From 3e2bf59e2aedbebbfa40a8728c357c3fa90b1501 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 2 Feb 2023 23:25:43 -0500
Subject: [PATCH 1/4] add pre-commit configs

---
 .pre-commit-config.yaml | 55 +++++++++++++++++++++++++++++++++++++++++
 pyproject.toml          |  4 +++
 2 files changed, 59 insertions(+)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..1a5413e26a
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,55 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+    -   id: trailing-whitespace
+        exclude: "^.+\\.pbtxt$"
+    -   id: end-of-file-fixer
+        exclude: "^.+\\.pbtxt$"
+    -   id: check-yaml
+    #-   id: check-json
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: check-toml
+# Python
+-   repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+    -   id: black-jupyter
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+    - id: isort
+    files: \.py$
+# Python inside docs
+-   repo: https://github.com/asottile/blacken-docs
+    rev: 1.13.0
+    hooks:
+    -   id: blacken-docs
+# C++
+-   repo: https://github.com/pre-commit/mirrors-clang-format
+    rev: v15.0.7
+    hooks:
+    -   id: clang-format
+        exclude: ^source/3rdparty|source/lib/src/cuda/cudart/.+\.inc
+# CSS
+-   repo: https://github.com/pre-commit/mirrors-csslint
+    rev: v1.0.5
+    hooks:
+    -   id: csslint
+# Shell
+- repo: https://github.com/scop/pre-commit-shfmt
+  rev: v3.6.0-1
+  hooks:
+    - id: shfmt
+# CMake
+- repo: https://github.com/cheshirekow/cmake-format-precommit
+  rev: v0.6.13
+  hooks:
+    - id: cmake-format
+    #- id: cmake-lint
+ci:
+  autoupdate_branch: devel
diff --git a/pyproject.toml b/pyproject.toml
index 49f46e2cfb..81a041d3cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,3 +104,7 @@ ignore = "W504"
 # of space, start on the first line
 [tool.pydocstyle]
 ignore = "D413, D416, D203, D107, D213"
+
+[tool.isort]
+profile = "black"
+force_grid_wrap = 1

From 9a9b8367fa62692757aeb08bd29e157008add79c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Date: Thu, 2 Feb 2023 23:26:47 -0500
Subject: [PATCH 2/4] run pre-commit for github actions

---
 .github/workflows/build_cc.yml    | 4 ++--
 .github/workflows/labeler.yml     | 2 +-
 .github/workflows/test_python.yml | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml
index ff3342490f..e1dcf089a1 100644
--- a/.github/workflows/build_cc.yml
+++ b/.github/workflows/build_cc.yml
@@ -7,7 +7,7 @@ jobs:
     name: Build C++
     runs-on: ubuntu-latest
     container: ghcr.io/deepmodeling/deepmd-kit-test-cc:latest
-    strategy:  
+    strategy:
       matrix:
         include:
         - variant: cpu
@@ -47,7 +47,7 @@ jobs:
          && apt-get install -y rocm-dev hipcub-dev
       if: matrix.variant == 'rocm'
     - run: apt-get update && apt-get install -y clang
-      if: matrix.variant == 'clang'  
+      if: matrix.variant == 'clang'
     - run: source/install/build_cc.sh
       env:
         DP_VARIANT: ${{ matrix.dp_variant }}
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 51dced0ed7..2c8ba30ba1 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -11,4 +11,4 @@ jobs:
     steps:
     - uses: actions/labeler@v4
       with:
-        repo-token: "${{ secrets.GITHUB_TOKEN }}"
\ No newline at end of file
+        repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml
index 94a9fbd15b..5cbe80320f 100644
--- a/.github/workflows/test_python.yml
+++ b/.github/workflows/test_python.yml
@@ -6,7 +6,7 @@ jobs:
   testpython:
     name: Test Python
     runs-on: ubuntu-latest
-    strategy:  
+    strategy:
       matrix:
         include:
           - python: 3.7

From 66144eb412504976bf4b5d87eb3fb16a87c5469d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 3 Feb 2023 04:27:33 +0000
Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .github/ISSUE_TEMPLATE/bug-report.yml         |    8 +-
 .github/ISSUE_TEMPLATE/feature-request.yml    |    6 +-
 .github/ISSUE_TEMPLATE/generic-issue.yml      |    6 +-
 .github/ISSUE_TEMPLATE/parameters.yml         |    6 +-
 CONTRIBUTING.md                               |   14 +-
 README.md                                     |   10 +-
 backend/dp_backend.py                         |   13 +-
 backend/find_tensorflow.py                    |   76 +-
 data/json/json2yaml.py                        |   25 +-
 data/raw/copy_raw.py                          |  143 +-
 data/raw/raw_to_set.sh                        |  141 +-
 data/raw/shuffle_raw.py                       |   83 +-
 deepmd/.gitignore                             |    2 +-
 deepmd/__about__.py                           |    2 +-
 deepmd/__init__.py                            |   35 +-
 deepmd/__main__.py                            |    6 +-
 deepmd/calculator.py                          |   34 +-
 deepmd/cluster/__init__.py                    |    9 +-
 deepmd/cluster/local.py                       |   36 +-
 deepmd/cluster/slurm.py                       |   13 +-
 deepmd/common.py                              |   86 +-
 deepmd/descriptor/__init__.py                 |   38 +-
 deepmd/descriptor/descriptor.py               |  203 +-
 deepmd/descriptor/hybrid.py                   |  232 +-
 deepmd/descriptor/loc_frame.py                |  402 +-
 deepmd/descriptor/se.py                       |   83 +-
 deepmd/descriptor/se_a.py                     | 1110 ++--
 deepmd/descriptor/se_a_ebd.py                 |  694 ++-
 deepmd/descriptor/se_a_ef.py                  |  526 +-
 deepmd/descriptor/se_atten.py                 |  891 +--
 deepmd/descriptor/se_r.py                     |  614 +-
 deepmd/descriptor/se_t.py                     |  652 +-
 deepmd/entrypoints/__init__.py                |   37 +-
 deepmd/entrypoints/compress.py                |   85 +-
 deepmd/entrypoints/config.py                  |   15 +-
 deepmd/entrypoints/convert.py                 |   21 +-
 deepmd/entrypoints/doc.py                     |    5 +-
 deepmd/entrypoints/freeze.py                  |  284 +-
 deepmd/entrypoints/main.py                    |  133 +-
 deepmd/entrypoints/neighbor_stat.py           |   19 +-
 deepmd/entrypoints/test.py                    |  102 +-
 deepmd/entrypoints/train.py                   |  259 +-
 deepmd/entrypoints/transfer.py                |   34 +-
 deepmd/env.py                                 |   99 +-
 deepmd/fit/__init__.py                        |   14 +-
 deepmd/fit/dipole.py                          |  237 +-
 deepmd/fit/ener.py                            |  637 +-
 deepmd/fit/fitting.py                         |   25 +-
 deepmd/fit/polar.py                           |  524 +-
 deepmd/infer/__init__.py                      |   43 +-
 deepmd/infer/data_modifier.py                 |  379 +-
 deepmd/infer/deep_dipole.py                   |   17 +-
 deepmd/infer/deep_eval.py                     |  119 +-
 deepmd/infer/deep_polar.py                    |   20 +-
 deepmd/infer/deep_pot.py                      |  289 +-
 deepmd/infer/deep_tensor.py                   |  178 +-
 deepmd/infer/deep_wfc.py                      |   17 +-
 deepmd/infer/ewald_recp.py                    |  102 +-
 deepmd/infer/model_devi.py                    |  113 +-
 deepmd/lmp.py                                 |   42 +-
 deepmd/loggers/__init__.py                    |    4 +-
 deepmd/loggers/loggers.py                     |   31 +-
 deepmd/loss/__init__.py                       |   11 +-
 deepmd/loss/ener.py                           |  382 +-
 deepmd/loss/loss.py                           |   42 +-
 deepmd/loss/tensor.py                         |  160 +-
 deepmd/model/__init__.py                      |   18 +-
 deepmd/model/ener.py                          |  367 +-
 deepmd/model/model.py                         |  129 +-
 deepmd/model/model_stat.py                    |   38 +-
 deepmd/model/multi.py                         |  517 +-
 deepmd/model/tensor.py                        |  220 +-
 deepmd/nvnmd/__init__.py                      |    9 +-
 deepmd/nvnmd/data/__init__.py                 |   30 +-
 deepmd/nvnmd/data/data.py                     |  146 +-
 deepmd/nvnmd/descriptor/se_a.py               |  212 +-
 deepmd/nvnmd/entrypoints/__init__.py          |   18 +-
 deepmd/nvnmd/entrypoints/freeze.py            |   43 +-
 deepmd/nvnmd/entrypoints/mapt.py              |  305 +-
 deepmd/nvnmd/entrypoints/train.py             |  179 +-
 deepmd/nvnmd/entrypoints/wrap.py              |  281 +-
 deepmd/nvnmd/fit/__init__.py                  |    2 +-
 deepmd/nvnmd/fit/ener.py                      |   11 +-
 deepmd/nvnmd/utils/__init__.py                |   32 +-
 deepmd/nvnmd/utils/argcheck.py                |   64 +-
 deepmd/nvnmd/utils/config.py                  |  339 +-
 deepmd/nvnmd/utils/encode.py                  |  128 +-
 deepmd/nvnmd/utils/fio.py                     |  112 +-
 deepmd/nvnmd/utils/network.py                 |  148 +-
 deepmd/nvnmd/utils/op.py                      |    8 +-
 deepmd/nvnmd/utils/weight.py                  |   17 +-
 deepmd/op/__init__.py                         |    4 +-
 deepmd/train/__init__.py                      |    1 -
 deepmd/train/run_options.py                   |   43 +-
 deepmd/train/trainer.py                       |  994 +--
 deepmd/utils/__init__.py                      |   21 +-
 deepmd/utils/argcheck.py                      | 1436 +++--
 deepmd/utils/batch_size.py                    |  101 +-
 deepmd/utils/compat.py                        |   57 +-
 deepmd/utils/convert.py                       |  235 +-
 deepmd/utils/data.py                          |  485 +-
 deepmd/utils/data_system.py                   |  408 +-
 deepmd/utils/errors.py                        |    4 +-
 deepmd/utils/finetune.py                      |  133 +-
 deepmd/utils/graph.py                         |  203 +-
 deepmd/utils/learning_rate.py                 |   87 +-
 deepmd/utils/multi_init.py                    |  156 +-
 deepmd/utils/neighbor_stat.py                 |  125 +-
 deepmd/utils/network.py                       |  304 +-
 deepmd/utils/pair_tab.py                      |   89 +-
 deepmd/utils/parallel_op.py                   |   42 +-
 deepmd/utils/path.py                          |  111 +-
 deepmd/utils/plugin.py                        |   36 +-
 deepmd/utils/random.py                        |    7 +-
 deepmd/utils/sess.py                          |   14 +-
 deepmd/utils/tabulate.py                      |  504 +-
 deepmd/utils/type_embed.py                    |  142 +-
 deepmd/utils/weight_avg.py                    |   15 +-
 doc/conf.py                                   |  163 +-
 doc/data/data-conv.md                         |    6 +-
 doc/data/dpdata.md                            |    9 +-
 doc/data/system.md                            |   22 +-
 doc/development/coding-conventions.rst        |   12 +-
 doc/development/type-embedding.md             |    8 +-
 doc/freeze/compress.md                        |    4 +-
 doc/freeze/freeze.md                          |    8 +-
 doc/freeze/index.rst                          |    2 +-
 doc/getting-started/index.rst                 |    2 +-
 doc/index.rst                                 |    8 +-
 doc/inference/index.md                        |    2 +-
 doc/inference/index.rst                       |    2 +-
 doc/inference/python.md                       |   11 +-
 doc/install/index.md                          |    2 +-
 doc/install/install-from-source.md            |   22 +-
 doc/install/install-gromacs.md                |    4 +-
 doc/install/install-ipi.md                    |    2 +-
 doc/install/install-lammps.md                 |    4 +-
 doc/install/install-tf.1.12.md                |    5 +-
 doc/install/install-tf.1.14-gpu.md            |    6 +-
 doc/install/install-tf.1.14.md                |    4 +-
 doc/install/install-tf.1.8.md                 |    2 +-
 doc/install/install-tf.2.3.md                 |    4 +-
 doc/install/install-tf.2.8.md                 |    2 +-
 doc/license.rst                               |    2 +-
 doc/model/dplr.md                             |   28 +-
 doc/model/overall.md                          |    2 +-
 doc/model/train-energy.md                     |    6 +-
 doc/model/train-fitting-tensor.md             |    8 +-
 doc/model/train-hybrid.md                     |    2 +-
 doc/model/train-se-atten.md                   |   21 +-
 doc/model/train-se-e2-a-tebd.md               |   12 +-
 doc/model/train-se-e2-a.md                    |   11 +-
 doc/model/train-se-e2-r.md                    |    2 +-
 doc/nvnmd/index.rst                           |    2 +-
 doc/nvnmd/nvnmd.md                            |   14 +-
 doc/sphinx_contrib_exhale_multiproject.py     |   33 +-
 doc/test/index.md                             |    2 +-
 doc/test/test.md                              |    4 +-
 doc/third-party/ase.md                        |   15 +-
 doc/third-party/gromacs.md                    |   32 +-
 doc/third-party/index.md                      |    2 +-
 doc/third-party/index.rst                     |    4 +-
 doc/third-party/ipi.md                        |    4 +-
 doc/third-party/lammps-command.md             |   22 +-
 doc/train-input-auto.rst                      |  719 ++-
 doc/train/finetuning.md                       |   22 +-
 doc/train/gpu-limitations.md                  |    2 +-
 doc/train/index.rst                           |    2 +-
 doc/train/multi-task-training.md              |   36 +-
 doc/train/tensorboard.md                      |    4 +-
 doc/train/train-input.rst                     |    1 -
 doc/train/training-advanced.md                |    6 +-
 doc/train/training.md                         |    8 +-
 doc/troubleshooting/howtoset_netsize.md       |   76 +-
 doc/troubleshooting/howtoset_num_nodes.md     |    2 +-
 doc/troubleshooting/howtoset_sel.md           |    2 +-
 doc/troubleshooting/index.rst                 |    4 +-
 doc/troubleshooting/installation.md           |    2 +-
 .../md-version-compatibility.md               |    2 +-
 doc/troubleshooting/model-compatability.md    |    2 +-
 doc/troubleshooting/precision.md              |    2 +-
 examples/data_conv/OUTCAR                     |  408 +-
 examples/fparam/train/.gitignore              |    1 -
 examples/fparam/train/input.json              |  120 +-
 examples/fparam/train/input_aparam.json       |  120 +-
 examples/infer_water/infer_water.c            |   21 +-
 examples/infer_water/infer_water.cpp          |   14 +-
 examples/infer_water/infer_water_hpp.cpp      |   22 +-
 examples/infer_water/infer_water_nlist.cpp    |   32 +-
 examples/methane/index.raw                    |    2 +-
 examples/methane/input.json                   |   10 +-
 examples/methane/methane.itp                  |    8 +-
 examples/methane/run.sh                       |    4 +-
 examples/methane/topol.top                    |    2 +-
 examples/methane/type.raw                     |    2 +-
 examples/nopbc/README.md                      |    1 -
 examples/nopbc/train/input.json               |  121 +-
 examples/nvnmd/train/train_cnn.json           |   77 +-
 examples/nvnmd/train/train_qnn.json           |   79 +-
 examples/water/dplr/lmp/conf.lmp              |    2 +-
 examples/water/dplr/lmp/in.lammps             |    2 -
 examples/water/dplr/train/dw.json             |  133 +-
 examples/water/dplr/train/ener.json           |  142 +-
 examples/water/gmx/index.raw                  |    2 +-
 examples/water/gmx/input.json                 |    8 +-
 examples/water/gmx/md.mdp                     |    1 -
 examples/water/gmx/md.sh                      |    2 +-
 examples/water/gmx/type.raw                   |    2 +-
 examples/water/hybrid/input.json              |  172 +-
 examples/water/ipi/input.xml                  |    8 +-
 examples/water/ipi/water.json                 |   22 +-
 examples/water/lmp/in.lammps                  |    2 +-
 examples/water/lmp/in.plugin.lammps           |    2 +-
 examples/water/lmp/water.lmp                  |    2 +-
 examples/water/se_atten/input.json            |  142 +-
 examples/water/se_e2_a/input.json             |  137 +-
 examples/water/se_e2_a_mixed_prec/input.json  |  145 +-
 examples/water/se_e2_a_tebd/input.json        |  153 +-
 examples/water/se_e2_r/input.json             |  133 +-
 examples/water/se_e3/input.json               |  144 +-
 .../water_multi_task/ener_dipole/input.json   |  198 +-
 .../water_tensor/dipole/dipole_input.json     |  156 +-
 .../training_data/atomic_system/type.raw      |    2 +-
 .../training_data/global_system/type.raw      |    2 +-
 .../validation_data/atomic_system/type.raw    |    2 +-
 .../validation_data/global_system/type.raw    |    2 +-
 examples/water_tensor/polar/polar_input.json  |  145 +-
 .../training_data/atomic_system/type.raw      |    2 +-
 .../training_data/global_system/type.raw      |    2 +-
 .../validation_data/atomic_system/type.raw    |    2 +-
 .../validation_data/global_system/type.raw    |    2 +-
 setup.py                                      |   21 +-
 source/3rdparty/json.hpp                      |    2 +-
 source/CMakeLists.txt                         |  211 +-
 source/api_c/CMakeLists.txt                   |   63 +-
 source/api_c/include/c_api.h                  | 1119 ++--
 source/api_c/include/c_api_internal.h         |    4 +-
 source/api_c/include/deepmd.hpp               | 2539 ++++----
 source/api_c/src/c_api.cc                     | 1665 +++--
 source/api_c/tests/CMakeLists.txt             |   19 +-
 source/api_c/tests/test_deepdipole_hpp.cc     |  478 +-
 source/api_c/tests/test_deeppolar_hpp.cc      |  568 +-
 source/api_c/tests/test_deeppot_a.cc          |  279 +-
 source/api_c/tests/test_deeppot_a_hpp.cc      |  433 +-
 .../tests/test_deeppot_model_devi_hpp.cc      |  213 +-
 source/api_c/tests/test_dipolecharge.cc       |  193 +-
 source/api_c/tests/test_utils.h               |  150 +-
 source/api_cc/CMakeLists.txt                  |   67 +-
 source/api_cc/include/AtomMap.h               |   34 +-
 source/api_cc/include/DataModifier.h          |  148 +-
 source/api_cc/include/DeepPot.h               |  757 +--
 source/api_cc/include/DeepTensor.h            |  420 +-
 source/api_cc/include/common.h                |  385 +-
 source/api_cc/include/tf_private.h            |    9 +-
 source/api_cc/include/tf_public.h             |   17 +-
 source/api_cc/src/AtomMap.cc                  |   93 +-
 source/api_cc/src/DataModifier.cc             |  354 +-
 source/api_cc/src/DeepPot.cc                  | 1840 +++---
 source/api_cc/src/DeepTensor.cc               | 1078 ++--
 source/api_cc/src/common.cc                   | 1423 ++---
 source/api_cc/tests/CMakeLists.txt            |   18 +-
 source/api_cc/tests/test_deepdipole.cc        |  485 +-
 source/api_cc/tests/test_deeppolar.cc         |  575 +-
 source/api_cc/tests/test_deeppot_a.cc         |  487 +-
 .../api_cc/tests/test_deeppot_model_devi.cc   |  344 +-
 source/api_cc/tests/test_deeppot_r.cc         |  443 +-
 source/api_cc/tests/test_dipolecharge.cc      |  198 +-
 source/api_cc/tests/test_ewald.cc             |   70 +-
 source/api_cc/tests/test_utils.h              |  153 +-
 source/cmake/FindROCM.cmake                   |  104 +-
 source/cmake/Findtensorflow.cmake             |  494 +-
 source/cmake/Findxdrfile.cmake                |   73 +-
 source/cmake/cmake_lammps.cmake.in            |   14 +-
 source/cmake/coverage_config/CMakeLists.txt   |   25 +-
 source/cmake/test_cxx_abi.cpp                 |    5 +-
 source/cmake/tf_cxx_abi.cpp                   |    6 +-
 source/cmake/tf_version.cpp                   |    7 +-
 source/config/CMakeLists.txt                  |    8 +-
 source/gmx/.gitignore                         |    2 +-
 source/gmx/CMakeLists.txt                     |   49 +-
 source/gmx/include/gmx_plugin.h               |   32 +-
 .../patches/2020.2/CMakeLists.txt.patch.in    |    2 +-
 .../src/gromacs/mdlib/forcerec.cpp.patch      |    4 +-
 .../2020.2/src/gromacs/mdlib/forcerec.h.patch |    2 +-
 .../src/gromacs/mdlib/sim_util.cpp.patch      |    8 +-
 source/gmx/src/gmx_plugin.cpp                 |  203 +-
 source/install/build_cc.sh                    |   20 +-
 source/install/build_lammps.sh                |    9 +-
 source/install/build_tf.py                    |  364 +-
 source/install/docker_package_c.sh            |    4 +-
 source/install/docker_test_package_c.sh       |    4 +-
 source/install/install_tf.sh                  |   25 +-
 source/install/package_c.sh                   |   13 +-
 source/ipi/CMakeLists.txt                     |   77 +-
 source/ipi/driver.cc                          |  268 +-
 source/ipi/include/Convert.h                  |   32 +-
 source/ipi/include/StringSplit.h              |   36 +-
 source/ipi/include/XyzFileManager.h           |   15 +-
 source/ipi/include/sockets.h                  |   18 +-
 source/ipi/src/Convert.cc                     |   55 +-
 source/ipi/src/XyzFileManager.cc              |  119 +-
 source/ipi/src/sockets.c                      |  118 +-
 source/lib/CMakeLists.txt                     |   44 +-
 source/lib/include/ComputeDescriptor.h        | 1346 +++--
 source/lib/include/DeviceFunctor.h            |  136 +-
 source/lib/include/SimulationRegion.h         |  269 +-
 source/lib/include/SimulationRegion_Impl.h    |  564 +-
 source/lib/include/coord.h                    |  140 +-
 source/lib/include/device.h                   |    7 +-
 source/lib/include/env_mat.h                  |  105 +-
 source/lib/include/env_mat_nvnmd.h            |   74 +-
 source/lib/include/errors.h                   |   34 +-
 source/lib/include/ewald.h                    |   31 +-
 source/lib/include/fmt_nlist.h                |  152 +-
 source/lib/include/gelu.h                     |  111 +-
 source/lib/include/gpu_cuda.h                 |  172 +-
 source/lib/include/gpu_rocm.h                 |  127 +-
 source/lib/include/map_aparam.h               |   18 +-
 source/lib/include/neighbor_list.h            |  316 +-
 source/lib/include/pair_tab.h                 |   32 +-
 source/lib/include/prod_env_mat.h             |  256 +-
 source/lib/include/prod_env_mat_nvnmd.h       |   49 +-
 source/lib/include/prod_force.h               |  112 +-
 source/lib/include/prod_force_grad.h          |  100 +-
 source/lib/include/prod_virial.h              |  135 +-
 source/lib/include/prod_virial_grad.h         |  112 +-
 source/lib/include/region.cuh                 |   82 +-
 source/lib/include/region.h                   |  114 +-
 source/lib/include/soft_min_switch.h          |   23 +-
 source/lib/include/soft_min_switch_force.h    |   21 +-
 .../lib/include/soft_min_switch_force_grad.h  |   19 +-
 source/lib/include/soft_min_switch_virial.h   |   25 +-
 .../lib/include/soft_min_switch_virial_grad.h |   21 +-
 source/lib/include/switcher.h                 |   98 +-
 source/lib/include/tabulate.h                 |  636 +-
 source/lib/include/utilities.h                |   60 +-
 source/lib/src/SimulationRegion.cpp           |    1 +
 source/lib/src/coord.cc                       |  214 +-
 source/lib/src/cuda/CMakeLists.txt            |  343 +-
 source/lib/src/cuda/coord.cu                  |  756 ++-
 source/lib/src/cuda/cudart/CMakeLists.txt     |   32 +-
 .../lib/src/cuda/cudart/cuda_runtime_11_8.inc |    2 +-
 .../lib/src/cuda/cudart/cuda_runtime_12_0.inc |    2 +-
 source/lib/src/cuda/cudart/cudart_stub.cc     |   34 +-
 source/lib/src/cuda/gelu.cu                   |  142 +-
 source/lib/src/cuda/neighbor_list.cu          |  464 +-
 source/lib/src/cuda/prod_env_mat.cu           |  958 +--
 source/lib/src/cuda/prod_force.cu             |  218 +-
 source/lib/src/cuda/prod_force_grad.cu        |  266 +-
 source/lib/src/cuda/prod_virial.cu            |  265 +-
 source/lib/src/cuda/prod_virial_grad.cu       |  267 +-
 source/lib/src/cuda/region.cu                 |  107 +-
 source/lib/src/cuda/tabulate.cu               | 1121 ++--
 source/lib/src/env_mat.cc                     |  603 +-
 source/lib/src/env_mat_nvnmd.cc               |  223 +-
 source/lib/src/ewald.cc                       |  344 +-
 source/lib/src/fmt_nlist.cc                   |  352 +-
 source/lib/src/gelu.cc                        |   94 +-
 source/lib/src/map_aparam.cc                  |   60 +-
 source/lib/src/neighbor_list.cc               | 1069 ++--
 source/lib/src/pair_tab.cc                    |  268 +-
 source/lib/src/prod_env_mat.cc                |  334 +-
 source/lib/src/prod_env_mat_nvnmd.cc          |  151 +-
 source/lib/src/prod_force.cc                  |  193 +-
 source/lib/src/prod_force_grad.cc             |  202 +-
 source/lib/src/prod_virial.cc                 |  227 +-
 source/lib/src/prod_virial_grad.cc            |  201 +-
 source/lib/src/region.cc                      |  242 +-
 source/lib/src/rocm/CMakeLists.txt            |   27 +-
 source/lib/src/rocm/coord.hip.cu              |  762 +--
 source/lib/src/rocm/gelu.hip.cu               |  200 +-
 source/lib/src/rocm/neighbor_list.hip.cu      |  465 +-
 source/lib/src/rocm/prod_env_mat.hip.cu       |  971 +--
 source/lib/src/rocm/prod_force.hip.cu         |  274 +-
 source/lib/src/rocm/prod_force_grad.hip.cu    |  266 +-
 source/lib/src/rocm/prod_virial.hip.cu        |  287 +-
 source/lib/src/rocm/prod_virial_grad.hip.cu   |  266 +-
 source/lib/src/rocm/region.hip.cu             |  109 +-
 source/lib/src/rocm/tabulate.hip.cu           | 1154 ++--
 source/lib/src/soft_min_switch.cc             |  109 +-
 source/lib/src/soft_min_switch_force.cc       |   60 +-
 source/lib/src/soft_min_switch_force_grad.cc  |   54 +-
 source/lib/src/soft_min_switch_virial.cc      |   89 +-
 source/lib/src/soft_min_switch_virial_grad.cc |   69 +-
 source/lib/src/tabulate.cc                    |  886 +--
 source/lib/src/utilities.cc                   |    9 +-
 source/lib/tests/CMakeLists.txt               |   15 +-
 source/lib/tests/test_coord.cc                | 1024 ++--
 source/lib/tests/test_env_mat_a.cc            | 1028 ++--
 source/lib/tests/test_env_mat_a_mix.cc        | 1127 ++--
 source/lib/tests/test_env_mat_a_nvnmd.cc      |  403 +-
 source/lib/tests/test_env_mat_r.cc            |  730 ++-
 source/lib/tests/test_ewald.cc                |   56 +-
 source/lib/tests/test_fmt_nlist.cc            |  609 +-
 source/lib/tests/test_gelu.cc                 |  331 +-
 source/lib/tests/test_map_aparam.cc           |   91 +-
 source/lib/tests/test_neighbor_list.cc        |  298 +-
 source/lib/tests/test_pair_tab.cc             | 1090 ++--
 source/lib/tests/test_prod_force_a.cc         |  150 +-
 source/lib/tests/test_prod_force_grad_a.cc    |  161 +-
 source/lib/tests/test_prod_force_grad_r.cc    |  135 +-
 source/lib/tests/test_prod_force_r.cc         |  147 +-
 source/lib/tests/test_prod_virial_a.cc        |  250 +-
 source/lib/tests/test_prod_virial_grad_a.cc   |  168 +-
 source/lib/tests/test_prod_virial_grad_r.cc   |  141 +-
 source/lib/tests/test_prod_virial_r.cc        |  244 +-
 source/lib/tests/test_simulation_region.cc    |  123 +-
 source/lib/tests/test_soft_min_switch.cc      |  152 +-
 .../lib/tests/test_soft_min_switch_force.cc   |  118 +-
 .../tests/test_soft_min_switch_force_grad.cc  |   97 +-
 .../lib/tests/test_soft_min_switch_virial.cc  |  175 +-
 .../tests/test_soft_min_switch_virial_grad.cc |   98 +-
 source/lib/tests/test_tabulate_se_a.cc        |  826 ++-
 source/lib/tests/test_tabulate_se_r.cc        |  704 ++-
 source/lib/tests/test_tabulate_se_t.cc        | 5339 ++++++++++++++++-
 source/lmp/CMakeLists.txt                     |   14 +-
 source/lmp/Install.sh                         |   56 +-
 source/lmp/compute_deeptensor_atom.cpp        |  120 +-
 source/lmp/compute_deeptensor_atom.h          |    7 +-
 source/lmp/fix_dplr.cpp                       |  419 +-
 source/lmp/fix_dplr.h                         |   79 +-
 source/lmp/fix_ttm_dp.h                       |    7 +-
 source/lmp/lmp_version.sh                     |    4 +-
 source/lmp/pair_deepmd.cpp                    | 1338 ++---
 source/lmp/pair_deepmd.h.in                   |    8 +-
 source/lmp/plugin/CMakeLists.txt              |  137 +-
 source/lmp/plugin/deepmdplugin.cpp            |   45 +-
 source/lmp/pppm_dplr.cpp                      |  232 +-
 source/lmp/pppm_dplr.h                        |   44 +-
 source/lmp/tests/.gitignore                   |    2 +-
 source/lmp/tests/data.lmp                     |    2 +-
 source/lmp/tests/data_type_map.lmp            |    2 +-
 source/lmp/tests/test_lammps.py               |  317 +-
 source/md/CMakeLists.txt                      |  131 +-
 source/md/include/AdWeight.h                  |  107 +-
 source/md/include/Convert.h                   |   62 +-
 source/md/include/CosSwitch.h                 |   64 +-
 source/md/include/Gaussian.h                  |   11 +-
 source/md/include/GroFileManager.h            |   73 +-
 source/md/include/HarmonicAngle.h             |   44 +-
 source/md/include/HarmonicBond.h              |   44 +-
 source/md/include/Integrator.h                |   78 +-
 source/md/include/Interpolation.h             |  102 +-
 source/md/include/LJInter.h                   |   45 +-
 source/md/include/LJTab.h                     |   43 +-
 source/md/include/MaxShift.h                  |   29 +-
 source/md/include/Poly.h                      |  136 +-
 source/md/include/RandomGenerator.h           |   19 +-
 source/md/include/Statistics.h                |   52 +-
 source/md/include/StringSplit.h               |   19 +-
 source/md/include/TF.h                        |   36 +-
 source/md/include/TableFileLoader.h           |   32 +-
 source/md/include/Tabulated.h                 |   73 +-
 source/md/include/Trajectory.h                |   75 +-
 source/md/include/UnitManager.h               |   17 +-
 source/md/include/XyzFileManager.h            |   21 +-
 source/md/include/ZM.h                        |   69 +-
 source/md/include/ZMFunctions.h               |   70 +-
 source/md/include/common.h                    |   46 +-
 source/md/include/mymath.h                    |   68 +-
 source/md/mdnn.cc                             |  179 +-
 source/md/src/AdWeight.cc                     |  237 +-
 source/md/src/Convert.cc                      |  114 +-
 source/md/src/Gaussian.cc                     |   18 +-
 source/md/src/GroFileManager.cc               |  349 +-
 source/md/src/HarmonicAngle.cc                |   85 +-
 source/md/src/HarmonicBond.cc                 |   55 +-
 source/md/src/Integrator.cc                   |  127 +-
 source/md/src/Interpolation.cpp               |  598 +-
 source/md/src/LJInter.cc                      |   95 +-
 source/md/src/LJTab.cc                        |   33 +-
 source/md/src/MaxShift.cc                     |   41 +-
 source/md/src/Poly.cpp                        |  276 +-
 source/md/src/RandomGenerator_MT19937.cc      |  194 +-
 source/md/src/Statistics.cc                   |  109 +-
 source/md/src/StringSplit.cpp                 |   45 +-
 source/md/src/TF.cc                           |   62 +-
 source/md/src/TableFileLoader.cpp             |  109 +-
 source/md/src/Tabulated.cc                    |  209 +-
 source/md/src/Trajectory.cc                   |  167 +-
 source/md/src/UnitManager.cc                  |   47 +-
 source/md/src/XyzFileManager.cc               |  152 +-
 source/md/src/ZM.cc                           |   93 +-
 source/md/src/ZMFunctions.cpp                 |  280 +-
 source/op/CMakeLists.txt                      |   99 +-
 source/op/_add_flt_nvnmd_grad.py              |   13 +-
 source/op/_copy_flt_nvnmd_grad.py             |   13 +-
 source/op/_dotmul_flt_nvnmd_grad.py           |   12 +-
 source/op/_flt_nvnmd_grad.py                  |   13 +-
 source/op/_gelu.py                            |   33 +-
 source/op/_map_flt_nvnmd_grad.py              |   13 +-
 source/op/_matmul_fitnet_nvnmd_grad.py        |   12 +-
 source/op/_matmul_flt2fix_nvnmd.py            |   12 +-
 source/op/_matmul_flt_nvnmd_grad.py           |   12 +-
 source/op/_mul_flt_nvnmd_grad.py              |   12 +-
 source/op/_prod_force_grad.py                 |   32 +-
 source/op/_prod_force_se_a_grad.py            |   30 +-
 source/op/_prod_force_se_r_grad.py            |   22 +-
 source/op/_prod_virial_grad.py                |   34 +-
 source/op/_prod_virial_se_a_grad.py           |   32 +-
 source/op/_prod_virial_se_r_grad.py           |   23 +-
 source/op/_quantize_nvnmd_grad.py             |   12 +-
 source/op/_soft_min_force_grad.py             |   29 +-
 source/op/_soft_min_virial_grad.py            |   31 +-
 source/op/_tabulate_grad.py                   |   56 +-
 source/op/_tanh4_flt_nvnmd_grad.py            |   26 +-
 source/op/add_flt_nvnmd.cc                    |  163 +-
 source/op/copy_flt_nvnmd.cc                   |  151 +-
 source/op/custom_op.cc                        |   30 +-
 source/op/custom_op.h                         |   29 +-
 source/op/descrpt.cc                          |  883 +--
 source/op/descrpt_se_a_ef.cc                  |  519 +-
 source/op/descrpt_se_a_ef_para.cc             |  522 +-
 source/op/descrpt_se_a_ef_vert.cc             |  524 +-
 source/op/dotmul_flt_nvnmd.cc                 |  239 +-
 source/op/ewald_recp.cc                       |  151 +-
 source/op/flt_nvnmd.cc                        |  142 +-
 source/op/gelu_multi_device.cc                |  243 +-
 source/op/map_aparam.cc                       |   91 +-
 source/op/map_flt_nvnmd.cc                    |  227 +-
 source/op/matmul_fitnet_nvnmd.cc              |  273 +-
 source/op/matmul_flt2fix_nvnmd.cc             |  235 +-
 source/op/matmul_flt_nvnmd.cc                 |  294 +-
 source/op/mul_flt_nvnmd.cc                    |  187 +-
 source/op/neighbor_stat.cc                    |  331 +-
 source/op/optimizer/parallel.cc               |   21 +-
 source/op/optimizer/parallel.h                |   11 +-
 source/op/pair_tab.cc                         |  217 +-
 source/op/prod_env_mat_multi_device.cc        | 2717 ++++-----
 source/op/prod_env_mat_multi_device_nvnmd.cc  |  643 +-
 source/op/prod_force.cc                       |  262 +-
 source/op/prod_force_grad.cc                  |  288 +-
 source/op/prod_force_grad_multi_device.cc     |  370 +-
 source/op/prod_force_multi_device.cc          |  325 +-
 source/op/prod_force_se_a_grad.cc             |  168 +-
 source/op/prod_force_se_r_grad.cc             |  137 +-
 source/op/prod_virial.cc                      |  285 +-
 source/op/prod_virial_grad.cc                 |  307 +-
 source/op/prod_virial_grad_multi_device.cc    |  417 +-
 source/op/prod_virial_multi_device.cc         |  336 +-
 source/op/prod_virial_se_a_grad.cc            |  188 +-
 source/op/prod_virial_se_r_grad.cc            |  161 +-
 source/op/quantize_nvnmd.cc                   |  200 +-
 source/op/soft_min.cc                         |  151 +-
 source/op/soft_min_force.cc                   |  100 +-
 source/op/soft_min_force_grad.cc              |  142 +-
 source/op/soft_min_virial.cc                  |  128 +-
 source/op/soft_min_virial_grad.cc             |  178 +-
 source/op/tabulate_multi_device.cc            |  939 +--
 source/op/tanh4_flt_nvnmd.cc                  |  206 +-
 source/op/unaggregated_grad.cc                |  773 +--
 source/tests/CMakeLists.txt                   |    6 +-
 source/tests/common.py                        |  920 +--
 source/tests/compat_inputs/water_se_a_v0.json |   94 +-
 source/tests/compat_inputs/water_se_a_v1.json |  108 +-
 source/tests/compat_inputs/water_v0.json      |  106 +-
 source/tests/compat_inputs/water_v1.json      |  114 +-
 source/tests/compat_inputs/water_v2.json      |  118 +-
 source/tests/data_modifier/dipole.json        |  127 +-
 source/tests/finetune/input_finetune.json     |  104 +-
 source/tests/finetune/input_pretrain.json     |  144 +-
 source/tests/infer/convert2pb.py              |   26 +-
 source/tests/infer/in.test                    |    2 +-
 source/tests/init_frz_model/data/type.raw     |   10 +-
 source/tests/init_frz_model/input.json        |  137 +-
 source/tests/model_compression/data/type.raw  |   10 +-
 source/tests/model_compression/input.json     |  137 +-
 source/tests/nvnmd/train.json                 |   77 +-
 source/tests/nvnmd/train_cnn.json             |  146 +-
 source/tests/polar_se_a.json                  |  127 +-
 source/tests/polar_se_a_tebd.json             |  141 +-
 source/tests/test_activation_fn_gelu.py       |  111 +-
 source/tests/test_adjust_sel.py               |  218 +-
 source/tests/test_argument_parser.py          |   43 +-
 source/tests/test_auto_batch_size.py          |   23 +-
 source/tests/test_cluster.py                  |  130 +-
 source/tests/test_common.py                   |   61 +-
 source/tests/test_compat_input.py             |   41 +-
 source/tests/test_data_large_batch.py         |  304 +-
 source/tests/test_data_modifier.py            |  176 +-
 source/tests/test_data_modifier_shuffle.py    |  261 +-
 source/tests/test_data_requirement.py         |   29 +-
 source/tests/test_deepdipole.py               |  970 ++-
 source/tests/test_deepmd_data.py              |  341 +-
 source/tests/test_deepmd_data_sys.py          |  467 +-
 source/tests/test_deeppolar.py                | 1023 +++-
 source/tests/test_deeppot_a.py                |  732 ++-
 source/tests/test_deeppot_r.py                |  631 +-
 source/tests/test_descrpt_nonsmth.py          |  379 +-
 source/tests/test_descrpt_se_a_type.py        |  463 +-
 source/tests/test_descrpt_se_atten.py         |  435 +-
 source/tests/test_descrpt_se_r.py             |  353 +-
 source/tests/test_descrpt_sea_ef.py           |  245 +-
 source/tests/test_descrpt_sea_ef_para.py      |  245 +-
 source/tests/test_descrpt_sea_ef_rot.py       |  627 +-
 source/tests/test_descrpt_sea_ef_vert.py      |  245 +-
 source/tests/test_descrpt_smooth.py           |  371 +-
 source/tests/test_dipole_se_a.py              |  232 +-
 source/tests/test_dipole_se_a_tebd.py         |  237 +-
 source/tests/test_dipolecharge.py             |  174 +-
 source/tests/test_embedding_net.py            |  149 +-
 source/tests/test_env.py                      |   25 +-
 source/tests/test_ewald.py                    |  272 +-
 source/tests/test_examples.py                 |   15 +-
 source/tests/test_finetune_se_atten.py        |  183 +-
 source/tests/test_fitting_ener_type.py        |  270 +-
 source/tests/test_fitting_stat.py             |   95 +-
 source/tests/test_gen_stat_data.py            |  159 +-
 source/tests/test_get_potential.py            |   27 +-
 source/tests/test_init_frz_model_multi.py     |  222 +-
 source/tests/test_init_frz_model_se_a.py      |  179 +-
 source/tests/test_init_frz_model_se_a_type.py |  185 +-
 source/tests/test_init_frz_model_se_atten.py  |  195 +-
 source/tests/test_init_frz_model_se_r.py      |  177 +-
 source/tests/test_lammps.py                   |   20 +-
 source/tests/test_layer_name.py               |  174 +-
 source/tests/test_mixed_prec_training.py      |   49 +-
 source/tests/test_model_compression_se_a.py   |  513 +-
 ...ession_se_a_type_one_side_exclude_types.py |  172 +-
 source/tests/test_model_compression_se_r.py   |  517 +-
 source/tests/test_model_compression_se_t.py   |  515 +-
 source/tests/test_model_devi.py               |   60 +-
 source/tests/test_model_loc_frame.py          |  206 +-
 source/tests/test_model_multi.py              |  298 +-
 source/tests/test_model_se_a.py               |  478 +-
 source/tests/test_model_se_a_aparam.py        |  205 +-
 source/tests/test_model_se_a_fparam.py        |  203 +-
 source/tests/test_model_se_a_srtab.py         |  228 +-
 source/tests/test_model_se_a_type.py          |  216 +-
 source/tests/test_model_se_atten.py           |  219 +-
 source/tests/test_model_se_r.py               |  207 +-
 source/tests/test_model_se_t.py               |  201 +-
 source/tests/test_neighbor_stat.py            |   46 +-
 source/tests/test_nvnmd_entrypoints.py        |  175 +-
 source/tests/test_nvnmd_op.py                 |  600 +-
 source/tests/test_nvnmd_se_a.py               |  149 +-
 source/tests/test_nvnmd_utils.py              |   65 +-
 source/tests/test_parallel_training.py        |   41 +-
 source/tests/test_polar_se_a.py               |  244 +-
 source/tests/test_polar_se_a_tebd.py          |  256 +-
 source/tests/test_prod_env_mat.py             | 1229 +++-
 source/tests/test_prod_force.py               | 1094 +++-
 source/tests/test_prod_force_grad.py          | 1158 +++-
 source/tests/test_prod_virial.py              | 1547 ++++-
 source/tests/test_prod_virial_grad.py         | 1347 ++++-
 source/tests/test_sel_idx.py                  |   21 +-
 source/tests/test_tab_nonsmth.py              |  274 +-
 source/tests/test_tab_smooth.py               |  270 +-
 source/tests/test_tabulate.py                 |  114 +-
 source/tests/test_train.py                    |  189 +-
 source/tests/test_transfer.py                 |  270 +-
 source/tests/test_type_embed.py               |   54 +-
 source/tests/test_type_one_side.py            |  303 +-
 source/tests/water.json                       |  118 +-
 source/tests/water_layer_name.json            |  142 +-
 source/tests/water_multi.json                 |  136 +-
 source/tests/water_se_a.json                  |  105 +-
 source/tests/water_se_a_afparam.json          |  109 +-
 source/tests/water_se_a_aparam.json           |  107 +-
 source/tests/water_se_a_fparam.json           |  107 +-
 source/tests/water_se_a_srtab.json            |  113 +-
 source/tests/water_se_a_type.json             |  121 +-
 source/tests/water_se_atten.json              |  127 +-
 source/tests/water_se_atten_mixed_type.json   |  127 +-
 source/tests/water_se_r.json                  |  102 +-
 source/tests/water_se_t.json                  |  102 +-
 source/tests/wfc.json                         |  137 +-
 source/tests/yaml_inputs/water_se_a_v1.json   |  108 +-
 source/tests/yaml_inputs/water_v1.json        |  114 +-
 669 files changed, 79243 insertions(+), 55393 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index e13eb9daf7..f13b187dfb 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -9,7 +9,7 @@ body:
     attributes:
       label: Bug summary
       description: Please provide a clear and concise description of what the bug is.
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -45,7 +45,7 @@ body:
     attributes:
       label: Input Files, Running Commands, Error Log, etc.
       description: "Please provide necessary information including input file, running commands, error log , etc., AS  DETAILED AS POSSIBLE to help locate and reproduce your problem. WARNING: Do not use image to show error log! Paste texts in a code block instead."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -54,7 +54,7 @@ body:
     attributes:
       label: Steps to Reproduce
       description: "Describe the steps required to (quickly) reproduce the issue. You can attach (small) files to the section below or add URLs where to download an archive with all necessary files. Please try to create an input set that is as minimal and small as possible and reproduces the bug as quickly as possible. **NOTE:** the less effort and time it takes to reproduce your reported bug, the more likely it becomes, that somebody will look into it and fix the problem."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -63,7 +63,7 @@ body:
     attributes:
       label: Further Information, Files, and Links
       description: Put any additional information here, attach relevant text or image files and URLs to external sites, e.g. relevant publications
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: false
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
index ec76a91927..6cf73fecb2 100644
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -9,7 +9,7 @@ body:
     attributes:
       label: Summary
       description: "Please provide a brief and concise description of the suggested feature or change"
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -18,7 +18,7 @@ body:
     attributes:
       label: Detailed Description
       description: "Please explain how you would like to see deepmd-kit enhanced, what feature(s) you are looking for, what specific problems this will solve. If possible, provide references to relevant background information like publications or web pages, and whether you are planning to implement the enhancement yourself or would like to participate in the implementation. If applicable add a reference to an existing bug report or issue that this will address."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -27,7 +27,7 @@ body:
     attributes:
       label: Further Information, Files, and Links
       description: Put any additional information here, attach relevant text or image files and URLs to external sites, e.g. relevant publications
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: false
diff --git a/.github/ISSUE_TEMPLATE/generic-issue.yml b/.github/ISSUE_TEMPLATE/generic-issue.yml
index 5fe3cd8851..af9f01c64d 100644
--- a/.github/ISSUE_TEMPLATE/generic-issue.yml
+++ b/.github/ISSUE_TEMPLATE/generic-issue.yml
@@ -9,7 +9,7 @@ body:
     attributes:
       label: Summary
       description: "Please provide a clear and concise description of what the question is."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -32,7 +32,7 @@ body:
     attributes:
       label: Python Version, CUDA Version, GCC Version, LAMMPS Version, etc
       description: "If applicable, specify what platform you are running on."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: false
@@ -41,7 +41,7 @@ body:
     attributes:
       label: Details
       description: "Please explain the issue in detail here."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
diff --git a/.github/ISSUE_TEMPLATE/parameters.yml b/.github/ISSUE_TEMPLATE/parameters.yml
index df7397d782..275740d31f 100644
--- a/.github/ISSUE_TEMPLATE/parameters.yml
+++ b/.github/ISSUE_TEMPLATE/parameters.yml
@@ -9,7 +9,7 @@ body:
     attributes:
       label: Summary
       description: "Please provide a clear and concise description of what your request is."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -18,7 +18,7 @@ body:
     attributes:
       label: Detailed Description
       description: "Please explain how you would like to see deepmd-kit enhanced. Specify your material system, and exactly what behaviors or properties you are looking for, or what specific problems this will solve. If possible, provide references to relevant background information like publications or web pages, and whether you are planning to implement the enhancement yourself or would like to participate in the implementation."
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: true
@@ -27,7 +27,7 @@ body:
     attributes:
       label: Further Information, Files, and Links
       description: Put any additional information here, attach relevant text or image files and URLs to external sites, e.g. relevant publications
-      placeholder: 
+      placeholder:
       value:
     validations:
       required: false
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 86199588d7..e43e23beb6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -13,7 +13,7 @@ You can start from any one of the following items to help improve deepmd-kit
 - Implement a feature or add a patch, whatever you think deepmd-kit is missing
 - Browse [issues](https://github.com/deepmodeling/deepmd-kit/issues), find an issue labeled enhancement or bug, and help to solve it.
 
-See [here](#before-you-contribute) for some before-hand heads-up. 
+See [here](#before-you-contribute) for some before-hand heads-up.
 
 See [here](#how-to-contribute) to learn how to contribute.
 
@@ -54,7 +54,7 @@ Please perform the following steps to create your Pull Request to this repositor
     ```bash
     git clone https://github.com/$username/deepmd-kit.git
     # Replace `$username` with your GitHub ID
-    
+
     git checkout devel
     ```
 
@@ -62,9 +62,9 @@ Please perform the following steps to create your Pull Request to this repositor
     ```bash
     git remote add upstream https://github.com/deepmodeling/deepmd-kit.git
     # After you add a remote repo, your local repo will be automatically named "origin".
-    
+
     git fetch upstream
-    
+
     # If your current codes are behind the latest codes, you should merge latest codes first.
     # Notice you should merge from "devel"!
     git merge upstream/devel
@@ -78,10 +78,10 @@ Please perform the following steps to create your Pull Request to this repositor
     git add <file> ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.`
     git commit -m "commit-message: update the xx"
     ```
-    
-5. Push the changed codes to your original repo on github. 
+
+5. Push the changed codes to your original repo on github.
     ```bash
-    git push origin devel 
+    git push origin devel
     ```
 
 ### Alternatively: Create a new branch
diff --git a/README.md b/README.md
index fe242e2775..ff28678895 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@
 - [Troubleshooting](#troubleshooting)
 
 # About DeePMD-kit
-DeePMD-kit is a package written in Python/C++, designed to minimize the effort required to build deep learning-based model of interatomic potential energy and force field and to perform molecular dynamics (MD). This brings new hopes to addressing the accuracy-versus-efficiency dilemma in molecular simulations. Applications of DeePMD-kit span from finite molecules to extended systems and from metallic systems to chemically bonded systems. 
+DeePMD-kit is a package written in Python/C++, designed to minimize the effort required to build deep learning-based model of interatomic potential energy and force field and to perform molecular dynamics (MD). This brings new hopes to addressing the accuracy-versus-efficiency dilemma in molecular simulations. Applications of DeePMD-kit span from finite molecules to extended systems and from metallic systems to chemically bonded systems.
 
 For more information, check the [documentation](https://deepmd.readthedocs.io/).
 
@@ -32,18 +32,18 @@ For more information, check the [documentation](https://deepmd.readthedocs.io/).
 * [Atom type embedding](doc/model/train-se-e2-a-tebd.md). Enable atom-type embedding to decline training complexity and refine performance.
 * Training and inference of the dipole (vector) and polarizability (matrix).
 * Split of training and validation dataset.
-* Optimized training on GPUs. 
+* Optimized training on GPUs.
 
 ## Highlighted features
 * **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient, in addition, Tensorboard can be used to visualize training procedures.
-* **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively. 
+* **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively.
 * **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems including organic molecules, metals, semiconductors, insulators, etc.
 * **implements MPI and GPU supports**, making it highly efficient for high-performance parallel and distributed computing.
 * **highly modularized**, easy to adapt to different descriptors for deep learning-based potential energy models.
 
 ## License and credits
 The project DeePMD-kit is licensed under [GNU LGPLv3.0](./LICENSE).
-If you use this code in any future publications, please cite this using 
+If you use this code in any future publications, please cite this using
 ``Han Wang, Linfeng Zhang, Jiequn Han, and Weinan E. "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics." Computer Physics Communications 228 (2018): 178-184.``
 
 ## Deep Potential in a nutshell
@@ -59,7 +59,7 @@ In addition to building up potential energy models, DeePMD-kit can also be used
 
 Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel).
 
-DeePMD-kit offers multiple installation methods. It is recommended to use easy methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker). 
+DeePMD-kit offers multiple installation methods. It is recommended to use easy methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker).
 
 One may manually install DeePMD-kit by following the instructions on [installing the Python interface](doc/install/install-from-source.md#install-the-python-interface) and [installing the C++ interface](doc/install/install-from-source.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS, i-PI or GROMACS.
 
diff --git a/backend/dp_backend.py b/backend/dp_backend.py
index 71b8046d6e..97fa1578c7 100644
--- a/backend/dp_backend.py
+++ b/backend/dp_backend.py
@@ -1,10 +1,15 @@
 """A PEP-517 backend to find TensorFlow."""
-from typing import List
+from typing import (
+    List,
+)
+
+from find_tensorflow import (
+    find_tensorflow,
+)
+
 # TODO: switch to scikit_build_core after it is available
 from setuptools import build_meta as _orig
 
-from find_tensorflow import find_tensorflow
-
 __all__ = [
     "build_sdist",
     "build_wheel",
@@ -23,11 +28,13 @@ def __dir__() -> List[str]:
 build_sdist = _orig.build_sdist
 get_requires_for_build_sdist = _orig.get_requires_for_build_sdist
 
+
 def get_requires_for_build_wheel(
     config_settings: dict,
 ) -> List[str]:
     return _orig.get_requires_for_build_wheel(config_settings) + find_tensorflow()[1]
 
+
 # TODO: export get_requires_for_build_editable, prepare_metadata_for_build_editable, build_editable
 # after scikit-build is ready
 # See https://github.com/scikit-build/scikit-build/issues/740
diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py
index d0998d0460..fbac958baa 100644
--- a/backend/find_tensorflow.py
+++ b/backend/find_tensorflow.py
@@ -1,16 +1,32 @@
-import site
 import os
-from importlib.util import find_spec
-from importlib.machinery import FileFinder
-from sysconfig import get_path
-from pathlib import Path
-from typing import List, Optional, Tuple, Union
-from packaging.specifiers import SpecifierSet
+import site
+from importlib.machinery import (
+    FileFinder,
+)
+from importlib.util import (
+    find_spec,
+)
+from pathlib import (
+    Path,
+)
+from sysconfig import (
+    get_path,
+)
+from typing import (
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+from packaging.specifiers import (
+    SpecifierSet,
+)
 
 
 def find_tensorflow() -> Tuple[Optional[str], List[str]]:
     """Find TensorFlow library.
-    
+
     Tries to find TensorFlow in the order of:
 
     1. Environment variable `TENSORFLOW_ROOT` if set
@@ -62,7 +78,7 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
         # TypeError if submodule_search_locations are None
         # IndexError if submodule_search_locations is an empty list
     except (AttributeError, TypeError, IndexError):
-        requires.extend(get_tf_requirement()['cpu'])
+        requires.extend(get_tf_requirement()["cpu"])
         # setuptools will re-find tensorflow after installing setup_requires
         tf_install_dir = None
     return tf_install_dir, requires
@@ -70,7 +86,7 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]:
 
 def get_tf_requirement(tf_version: str = "") -> dict:
     """Get TensorFlow requirement (CPU) when TF is not installed.
-    
+
     If tf_version is not given and the environment variable `TENSORFLOW_VERSION` is set, use it as the requirement.
 
     Parameters
@@ -88,24 +104,44 @@ def get_tf_requirement(tf_version: str = "") -> dict:
 
     if tf_version == "":
         return {
-            "cpu": ["tensorflow-cpu; platform_machine!='aarch64'", "tensorflow; platform_machine=='aarch64'"],
-            "gpu": ["tensorflow; platform_machine!='aarch64'", "tensorflow; platform_machine=='aarch64'"],
+            "cpu": [
+                "tensorflow-cpu; platform_machine!='aarch64'",
+                "tensorflow; platform_machine=='aarch64'",
+            ],
+            "gpu": [
+                "tensorflow; platform_machine!='aarch64'",
+                "tensorflow; platform_machine=='aarch64'",
+            ],
         }
-    elif tf_version in SpecifierSet("<1.15") or tf_version in SpecifierSet(">=2.0,<2.1"):
+    elif tf_version in SpecifierSet("<1.15") or tf_version in SpecifierSet(
+        ">=2.0,<2.1"
+    ):
         return {
-            "cpu": [f"tensorflow=={tf_version}; platform_machine!='aarch64'", f"tensorflow=={tf_version}; platform_machine=='aarch64'"],
-            "gpu": [f"tensorflow-gpu=={tf_version}; platform_machine!='aarch64'", f"tensorflow=={tf_version}; platform_machine=='aarch64'"],
+            "cpu": [
+                f"tensorflow=={tf_version}; platform_machine!='aarch64'",
+                f"tensorflow=={tf_version}; platform_machine=='aarch64'",
+            ],
+            "gpu": [
+                f"tensorflow-gpu=={tf_version}; platform_machine!='aarch64'",
+                f"tensorflow=={tf_version}; platform_machine=='aarch64'",
+            ],
         }
     else:
         return {
-            "cpu": [f"tensorflow-cpu=={tf_version}; platform_machine!='aarch64'", f"tensorflow=={tf_version}; platform_machine=='aarch64'"],
-            "gpu": [f"tensorflow=={tf_version}; platform_machine!='aarch64'", f"tensorflow=={tf_version}; platform_machine=='aarch64'"],
+            "cpu": [
+                f"tensorflow-cpu=={tf_version}; platform_machine!='aarch64'",
+                f"tensorflow=={tf_version}; platform_machine=='aarch64'",
+            ],
+            "gpu": [
+                f"tensorflow=={tf_version}; platform_machine!='aarch64'",
+                f"tensorflow=={tf_version}; platform_machine=='aarch64'",
+            ],
         }
 
 
 def get_tf_version(tf_path: Union[str, Path]) -> str:
     """Get TF version from a TF Python library path.
-    
+
     Parameters
     ----------
     tf_path : str or Path
@@ -118,7 +154,9 @@ def get_tf_version(tf_path: Union[str, Path]) -> str:
     """
     if tf_path is None or tf_path == "":
         return ""
-    version_file = Path(tf_path) / "include" / "tensorflow" / "core" / "public" / "version.h"
+    version_file = (
+        Path(tf_path) / "include" / "tensorflow" / "core" / "public" / "version.h"
+    )
     major = minor = patch = None
     with open(version_file) as f:
         for line in f:
diff --git a/data/json/json2yaml.py b/data/json/json2yaml.py
index f601928427..6c97771bec 100644
--- a/data/json/json2yaml.py
+++ b/data/json/json2yaml.py
@@ -2,8 +2,12 @@
 
 import argparse
 import json
-from pathlib import Path
-from warnings import warn
+from pathlib import (
+    Path,
+)
+from warnings import (
+    warn,
+)
 
 import yaml
 
@@ -11,7 +15,8 @@
 def _main():
     parser = argparse.ArgumentParser(
         description="convert json config file to yaml",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
 
     # get all json files in dir
     jsons = [p for p in Path.cwd().glob("*.json")]
@@ -20,18 +25,20 @@ def _main():
     jfile = jsons[0]
     yfile = jfile.with_suffix(".yaml")
 
-    parser.add_argument("INPUT", default=jfile, type=Path, nargs="?",
-                        help="input json file")
-    parser.add_argument("OUTPUT", default=yfile, type=Path, nargs="?",
-                        help="output yaml file")
+    parser.add_argument(
+        "INPUT", default=jfile, type=Path, nargs="?", help="input json file"
+    )
+    parser.add_argument(
+        "OUTPUT", default=yfile, type=Path, nargs="?", help="output yaml file"
+    )
     args = parser.parse_args()
 
     with args.INPUT.open("r") as infile, args.OUTPUT.open("w") as outfile:
-        yaml.dump(json.load(infile), outfile, default_flow_style=False,
-                  sort_keys=False)
+        yaml.dump(json.load(infile), outfile, default_flow_style=False, sort_keys=False)
 
     warn("The order of the keys won't be preserved!", SyntaxWarning)
     warn("_comment keys will also be lostt in the conversion")
 
+
 if __name__ == "__main__":
     _main()
diff --git a/data/raw/copy_raw.py b/data/raw/copy_raw.py
index 0099482891..073b0fbfd7 100755
--- a/data/raw/copy_raw.py
+++ b/data/raw/copy_raw.py
@@ -1,88 +1,95 @@
 #!/usr/bin/env python3
 
-import numpy as np
-import argparse, os
+import argparse
+import os
 import os.path
 
-def copy (in_dir,
-          out_dir,
-          ncopies = [1,1,1]) :
-    has_energy = os.path.isfile (in_dir + "/energy.raw")
-    has_force  = os.path.isfile (in_dir + "/force.raw")
-    has_virial = os.path.isfile (in_dir + "/virial.raw")
-
-    i_box       = np.loadtxt (in_dir + "/box.raw")
-    i_coord     = np.loadtxt (in_dir + "/coord.raw")
-    if has_energy :
-        i_energy    = np.loadtxt (in_dir + "/energy.raw")
-    if has_force : 
-        i_force     = np.loadtxt (in_dir + "/force.raw")
-    if has_virial: 
-        i_virial    = np.loadtxt (in_dir + "/virial.raw")
-    i_type      = np.loadtxt (in_dir + "/type.raw")
+import numpy as np
+
+
+def copy(in_dir, out_dir, ncopies=[1, 1, 1]):
+    has_energy = os.path.isfile(in_dir + "/energy.raw")
+    has_force = os.path.isfile(in_dir + "/force.raw")
+    has_virial = os.path.isfile(in_dir + "/virial.raw")
+
+    i_box = np.loadtxt(in_dir + "/box.raw")
+    i_coord = np.loadtxt(in_dir + "/coord.raw")
+    if has_energy:
+        i_energy = np.loadtxt(in_dir + "/energy.raw")
+    if has_force:
+        i_force = np.loadtxt(in_dir + "/force.raw")
+    if has_virial:
+        i_virial = np.loadtxt(in_dir + "/virial.raw")
+    i_type = np.loadtxt(in_dir + "/type.raw")
 
     nsys = ncopies[0] * ncopies[1] * ncopies[2]
     nframes = i_coord.shape[0]
     natoms = i_coord.shape[1] // 3
 
-    if has_energy :
+    if has_energy:
         o_energy = i_energy * nsys
-    if has_virial :
-        o_virial = i_virial * nsys    
+    if has_virial:
+        o_virial = i_virial * nsys
 
     o_box = np.zeros(i_box.shape)
-    for ii in range (3) :
-        o_box[:, ii*3:ii*3+3] = i_box[:, ii*3:ii*3+3] * ncopies[ii]
-        
+    for ii in range(3):
+        o_box[:, ii * 3 : ii * 3 + 3] = i_box[:, ii * 3 : ii * 3 + 3] * ncopies[ii]
+
     o_coord = i_coord
-    if has_force :
+    if has_force:
         o_force = i_force
-    i_type = np.reshape (i_type, [-1, natoms])
+    i_type = np.reshape(i_type, [-1, natoms])
     o_type = i_type
-    for ii in range (ncopies[0]) :
-        for jj in range (ncopies[1]) :
-            for kk in range (ncopies[2]) :
-                if ii == 0 and jj == 0 and kk == 0 : 
+    for ii in range(ncopies[0]):
+        for jj in range(ncopies[1]):
+            for kk in range(ncopies[2]):
+                if ii == 0 and jj == 0 and kk == 0:
                     continue
-                citer = np.array ([ii, jj, kk])
-                shift = np.zeros ([nframes, 3])
-                for dd in range (3) :
-                    shift += i_box[:, dd*3:dd*3+3] * citer[dd]
-                ashift = np.tile (shift, natoms)
-                o_coord = np.concatenate ((o_coord, i_coord + ashift), axis = 1)
-                if has_force :
-                    o_force = np.concatenate ((o_force, i_force), axis = 1)
-                o_type = np.concatenate ((o_type, i_type), axis = 1)
-
-    if not os.path.exists (out_dir) : 
-        os.makedirs (out_dir)
-        
-    np.savetxt (out_dir + "/box.raw",           o_box)
-    np.savetxt (out_dir + "/coord.raw",         o_coord)
-    if has_energy :
-        np.savetxt (out_dir + "/energy.raw",        o_energy)
-    if has_force :
-        np.savetxt (out_dir + "/force.raw",         o_force)
-    if has_virial :
-        np.savetxt (out_dir + "/virial.raw",        o_virial)
-    np.savetxt (out_dir + "/type.raw",          o_type, fmt = '%d')
-    np.savetxt (out_dir + "/ncopies.raw",       ncopies, fmt = "%d")
-    
-def _main () :
-    parser = argparse.ArgumentParser (
-        description = "parse copy raw args" )
-    parser.add_argument ("INPUT", default = ".", 
-                         help = "input dir of raw files")
-    parser.add_argument ("OUTPUT", default = ".",
-                         help = "output dir of copied raw files")
-    parser.add_argument ("-n", "--ncopies", nargs = 3, default = [1,1,1], type = int,
-                         help = "the number of copies")
+                citer = np.array([ii, jj, kk])
+                shift = np.zeros([nframes, 3])
+                for dd in range(3):
+                    shift += i_box[:, dd * 3 : dd * 3 + 3] * citer[dd]
+                ashift = np.tile(shift, natoms)
+                o_coord = np.concatenate((o_coord, i_coord + ashift), axis=1)
+                if has_force:
+                    o_force = np.concatenate((o_force, i_force), axis=1)
+                o_type = np.concatenate((o_type, i_type), axis=1)
+
+    if not os.path.exists(out_dir):
+        os.makedirs(out_dir)
+
+    np.savetxt(out_dir + "/box.raw", o_box)
+    np.savetxt(out_dir + "/coord.raw", o_coord)
+    if has_energy:
+        np.savetxt(out_dir + "/energy.raw", o_energy)
+    if has_force:
+        np.savetxt(out_dir + "/force.raw", o_force)
+    if has_virial:
+        np.savetxt(out_dir + "/virial.raw", o_virial)
+    np.savetxt(out_dir + "/type.raw", o_type, fmt="%d")
+    np.savetxt(out_dir + "/ncopies.raw", ncopies, fmt="%d")
+
+
+def _main():
+    parser = argparse.ArgumentParser(description="parse copy raw args")
+    parser.add_argument("INPUT", default=".", help="input dir of raw files")
+    parser.add_argument("OUTPUT", default=".", help="output dir of copied raw files")
+    parser.add_argument(
+        "-n",
+        "--ncopies",
+        nargs=3,
+        default=[1, 1, 1],
+        type=int,
+        help="the number of copies",
+    )
     args = parser.parse_args()
 
-    print ("# copy the system by %s copies" % args.ncopies)
-    assert (np.all(np.array(args.ncopies, dtype = int) >= np.array([1,1,1], dtype=int))), \
-        "number of copies should be larger than or equal to 1"
-    copy (args.INPUT, args.OUTPUT, args.ncopies)
+    print("# copy the system by %s copies" % args.ncopies)
+    assert np.all(
+        np.array(args.ncopies, dtype=int) >= np.array([1, 1, 1], dtype=int)
+    ), "number of copies should be larger than or equal to 1"
+    copy(args.INPUT, args.OUTPUT, args.ncopies)
+
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     _main()
diff --git a/data/raw/raw_to_set.sh b/data/raw/raw_to_set.sh
index a89ef3a872..1752f8d641 100755
--- a/data/raw/raw_to_set.sh
+++ b/data/raw/raw_to_set.sh
@@ -3,17 +3,17 @@
 nline_per_set=2000
 
 if test $# -ge 1; then
-    nline_per_set=$1
+	nline_per_set=$1
 fi
 
 rm -fr set.*
-echo nframe is `cat box.raw | wc -l`
+echo nframe is $(cat box.raw | wc -l)
 echo nline per set is $nline_per_set
 
-split box.raw	 -l $nline_per_set -d -a 3 box.raw
-split coord.raw	 -l $nline_per_set -d -a 3 coord.raw
+split box.raw -l $nline_per_set -d -a 3 box.raw
+split coord.raw -l $nline_per_set -d -a 3 coord.raw
 test -f energy.raw && split energy.raw -l $nline_per_set -d -a 3 energy.raw
-test -f force.raw  && split force.raw  -l $nline_per_set -d -a 3 force.raw
+test -f force.raw && split force.raw -l $nline_per_set -d -a 3 force.raw
 test -f virial.raw && split virial.raw -l $nline_per_set -d -a 3 virial.raw
 test -f atom_ener.raw && split atom_ener.raw -l $nline_per_set -d -a 3 atom_ener.raw
 test -f fparam.raw && split fparam.raw -l $nline_per_set -d -a 3 fparam.raw
@@ -22,91 +22,90 @@ test -f polarizability.raw && split polarizability.raw -l $nline_per_set -d -a 3
 test -f atomic_dipole.raw && split atomic_dipole.raw -l $nline_per_set -d -a 3 atomic_dipole.raw
 test -f atomic_polarizability.raw && split atomic_polarizability.raw -l $nline_per_set -d -a 3 atomic_polarizability.raw
 
-nset=`ls | grep box.raw[0-9] | wc -l`
-nset_1=$(($nset-1))
+nset=$(ls | grep box.raw[0-9] | wc -l)
+nset_1=$((nset - 1))
 echo will make $nset sets
 
-for ii in `seq 0 $nset_1`
-do
-  echo making set $ii ...
-  pi=`printf %03d $ii`
-  mkdir set.$pi
-  mv box.raw$pi		set.$pi/box.raw
-  mv coord.raw$pi	set.$pi/coord.raw
-  test -f energy.raw$pi && mv energy.raw$pi set.$pi/energy.raw
-  test -f force.raw$pi  && mv force.raw$pi  set.$pi/force.raw
-  test -f virial.raw$pi && mv virial.raw$pi set.$pi/virial.raw
-  test -f atom_ener.raw$pi && mv atom_ener.raw$pi set.$pi/atom_ener.raw
-  test -f fparam.raw$pi && mv fparam.raw$pi set.$pi/fparam.raw
-  test -f atomic_dipole.raw$pi && mv atomic_dipole.raw$pi set.$pi/atomic_dipole.raw
-  test -f atomic_polarizability.raw$pi && mv atomic_polarizability.raw$pi set.$pi/atomic_polarizability.raw
+for ii in $(seq 0 $nset_1); do
+	echo making set $ii ...
+	pi=$(printf %03d $ii)
+	mkdir set.$pi
+	mv box.raw$pi set.$pi/box.raw
+	mv coord.raw$pi set.$pi/coord.raw
+	test -f energy.raw$pi && mv energy.raw$pi set.$pi/energy.raw
+	test -f force.raw$pi && mv force.raw$pi set.$pi/force.raw
+	test -f virial.raw$pi && mv virial.raw$pi set.$pi/virial.raw
+	test -f atom_ener.raw$pi && mv atom_ener.raw$pi set.$pi/atom_ener.raw
+	test -f fparam.raw$pi && mv fparam.raw$pi set.$pi/fparam.raw
+	test -f atomic_dipole.raw$pi && mv atomic_dipole.raw$pi set.$pi/atomic_dipole.raw
+	test -f atomic_polarizability.raw$pi && mv atomic_polarizability.raw$pi set.$pi/atomic_polarizability.raw
 
-  cd set.$pi
-  python -c 'import numpy as np; data = np.loadtxt("box.raw"   , ndmin = 2); data = data.astype (np.float32); np.save ("box",    data)'
-  python -c 'import numpy as np; data = np.loadtxt("coord.raw" , ndmin = 2); data = data.astype (np.float32); np.save ("coord",  data)'
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("energy.raw"): 
-   data = np.loadtxt("energy.raw"); 
-   data = data.astype (np.float32); 
+	cd set.$pi
+	python -c 'import numpy as np; data = np.loadtxt("box.raw"   , ndmin = 2); data = data.astype (np.float32); np.save ("box",    data)'
+	python -c 'import numpy as np; data = np.loadtxt("coord.raw" , ndmin = 2); data = data.astype (np.float32); np.save ("coord",  data)'
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("energy.raw"):
+   data = np.loadtxt("energy.raw");
+   data = data.astype (np.float32);
    np.save ("energy", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("force.raw" ): 
-   data = np.loadtxt("force.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("force.raw" ):
+   data = np.loadtxt("force.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("force",  data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("virial.raw"): 
-   data = np.loadtxt("virial.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("virial.raw"):
+   data = np.loadtxt("virial.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("virial", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("atom_ener.raw"): 
-   data = np.loadtxt("atom_ener.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("atom_ener.raw"):
+   data = np.loadtxt("atom_ener.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("atom_ener", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("fparam.raw"): 
-   data = np.loadtxt("fparam.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("fparam.raw"):
+   data = np.loadtxt("fparam.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("fparam", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("dipole.raw"): 
-   data = np.loadtxt("dipole.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("dipole.raw"):
+   data = np.loadtxt("dipole.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("dipole", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("polarizability.raw"): 
-   data = np.loadtxt("polarizability.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("polarizability.raw"):
+   data = np.loadtxt("polarizability.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("polarizability", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("atomic_dipole.raw"): 
-   data = np.loadtxt("atomic_dipole.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("atomic_dipole.raw"):
+   data = np.loadtxt("atomic_dipole.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("atomic_dipole", data)
 '
-  python -c \
-'import numpy as np; import os.path; 
-if os.path.isfile("atomic_polarizability.raw"): 
-   data = np.loadtxt("atomic_polarizability.raw", ndmin = 2); 
-   data = data.astype (np.float32); 
+	python -c \
+		'import numpy as np; import os.path;
+if os.path.isfile("atomic_polarizability.raw"):
+   data = np.loadtxt("atomic_polarizability.raw", ndmin = 2);
+   data = data.astype (np.float32);
    np.save ("atomic_polarizability", data)
 '
-  rm *.raw
-  cd ../
+	rm *.raw
+	cd ../
 done
diff --git a/data/raw/shuffle_raw.py b/data/raw/shuffle_raw.py
index a114cf7576..2c99188aa4 100755
--- a/data/raw/shuffle_raw.py
+++ b/data/raw/shuffle_raw.py
@@ -1,68 +1,81 @@
 #!/usr/bin/env python3
 
+import argparse
 import os
+
 import numpy as np
-import argparse
 
-def _parse_args () : 
-    parser = argparse.ArgumentParser (
-        description = "parse shuffle args" )
-    parser.add_argument ("INPUT", default = ".", 
-                         help = "input dir of raw files")
-    parser.add_argument ("OUTPUT", default = ".",
-                         help = "output dir of shuffled raw files")
-    parser.add_argument ('-r', '--raws', nargs = '+', default = [],
-                         help = "raw files, if not set, then auto detect")
+
+def _parse_args():
+    parser = argparse.ArgumentParser(description="parse shuffle args")
+    parser.add_argument("INPUT", default=".", help="input dir of raw files")
+    parser.add_argument("OUTPUT", default=".", help="output dir of shuffled raw files")
+    parser.add_argument(
+        "-r",
+        "--raws",
+        nargs="+",
+        default=[],
+        help="raw files, if not set, then auto detect",
+    )
     return parser.parse_args()
 
-def detect_raw (path) :
+
+def detect_raw(path):
     raws = []
     names = ["box.raw", "coord.raw", "energy.raw", "force.raw", "virial.raw"]
-    for ff in names :
-        if os.path.isfile (path + "/" + ff) : raws.append (ff)
+    for ff in names:
+        if os.path.isfile(path + "/" + ff):
+            raws.append(ff)
     return raws
 
-def _main () :
-    args = _parse_args ()
+
+def _main():
+    args = _parse_args()
     raws = args.raws
     inpath = args.INPUT
     outpath = args.OUTPUT
 
-    if not os.path.isdir (inpath):
-        print ("# no input dir " + inpath + ", exit")
+    if not os.path.isdir(inpath):
+        print("# no input dir " + inpath + ", exit")
         return
-        
-    if not os.path.isdir (outpath) : 
-        os.mkdir (outpath)
 
-    if len(raws) == 0 :
-        raws = detect_raw (inpath)
+    if not os.path.isdir(outpath):
+        os.mkdir(outpath)
+
+    if len(raws) == 0:
+        raws = detect_raw(inpath)
 
-    if len(raws) == 0 :
-        print ("# no file to shuffle, exit")
+    if len(raws) == 0:
+        print("# no file to shuffle, exit")
         return
 
-    assert ("box.raw" in raws)
+    assert "box.raw" in raws
     tmp = np.loadtxt(os.path.join(inpath, "box.raw"))
     tmp = np.reshape(tmp, [-1, 9])
     nframe = tmp.shape[0]
     print(nframe)
 
-    print ("# will shuffle raw files " + str(raws) + 
-           " in dir " + inpath +
-           " and output to dir " + outpath)
+    print(
+        "# will shuffle raw files "
+        + str(raws)
+        + " in dir "
+        + inpath
+        + " and output to dir "
+        + outpath
+    )
 
-    tmp = np.loadtxt (inpath + "/" + raws[0])
+    tmp = np.loadtxt(inpath + "/" + raws[0])
     tmp = np.reshape(tmp, [nframe, -1])
     nframe = tmp.shape[0]
-    idx = np.arange (nframe)
+    idx = np.arange(nframe)
     np.random.shuffle(idx)
-    
-    for ii in raws : 
+
+    for ii in raws:
         data = np.loadtxt(inpath + "/" + ii)
         data = np.reshape(data, [nframe, -1])
-        data = data [idx]
-        np.savetxt (outpath + "/" + ii, data)
+        data = data[idx]
+        np.savetxt(outpath + "/" + ii, data)
+
 
-if __name__ == "__main__" :
+if __name__ == "__main__":
     _main()
diff --git a/deepmd/.gitignore b/deepmd/.gitignore
index 5ac6bafb1c..b2b9057ea2 100644
--- a/deepmd/.gitignore
+++ b/deepmd/.gitignore
@@ -1,4 +1,4 @@
 op/_*.py
 pkg_config
 run_config.ini
-!op/__init__.py
\ No newline at end of file
+!op/__init__.py
diff --git a/deepmd/__about__.py b/deepmd/__about__.py
index 2c6b9a1374..d5cfca6473 100644
--- a/deepmd/__about__.py
+++ b/deepmd/__about__.py
@@ -1 +1 @@
-__version__ = 'unknown'
+__version__ = "unknown"
diff --git a/deepmd/__init__.py b/deepmd/__init__.py
index 25e94757fc..6b3d4ae310 100644
--- a/deepmd/__init__.py
+++ b/deepmd/__init__.py
@@ -1,28 +1,47 @@
 """Root of the deepmd package, exposes all public classes and submodules."""
 
 try:
-    from importlib import metadata
+    from importlib import (
+        metadata,
+    )
 except ImportError:  # for Python<3.8
     import importlib_metadata as metadata
+
 import deepmd.utils.network as network
 
-from . import cluster, descriptor, fit, loss, utils, nvnmd
-from .env import set_mkl
-from .infer import DeepEval, DeepPotential
-from .infer.data_modifier import DipoleChargeModifier
+from . import (
+    cluster,
+    descriptor,
+    fit,
+    loss,
+    nvnmd,
+    utils,
+)
+from .env import (
+    set_mkl,
+)
+from .infer import (
+    DeepEval,
+    DeepPotential,
+)
+from .infer.data_modifier import (
+    DipoleChargeModifier,
+)
 
 set_mkl()
 
 try:
     from ._version import version as __version__
 except ImportError:
-    from .__about__ import __version__
+    from .__about__ import (
+        __version__,
+    )
 
 # load third-party plugins
 try:
-    eps = metadata.entry_points(group='deepmd')
+    eps = metadata.entry_points(group="deepmd")
 except TypeError:
-    eps = metadata.entry_points().get('deepmd', [])
+    eps = metadata.entry_points().get("deepmd", [])
 for ep in eps:
     ep.load()
 
diff --git a/deepmd/__main__.py b/deepmd/__main__.py
index 2dea15ee78..8e3011bc7b 100644
--- a/deepmd/__main__.py
+++ b/deepmd/__main__.py
@@ -1,6 +1,8 @@
 """Package dp entry point."""
 
-from .entrypoints.main import main
+from .entrypoints.main import (
+    main,
+)
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/deepmd/calculator.py b/deepmd/calculator.py
index 60b22b868a..cd361eed5f 100644
--- a/deepmd/calculator.py
+++ b/deepmd/calculator.py
@@ -1,16 +1,30 @@
 """ASE calculator interface module."""
 
-from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Optional, Union
+from pathlib import (
+    Path,
+)
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Optional,
+    Union,
+)
 
 from ase.calculators.calculator import (
-    Calculator, all_changes, PropertyNotImplementedError
+    Calculator,
+    PropertyNotImplementedError,
+    all_changes,
 )
 
-from deepmd import DeepPotential
+from deepmd import (
+    DeepPotential,
+)
 
 if TYPE_CHECKING:
-    from ase import Atoms
+    from ase import (
+        Atoms,
+    )
 
 __all__ = ["DP"]
 
@@ -101,11 +115,11 @@ def calculate(
         symbols = self.atoms.get_chemical_symbols()
         atype = [self.type_dict[k] for k in symbols]
         e, f, v = self.dp.eval(coords=coord, cells=cell, atom_types=atype)
-        self.results['energy'] = e[0][0]
+        self.results["energy"] = e[0][0]
         # see https://gitlab.com/ase/ase/-/merge_requests/2485
-        self.results['free_energy'] = e[0][0]
-        self.results['forces'] = f[0]
-        self.results['virial'] = v[0].reshape(3, 3)
+        self.results["free_energy"] = e[0][0]
+        self.results["forces"] = f[0]
+        self.results["virial"] = v[0].reshape(3, 3)
 
         # convert virial into stress for lattice relaxation
         if "stress" in properties:
@@ -114,6 +128,6 @@ def calculate(
                 # stress = -virial / volume
                 stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume()
                 # Voigt notation
-                self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]]
+                self.results["stress"] = stress.flat[[0, 4, 8, 5, 2, 1]]
             else:
                 raise PropertyNotImplementedError
diff --git a/deepmd/cluster/__init__.py b/deepmd/cluster/__init__.py
index 1875b21f9b..4cba0c10b9 100644
--- a/deepmd/cluster/__init__.py
+++ b/deepmd/cluster/__init__.py
@@ -1,9 +1,14 @@
 """Module that reads node resources, auto detects if running local or on SLURM."""
 
+import os
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 from .local import get_resource as get_local_res
 from .slurm import get_resource as get_slurm_res
-import os
-from typing import List, Tuple, Optional
 
 __all__ = ["get_resource"]
 
diff --git a/deepmd/cluster/local.py b/deepmd/cluster/local.py
index 69af55040d..4b15af6e6d 100644
--- a/deepmd/cluster/local.py
+++ b/deepmd/cluster/local.py
@@ -4,10 +4,15 @@
 import socket
 import subprocess as sp
 import sys
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
 
-from deepmd.env import tf
-from typing import List, Tuple, Optional
-
+from deepmd.env import (
+    tf,
+)
 
 __all__ = ["get_gpus", "get_resource"]
 
@@ -21,20 +26,25 @@ def get_gpus():
     Optional[List[int]]
         List of available GPU IDs. Otherwise, None.
     """
-    if (not tf.test.is_built_with_cuda() and 
-        not (hasattr(tf.test, 'is_built_with_rocm') and tf.test.is_built_with_rocm())):
+    if not tf.test.is_built_with_cuda() and not (
+        hasattr(tf.test, "is_built_with_rocm") and tf.test.is_built_with_rocm()
+    ):
         # TF is built with CPU only, skip expensive subprocess call
         return None
-    test_cmd = 'from tensorflow.python.client import device_lib; ' \
-               'devices = device_lib.list_local_devices(); ' \
-               'gpus = [d.name for d in devices if d.device_type == "GPU"]; ' \
-               'print(len(gpus))'
-    with sp.Popen([sys.executable, "-c", test_cmd], stderr=sp.PIPE, stdout=sp.PIPE) as p:
+    test_cmd = (
+        "from tensorflow.python.client import device_lib; "
+        "devices = device_lib.list_local_devices(); "
+        'gpus = [d.name for d in devices if d.device_type == "GPU"]; '
+        "print(len(gpus))"
+    )
+    with sp.Popen(
+        [sys.executable, "-c", test_cmd], stderr=sp.PIPE, stdout=sp.PIPE
+    ) as p:
         stdout, stderr = p.communicate()
         if p.returncode != 0:
-            decoded = stderr.decode('UTF-8')
-            raise RuntimeError('Failed to detect availbe GPUs due to:\n%s' % decoded)
-        decoded = stdout.decode('UTF-8').strip()
+            decoded = stderr.decode("UTF-8")
+            raise RuntimeError("Failed to detect availbe GPUs due to:\n%s" % decoded)
+        decoded = stdout.decode("UTF-8").strip()
         num_gpus = int(decoded)
         return list(range(num_gpus)) if num_gpus > 0 else None
 
diff --git a/deepmd/cluster/slurm.py b/deepmd/cluster/slurm.py
index feafd84117..2be6b438f2 100644
--- a/deepmd/cluster/slurm.py
+++ b/deepmd/cluster/slurm.py
@@ -5,11 +5,18 @@
 https://github.com/deepsense-ai/tensorflow_on_slurm ####
 """
 
-import hostlist
 import os
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
+import hostlist
 
-from deepmd.cluster import local
-from typing import List, Tuple, Optional
+from deepmd.cluster import (
+    local,
+)
 
 __all__ = ["get_resource"]
 
diff --git a/deepmd/common.py b/deepmd/common.py
index 0286125ec5..4037bdd6f0 100644
--- a/deepmd/common.py
+++ b/deepmd/common.py
@@ -2,9 +2,12 @@
 
 import json
 import warnings
-import tensorflow
-from functools import wraps
-from pathlib import Path
+from functools import (
+    wraps,
+)
+from pathlib import (
+    Path,
+)
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -18,14 +21,27 @@
 )
 
 import numpy as np
+import tensorflow
 import yaml
+from tensorflow.python.framework import (
+    tensor_util,
+)
 
-from deepmd.env import op_module, tf
-from tensorflow.python.framework import tensor_util
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, GLOBAL_NP_FLOAT_PRECISION
-from deepmd.utils.sess import run_sess
-from deepmd.utils.errors import GraphWithoutTensorError
-from deepmd.utils.path import DPPath
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 if TYPE_CHECKING:
     _DICT_VAL = TypeVar("_DICT_VAL")
@@ -34,7 +50,9 @@
         from typing import Literal  # python >3.6
     except ImportError:
         from typing_extensions import Literal  # type: ignore
-    _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"]
+    _ACTIVATION = Literal[
+        "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu", "gelu_tf"
+    ]
     _PRECISION = Literal["default", "float16", "float32", "float64"]
 
 # define constants
@@ -90,14 +108,19 @@ def gelu_tf(x: tf.Tensor) -> tf.Tensor:
     Original paper
     https://arxiv.org/abs/1606.08415
     """
+
     def gelu_wrapper(x):
         try:
             return tensorflow.nn.gelu(x, approximate=True)
         except AttributeError:
-            warnings.warn("TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator.")
+            warnings.warn(
+                "TensorFlow does not provide an implementation of gelu, please upgrade your TensorFlow version. Fallback to the custom gelu operator."
+            )
             return op_module.gelu_custom(x)
+
     return (lambda x: gelu_wrapper(x))(x)
 
+
 # TODO this is not a good way to do things. This is some global variable to which
 # TODO anyone can write and there is no good way to keep track of the changes
 data_requirement = {}
@@ -121,7 +144,7 @@ def add_data_requirement(
     high_prec: bool = False,
     type_sel: Optional[bool] = None,
     repeat: int = 1,
-    default: float = 0.,
+    default: float = 0.0,
     dtype: Optional[np.dtype] = None,
 ):
     """Specify data requirements for training.
@@ -161,9 +184,7 @@ def add_data_requirement(
     }
 
 
-def select_idx_map(
-    atom_types: np.ndarray, select_types: np.ndarray
-) -> np.ndarray:
+def select_idx_map(atom_types: np.ndarray, select_types: np.ndarray) -> np.ndarray:
     """Build map of indices for element supplied element types from all atoms list.
 
     Parameters
@@ -190,9 +211,7 @@ def select_idx_map(
 
 
 # TODO not really sure if the docstring is right the purpose of this is a bit unclear
-def make_default_mesh(
-    test_box: np.ndarray, cell_size: float = 3.0
-) -> np.ndarray:
+def make_default_mesh(test_box: np.ndarray, cell_size: float = 3.0) -> np.ndarray:
     """Get number of cells of size=`cell_size` fit into average box.
 
     Parameters
@@ -294,7 +313,7 @@ def get_activation_func(
     RuntimeError
         if unknown activation function is specified
     """
-    if activation_fn is None or activation_fn in ['none', 'None']:
+    if activation_fn is None or activation_fn in ["none", "None"]:
         return None
     if activation_fn not in ACTIVATION_FN_DICT:
         raise RuntimeError(f"{activation_fn} is not a valid activation function")
@@ -375,9 +394,9 @@ def get_np_precision(precision: "_PRECISION") -> np.dtype:
         raise RuntimeError(f"{precision} is not a valid precision")
 
 
-def safe_cast_tensor(input: tf.Tensor,
-                     from_precision: tf.DType,
-                     to_precision: tf.DType) -> tf.Tensor:
+def safe_cast_tensor(
+    input: tf.Tensor, from_precision: tf.DType, to_precision: tf.DType
+) -> tf.Tensor:
     """Convert a Tensor from a precision to another precision.
 
     If input is not a Tensor or without the specific precision, the method will not
@@ -434,18 +453,33 @@ def cast_precision(func: Callable) -> Callable:
     ...   def f(x: tf.Tensor, y: tf.Tensor) -> tf.Tensor:
     ...     return x ** 2 + y
     """
+
     @wraps(func)
     def wrapper(self, *args, **kwargs):
         # only convert tensors
         returned_tensor = func(
             self,
-            *[safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) for vv in args],
-            **{kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision) for kk, vv in kwargs.items()},
+            *[
+                safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision)
+                for vv in args
+            ],
+            **{
+                kk: safe_cast_tensor(vv, GLOBAL_TF_FLOAT_PRECISION, self.precision)
+                for kk, vv in kwargs.items()
+            },
         )
         if isinstance(returned_tensor, tuple):
-            return tuple((safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION) for vv in returned_tensor))
+            return tuple(
+                (
+                    safe_cast_tensor(vv, self.precision, GLOBAL_TF_FLOAT_PRECISION)
+                    for vv in returned_tensor
+                )
+            )
         else:
-            return safe_cast_tensor(returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION)
+            return safe_cast_tensor(
+                returned_tensor, self.precision, GLOBAL_TF_FLOAT_PRECISION
+            )
+
     return wrapper
 
 
diff --git a/deepmd/descriptor/__init__.py b/deepmd/descriptor/__init__.py
index e4df063a81..3e2c0e59b0 100644
--- a/deepmd/descriptor/__init__.py
+++ b/deepmd/descriptor/__init__.py
@@ -1,10 +1,28 @@
-from .descriptor import Descriptor
-from .hybrid import DescrptHybrid
-from .se_a import DescrptSeA
-from .se_r import DescrptSeR
-from .se_t import DescrptSeT
-from .se_a_ebd import DescrptSeAEbd
-from .se_a_ef import DescrptSeAEf
-from .se_a_ef import DescrptSeAEfLower
-from .loc_frame import DescrptLocFrame
-from .se_atten import DescrptSeAtten
+from .descriptor import (
+    Descriptor,
+)
+from .hybrid import (
+    DescrptHybrid,
+)
+from .loc_frame import (
+    DescrptLocFrame,
+)
+from .se_a import (
+    DescrptSeA,
+)
+from .se_a_ebd import (
+    DescrptSeAEbd,
+)
+from .se_a_ef import (
+    DescrptSeAEf,
+    DescrptSeAEfLower,
+)
+from .se_atten import (
+    DescrptSeAtten,
+)
+from .se_r import (
+    DescrptSeR,
+)
+from .se_t import (
+    DescrptSeT,
+)
diff --git a/deepmd/descriptor/descriptor.py b/deepmd/descriptor/descriptor.py
index a024783ec4..fa31a39a63 100644
--- a/deepmd/descriptor/descriptor.py
+++ b/deepmd/descriptor/descriptor.py
@@ -1,9 +1,25 @@
-from abc import ABC, abstractmethod
-from typing import Optional, Any, Dict, List, Tuple
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
 
 import numpy as np
-from deepmd.env import tf, GLOBAL_TF_FLOAT_PRECISION
-from deepmd.utils import Plugin, PluginVariant
+
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.utils import (
+    Plugin,
+    PluginVariant,
+)
 
 
 class Descriptor(PluginVariant):
@@ -52,13 +68,13 @@ class SomeDescript(Descriptor):
     def __new__(cls, *args, **kwargs):
         if cls is Descriptor:
             try:
-                descrpt_type = kwargs['type']
+                descrpt_type = kwargs["type"]
             except KeyError:
-                raise KeyError('the type of descriptor should be set by `type`')
+                raise KeyError("the type of descriptor should be set by `type`")
             if descrpt_type in Descriptor.__plugins.plugins:
                 cls = Descriptor.__plugins.plugins[descrpt_type]
             else:
-                raise RuntimeError('Unknown descriptor type: ' + descrpt_type)
+                raise RuntimeError("Unknown descriptor type: " + descrpt_type)
         return super().__new__(cls)
 
     @abstractmethod
@@ -141,14 +157,15 @@ def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         raise NotImplementedError
 
     @abstractmethod
-    def compute_input_stats(self,
-                            data_coord: List[np.ndarray],
-                            data_box: List[np.ndarray],
-                            data_atype: List[np.ndarray],
-                            natoms_vec: List[np.ndarray],
-                            mesh: List[np.ndarray],
-                            input_dict: Dict[str, List[np.ndarray]]
-                            ) -> None:
+    def compute_input_stats(
+        self,
+        data_coord: List[np.ndarray],
+        data_box: List[np.ndarray],
+        data_atype: List[np.ndarray],
+        natoms_vec: List[np.ndarray],
+        mesh: List[np.ndarray],
+        input_dict: Dict[str, List[np.ndarray]],
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be
         normalized by the statistics.
@@ -178,16 +195,17 @@ def compute_input_stats(self,
         """
 
     @abstractmethod
-    def build(self,
-              coord_: tf.Tensor,
-              atype_: tf.Tensor,
-              natoms: tf.Tensor,
-              box_: tf.Tensor,
-              mesh: tf.Tensor,
-              input_dict: Dict[str, Any],
-              reuse: Optional[bool] = None,
-              suffix: str = '',
-              ) -> tf.Tensor:
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: Dict[str, Any],
+        reuse: Optional[bool] = None,
+        suffix: str = "",
+    ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor.
 
@@ -225,16 +243,17 @@ def build(self,
         This method must be implemented, as it's called by other classes.
         """
 
-    def enable_compression(self,
-                           min_nbor_dist: float,
-                           graph: tf.Graph,
-                           graph_def: tf.GraphDef,
-                           table_extrapolate: float = 5.,
-                           table_stride_1: float = 0.01,
-                           table_stride_2: float = 0.1,
-                           check_frequency: int = -1,
-                           suffix: str = "",
-                           ) -> None:
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        table_extrapolate: float = 5.0,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+        suffix: str = "",
+    ) -> None:
         """
         Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the
         training data.
@@ -263,7 +282,8 @@ def enable_compression(self,
         This method is called by others when the descriptor supported compression.
         """
         raise NotImplementedError(
-            "Descriptor %s doesn't support compression!" % type(self).__name__)
+            "Descriptor %s doesn't support compression!" % type(self).__name__
+        )
 
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
         """
@@ -284,10 +304,9 @@ def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
         )
 
     @abstractmethod
-    def prod_force_virial(self,
-                          atom_ener: tf.Tensor,
-                          natoms: tf.Tensor
-                          ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
+    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial.
 
@@ -311,13 +330,14 @@ def prod_force_virial(self,
             The atomic virial
         """
 
-    def get_feed_dict(self,
-                      coord_: tf.Tensor,
-                      atype_: tf.Tensor,
-                      natoms: tf.Tensor,
-                      box: tf.Tensor,
-                      mesh: tf.Tensor
-                      ) -> Dict[str, tf.Tensor]:
+    def get_feed_dict(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box: tf.Tensor,
+        mesh: tf.Tensor,
+    ) -> Dict[str, tf.Tensor]:
         """
         Generate the feed_dict for current descriptor
 
@@ -336,8 +356,8 @@ def get_feed_dict(self,
             The box. Can be generated by deepmd.model.make_stat_input
         mesh : tf.Tensor
             For historical reasons, only the length of the Tensor matters.
-            if size of mesh == 6, pbc is assumed. 
-            if size of mesh == 0, no-pbc is assumed. 
+            if size of mesh == 6, pbc is assumed.
+            if size of mesh == 0, no-pbc is assumed.
 
         Returns
         -------
@@ -345,18 +365,19 @@ def get_feed_dict(self,
             The output feed_dict of current descriptor
         """
         feed_dict = {
-            't_coord:0'  :coord_,
-            't_type:0'   :atype_,
-            't_natoms:0' :natoms,
-            't_box:0'    :box,
-            't_mesh:0'   :mesh
+            "t_coord:0": coord_,
+            "t_type:0": atype_,
+            "t_natoms:0": natoms,
+            "t_box:0": box,
+            "t_mesh:0": mesh,
         }
         return feed_dict
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given dict
@@ -369,17 +390,19 @@ def init_variables(self,
             The input frozen model graph_def
         suffix : str, optional
             The suffix of the scope
-        
+
         Notes
         -----
         This method is called by others when the descriptor supported initialization from the given variables.
         """
         raise NotImplementedError(
-            "Descriptor %s doesn't support initialization from the given variables!" % type(self).__name__)
+            "Descriptor %s doesn't support initialization from the given variables!"
+            % type(self).__name__
+        )
 
-    def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
+    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
         """Get names of tensors.
-        
+
         Parameters
         ----------
         suffix : str
@@ -390,10 +413,13 @@ def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
         Tuple[str]
             Names of tensors
         """
-        raise NotImplementedError("Descriptor %s doesn't support this property!" % type(self).__name__)
+        raise NotImplementedError(
+            "Descriptor %s doesn't support this property!" % type(self).__name__
+        )
 
-    def pass_tensors_from_frz_model(self,
-                                    *tensors : tf.Tensor,
+    def pass_tensors_from_frz_model(
+        self,
+        *tensors: tf.Tensor,
     ) -> None:
         """
         Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def
@@ -402,21 +428,25 @@ def pass_tensors_from_frz_model(self,
         ----------
         *tensors : tf.Tensor
             passed tensors
-        
+
         Notes
         -----
         The number of parameters in the method must be equal to the numbers of returns in
         :meth:`get_tensor_names`.
         """
-        raise NotImplementedError("Descriptor %s doesn't support this method!" % type(self).__name__)
-
-    def build_type_exclude_mask(self,
-                                exclude_types: List[Tuple[int, int]],
-                                ntypes: int,
-                                sel: List[int],
-                                ndescrpt: int,
-                                atype: tf.Tensor,
-                                shape0: tf.Tensor) -> tf.Tensor:
+        raise NotImplementedError(
+            "Descriptor %s doesn't support this method!" % type(self).__name__
+        )
+
+    def build_type_exclude_mask(
+        self,
+        exclude_types: List[Tuple[int, int]],
+        ntypes: int,
+        sel: List[int],
+        ndescrpt: int,
+        atype: tf.Tensor,
+        shape0: tf.Tensor,
+    ) -> tf.Tensor:
         r"""Build the type exclude mask for the descriptor.
 
         Notes
@@ -470,12 +500,17 @@ def build_type_exclude_mask(self,
            17 (11), 6993-7009.
         """
         # generate a mask
-        type_mask = np.array([
-            [1 if (tt_i, tt_j) not in exclude_types else 0
-            for tt_i in range(ntypes)]
-            for tt_j in range(ntypes)
-        ], dtype = bool)
-        type_mask = tf.convert_to_tensor(type_mask, dtype = GLOBAL_TF_FLOAT_PRECISION)
+        type_mask = np.array(
+            [
+                [
+                    1 if (tt_i, tt_j) not in exclude_types else 0
+                    for tt_i in range(ntypes)
+                ]
+                for tt_j in range(ntypes)
+            ],
+            dtype=bool,
+        )
+        type_mask = tf.convert_to_tensor(type_mask, dtype=GLOBAL_TF_FLOAT_PRECISION)
         type_mask = tf.reshape(type_mask, [-1])
 
         # (nsamples * natoms, 1)
@@ -485,8 +520,10 @@ def build_type_exclude_mask(self,
         ndescrpt_per_neighbor = ndescrpt // np.sum(sel)
         # assume the number of neighbors for each type is the same
         assert ndescrpt_per_neighbor * np.sum(sel) == ndescrpt
-        atype_descrpt = np.repeat(np.arange(ntypes), np.array(sel) * ndescrpt_per_neighbor)
-        atype_descrpt = tf.convert_to_tensor(atype_descrpt, dtype = tf.int32)
+        atype_descrpt = np.repeat(
+            np.arange(ntypes), np.array(sel) * ndescrpt_per_neighbor
+        )
+        atype_descrpt = tf.convert_to_tensor(atype_descrpt, dtype=tf.int32)
         # (1, ndescrpt)
         atype_descrpt = tf.reshape(atype_descrpt, (1, ndescrpt))
         # (nsamples * natoms, ndescrpt)
diff --git a/deepmd/descriptor/hybrid.py b/deepmd/descriptor/hybrid.py
index 060e1400d8..2795271eed 100644
--- a/deepmd/descriptor/hybrid.py
+++ b/deepmd/descriptor/hybrid.py
@@ -1,26 +1,48 @@
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, List
 
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+
 # from deepmd.descriptor import DescrptLocFrame
 # from deepmd.descriptor import DescrptSeA
 # from deepmd.descriptor import DescrptSeT
 # from deepmd.descriptor import DescrptSeAEbd
 # from deepmd.descriptor import DescrptSeAEf
 # from deepmd.descriptor import DescrptSeR
-from .descriptor import Descriptor
-from .se_a import DescrptSeA
-from .se_r import DescrptSeR
-from .se_t import DescrptSeT
-from .se_a_ebd import DescrptSeAEbd
-from .se_a_ef import DescrptSeAEf
-from .loc_frame import DescrptLocFrame
+from .descriptor import (
+    Descriptor,
+)
+from .loc_frame import (
+    DescrptLocFrame,
+)
+from .se_a import (
+    DescrptSeA,
+)
+from .se_a_ebd import (
+    DescrptSeAEbd,
+)
+from .se_a_ef import (
+    DescrptSeAEf,
+)
+from .se_r import (
+    DescrptSeR,
+)
+from .se_t import (
+    DescrptSeT,
+)
+
 
 @Descriptor.register("hybrid")
-class DescrptHybrid (Descriptor):
+class DescrptHybrid(Descriptor):
     """Concate a list of descriptors to form a new descriptor.
 
     Parameters
@@ -28,17 +50,17 @@ class DescrptHybrid (Descriptor):
     list : list
             Build a descriptor from the concatenation of the list of descriptors.
     """
-    def __init__ (self, 
-                  list : list,
-                  multi_task: bool = False
-    ) -> None :
+
+    def __init__(self, list: list, multi_task: bool = False) -> None:
         """
         Constructor
         """
         # warning: list is conflict with built-in list
         descrpt_list = list
         if descrpt_list == [] or descrpt_list is None:
-            raise RuntimeError('cannot build descriptor from an empty list of descriptors.')
+            raise RuntimeError(
+                "cannot build descriptor from an empty list of descriptors."
+            )
         formatted_descript_list = []
         self.multi_task = multi_task
         for ii in descrpt_list:
@@ -46,43 +68,39 @@ def __init__ (self,
                 formatted_descript_list.append(ii)
             elif isinstance(ii, dict):
                 if multi_task:
-                    ii['multi_task'] = True
+                    ii["multi_task"] = True
                 formatted_descript_list.append(Descriptor(**ii))
             else:
                 raise NotImplementedError
         self.descrpt_list = formatted_descript_list
         self.numb_descrpt = len(self.descrpt_list)
         for ii in range(1, self.numb_descrpt):
-            assert(self.descrpt_list[ii].get_ntypes() == 
-                   self.descrpt_list[ 0].get_ntypes()), \
-                   f'number of atom types in {ii}th descrptor does not match others'
-
+            assert (
+                self.descrpt_list[ii].get_ntypes() == self.descrpt_list[0].get_ntypes()
+            ), f"number of atom types in {ii}th descrptor does not match others"
 
-    def get_rcut (self) -> float:
+    def get_rcut(self) -> float:
         """
         Returns the cut-off radius
         """
         all_rcut = [ii.get_rcut() for ii in self.descrpt_list]
         return np.max(all_rcut)
 
-
-    def get_ntypes (self) -> int:
+    def get_ntypes(self) -> int:
         """
         Returns the number of atom types
         """
         return self.descrpt_list[0].get_ntypes()
 
-
-    def get_dim_out (self) -> int:
+    def get_dim_out(self) -> int:
         """
         Returns the output dimension of this descriptor
         """
         all_dim_out = [ii.get_dim_out() for ii in self.descrpt_list]
         return sum(all_dim_out)
 
-
     def get_nlist(
-            self,
+        self,
     ) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """Get the neighbor information of the descriptor, returns the
         nlist of the descriptor with the largest cut-off radius.
@@ -101,10 +119,7 @@ def get_nlist(
         maxr_idx = np.argmax([ii.get_rcut() for ii in self.descrpt_list])
         return self.get_nlist_i(maxr_idx)
 
-
-    def get_nlist_i(self, 
-                    ii : int
-    ) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist_i(self, ii: int) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """Get the neighbor information of the ii-th descriptor
 
         Parameters
@@ -123,20 +138,25 @@ def get_nlist_i(self,
         sel_r
                 The number of neighbors with only radial information
         """
-        return self.descrpt_list[ii].nlist, self.descrpt_list[ii].rij, self.descrpt_list[ii].sel_a, self.descrpt_list[ii].sel_r
-    
-
-    def compute_input_stats (self,
-                             data_coord : list, 
-                             data_box : list, 
-                             data_atype : list, 
-                             natoms_vec : list,
-                             mesh : list, 
-                             input_dict : dict
-    ) -> None :
+        return (
+            self.descrpt_list[ii].nlist,
+            self.descrpt_list[ii].rij,
+            self.descrpt_list[ii].sel_a,
+            self.descrpt_list[ii].sel_r,
+        )
+
+    def compute_input_stats(
+        self,
+        data_coord: list,
+        data_box: list,
+        data_atype: list,
+        natoms_vec: list,
+        mesh: list,
+        input_dict: dict,
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -153,7 +173,9 @@ def compute_input_stats (self,
                 Dictionary for additional input
         """
         for ii in self.descrpt_list:
-            ii.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh, input_dict)
+            ii.compute_input_stats(
+                data_coord, data_box, data_atype, natoms_vec, mesh, input_dict
+            )
 
     def merge_input_stats(self, stat_dict):
         """
@@ -177,15 +199,16 @@ def merge_input_stats(self, stat_dict):
         for ii in self.descrpt_list:
             ii.merge_input_stats(stat_dict)
 
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -203,8 +226,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -217,26 +240,31 @@ def build (self,
         descriptor
                 The output descriptor
         """
-        with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
-            t_rcut = tf.constant(self.get_rcut(), 
-                                 name = 'rcut', 
-                                 dtype = GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.get_ntypes(), 
-                                   name = 'ntypes', 
-                                   dtype = tf.int32)
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
+            t_rcut = tf.constant(
+                self.get_rcut(), name="rcut", dtype=GLOBAL_TF_FLOAT_PRECISION
+            )
+            t_ntypes = tf.constant(self.get_ntypes(), name="ntypes", dtype=tf.int32)
         all_dout = []
-        for idx,ii in enumerate(self.descrpt_list):
-            dout = ii.build(coord_, atype_, natoms, box_, mesh, input_dict, suffix=suffix+f'_{idx}', reuse=reuse)
+        for idx, ii in enumerate(self.descrpt_list):
+            dout = ii.build(
+                coord_,
+                atype_,
+                natoms,
+                box_,
+                mesh,
+                input_dict,
+                suffix=suffix + f"_{idx}",
+                reuse=reuse,
+            )
             dout = tf.reshape(dout, [-1, ii.get_dim_out()])
             all_dout.append(dout)
-        dout = tf.concat(all_dout, axis = 1)
+        dout = tf.concat(all_dout, axis=1)
         dout = tf.reshape(dout, [-1, natoms[0], self.get_dim_out()])
         return dout
-        
 
-    def prod_force_virial(self, 
-                          atom_ener : tf.Tensor, 
-                          natoms : tf.Tensor
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial
@@ -260,7 +288,7 @@ def prod_force_virial(self,
         atom_virial
                 The atomic virial
         """
-        for idx,ii in enumerate(self.descrpt_list):
+        for idx, ii in enumerate(self.descrpt_list):
             ff, vv, av = ii.prod_force_virial(atom_ener, natoms)
             if idx == 0:
                 force = ff
@@ -272,16 +300,17 @@ def prod_force_virial(self,
                 atom_virial += av
         return force, virial, atom_virial
 
-    def enable_compression(self,
-                           min_nbor_dist: float,
-                           graph: tf.Graph,
-                           graph_def: tf.GraphDef,
-                           table_extrapolate: float = 5.,
-                           table_stride_1: float = 0.01,
-                           table_stride_2: float = 0.1,
-                           check_frequency: int = -1,
-                           suffix: str = ""
-                           ) -> None:
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        table_extrapolate: float = 5.0,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+        suffix: str = "",
+    ) -> None:
         """
         Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the
         training data.
@@ -306,10 +335,18 @@ def enable_compression(self,
                 The suffix of the scope
         """
         for idx, ii in enumerate(self.descrpt_list):
-            ii.enable_compression(min_nbor_dist, graph, graph_def, table_extrapolate, table_stride_1, table_stride_2, check_frequency, suffix=f"{suffix}_{idx}")
-
-
-    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+            ii.enable_compression(
+                min_nbor_dist,
+                graph,
+                graph_def,
+                table_extrapolate,
+                table_stride_1,
+                table_stride_2,
+                check_frequency,
+                suffix=f"{suffix}_{idx}",
+            )
+
+    def enable_mixed_precision(self, mixed_prec: dict = None) -> None:
         """
         Reveive the mixed precision setting.
 
@@ -321,11 +358,11 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
         for idx, ii in enumerate(self.descrpt_list):
             ii.enable_mixed_precision(mixed_prec)
 
-
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given dict
@@ -342,9 +379,9 @@ def init_variables(self,
         for idx, ii in enumerate(self.descrpt_list):
             ii.init_variables(graph, graph_def, suffix=f"{suffix}_{idx}")
 
-    def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
+    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
         """Get names of tensors.
-        
+
         Parameters
         ----------
         suffix : str
@@ -360,8 +397,9 @@ def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
             tensor_names.extend(ii.get_tensor_names(suffix=f"{suffix}_{idx}"))
         return tuple(tensor_names)
 
-    def pass_tensors_from_frz_model(self,
-                                    *tensors : tf.Tensor,
+    def pass_tensors_from_frz_model(
+        self,
+        *tensors: tf.Tensor,
     ) -> None:
         """
         Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def
@@ -374,5 +412,5 @@ def pass_tensors_from_frz_model(self,
         jj = 0
         for ii in self.descrpt_list:
             n_tensors = len(ii.get_tensor_names())
-            ii.pass_tensors_from_frz_model(*tensors[jj:jj+n_tensors])
+            ii.pass_tensors_from_frz_model(*tensors[jj : jj + n_tensors])
             jj += n_tensors
diff --git a/deepmd/descriptor/loc_frame.py b/deepmd/descriptor/loc_frame.py
index d96b38385c..76ae49dd78 100644
--- a/deepmd/descriptor/loc_frame.py
+++ b/deepmd/descriptor/loc_frame.py
@@ -1,17 +1,31 @@
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, List
 
-from deepmd.env import tf
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.sess import run_sess
-from .descriptor import Descriptor
-from deepmd.utils.graph import get_tensor_by_name_from_graph
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+
 
 @Descriptor.register("loc_frame")
-class DescrptLocFrame (Descriptor) :
+class DescrptLocFrame(Descriptor):
     """Defines a local frame at each atom, and the compute the descriptor as local
     coordinates under this frame.
 
@@ -20,16 +34,16 @@ class DescrptLocFrame (Descriptor) :
     rcut
             The cut-off radius
     sel_a : list[str]
-            The length of the list should be the same as the number of atom types in the system. 
-            `sel_a[i]` gives the selected number of type-i neighbors. 
+            The length of the list should be the same as the number of atom types in the system.
+            `sel_a[i]` gives the selected number of type-i neighbors.
             The full relative coordinates of the neighbors are used by the descriptor.
     sel_r : list[str]
-            The length of the list should be the same as the number of atom types in the system. 
-            `sel_r[i]` gives the selected number of type-i neighbors. 
+            The length of the list should be the same as the number of atom types in the system.
+            `sel_r[i]` gives the selected number of type-i neighbors.
             Only relative distance of the neighbors are used by the descriptor.
-            sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius.        
+            sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius.
     axis_rule: list[int]
-            The length should be 6 times of the number of types. 
+            The length should be 6 times of the number of types.
             - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
             - axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
             - axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
@@ -37,14 +51,12 @@ class DescrptLocFrame (Descriptor) :
             - axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
             - axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance.
     """
-    def __init__(self, 
-                 rcut: float,
-                 sel_a : List[int],
-                 sel_r : List[int],
-                 axis_rule : List[int]
+
+    def __init__(
+        self, rcut: float, sel_a: List[int], sel_r: List[int], axis_rule: List[int]
     ) -> None:
         """
-        Constructor    
+        Constructor
         """
         self.sel_a = sel_a
         self.sel_r = sel_r
@@ -52,7 +64,7 @@ def __init__(self,
         self.rcut_r = rcut
         # ntypes and rcut_a === -1
         self.ntypes = len(self.sel_a)
-        assert(self.ntypes == len(self.sel_r))
+        assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
         # numb of neighbors and numb of descrptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
@@ -65,51 +77,70 @@ def __init__(self,
         self.dstd = None
 
         self.place_holders = {}
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
         sub_graph = tf.Graph()
         with sub_graph.as_default():
-            name_pfx = 'd_lf_'
-            for ii in ['coord', 'box']:
-                self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name = name_pfx+'t_'+ii)
-            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx+'t_type')
-            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms')
-            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh')
-            self.stat_descrpt, descrpt_deriv, rij, nlist, axis, rot_mat \
-                = op_module.descrpt (self.place_holders['coord'],
-                                     self.place_holders['type'],
-                                     self.place_holders['natoms_vec'],
-                                     self.place_holders['box'],
-                                     self.place_holders['default_mesh'],
-                                     tf.constant(avg_zero),
-                                     tf.constant(std_ones),
-                                     rcut_a = self.rcut_a,
-                                     rcut_r = self.rcut_r,
-                                     sel_a = self.sel_a,
-                                     sel_r = self.sel_r,
-                                     axis_rule = self.axis_rule)
-        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
-
+            name_pfx = "d_lf_"
+            for ii in ["coord", "box"]:
+                self.place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii
+                )
+            self.place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name=name_pfx + "t_type"
+            )
+            self.place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms"
+            )
+            self.place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name=name_pfx + "t_mesh"
+            )
+            (
+                self.stat_descrpt,
+                descrpt_deriv,
+                rij,
+                nlist,
+                axis,
+                rot_mat,
+            ) = op_module.descrpt(
+                self.place_holders["coord"],
+                self.place_holders["type"],
+                self.place_holders["natoms_vec"],
+                self.place_holders["box"],
+                self.place_holders["default_mesh"],
+                tf.constant(avg_zero),
+                tf.constant(std_ones),
+                rcut_a=self.rcut_a,
+                rcut_r=self.rcut_r,
+                sel_a=self.sel_a,
+                sel_r=self.sel_r,
+                axis_rule=self.axis_rule,
+            )
+        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
 
-    def get_rcut (self) -> float:
+    def get_rcut(self) -> float:
         """
         Returns the cut-off radius
         """
         return self.rcut_r
 
-    def get_ntypes (self) -> int:
+    def get_ntypes(self) -> int:
         """
         Returns the number of atom types
         """
         return self.ntypes
 
-    def get_dim_out (self) -> int:
+    def get_dim_out(self) -> int:
         """
         Returns the output dimension of this descriptor
         """
         return self.ndescrpt
 
-    def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         Returns
         -------
@@ -124,17 +155,18 @@ def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_input_stats (self,
-                             data_coord : list, 
-                             data_box : list, 
-                             data_atype : list, 
-                             natoms_vec : list,
-                             mesh : list, 
-                             input_dict : dict
-    ) -> None :
+    def compute_input_stats(
+        self,
+        data_coord: list,
+        data_box: list,
+        data_atype: list,
+        natoms_vec: list,
+        mesh: list,
+        input_dict: dict,
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -156,36 +188,37 @@ def compute_input_stats (self,
             sumv = []
             sumn = []
             sumv2 = []
-            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
-                sysv,sysv2,sysn \
-                    = self._compute_dstats_sys_nonsmth(cc,bb,tt,nn,mm)
+            for cc, bb, tt, nn, mm in zip(
+                data_coord, data_box, data_atype, natoms_vec, mesh
+            ):
+                sysv, sysv2, sysn = self._compute_dstats_sys_nonsmth(cc, bb, tt, nn, mm)
                 sumv.append(sysv)
                 sumn.append(sysn)
                 sumv2.append(sysv2)
-            sumv = np.sum(sumv, axis = 0)
-            sumn = np.sum(sumn, axis = 0)
-            sumv2 = np.sum(sumv2, axis = 0)
-            for type_i in range(self.ntypes) :
-                davg = sumv[type_i] /  sumn[type_i]
+            sumv = np.sum(sumv, axis=0)
+            sumn = np.sum(sumn, axis=0)
+            sumv2 = np.sum(sumv2, axis=0)
+            for type_i in range(self.ntypes):
+                davg = sumv[type_i] / sumn[type_i]
                 dstd = self._compute_std(sumv2[type_i], sumv[type_i], sumn[type_i])
-                for ii in range (len(dstd)) :
-                    if (np.abs(dstd[ii]) < 1e-2) :
-                        dstd[ii] = 1e-2            
+                for ii in range(len(dstd)):
+                    if np.abs(dstd[ii]) < 1e-2:
+                        dstd[ii] = 1e-2
                 all_davg.append(davg)
                 all_dstd.append(dstd)
         self.davg = np.array(all_davg)
-        self.dstd = np.array(all_dstd)        
+        self.dstd = np.array(all_dstd)
 
-        
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -203,8 +236,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -219,49 +252,61 @@ def build (self,
         """
         davg = self.davg
         dstd = self.dstd
-        with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             if davg is None:
-                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+                davg = np.zeros([self.ntypes, self.ndescrpt])
             if dstd is None:
-                dstd = np.ones ([self.ntypes, self.ndescrpt])
-            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), 
-                                 name = 'rcut', 
-                                 dtype = GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.ntypes, 
-                                   name = 'ntypes', 
-                                   dtype = tf.int32)
-            self.t_avg = tf.get_variable('t_avg', 
-                                         davg.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(davg))
-            self.t_std = tf.get_variable('t_std', 
-                                         dstd.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(dstd))
+                dstd = np.ones([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(
+                np.max([self.rcut_r, self.rcut_a]),
+                name="rcut",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            self.t_avg = tf.get_variable(
+                "t_avg",
+                davg.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(davg),
+            )
+            self.t_std = tf.get_variable(
+                "t_std",
+                dstd.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(dstd),
+            )
 
-        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
-        box   = tf.reshape (box_, [-1, 9])
-        atype = tf.reshape (atype_, [-1, natoms[1]])
+        coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        box = tf.reshape(box_, [-1, 9])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
 
-        self.descrpt, self.descrpt_deriv, self.rij, self.nlist, self.axis, self.rot_mat \
-            = op_module.descrpt (coord,
-                                 atype,
-                                 natoms,
-                                 box,                                    
-                                 mesh,
-                                 self.t_avg,
-                                 self.t_std,
-                                 rcut_a = self.rcut_a,
-                                 rcut_r = self.rcut_r,
-                                 sel_a = self.sel_a,
-                                 sel_r = self.sel_r,
-                                 axis_rule = self.axis_rule)
+        (
+            self.descrpt,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+            self.axis,
+            self.rot_mat,
+        ) = op_module.descrpt(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+            axis_rule=self.axis_rule,
+        )
         self.descrpt = tf.reshape(self.descrpt, [-1, self.ndescrpt])
-        tf.summary.histogram('descrpt', self.descrpt)
-        tf.summary.histogram('rij', self.rij)
-        tf.summary.histogram('nlist', self.nlist)
+        tf.summary.histogram("descrpt", self.descrpt)
+        tf.summary.histogram("rij", self.rij)
+        tf.summary.histogram("nlist", self.nlist)
 
         return self.descrpt
 
@@ -271,9 +316,8 @@ def get_rot_mat(self) -> tf.Tensor:
         """
         return self.rot_mat
 
-    def prod_force_virial(self, 
-                          atom_ener : tf.Tensor, 
-                          natoms : tf.Tensor
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial
@@ -297,47 +341,51 @@ def prod_force_virial(self,
         atom_virial
                 The atomic virial
         """
-        [net_deriv] = tf.gradients (atom_ener, self.descrpt)
-        tf.summary.histogram('net_derivative', net_deriv)
-        net_deriv_reshape = tf.reshape (net_deriv, [np.cast['int64'](-1), natoms[0] * np.cast['int64'](self.ndescrpt)])
-        force = op_module.prod_force (net_deriv_reshape,
-                                      self.descrpt_deriv,
-                                      self.nlist,
-                                      self.axis,
-                                      natoms,
-                                      n_a_sel = self.nnei_a,
-                                      n_r_sel = self.nnei_r)
-        virial, atom_virial \
-            = op_module.prod_virial (net_deriv_reshape,
-                                     self.descrpt_deriv,
-                                     self.rij,
-                                     self.nlist,
-                                     self.axis,
-                                     natoms,
-                                     n_a_sel = self.nnei_a,
-                                     n_r_sel = self.nnei_r)
-        tf.summary.histogram('force', force)
-        tf.summary.histogram('virial', virial)
-        tf.summary.histogram('atom_virial', atom_virial)
+        [net_deriv] = tf.gradients(atom_ener, self.descrpt)
+        tf.summary.histogram("net_derivative", net_deriv)
+        net_deriv_reshape = tf.reshape(
+            net_deriv,
+            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+        )
+        force = op_module.prod_force(
+            net_deriv_reshape,
+            self.descrpt_deriv,
+            self.nlist,
+            self.axis,
+            natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_virial = op_module.prod_virial(
+            net_deriv_reshape,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+            self.axis,
+            natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        tf.summary.histogram("force", force)
+        tf.summary.histogram("virial", virial)
+        tf.summary.histogram("atom_virial", atom_virial)
 
         return force, virial, atom_virial
 
-
-    def _compute_dstats_sys_nonsmth (self,
-                                    data_coord, 
-                                    data_box, 
-                                    data_atype, 
-                                    natoms_vec,
-                                    mesh) :    
-        dd_all \
-            = run_sess(self.sub_sess, self.stat_descrpt, 
-                                feed_dict = {
-                                    self.place_holders['coord']: data_coord,
-                                    self.place_holders['type']: data_atype,
-                                    self.place_holders['natoms_vec']: natoms_vec,
-                                    self.place_holders['box']: data_box,
-                                    self.place_holders['default_mesh']: mesh,
-                                })
+    def _compute_dstats_sys_nonsmth(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh
+    ):
+        dd_all = run_sess(
+            self.sub_sess,
+            self.stat_descrpt,
+            feed_dict={
+                self.place_holders["coord"]: data_coord,
+                self.place_holders["type"]: data_atype,
+                self.place_holders["natoms_vec"]: natoms_vec,
+                self.place_holders["box"]: data_box,
+                self.place_holders["default_mesh"]: mesh,
+            },
+        )
         natoms = natoms_vec
         dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
         start_index = 0
@@ -345,27 +393,27 @@ def _compute_dstats_sys_nonsmth (self,
         sysn = []
         sysv2 = []
         for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            end_index = start_index + self.ndescrpt * natoms[2 + type_i]
             dd = dd_all[:, start_index:end_index]
             dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
+            start_index = end_index
             # compute
-            sumv = np.sum(dd, axis = 0)
+            sumv = np.sum(dd, axis=0)
             sumn = dd.shape[0]
-            sumv2 = np.sum(np.multiply(dd,dd), axis = 0)            
+            sumv2 = np.sum(np.multiply(dd, dd), axis=0)
             sysv.append(sumv)
             sysn.append(sumn)
             sysv2.append(sumv2)
         return sysv, sysv2, sysn
 
+    def _compute_std(self, sumv2, sumv, sumn):
+        return np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn))
 
-    def _compute_std (self,sumv2, sumv, sumn) :
-        return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
-
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given dict
@@ -379,5 +427,9 @@ def init_variables(self,
         suffix : str, optional
             The suffix of the scope
         """
-        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
+        self.davg = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_avg" % suffix
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_std" % suffix
+        )
diff --git a/deepmd/descriptor/se.py b/deepmd/descriptor/se.py
index 059924071f..a5d52991f6 100644
--- a/deepmd/descriptor/se.py
+++ b/deepmd/descriptor/se.py
@@ -1,13 +1,24 @@
-from typing import Tuple, List
+from typing import (
+    List,
+    Tuple,
+)
 
-from deepmd.env import tf
-from deepmd.utils.graph import get_embedding_net_variables_from_graph_def, get_tensor_by_name_from_graph
-from .descriptor import Descriptor
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils.graph import (
+    get_embedding_net_variables_from_graph_def,
+    get_tensor_by_name_from_graph,
+)
 
+from .descriptor import (
+    Descriptor,
+)
 
-class DescrptSe (Descriptor):
+
+class DescrptSe(Descriptor):
     """A base class for smooth version of descriptors.
-    
+
     Notes
     -----
     All of these descriptors have an environmental matrix and an
@@ -26,11 +37,12 @@ class DescrptSe (Descriptor):
         distances between two atoms
     nlist : tf.Tensor
         the neighbor list
-    
+
     """
-    def _identity_tensors(self, suffix : str = "") -> None:
+
+    def _identity_tensors(self, suffix: str = "") -> None:
         """Identify tensors which are expected to be stored and restored.
-        
+
         Notes
         -----
         These tensors will be indentitied:
@@ -46,14 +58,16 @@ def _identity_tensors(self, suffix : str = "") -> None:
         suffix : str
             The suffix of the scope
         """
-        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name = 'o_rmat' + suffix)
-        self.descrpt_deriv = tf.identity(self.descrpt_deriv, name = 'o_rmat_deriv' + suffix)
-        self.rij = tf.identity(self.rij, name = 'o_rij' + suffix)
-        self.nlist = tf.identity(self.nlist, name = 'o_nlist' + suffix)
+        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat" + suffix)
+        self.descrpt_deriv = tf.identity(
+            self.descrpt_deriv, name="o_rmat_deriv" + suffix
+        )
+        self.rij = tf.identity(self.rij, name="o_rij" + suffix)
+        self.nlist = tf.identity(self.nlist, name="o_nlist" + suffix)
 
-    def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
+    def get_tensor_names(self, suffix: str = "") -> Tuple[str]:
         """Get names of tensors.
-        
+
         Parameters
         ----------
         suffix : str
@@ -64,13 +78,19 @@ def get_tensor_names(self, suffix : str = "") -> Tuple[str]:
         Tuple[str]
             Names of tensors
         """
-        return (f'o_rmat{suffix}:0', f'o_rmat_deriv{suffix}:0', f'o_rij{suffix}:0', f'o_nlist{suffix}:0')
+        return (
+            f"o_rmat{suffix}:0",
+            f"o_rmat_deriv{suffix}:0",
+            f"o_rij{suffix}:0",
+            f"o_nlist{suffix}:0",
+        )
 
-    def pass_tensors_from_frz_model(self,
-                                    descrpt_reshape : tf.Tensor,
-                                    descrpt_deriv   : tf.Tensor,
-                                    rij             : tf.Tensor,
-                                    nlist           : tf.Tensor
+    def pass_tensors_from_frz_model(
+        self,
+        descrpt_reshape: tf.Tensor,
+        descrpt_deriv: tf.Tensor,
+        rij: tf.Tensor,
+        nlist: tf.Tensor,
     ):
         """
         Pass the descrpt_reshape tensor as well as descrpt_deriv tensor from the frz graph_def
@@ -91,10 +111,11 @@ def pass_tensors_from_frz_model(self,
         self.descrpt_deriv = descrpt_deriv
         self.descrpt_reshape = descrpt_reshape
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given dict
@@ -108,9 +129,15 @@ def init_variables(self,
         suffix : str, optional
             The suffix of the scope
         """
-        self.embedding_net_variables = get_embedding_net_variables_from_graph_def(graph_def, suffix = suffix)
-        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
+        self.embedding_net_variables = get_embedding_net_variables_from_graph_def(
+            graph_def, suffix=suffix
+        )
+        self.davg = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_avg" % suffix
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_std" % suffix
+        )
 
     @property
     def precision(self) -> tf.DType:
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 11754584f8..49d38d2e76 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -1,28 +1,70 @@
 import math
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List, Dict, Any
-
-from deepmd.env import tf
-from deepmd.common import get_activation_func, get_precision, cast_precision
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift
-from deepmd.utils.tabulate import DPTabulate
-from deepmd.utils.type_embed import embed_atom_type
-from deepmd.utils.sess import run_sess
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph, get_tensor_by_name
-from deepmd.utils.errors import GraphWithoutTensorError
-from .descriptor import Descriptor
-from .se import DescrptSe
-
-from deepmd.nvnmd.descriptor.se_a import descrpt2r4, build_davg_dstd, check_switch_range, build_op_descriptor, filter_lower_R42GR, filter_GR2D
-from deepmd.nvnmd.utils.config import nvnmd_cfg 
+
+from deepmd.common import (
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.nvnmd.descriptor.se_a import (
+    build_davg_dstd,
+    build_op_descriptor,
+    check_switch_range,
+    descrpt2r4,
+    filter_GR2D,
+    filter_lower_R42GR,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name,
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.utils.network import (
+    embedding_net,
+    embedding_net_rand_seed_shift,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+from deepmd.utils.tabulate import (
+    DPTabulate,
+)
+from deepmd.utils.type_embed import (
+    embed_atom_type,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se import (
+    DescrptSe,
+)
+
 
 @Descriptor.register("se_e2_a")
 @Descriptor.register("se_a")
-class DescrptSeA (DescrptSe):
+class DescrptSeA(DescrptSe):
     r"""DeepPot-SE constructed from all information (both angular and radial) of
     atomic configurations. The embedding takes the distance between atoms as input.
 
@@ -41,7 +83,7 @@ class DescrptSeA (DescrptSe):
         \end{array}
         ]
 
-    where :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is 
+    where :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is
     the relative coordinate and :math:`r_{ji}=\lVert \mathbf{R}_{ji} \lVert` is its norm.
     The switching function :math:`s(r)` is defined as:
 
@@ -97,7 +139,7 @@ class DescrptSeA (DescrptSe):
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     multi_task
             If the model has multi fitting nets to train.
-    
+
     References
     ----------
     .. [1] Linfeng Zhang, Jiequn Han, Han Wang, Wissam A. Saidi, Roberto Car, and E. Weinan. 2018.
@@ -105,28 +147,32 @@ class DescrptSeA (DescrptSe):
        systems. In Proceedings of the 32nd International Conference on Neural Information Processing
        Systems (NIPS'18). Curran Associates Inc., Red Hook, NY, USA, 4441–4451.
     """
-    def __init__ (self, 
-                  rcut: float,
-                  rcut_smth: float,
-                  sel: List[str],
-                  neuron: List[int] = [24,48,96],
-                  axis_neuron: int = 8,
-                  resnet_dt: bool = False,
-                  trainable: bool = True,
-                  seed: Optional[int] = None,
-                  type_one_side: bool = True,
-                  exclude_types: List[List[int]] = [],
-                  set_davg_zero: bool = False,
-                  activation_function: str = 'tanh',
-                  precision: str = 'default',
-                  uniform_seed: bool = False,
-                  multi_task: bool = False
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
+        multi_task: bool = False,
     ) -> None:
         """
         Constructor
         """
         if rcut < rcut_smth:
-            raise RuntimeError("rcut_smth (%f) should be no more than rcut (%f)!" % (rcut_smth, rcut))
+            raise RuntimeError(
+                "rcut_smth (%f) should be no more than rcut (%f)!" % (rcut_smth, rcut)
+            )
         self.sel_a = sel
         self.rcut_r = rcut
         self.rcut_r_smth = rcut_smth
@@ -142,16 +188,16 @@ def __init__ (self,
         self.filter_precision = get_precision(precision)
         self.exclude_types = set()
         for tt in exclude_types:
-            assert(len(tt) == 2)
+            assert len(tt) == 2
             self.exclude_types.add((tt[0], tt[1]))
             self.exclude_types.add((tt[1], tt[0]))
         self.set_davg_zero = set_davg_zero
         self.type_one_side = type_one_side
 
         # descrpt config
-        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
+        self.sel_r = [0 for ii in range(len(self.sel_a))]
         self.ntypes = len(self.sel_a)
-        assert(self.ntypes == len(self.sel_r))
+        assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
         # numb of neighbors and numb of descrptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
@@ -169,60 +215,79 @@ def __init__ (self,
         self.place_holders = {}
         self.nei_type = np.repeat(np.arange(self.ntypes), self.sel_a)  # like a mask
 
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
         sub_graph = tf.Graph()
         with sub_graph.as_default():
-            name_pfx = 'd_sea_'
-            for ii in ['coord', 'box']:
-                self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name = name_pfx+'t_'+ii)
-            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx+'t_type')
-            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms')
-            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh')
-            self.stat_descrpt, descrpt_deriv, rij, nlist \
-                = op_module.prod_env_mat_a(self.place_holders['coord'],
-                                         self.place_holders['type'],
-                                         self.place_holders['natoms_vec'],
-                                         self.place_holders['box'],
-                                         self.place_holders['default_mesh'],
-                                         tf.constant(avg_zero),
-                                         tf.constant(std_ones),
-                                         rcut_a = self.rcut_a,
-                                         rcut_r = self.rcut_r,
-                                         rcut_r_smth = self.rcut_r_smth,
-                                         sel_a = self.sel_a,
-                                         sel_r = self.sel_r)
-        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
+            name_pfx = "d_sea_"
+            for ii in ["coord", "box"]:
+                self.place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii
+                )
+            self.place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name=name_pfx + "t_type"
+            )
+            self.place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms"
+            )
+            self.place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name=name_pfx + "t_mesh"
+            )
+            self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a(
+                self.place_holders["coord"],
+                self.place_holders["type"],
+                self.place_holders["natoms_vec"],
+                self.place_holders["box"],
+                self.place_holders["default_mesh"],
+                tf.constant(avg_zero),
+                tf.constant(std_ones),
+                rcut_a=self.rcut_a,
+                rcut_r=self.rcut_r,
+                rcut_r_smth=self.rcut_r_smth,
+                sel_a=self.sel_a,
+                sel_r=self.sel_r,
+            )
+        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
         self.original_sel = None
         self.multi_task = multi_task
         if multi_task:
-            self.stat_dict = {'sumr': [], 'suma': [], 'sumn': [], 'sumr2': [], 'suma2': []}
+            self.stat_dict = {
+                "sumr": [],
+                "suma": [],
+                "sumn": [],
+                "sumr2": [],
+                "suma2": [],
+            }
 
-    def get_rcut (self) -> float:
+    def get_rcut(self) -> float:
         """
         Returns the cut-off radius
         """
         return self.rcut_r
 
-    def get_ntypes (self) -> int:
+    def get_ntypes(self) -> int:
         """
         Returns the number of atom types
         """
         return self.ntypes
 
-    def get_dim_out (self) -> int:
+    def get_dim_out(self) -> int:
         """
         Returns the output dimension of this descriptor
         """
         return self.filter_neuron[-1] * self.n_axis_neuron
 
-    def get_dim_rot_mat_1 (self) -> int:
+    def get_dim_rot_mat_1(self) -> int:
         """
         Returns the first dimension of the rotation matrix. The rotation is of shape dim_1 x 3
         """
         return self.filter_neuron[-1]
 
-    def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         Returns
         -------
@@ -237,17 +302,18 @@ def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_input_stats (self,
-                             data_coord : list, 
-                             data_box : list, 
-                             data_atype : list, 
-                             natoms_vec : list,
-                             mesh : list, 
-                             input_dict : dict
-    ) -> None :
+    def compute_input_stats(
+        self,
+        data_coord: list,
+        data_box: list,
+        data_atype: list,
+        natoms_vec: list,
+        mesh: list,
+        input_dict: dict,
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -269,23 +335,32 @@ def compute_input_stats (self,
             sumn = []
             sumr2 = []
             suma2 = []
-            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
-                sysr,sysr2,sysa,sysa2,sysn \
-                    = self._compute_dstats_sys_smth(cc,bb,tt,nn,mm)
+            for cc, bb, tt, nn, mm in zip(
+                data_coord, data_box, data_atype, natoms_vec, mesh
+            ):
+                sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth(
+                    cc, bb, tt, nn, mm
+                )
                 sumr.append(sysr)
                 suma.append(sysa)
                 sumn.append(sysn)
                 sumr2.append(sysr2)
                 suma2.append(sysa2)
             if not self.multi_task:
-                stat_dict = {'sumr': sumr, 'suma': suma, 'sumn': sumn, 'sumr2': sumr2, 'suma2': suma2}
+                stat_dict = {
+                    "sumr": sumr,
+                    "suma": suma,
+                    "sumn": sumn,
+                    "sumr2": sumr2,
+                    "suma2": suma2,
+                }
                 self.merge_input_stats(stat_dict)
             else:
-                self.stat_dict['sumr'] += sumr
-                self.stat_dict['suma'] += suma
-                self.stat_dict['sumn'] += sumn
-                self.stat_dict['sumr2'] += sumr2
-                self.stat_dict['suma2'] += suma2
+                self.stat_dict["sumr"] += sumr
+                self.stat_dict["suma"] += suma
+                self.stat_dict["sumn"] += sumn
+                self.stat_dict["sumr2"] += sumr2
+                self.stat_dict["suma2"] += suma2
 
     def merge_input_stats(self, stat_dict):
         """
@@ -308,18 +383,19 @@ def merge_input_stats(self, stat_dict):
         """
         all_davg = []
         all_dstd = []
-        sumr = np.sum(stat_dict['sumr'], axis = 0)
-        suma = np.sum(stat_dict['suma'], axis = 0)
-        sumn = np.sum(stat_dict['sumn'], axis = 0)
-        sumr2 = np.sum(stat_dict['sumr2'], axis = 0)
-        suma2 = np.sum(stat_dict['suma2'], axis = 0)
-        for type_i in range(self.ntypes) :
-            davgunit = [sumr[type_i]/(sumn[type_i]+1e-15), 0, 0, 0]
-            dstdunit = [self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]),
-                        self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
-                        self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
-                        self._compute_std(suma2[type_i], suma[type_i], sumn[type_i])
-                        ]
+        sumr = np.sum(stat_dict["sumr"], axis=0)
+        suma = np.sum(stat_dict["suma"], axis=0)
+        sumn = np.sum(stat_dict["sumn"], axis=0)
+        sumr2 = np.sum(stat_dict["sumr2"], axis=0)
+        suma2 = np.sum(stat_dict["suma2"], axis=0)
+        for type_i in range(self.ntypes):
+            davgunit = [sumr[type_i] / (sumn[type_i] + 1e-15), 0, 0, 0]
+            dstdunit = [
+                self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]),
+                self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+            ]
             davg = np.tile(davgunit, self.ndescrpt // 4)
             dstd = np.tile(dstdunit, self.ndescrpt // 4)
             all_davg.append(davg)
@@ -328,19 +404,20 @@ def merge_input_stats(self, stat_dict):
             self.davg = np.array(all_davg)
         self.dstd = np.array(all_dstd)
 
-    def enable_compression(self,
-                           min_nbor_dist : float,
-                           graph: tf.Graph,
-                           graph_def: tf.GraphDef,
-                           table_extrapolate : float = 5,
-                           table_stride_1 : float = 0.01,
-                           table_stride_2 : float = 0.1,
-                           check_frequency : int = -1,
-                           suffix : str = "",
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+        suffix: str = "",
     ) -> None:
         """
         Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
-        
+
         Parameters
         ----------
         min_nbor_dist
@@ -366,33 +443,55 @@ def enable_compression(self,
         ), "Model compression error: descriptor resnet_dt must be false!"
         for tt in self.exclude_types:
             if (tt[0] not in range(self.ntypes)) or (tt[1] not in range(self.ntypes)):
-                raise RuntimeError("exclude types" + str(tt) + " must within the number of atomic types " + str(self.ntypes) + "!")
-        if (self.ntypes * self.ntypes - len(self.exclude_types) == 0):
-            raise RuntimeError("empty embedding-net are not supported in model compression!")
+                raise RuntimeError(
+                    "exclude types"
+                    + str(tt)
+                    + " must within the number of atomic types "
+                    + str(self.ntypes)
+                    + "!"
+                )
+        if self.ntypes * self.ntypes - len(self.exclude_types) == 0:
+            raise RuntimeError(
+                "empty embedding-net are not supported in model compression!"
+            )
 
         for ii in range(len(self.filter_neuron) - 1):
             if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
                 raise NotImplementedError(
                     "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer." 
-                    % ','.join([str(item) for item in self.filter_neuron])
+                    "The size of the next layer of the neural network must be twice the size of the previous layer."
+                    % ",".join([str(item) for item in self.filter_neuron])
                 )
 
         self.compress = True
         self.table = DPTabulate(
-            self, self.filter_neuron, graph, graph_def, self.type_one_side, self.exclude_types, self.compress_activation_fn, suffix=suffix)
-        self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
-        self.lower, self.upper \
-            = self.table.build(min_nbor_dist, 
-                               table_extrapolate, 
-                               table_stride_1, 
-                               table_stride_2)
-        
-        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
-
-
-    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+            self,
+            self.filter_neuron,
+            graph,
+            graph_def,
+            self.type_one_side,
+            self.exclude_types,
+            self.compress_activation_fn,
+            suffix=suffix,
+        )
+        self.table_config = [
+            table_extrapolate,
+            table_stride_1,
+            table_stride_2,
+            check_frequency,
+        ]
+        self.lower, self.upper = self.table.build(
+            min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
+        )
+
+        self.davg = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_avg" % suffix
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_std" % suffix
+        )
+
+    def enable_mixed_precision(self, mixed_prec: dict = None) -> None:
         """
         Reveive the mixed precision setting.
 
@@ -402,18 +501,18 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
                 The mixed precision setting used in the embedding net
         """
         self.mixed_prec = mixed_prec
-        self.filter_precision = get_precision(mixed_prec['output_prec'])
-
-
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+        self.filter_precision = get_precision(mixed_prec["output_prec"])
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -431,8 +530,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -451,85 +550,91 @@ def build (self,
             if nvnmd_cfg.restore_descriptor:
                 davg, dstd = build_davg_dstd()
             check_switch_range(davg, dstd)
-        with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             if davg is None:
-                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+                davg = np.zeros([self.ntypes, self.ndescrpt])
             if dstd is None:
-                dstd = np.ones ([self.ntypes, self.ndescrpt])
-            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), 
-                                 name = 'rcut', 
-                                 dtype = GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.ntypes, 
-                                   name = 'ntypes', 
-                                   dtype = tf.int32)
-            t_ndescrpt = tf.constant(self.ndescrpt, 
-                                     name = 'ndescrpt', 
-                                     dtype = tf.int32)            
-            t_sel = tf.constant(self.sel_a, 
-                                name = 'sel', 
-                                dtype = tf.int32)            
-            t_original_sel = tf.constant(self.original_sel if self.original_sel is not None else self.sel_a,
-                name = 'original_sel',
-                dtype = tf.int32)
-            self.t_avg = tf.get_variable('t_avg', 
-                                         davg.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(davg))
-            self.t_std = tf.get_variable('t_std', 
-                                         dstd.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(dstd))
+                dstd = np.ones([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(
+                np.max([self.rcut_r, self.rcut_a]),
+                name="rcut",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32)
+            t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32)
+            t_original_sel = tf.constant(
+                self.original_sel if self.original_sel is not None else self.sel_a,
+                name="original_sel",
+                dtype=tf.int32,
+            )
+            self.t_avg = tf.get_variable(
+                "t_avg",
+                davg.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(davg),
+            )
+            self.t_std = tf.get_variable(
+                "t_std",
+                dstd.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(dstd),
+            )
 
         with tf.control_dependencies([t_sel, t_original_sel]):
-            coord = tf.reshape (coord_, [-1, natoms[1] * 3])
-        box   = tf.reshape (box_, [-1, 9])
-        atype = tf.reshape (atype_, [-1, natoms[1]])
-
-        op_descriptor = build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a
-        self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
-            = op_descriptor           (coord,
-                                       atype,
-                                       natoms,
-                                       box,
-                                       mesh,
-                                       self.t_avg,
-                                       self.t_std,
-                                       rcut_a = self.rcut_a,
-                                       rcut_r = self.rcut_r,
-                                       rcut_r_smth = self.rcut_r_smth,
-                                       sel_a = self.sel_a,
-                                       sel_r = self.sel_r)
+            coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        box = tf.reshape(box_, [-1, 9])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
+
+        op_descriptor = (
+            build_op_descriptor() if nvnmd_cfg.enable else op_module.prod_env_mat_a
+        )
+        self.descrpt, self.descrpt_deriv, self.rij, self.nlist = op_descriptor(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
         # only used when tensorboard was set as true
-        tf.summary.histogram('descrpt', self.descrpt)
-        tf.summary.histogram('rij', self.rij)
-        tf.summary.histogram('nlist', self.nlist)
+        tf.summary.histogram("descrpt", self.descrpt)
+        tf.summary.histogram("rij", self.rij)
+        tf.summary.histogram("nlist", self.nlist)
 
         self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
         self._identity_tensors(suffix=suffix)
 
-        self.dout, self.qmat = self._pass_filter(self.descrpt_reshape, 
-                                                 atype,
-                                                 natoms, 
-                                                 input_dict,
-                                                 suffix = suffix, 
-                                                 reuse = reuse, 
-                                                 trainable = self.trainable)
+        self.dout, self.qmat = self._pass_filter(
+            self.descrpt_reshape,
+            atype,
+            natoms,
+            input_dict,
+            suffix=suffix,
+            reuse=reuse,
+            trainable=self.trainable,
+        )
 
         # only used when tensorboard was set as true
-        tf.summary.histogram('embedding_net_output', self.dout)
+        tf.summary.histogram("embedding_net_output", self.dout)
         return self.dout
-    
+
     def get_rot_mat(self) -> tf.Tensor:
         """
         Get rotational matrix
         """
         return self.qmat
 
-    def prod_force_virial(self, 
-                          atom_ener : tf.Tensor, 
-                          natoms : tf.Tensor
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial
@@ -553,41 +658,40 @@ def prod_force_virial(self,
         atom_virial
                 The atomic virial
         """
-        [net_deriv] = tf.gradients (atom_ener, self.descrpt_reshape)
-        tf.summary.histogram('net_derivative', net_deriv)
-        net_deriv_reshape = tf.reshape (net_deriv, [np.cast['int64'](-1), natoms[0] * np.cast['int64'](self.ndescrpt)])        
-        force \
-            = op_module.prod_force_se_a (net_deriv_reshape,
-                                          self.descrpt_deriv,
-                                          self.nlist,
-                                          natoms,
-                                          n_a_sel = self.nnei_a,
-                                          n_r_sel = self.nnei_r)
-        virial, atom_virial \
-            = op_module.prod_virial_se_a (net_deriv_reshape,
-                                           self.descrpt_deriv,
-                                           self.rij,
-                                           self.nlist,
-                                           natoms,
-                                           n_a_sel = self.nnei_a,
-                                           n_r_sel = self.nnei_r)
-        tf.summary.histogram('force', force)
-        tf.summary.histogram('virial', virial)
-        tf.summary.histogram('atom_virial', atom_virial)
-        
+        [net_deriv] = tf.gradients(atom_ener, self.descrpt_reshape)
+        tf.summary.histogram("net_derivative", net_deriv)
+        net_deriv_reshape = tf.reshape(
+            net_deriv,
+            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+        )
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            self.descrpt_deriv,
+            self.nlist,
+            natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_virial = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+            natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        tf.summary.histogram("force", force)
+        tf.summary.histogram("virial", virial)
+        tf.summary.histogram("atom_virial", atom_virial)
+
         return force, virial, atom_virial
-        
-
-    def _pass_filter(self, 
-                     inputs,
-                     atype,
-                     natoms,
-                     input_dict,
-                     reuse = None,
-                     suffix = '', 
-                     trainable = True) :
+
+    def _pass_filter(
+        self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True
+    ):
         if input_dict is not None:
-            type_embedding = input_dict.get('type_embedding', None)
+            type_embedding = input_dict.get("type_embedding", None)
         else:
             type_embedding = None
         start_index = 0
@@ -596,22 +700,39 @@ def _pass_filter(self,
         output_qmat = []
         if not self.type_one_side and type_embedding is None:
             for type_i in range(self.ntypes):
-                inputs_i = tf.slice (inputs,
-                                     [ 0, start_index, 0],
-                                     [-1, natoms[2+type_i], -1] )
+                inputs_i = tf.slice(
+                    inputs, [0, start_index, 0], [-1, natoms[2 + type_i], -1]
+                )
                 inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
-                filter_name = 'filter_type_'+str(type_i)+suffix
-                layer, qmat = self._filter(inputs_i, type_i, name=filter_name, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
-                layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[2+type_i], self.get_dim_out()])
-                qmat  = tf.reshape(qmat,  [tf.shape(inputs)[0], natoms[2+type_i], self.get_dim_rot_mat_1() * 3])
+                filter_name = "filter_type_" + str(type_i) + suffix
+                layer, qmat = self._filter(
+                    inputs_i,
+                    type_i,
+                    name=filter_name,
+                    natoms=natoms,
+                    reuse=reuse,
+                    trainable=trainable,
+                    activation_fn=self.filter_activation_fn,
+                )
+                layer = tf.reshape(
+                    layer, [tf.shape(inputs)[0], natoms[2 + type_i], self.get_dim_out()]
+                )
+                qmat = tf.reshape(
+                    qmat,
+                    [
+                        tf.shape(inputs)[0],
+                        natoms[2 + type_i],
+                        self.get_dim_rot_mat_1() * 3,
+                    ],
+                )
                 output.append(layer)
                 output_qmat.append(qmat)
-                start_index += natoms[2+type_i]
-        else :
+                start_index += natoms[2 + type_i]
+        else:
             inputs_i = inputs
             inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
             type_i = -1
-            if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor: 
+            if nvnmd_cfg.enable and nvnmd_cfg.quantize_descriptor:
                 inputs_i = descrpt2r4(inputs_i, natoms)
             if len(self.exclude_types):
                 mask = self.build_type_exclude_mask(
@@ -624,31 +745,42 @@ def _pass_filter(self,
                 )
                 inputs_i *= mask
 
-            layer, qmat = self._filter(inputs_i, type_i, name='filter_type_all'+suffix, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn, type_embedding=type_embedding)
-            layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
-            qmat  = tf.reshape(qmat,  [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3])
+            layer, qmat = self._filter(
+                inputs_i,
+                type_i,
+                name="filter_type_all" + suffix,
+                natoms=natoms,
+                reuse=reuse,
+                trainable=trainable,
+                activation_fn=self.filter_activation_fn,
+                type_embedding=type_embedding,
+            )
+            layer = tf.reshape(
+                layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()]
+            )
+            qmat = tf.reshape(
+                qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3]
+            )
             output.append(layer)
             output_qmat.append(qmat)
-        output = tf.concat(output, axis = 1)
-        output_qmat = tf.concat(output_qmat, axis = 1)
+        output = tf.concat(output, axis=1)
+        output_qmat = tf.concat(output_qmat, axis=1)
         return output, output_qmat
 
-
-    def _compute_dstats_sys_smth (self,
-                                 data_coord, 
-                                 data_box, 
-                                 data_atype,                             
-                                 natoms_vec,
-                                 mesh) :    
-        dd_all \
-            = run_sess(self.sub_sess, self.stat_descrpt, 
-                                feed_dict = {
-                                    self.place_holders['coord']: data_coord,
-                                    self.place_holders['type']: data_atype,
-                                    self.place_holders['natoms_vec']: natoms_vec,
-                                    self.place_holders['box']: data_box,
-                                    self.place_holders['default_mesh']: mesh,
-                                })
+    def _compute_dstats_sys_smth(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh
+    ):
+        dd_all = run_sess(
+            self.sub_sess,
+            self.stat_descrpt,
+            feed_dict={
+                self.place_holders["coord"]: data_coord,
+                self.place_holders["type"]: data_atype,
+                self.place_holders["natoms_vec"]: natoms_vec,
+                self.place_holders["box"]: data_box,
+                self.place_holders["default_mesh"]: mesh,
+            },
+        )
         natoms = natoms_vec
         dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
         start_index = 0
@@ -658,19 +790,19 @@ def _compute_dstats_sys_smth (self,
         sysr2 = []
         sysa2 = []
         for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            end_index = start_index + self.ndescrpt * natoms[2 + type_i]
             dd = dd_all[:, start_index:end_index]
             dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
+            start_index = end_index
             # compute
-            dd = np.reshape (dd, [-1, 4])
-            ddr = dd[:,:1]
-            dda = dd[:,1:]
+            dd = np.reshape(dd, [-1, 4])
+            ddr = dd[:, :1]
+            dda = dd[:, 1:]
             sumr = np.sum(ddr)
-            suma = np.sum(dda) / 3.
+            suma = np.sum(dda) / 3.0
             sumn = dd.shape[0]
             sumr2 = np.sum(np.multiply(ddr, ddr))
-            suma2 = np.sum(np.multiply(dda, dda)) / 3.
+            suma2 = np.sum(np.multiply(dda, dda)) / 3.0
             sysr.append(sumr)
             sysa.append(suma)
             sysn.append(sumn)
@@ -678,24 +810,22 @@ def _compute_dstats_sys_smth (self,
             sysa2.append(suma2)
         return sysr, sysr2, sysa, sysa2, sysn
 
-
-    def _compute_std (self,sumv2, sumv, sumn) :
+    def _compute_std(self, sumv2, sumv, sumn):
         if sumn == 0:
-            return 1. / self.rcut_r
-        val = np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+            return 1.0 / self.rcut_r
+        val = np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn))
         if np.abs(val) < 1e-2:
             val = 1e-2
         return val
 
-
     def _concat_type_embedding(
-            self,
-            xyz_scatter,
-            nframes,
-            natoms,
-            type_embedding,
+        self,
+        xyz_scatter,
+        nframes,
+        natoms,
+        type_embedding,
     ):
-        '''Concatenate `type_embedding` of neighbors and `xyz_scatter`.
+        """Concatenate `type_embedding` of neighbors and `xyz_scatter`.
         If not self.type_one_side, concatenate `type_embedding` of center atoms as well.
 
         Parameters
@@ -713,125 +843,178 @@ def _concat_type_embedding(
         -------
             embedding:
                 environment of each atom represented by embedding.
-        '''
+        """
         te_out_dim = type_embedding.get_shape().as_list()[-1]
         self.t_nei_type = tf.constant(self.nei_type, dtype=tf.int32)
-        nei_embed = tf.nn.embedding_lookup(type_embedding,tf.cast(self.t_nei_type,dtype=tf.int32))  # shape is [self.nnei, 1+te_out_dim]
-        nei_embed = tf.tile(nei_embed,(nframes*natoms[0],1))  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
-        nei_embed = tf.reshape(nei_embed,[-1,te_out_dim])
-        embedding_input = tf.concat([xyz_scatter,nei_embed],1)  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim]
+        nei_embed = tf.nn.embedding_lookup(
+            type_embedding, tf.cast(self.t_nei_type, dtype=tf.int32)
+        )  # shape is [self.nnei, 1+te_out_dim]
+        nei_embed = tf.tile(
+            nei_embed, (nframes * natoms[0], 1)
+        )  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
+        nei_embed = tf.reshape(nei_embed, [-1, te_out_dim])
+        embedding_input = tf.concat(
+            [xyz_scatter, nei_embed], 1
+        )  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim]
         if not self.type_one_side:
-            atm_embed = embed_atom_type(self.ntypes, natoms, type_embedding)  # shape is [natoms[0], te_out_dim]
-            atm_embed = tf.tile(atm_embed,(nframes,self.nnei))  # shape is [nframes*natoms[0], self.nnei*te_out_dim]
-            atm_embed = tf.reshape(atm_embed,[-1,te_out_dim])  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
-            embedding_input = tf.concat([embedding_input,atm_embed],1)  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim+te_out_dim]
+            atm_embed = embed_atom_type(
+                self.ntypes, natoms, type_embedding
+            )  # shape is [natoms[0], te_out_dim]
+            atm_embed = tf.tile(
+                atm_embed, (nframes, self.nnei)
+            )  # shape is [nframes*natoms[0], self.nnei*te_out_dim]
+            atm_embed = tf.reshape(
+                atm_embed, [-1, te_out_dim]
+            )  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
+            embedding_input = tf.concat(
+                [embedding_input, atm_embed], 1
+            )  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim+te_out_dim]
         return embedding_input
 
-
     def _filter_lower(
-            self,
-            type_i,
-            type_input,
-            start_index,
-            incrs_index,
-            inputs,
-            nframes,
-            natoms,
-            type_embedding=None,
-            is_exclude = False,
-            activation_fn = None,
-            bavg = 0.0,
-            stddev = 1.0,
-            trainable = True,
-            suffix = '',
+        self,
+        type_i,
+        type_input,
+        start_index,
+        incrs_index,
+        inputs,
+        nframes,
+        natoms,
+        type_embedding=None,
+        is_exclude=False,
+        activation_fn=None,
+        bavg=0.0,
+        stddev=1.0,
+        trainable=True,
+        suffix="",
     ):
         """
         input env matrix, returns R.G
         """
         outputs_size = [1] + self.filter_neuron
         # cut-out inputs
-        # with natom x (nei_type_i x 4)  
-        inputs_i = tf.slice (inputs,
-                             [ 0, start_index* 4],
-                             [-1, incrs_index* 4] )
+        # with natom x (nei_type_i x 4)
+        inputs_i = tf.slice(inputs, [0, start_index * 4], [-1, incrs_index * 4])
         shape_i = inputs_i.get_shape().as_list()
         natom = tf.shape(inputs_i)[0]
         # with (natom x nei_type_i) x 4
         inputs_reshape = tf.reshape(inputs_i, [-1, 4])
         # with (natom x nei_type_i) x 1
-        xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
+        xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1])
         if type_embedding is not None:
             xyz_scatter = self._concat_type_embedding(
-                xyz_scatter, nframes, natoms, type_embedding)
+                xyz_scatter, nframes, natoms, type_embedding
+            )
             if self.compress:
-                raise RuntimeError('compression of type embedded descriptor is not supported at the moment')
+                raise RuntimeError(
+                    "compression of type embedded descriptor is not supported at the moment"
+                )
         # natom x 4 x outputs_size
         if nvnmd_cfg.enable:
-          return filter_lower_R42GR(
-            type_i, type_input, inputs_i, is_exclude,
-            activation_fn, bavg, stddev, trainable, 
-            suffix, self.seed, self.seed_shift, self.uniform_seed,
-            self.filter_neuron, self.filter_precision, self.filter_resnet_dt,
-            self.embedding_net_variables
-          )
+            return filter_lower_R42GR(
+                type_i,
+                type_input,
+                inputs_i,
+                is_exclude,
+                activation_fn,
+                bavg,
+                stddev,
+                trainable,
+                suffix,
+                self.seed,
+                self.seed_shift,
+                self.uniform_seed,
+                self.filter_neuron,
+                self.filter_precision,
+                self.filter_resnet_dt,
+                self.embedding_net_variables,
+            )
         if self.compress and (not is_exclude):
             if self.type_one_side:
-                net = 'filter_-1_net_' + str(type_i)
+                net = "filter_-1_net_" + str(type_i)
             else:
-                net = 'filter_' + str(type_input) + '_net_' + str(type_i)
-            info = [self.lower[net], self.upper[net], self.upper[net] * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
-            return op_module.tabulate_fusion_se_a(tf.cast(self.table.data[net], self.filter_precision), info, xyz_scatter, tf.reshape(inputs_i, [natom, shape_i[1]//4, 4]), last_layer_size = outputs_size[-1])  
+                net = "filter_" + str(type_input) + "_net_" + str(type_i)
+            info = [
+                self.lower[net],
+                self.upper[net],
+                self.upper[net] * self.table_config[0],
+                self.table_config[1],
+                self.table_config[2],
+                self.table_config[3],
+            ]
+            return op_module.tabulate_fusion_se_a(
+                tf.cast(self.table.data[net], self.filter_precision),
+                info,
+                xyz_scatter,
+                tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]),
+                last_layer_size=outputs_size[-1],
+            )
         else:
-          if (not is_exclude):
-              # with (natom x nei_type_i) x out_size
-              xyz_scatter = embedding_net(
-                  xyz_scatter, 
-                  self.filter_neuron, 
-                  self.filter_precision, 
-                  activation_fn = activation_fn, 
-                  resnet_dt = self.filter_resnet_dt,
-                  name_suffix = suffix,
-                  stddev = stddev,
-                  bavg = bavg,
-                  seed = self.seed,
-                  trainable = trainable, 
-                  uniform_seed = self.uniform_seed,
-                  initial_variables = self.embedding_net_variables,
-                  mixed_prec = self.mixed_prec)
-              if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
-          else:
-            # we can safely return the final xyz_scatter filled with zero directly
-            return tf.cast(tf.fill((natom, 4, outputs_size[-1]), 0.), self.filter_precision)
-          # natom x nei_type_i x out_size
-          xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))  
-          # When using tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below
-          # [588 24] -> [588 6 4] correct
-          # but if sel is zero
-          # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4]
-          # So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1
-          # natom x 4 x outputs_size
-          return tf.matmul(tf.reshape(inputs_i, [natom, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)
-
+            if not is_exclude:
+                # with (natom x nei_type_i) x out_size
+                xyz_scatter = embedding_net(
+                    xyz_scatter,
+                    self.filter_neuron,
+                    self.filter_precision,
+                    activation_fn=activation_fn,
+                    resnet_dt=self.filter_resnet_dt,
+                    name_suffix=suffix,
+                    stddev=stddev,
+                    bavg=bavg,
+                    seed=self.seed,
+                    trainable=trainable,
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.embedding_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
+                if (not self.uniform_seed) and (self.seed is not None):
+                    self.seed += self.seed_shift
+            else:
+                # we can safely return the final xyz_scatter filled with zero directly
+                return tf.cast(
+                    tf.fill((natom, 4, outputs_size[-1]), 0.0), self.filter_precision
+                )
+            # natom x nei_type_i x out_size
+            xyz_scatter = tf.reshape(
+                xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1])
+            )
+            # When using tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below
+            # [588 24] -> [588 6 4] correct
+            # but if sel is zero
+            # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4]
+            # So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1
+            # natom x 4 x outputs_size
+            return tf.matmul(
+                tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]),
+                xyz_scatter,
+                transpose_a=True,
+            )
 
     @cast_precision
     def _filter(
-            self, 
-            inputs, 
-            type_input,
-            natoms,
-            type_embedding = None,
-            activation_fn=tf.nn.tanh, 
-            stddev=1.0,
-            bavg=0.0,
-            name='linear', 
-            reuse=None,
-            trainable = True):
+        self,
+        inputs,
+        type_input,
+        natoms,
+        type_embedding=None,
+        activation_fn=tf.nn.tanh,
+        stddev=1.0,
+        bavg=0.0,
+        name="linear",
+        reuse=None,
+        trainable=True,
+    ):
         nframes = tf.shape(tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0]
         # natom x (nei x 4)
         shape = inputs.get_shape().as_list()
         outputs_size = [1] + self.filter_neuron
         outputs_size_2 = self.n_axis_neuron
-        all_excluded = all([(type_input, type_i) in self.exclude_types for type_i in range(self.ntypes)])
+        all_excluded = all(
+            [
+                (type_input, type_i) in self.exclude_types
+                for type_i in range(self.ntypes)
+            ]
+        )
         if all_excluded:
             # all types are excluded so result and qmat should be zeros
             # we can safaly return a zero matrix...
@@ -839,81 +1022,102 @@ def _filter(
             # result: natom x outputs_size x outputs_size_2
             # qmat: natom x outputs_size x 3
             natom = tf.shape(inputs)[0]
-            result = tf.cast(tf.fill((natom, outputs_size_2, outputs_size[-1]), 0.), GLOBAL_TF_FLOAT_PRECISION)
-            qmat = tf.cast(tf.fill((natom, outputs_size[-1], 3), 0.), GLOBAL_TF_FLOAT_PRECISION)
+            result = tf.cast(
+                tf.fill((natom, outputs_size_2, outputs_size[-1]), 0.0),
+                GLOBAL_TF_FLOAT_PRECISION,
+            )
+            qmat = tf.cast(
+                tf.fill((natom, outputs_size[-1], 3), 0.0), GLOBAL_TF_FLOAT_PRECISION
+            )
             return result, qmat
-            
+
         with tf.variable_scope(name, reuse=reuse):
-          start_index = 0
-          type_i = 0
-          # natom x 4 x outputs_size
-          if type_embedding is None:
-              rets = []
-              for type_i in range(self.ntypes):
-                  ret = self._filter_lower(
-                      type_i, type_input,
-                      start_index, self.sel_a[type_i],
-                      inputs,
-                      nframes,
-                      natoms,
-                      type_embedding = type_embedding,
-                      is_exclude = (type_input, type_i) in self.exclude_types,
-                      activation_fn = activation_fn,
-                      stddev = stddev,
-                      bavg = bavg,
-                      trainable = trainable,
-                      suffix = "_"+str(type_i))
-                  if (type_input, type_i) not in self.exclude_types:
-                      # add zero is meaningless; skip
-                      rets.append(ret)
-                  start_index += self.sel_a[type_i]
-              # faster to use accumulate_n than multiple add
-              xyz_scatter_1 = tf.accumulate_n(rets)
-          else :
-              xyz_scatter_1 = self._filter_lower(
-                  type_i, type_input,
-                  start_index, np.cumsum(self.sel_a)[-1],
-                  inputs,
-                  nframes,
-                  natoms,
-                  type_embedding = type_embedding,
-                  is_exclude = False,
-                  activation_fn = activation_fn,
-                  stddev = stddev,
-                  bavg = bavg,
-                  trainable = trainable)
-          if nvnmd_cfg.enable: return filter_GR2D(xyz_scatter_1)
-          # natom x nei x outputs_size
-          # xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
-          # natom x nei x 4
-          # inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4])
-          # natom x 4 x outputs_size
-          # xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True)
-          if self.original_sel is None:
-              # shape[1] = nnei * 4
-              nnei = shape[1] / 4
-          else:
-              nnei = tf.cast(tf.Variable(np.sum(self.original_sel), dtype=tf.int32, trainable=False, name="nnei"), self.filter_precision)
-          xyz_scatter_1 = xyz_scatter_1 / nnei
-          # natom x 4 x outputs_size_2
-          xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
-          # # natom x 3 x outputs_size_2
-          # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1])
-          # natom x 3 x outputs_size_1
-          qmat = tf.slice(xyz_scatter_1, [0,1,0], [-1, 3, -1])
-          # natom x outputs_size_1 x 3
-          qmat = tf.transpose(qmat, perm = [0, 2, 1])
-          # natom x outputs_size x outputs_size_2
-          result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a = True)
-          # natom x (outputs_size x outputs_size_2)
-          result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
+            start_index = 0
+            type_i = 0
+            # natom x 4 x outputs_size
+            if type_embedding is None:
+                rets = []
+                for type_i in range(self.ntypes):
+                    ret = self._filter_lower(
+                        type_i,
+                        type_input,
+                        start_index,
+                        self.sel_a[type_i],
+                        inputs,
+                        nframes,
+                        natoms,
+                        type_embedding=type_embedding,
+                        is_exclude=(type_input, type_i) in self.exclude_types,
+                        activation_fn=activation_fn,
+                        stddev=stddev,
+                        bavg=bavg,
+                        trainable=trainable,
+                        suffix="_" + str(type_i),
+                    )
+                    if (type_input, type_i) not in self.exclude_types:
+                        # add zero is meaningless; skip
+                        rets.append(ret)
+                    start_index += self.sel_a[type_i]
+                # faster to use accumulate_n than multiple add
+                xyz_scatter_1 = tf.accumulate_n(rets)
+            else:
+                xyz_scatter_1 = self._filter_lower(
+                    type_i,
+                    type_input,
+                    start_index,
+                    np.cumsum(self.sel_a)[-1],
+                    inputs,
+                    nframes,
+                    natoms,
+                    type_embedding=type_embedding,
+                    is_exclude=False,
+                    activation_fn=activation_fn,
+                    stddev=stddev,
+                    bavg=bavg,
+                    trainable=trainable,
+                )
+            if nvnmd_cfg.enable:
+                return filter_GR2D(xyz_scatter_1)
+            # natom x nei x outputs_size
+            # xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
+            # natom x nei x 4
+            # inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4])
+            # natom x 4 x outputs_size
+            # xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True)
+            if self.original_sel is None:
+                # shape[1] = nnei * 4
+                nnei = shape[1] / 4
+            else:
+                nnei = tf.cast(
+                    tf.Variable(
+                        np.sum(self.original_sel),
+                        dtype=tf.int32,
+                        trainable=False,
+                        name="nnei",
+                    ),
+                    self.filter_precision,
+                )
+            xyz_scatter_1 = xyz_scatter_1 / nnei
+            # natom x 4 x outputs_size_2
+            xyz_scatter_2 = tf.slice(xyz_scatter_1, [0, 0, 0], [-1, -1, outputs_size_2])
+            # # natom x 3 x outputs_size_2
+            # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1])
+            # natom x 3 x outputs_size_1
+            qmat = tf.slice(xyz_scatter_1, [0, 1, 0], [-1, 3, -1])
+            # natom x outputs_size_1 x 3
+            qmat = tf.transpose(qmat, perm=[0, 2, 1])
+            # natom x outputs_size x outputs_size_2
+            result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a=True)
+            # natom x (outputs_size x outputs_size_2)
+            result = tf.reshape(result, [-1, outputs_size_2 * outputs_size[-1]])
 
         return result, qmat
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given dict
@@ -929,23 +1133,31 @@ def init_variables(self,
         """
         super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
         try:
-            self.original_sel = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/original_sel' % suffix)
+            self.original_sel = get_tensor_by_name_from_graph(
+                graph, "descrpt_attr%s/original_sel" % suffix
+            )
         except GraphWithoutTensorError:
             # original_sel is not restored in old graphs, assume sel never changed before
             pass
         # check sel == original sel?
         try:
-            sel = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/sel' % suffix)
+            sel = get_tensor_by_name_from_graph(graph, "descrpt_attr%s/sel" % suffix)
         except GraphWithoutTensorError:
             # sel is not restored in old graphs
             pass
         else:
             if not np.array_equal(np.array(self.sel_a), sel):
                 if not self.set_davg_zero:
-                    raise RuntimeError("Adjusting sel is only supported when `set_davg_zero` is true!")
+                    raise RuntimeError(
+                        "Adjusting sel is only supported when `set_davg_zero` is true!"
+                    )
                 # as set_davg_zero, self.davg is safely zero
-                self.davg = np.zeros([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-                new_dstd = np.ones([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+                self.davg = np.zeros([self.ntypes, self.ndescrpt]).astype(
+                    GLOBAL_NP_FLOAT_PRECISION
+                )
+                new_dstd = np.ones([self.ntypes, self.ndescrpt]).astype(
+                    GLOBAL_NP_FLOAT_PRECISION
+                )
                 # shape of davg and dstd is (ntypes, ndescrpt), ndescrpt = 4*sel
                 n_descpt = np.array(self.sel_a) * 4
                 n_descpt_old = np.array(sel) * 4
@@ -956,13 +1168,15 @@ def init_variables(self,
                 start_index_old = np.roll(end_index_old, 1)
                 start_index_old[0] = 0
 
-                for nn, oo, ii, jj in zip(n_descpt, n_descpt_old, start_index, start_index_old):
+                for nn, oo, ii, jj in zip(
+                    n_descpt, n_descpt_old, start_index, start_index_old
+                ):
                     if nn < oo:
                         # new size is smaller, copy part of std
-                        new_dstd[:, ii:ii+nn] = self.dstd[:, jj:jj+nn]
+                        new_dstd[:, ii : ii + nn] = self.dstd[:, jj : jj + nn]
                     else:
                         # new size is larger, copy all, the rest remains 1
-                        new_dstd[:, ii:ii+oo] = self.dstd[:, jj:jj+oo]
+                        new_dstd[:, ii : ii + oo] = self.dstd[:, jj : jj + oo]
                 self.dstd = new_dstd
                 if self.original_sel is None:
                     self.original_sel = sel
diff --git a/deepmd/descriptor/se_a_ebd.py b/deepmd/descriptor/se_a_ebd.py
index b1c7c2a81d..600cdf52eb 100644
--- a/deepmd/descriptor/se_a_ebd.py
+++ b/deepmd/descriptor/se_a_ebd.py
@@ -1,20 +1,39 @@
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
 
-from deepmd.env import tf
-from deepmd.common import get_activation_func, get_precision, add_data_requirement
-from deepmd.utils.network import one_layer
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.network import embedding_net
-from .se_a import DescrptSeA
-from .descriptor import Descriptor
+from deepmd.common import (
+    add_data_requirement,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.network import (
+    embedding_net,
+    one_layer,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se_a import (
+    DescrptSeA,
+)
+
 
 @Descriptor.register("se_a_tpe")
 @Descriptor.register("se_a_ebd")
-class DescrptSeAEbd (DescrptSeA):
+class DescrptSeAEbd(DescrptSeA):
     """DeepPot-SE descriptor with type embedding approach.
 
     Parameters
@@ -54,58 +73,61 @@ class DescrptSeAEbd (DescrptSeA):
             The excluded pairs of types which have no interaction with each other.
             For example, `[[0, 1]]` means no interaction between type 0 and type 1.
     """
-    def __init__ (self, 
-                  rcut: float,
-                  rcut_smth: float,
-                  sel: List[str],
-                  neuron: List[int] = [24,48,96],
-                  axis_neuron: int = 8,
-                  resnet_dt: bool = False,
-                  trainable: bool = True,
-                  seed: Optional[int] = None,
-                  type_one_side: bool = True,
-                  type_nchanl : int = 2,
-                  type_nlayer : int = 1,
-                  numb_aparam : int = 0,
-                  set_davg_zero: bool = False,
-                  activation_function: str = 'tanh',
-                  precision: str = 'default',
-                  exclude_types: List[List[int]] = [],
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        type_nchanl: int = 2,
+        type_nlayer: int = 1,
+        numb_aparam: int = 0,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        exclude_types: List[List[int]] = [],
     ) -> None:
         """
         Constructor
         """
-        DescrptSeA.__init__(self, 
-                            rcut,
-                            rcut_smth,
-                            sel,
-                            neuron = neuron,
-                            axis_neuron = axis_neuron,
-                            resnet_dt = resnet_dt,
-                            trainable = trainable,
-                            seed = seed,
-                            type_one_side = type_one_side,
-                            set_davg_zero = set_davg_zero,
-                            activation_function = activation_function,
-                            precision = precision
+        DescrptSeA.__init__(
+            self,
+            rcut,
+            rcut_smth,
+            sel,
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            resnet_dt=resnet_dt,
+            trainable=trainable,
+            seed=seed,
+            type_one_side=type_one_side,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
         )
         self.type_nchanl = type_nchanl
         self.type_nlayer = type_nlayer
         self.type_one_side = type_one_side
         self.numb_aparam = numb_aparam
         if self.numb_aparam > 0:
-            add_data_requirement('aparam', 3, atomic=True, must=True, high_prec=False)
+            add_data_requirement("aparam", 3, atomic=True, must=True, high_prec=False)
 
-
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -123,8 +145,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -141,379 +163,445 @@ def build (self,
         for ii in range(self.ntypes):
             nei_type.append(ii * np.ones(self.sel_a[ii], dtype=int))
         nei_type = np.concatenate(nei_type)
-        self.nei_type = tf.get_variable('t_nei_type', 
-                                        [self.nnei],
-                                        dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                        trainable = False,
-                                        initializer = tf.constant_initializer(nei_type))
-        self.dout = DescrptSeA.build(self, coord_, atype_, natoms, box_, mesh, input_dict, suffix = suffix, reuse = reuse)
-        tf.summary.histogram('embedding_net_output', self.dout)
+        self.nei_type = tf.get_variable(
+            "t_nei_type",
+            [self.nnei],
+            dtype=GLOBAL_TF_FLOAT_PRECISION,
+            trainable=False,
+            initializer=tf.constant_initializer(nei_type),
+        )
+        self.dout = DescrptSeA.build(
+            self,
+            coord_,
+            atype_,
+            natoms,
+            box_,
+            mesh,
+            input_dict,
+            suffix=suffix,
+            reuse=reuse,
+        )
+        tf.summary.histogram("embedding_net_output", self.dout)
 
         return self.dout
 
-
-    def _type_embed(self, 
-                    atype,
-                    ndim = 1,
-                    reuse = None, 
-                    suffix = '',
-                    trainable = True):
+    def _type_embed(self, atype, ndim=1, reuse=None, suffix="", trainable=True):
         ebd_type = tf.cast(atype, self.filter_precision)
         ebd_type = ebd_type / float(self.ntypes)
         ebd_type = tf.reshape(ebd_type, [-1, ndim])
         for ii in range(self.type_nlayer):
-            name = 'type_embed_layer_' + str(ii)
-            ebd_type = one_layer(ebd_type,
-                                 self.type_nchanl,
-                                 activation_fn = self.filter_activation_fn,
-                                 precision = self.filter_precision,
-                                 name = name, 
-                                 reuse = reuse,
-                                 seed = self.seed + ii,
-                                 trainable = trainable)
-        name = 'type_embed_layer_' + str(self.type_nlayer)
-        ebd_type = one_layer(ebd_type,
-                             self.type_nchanl,
-                             activation_fn = None,
-                             precision = self.filter_precision,
-                             name = name, 
-                             reuse = reuse,
-                             seed = self.seed + ii,
-                             trainable = trainable)
+            name = "type_embed_layer_" + str(ii)
+            ebd_type = one_layer(
+                ebd_type,
+                self.type_nchanl,
+                activation_fn=self.filter_activation_fn,
+                precision=self.filter_precision,
+                name=name,
+                reuse=reuse,
+                seed=self.seed + ii,
+                trainable=trainable,
+            )
+        name = "type_embed_layer_" + str(self.type_nlayer)
+        ebd_type = one_layer(
+            ebd_type,
+            self.type_nchanl,
+            activation_fn=None,
+            precision=self.filter_precision,
+            name=name,
+            reuse=reuse,
+            seed=self.seed + ii,
+            trainable=trainable,
+        )
         ebd_type = tf.reshape(ebd_type, [tf.shape(atype)[0], self.type_nchanl])
-        return ebd_type            
+        return ebd_type
 
-
-    def _embedding_net(self, 
-                       inputs,
-                       natoms,
-                       filter_neuron,
-                       activation_fn=tf.nn.tanh, 
-                       stddev=1.0,
-                       bavg=0.0,
-                       name='linear', 
-                       reuse=None,
-                       seed=None, 
-                       trainable = True):
-        '''
+    def _embedding_net(
+        self,
+        inputs,
+        natoms,
+        filter_neuron,
+        activation_fn=tf.nn.tanh,
+        stddev=1.0,
+        bavg=0.0,
+        name="linear",
+        reuse=None,
+        seed=None,
+        trainable=True,
+    ):
+        """
         inputs:  nf x na x (nei x 4)
         outputs: nf x na x nei x output_size
-        '''
+        """
         # natom x (nei x 4)
         inputs = tf.reshape(inputs, [-1, self.ndescrpt])
         shape = inputs.get_shape().as_list()
         outputs_size = [1] + filter_neuron
         with tf.variable_scope(name, reuse=reuse):
             xyz_scatter_total = []
-            # with natom x (nei x 4)  
+            # with natom x (nei x 4)
             inputs_i = inputs
             shape_i = inputs_i.get_shape().as_list()
-            # with (natom x nei) x 4  
+            # with (natom x nei) x 4
             inputs_reshape = tf.reshape(inputs_i, [-1, 4])
             # with (natom x nei) x 1
-            xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0,0],[-1,1]),[-1,1])
+            xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1])
             # with (natom x nei) x out_size
-            xyz_scatter = embedding_net(xyz_scatter, 
-                                        self.filter_neuron, 
-                                        self.filter_precision, 
-                                        activation_fn = activation_fn, 
-                                        resnet_dt = self.filter_resnet_dt,
-                                        stddev = stddev,
-                                        bavg = bavg,
-                                        seed = seed,
-                                        trainable = trainable)
+            xyz_scatter = embedding_net(
+                xyz_scatter,
+                self.filter_neuron,
+                self.filter_precision,
+                activation_fn=activation_fn,
+                resnet_dt=self.filter_resnet_dt,
+                stddev=stddev,
+                bavg=bavg,
+                seed=seed,
+                trainable=trainable,
+            )
             # natom x nei x out_size
-            xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1]//4, outputs_size[-1]))
+            xyz_scatter = tf.reshape(
+                xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1])
+            )
             xyz_scatter_total.append(xyz_scatter)
         # natom x nei x outputs_size
         xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
         # nf x natom x nei x outputs_size
-        xyz_scatter = tf.reshape(xyz_scatter, [tf.shape(inputs)[0], natoms[0], self.nnei, outputs_size[-1]])
+        xyz_scatter = tf.reshape(
+            xyz_scatter, [tf.shape(inputs)[0], natoms[0], self.nnei, outputs_size[-1]]
+        )
         return xyz_scatter
 
-    
-    def _type_embedding_net_two_sides(self, 
-                                      mat_g, 
-                                      atype,
-                                      natoms,
-                                      name = '',
-                                      reuse = None,
-                                      seed = None,
-                                      trainable = True):
+    def _type_embedding_net_two_sides(
+        self, mat_g, atype, natoms, name="", reuse=None, seed=None, trainable=True
+    ):
         outputs_size = self.filter_neuron[-1]
         nframes = tf.shape(mat_g)[0]
         # (nf x natom x nei) x (outputs_size x chnl x chnl)
         mat_g = tf.reshape(mat_g, [nframes * natoms[0] * self.nnei, outputs_size])
-        mat_g = one_layer(mat_g, 
-                          outputs_size * self.type_nchanl * self.type_nchanl, 
-                          activation_fn = None,
-                          precision = self.filter_precision,
-                          name = name+'_amplify',
-                          reuse = reuse,
-                          seed = self.seed,
-                          trainable = trainable)        
+        mat_g = one_layer(
+            mat_g,
+            outputs_size * self.type_nchanl * self.type_nchanl,
+            activation_fn=None,
+            precision=self.filter_precision,
+            name=name + "_amplify",
+            reuse=reuse,
+            seed=self.seed,
+            trainable=trainable,
+        )
         # nf x natom x nei x outputs_size x chnl x chnl
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], self.nnei, outputs_size, self.type_nchanl, self.type_nchanl])
+        mat_g = tf.reshape(
+            mat_g,
+            [
+                nframes,
+                natoms[0],
+                self.nnei,
+                outputs_size,
+                self.type_nchanl,
+                self.type_nchanl,
+            ],
+        )
         # nf x natom x outputs_size x chnl x nei x chnl
-        mat_g = tf.transpose(mat_g, perm = [0, 1, 3, 4, 2, 5])
+        mat_g = tf.transpose(mat_g, perm=[0, 1, 3, 4, 2, 5])
         # nf x natom x outputs_size x chnl x (nei x chnl)
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], outputs_size, self.type_nchanl, self.nnei * self.type_nchanl])
-        
+        mat_g = tf.reshape(
+            mat_g,
+            [
+                nframes,
+                natoms[0],
+                outputs_size,
+                self.type_nchanl,
+                self.nnei * self.type_nchanl,
+            ],
+        )
+
         # nei x nchnl
-        ebd_nei_type = self._type_embed(self.nei_type, 
-                                        reuse = reuse,
-                                        trainable = True,
-                                        suffix = '')
+        ebd_nei_type = self._type_embed(
+            self.nei_type, reuse=reuse, trainable=True, suffix=""
+        )
         # (nei x nchnl)
         ebd_nei_type = tf.reshape(ebd_nei_type, [self.nnei * self.type_nchanl])
         # (nframes x natom) x nchnl
-        ebd_atm_type = self._type_embed(atype,
-                                        reuse = True,
-                                        trainable = True,
-                                        suffix = '')    
+        ebd_atm_type = self._type_embed(atype, reuse=True, trainable=True, suffix="")
         # (nframes x natom x nchnl)
-        ebd_atm_type = tf.reshape(ebd_atm_type, [nframes * natoms[0] * self.type_nchanl])
+        ebd_atm_type = tf.reshape(
+            ebd_atm_type, [nframes * natoms[0] * self.type_nchanl]
+        )
 
         # nf x natom x outputs_size x chnl x (nei x chnl)
         mat_g = tf.multiply(mat_g, ebd_nei_type)
         # nf x natom x outputs_size x chnl x nei x chnl
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], outputs_size, self.type_nchanl, self.nnei, self.type_nchanl])
-        # nf x natom x outputs_size x chnl x nei 
-        mat_g = tf.reduce_mean(mat_g, axis = 5)
+        mat_g = tf.reshape(
+            mat_g,
+            [
+                nframes,
+                natoms[0],
+                outputs_size,
+                self.type_nchanl,
+                self.nnei,
+                self.type_nchanl,
+            ],
+        )
+        # nf x natom x outputs_size x chnl x nei
+        mat_g = tf.reduce_mean(mat_g, axis=5)
         # outputs_size x nei x nf x natom x chnl
-        mat_g = tf.transpose(mat_g, perm = [2, 4, 0, 1, 3])
+        mat_g = tf.transpose(mat_g, perm=[2, 4, 0, 1, 3])
         # outputs_size x nei x (nf x natom x chnl)
-        mat_g = tf.reshape(mat_g, [outputs_size, self.nnei, nframes * natoms[0] * self.type_nchanl])
+        mat_g = tf.reshape(
+            mat_g, [outputs_size, self.nnei, nframes * natoms[0] * self.type_nchanl]
+        )
         # outputs_size x nei x (nf x natom x chnl)
         mat_g = tf.multiply(mat_g, ebd_atm_type)
         # outputs_size x nei x nf x natom x chnl
-        mat_g = tf.reshape(mat_g, [outputs_size, self.nnei, nframes, natoms[0], self.type_nchanl])
+        mat_g = tf.reshape(
+            mat_g, [outputs_size, self.nnei, nframes, natoms[0], self.type_nchanl]
+        )
         # outputs_size x nei x nf x natom
-        mat_g = tf.reduce_mean(mat_g, axis = 4)
+        mat_g = tf.reduce_mean(mat_g, axis=4)
         # nf x natom x nei x outputs_size
-        mat_g = tf.transpose(mat_g, perm = [2, 3, 1, 0])        
+        mat_g = tf.transpose(mat_g, perm=[2, 3, 1, 0])
         # (nf x natom) x nei x outputs_size
         mat_g = tf.reshape(mat_g, [nframes * natoms[0], self.nnei, outputs_size])
         return mat_g
 
-
-    def _type_embedding_net_one_side(self, 
-                                     mat_g, 
-                                     atype,
-                                     natoms,
-                                     name = '',
-                                     reuse = None,
-                                     seed = None,
-                                     trainable = True):
+    def _type_embedding_net_one_side(
+        self, mat_g, atype, natoms, name="", reuse=None, seed=None, trainable=True
+    ):
         outputs_size = self.filter_neuron[-1]
         nframes = tf.shape(mat_g)[0]
         # (nf x natom x nei) x (outputs_size x chnl x chnl)
         mat_g = tf.reshape(mat_g, [nframes * natoms[0] * self.nnei, outputs_size])
-        mat_g = one_layer(mat_g, 
-                          outputs_size * self.type_nchanl, 
-                          activation_fn = None,
-                          precision = self.filter_precision,
-                          name = name+'_amplify',
-                          reuse = reuse,
-                          seed = self.seed,
-                          trainable = trainable)        
+        mat_g = one_layer(
+            mat_g,
+            outputs_size * self.type_nchanl,
+            activation_fn=None,
+            precision=self.filter_precision,
+            name=name + "_amplify",
+            reuse=reuse,
+            seed=self.seed,
+            trainable=trainable,
+        )
         # nf x natom x nei x outputs_size x chnl
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], self.nnei, outputs_size, self.type_nchanl])
+        mat_g = tf.reshape(
+            mat_g, [nframes, natoms[0], self.nnei, outputs_size, self.type_nchanl]
+        )
         # nf x natom x outputs_size x nei x chnl
-        mat_g = tf.transpose(mat_g, perm = [0, 1, 3, 2, 4])
+        mat_g = tf.transpose(mat_g, perm=[0, 1, 3, 2, 4])
         # nf x natom x outputs_size x (nei x chnl)
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], outputs_size, self.nnei * self.type_nchanl])
+        mat_g = tf.reshape(
+            mat_g, [nframes, natoms[0], outputs_size, self.nnei * self.type_nchanl]
+        )
 
         # nei x nchnl
-        ebd_nei_type = self._type_embed(self.nei_type, 
-                                        reuse = reuse,
-                                        trainable = True,
-                                        suffix = '')
+        ebd_nei_type = self._type_embed(
+            self.nei_type, reuse=reuse, trainable=True, suffix=""
+        )
         # (nei x nchnl)
         ebd_nei_type = tf.reshape(ebd_nei_type, [self.nnei * self.type_nchanl])
 
         # nf x natom x outputs_size x (nei x chnl)
         mat_g = tf.multiply(mat_g, ebd_nei_type)
         # nf x natom x outputs_size x nei x chnl
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], outputs_size, self.nnei, self.type_nchanl])
-        # nf x natom x outputs_size x nei 
-        mat_g = tf.reduce_mean(mat_g, axis = 4)
+        mat_g = tf.reshape(
+            mat_g, [nframes, natoms[0], outputs_size, self.nnei, self.type_nchanl]
+        )
+        # nf x natom x outputs_size x nei
+        mat_g = tf.reduce_mean(mat_g, axis=4)
         # nf x natom x nei x outputs_size
-        mat_g = tf.transpose(mat_g, perm = [0, 1, 3, 2])
+        mat_g = tf.transpose(mat_g, perm=[0, 1, 3, 2])
         # (nf x natom) x nei x outputs_size
         mat_g = tf.reshape(mat_g, [nframes * natoms[0], self.nnei, outputs_size])
         return mat_g
 
-
-    def _type_embedding_net_one_side_aparam(self, 
-                                            mat_g, 
-                                            atype,
-                                            natoms,
-                                            aparam,
-                                            name = '',
-                                            reuse = None,
-                                            seed = None,
-                                            trainable = True):
+    def _type_embedding_net_one_side_aparam(
+        self,
+        mat_g,
+        atype,
+        natoms,
+        aparam,
+        name="",
+        reuse=None,
+        seed=None,
+        trainable=True,
+    ):
         outputs_size = self.filter_neuron[-1]
         nframes = tf.shape(mat_g)[0]
         # (nf x natom x nei) x (outputs_size x chnl x chnl)
         mat_g = tf.reshape(mat_g, [nframes * natoms[0] * self.nnei, outputs_size])
-        mat_g = one_layer(mat_g, 
-                          outputs_size * self.type_nchanl, 
-                          activation_fn = None,
-                          precision = self.filter_precision,
-                          name = name+'_amplify',
-                          reuse = reuse,
-                          seed = self.seed,
-                          trainable = trainable)        
+        mat_g = one_layer(
+            mat_g,
+            outputs_size * self.type_nchanl,
+            activation_fn=None,
+            precision=self.filter_precision,
+            name=name + "_amplify",
+            reuse=reuse,
+            seed=self.seed,
+            trainable=trainable,
+        )
         # nf x natom x nei x outputs_size x chnl
-        mat_g = tf.reshape(mat_g, [nframes, natoms[0], self.nnei, outputs_size, self.type_nchanl])
+        mat_g = tf.reshape(
+            mat_g, [nframes, natoms[0], self.nnei, outputs_size, self.type_nchanl]
+        )
         # outputs_size x nf x natom x nei x chnl
-        mat_g = tf.transpose(mat_g, perm = [3, 0, 1, 2, 4])
+        mat_g = tf.transpose(mat_g, perm=[3, 0, 1, 2, 4])
         # outputs_size x (nf x natom x nei x chnl)
-        mat_g = tf.reshape(mat_g, [outputs_size, nframes * natoms[0] * self.nnei * self.type_nchanl])        
-        # nf x natom x nnei        
-        embed_type = tf.tile(tf.reshape(self.nei_type, [1, self.nnei]),
-                             [nframes * natoms[0], 1])
+        mat_g = tf.reshape(
+            mat_g, [outputs_size, nframes * natoms[0] * self.nnei * self.type_nchanl]
+        )
+        # nf x natom x nnei
+        embed_type = tf.tile(
+            tf.reshape(self.nei_type, [1, self.nnei]), [nframes * natoms[0], 1]
+        )
         # (nf x natom x nnei) x 1
-        embed_type = tf.reshape(embed_type, [nframes * natoms[0] * self.nnei, 1])        
+        embed_type = tf.reshape(embed_type, [nframes * natoms[0] * self.nnei, 1])
         # nf x (natom x naparam)
         aparam = tf.reshape(aparam, [nframes, -1])
-        # nf x natom x nnei x naparam        
-        embed_aparam = op_module.map_aparam(aparam, self.nlist, natoms, n_a_sel = self.nnei_a, n_r_sel = self.nnei_r)
+        # nf x natom x nnei x naparam
+        embed_aparam = op_module.map_aparam(
+            aparam, self.nlist, natoms, n_a_sel=self.nnei_a, n_r_sel=self.nnei_r
+        )
         # (nf x natom x nnei) x naparam
-        embed_aparam = tf.reshape(embed_aparam, [nframes * natoms[0] * self.nnei, self.numb_aparam])
+        embed_aparam = tf.reshape(
+            embed_aparam, [nframes * natoms[0] * self.nnei, self.numb_aparam]
+        )
         # (nf x natom x nnei) x (naparam+1)
-        embed_input = tf.concat((embed_type, embed_aparam), axis = 1)
-        
+        embed_input = tf.concat((embed_type, embed_aparam), axis=1)
+
         # (nf x natom x nnei) x nchnl
-        ebd_nei_type = self._type_embed(embed_input, 
-                                        ndim = self.numb_aparam + 1,
-                                        reuse = reuse,
-                                        trainable = True,
-                                        suffix = '')
+        ebd_nei_type = self._type_embed(
+            embed_input,
+            ndim=self.numb_aparam + 1,
+            reuse=reuse,
+            trainable=True,
+            suffix="",
+        )
         # (nf x natom x nei x nchnl)
-        ebd_nei_type = tf.reshape(ebd_nei_type, [nframes * natoms[0] * self.nnei * self.type_nchanl])
+        ebd_nei_type = tf.reshape(
+            ebd_nei_type, [nframes * natoms[0] * self.nnei * self.type_nchanl]
+        )
 
         # outputs_size x (nf x natom x nei x chnl)
         mat_g = tf.multiply(mat_g, ebd_nei_type)
         # outputs_size x nf x natom x nei x chnl
-        mat_g = tf.reshape(mat_g, [outputs_size, nframes, natoms[0], self.nnei, self.type_nchanl])
-        # outputs_size x nf x natom x nei 
-        mat_g = tf.reduce_mean(mat_g, axis = 4)
+        mat_g = tf.reshape(
+            mat_g, [outputs_size, nframes, natoms[0], self.nnei, self.type_nchanl]
+        )
+        # outputs_size x nf x natom x nei
+        mat_g = tf.reduce_mean(mat_g, axis=4)
         # nf x natom x nei x outputs_size
-        mat_g = tf.transpose(mat_g, perm = [1, 2, 3, 0])
+        mat_g = tf.transpose(mat_g, perm=[1, 2, 3, 0])
         # (nf x natom) x nei x outputs_size
         mat_g = tf.reshape(mat_g, [nframes * natoms[0], self.nnei, outputs_size])
         return mat_g
 
-
-    def _pass_filter(self, 
-                     inputs,
-                     atype,
-                     natoms,
-                     input_dict,
-                     reuse = None,
-                     suffix = '', 
-                     trainable = True) :
+    def _pass_filter(
+        self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True
+    ):
         # nf x na x ndescrpt
         # nf x na x (nnei x 4)
         inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt])
-        layer, qmat = self._ebd_filter(tf.cast(inputs, self.filter_precision), 
-                                       atype,
-                                       natoms,
-                                       input_dict,
-                                       name='filter_type_all'+suffix, 
-                                       reuse=reuse, 
-                                       seed = self.seed, 
-                                       trainable = trainable, 
-                                       activation_fn = self.filter_activation_fn)
-        output      = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
-        output_qmat = tf.reshape(qmat,  [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3])
+        layer, qmat = self._ebd_filter(
+            tf.cast(inputs, self.filter_precision),
+            atype,
+            natoms,
+            input_dict,
+            name="filter_type_all" + suffix,
+            reuse=reuse,
+            seed=self.seed,
+            trainable=trainable,
+            activation_fn=self.filter_activation_fn,
+        )
+        output = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
+        output_qmat = tf.reshape(
+            qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3]
+        )
         return output, output_qmat
 
-
-    def _ebd_filter(self, 
-                    inputs, 
-                    atype,
-                    natoms,
-                    input_dict,
-                    activation_fn=tf.nn.tanh, 
-                    stddev=1.0,
-                    bavg=0.0,
-                    name='linear', 
-                    reuse=None,
-                    seed=None, 
-                    trainable = True):
+    def _ebd_filter(
+        self,
+        inputs,
+        atype,
+        natoms,
+        input_dict,
+        activation_fn=tf.nn.tanh,
+        stddev=1.0,
+        bavg=0.0,
+        name="linear",
+        reuse=None,
+        seed=None,
+        trainable=True,
+    ):
         outputs_size = self.filter_neuron[-1]
         outputs_size_2 = self.n_axis_neuron
         # nf x natom x (nei x 4)
         nframes = tf.shape(inputs)[0]
         shape = tf.reshape(inputs, [-1, self.ndescrpt]).get_shape().as_list()
-        
-        # nf x natom x nei x outputs_size        
-        mat_g = self._embedding_net(inputs,
-                                    natoms,
-                                    self.filter_neuron,
-                                    activation_fn = activation_fn, 
-                                    stddev = stddev,
-                                    bavg = bavg,
-                                    name = name, 
-                                    reuse = reuse,
-                                    seed = seed,
-                                    trainable = trainable)
-        # nf x natom x nei x outputs_size        
+
+        # nf x natom x nei x outputs_size
+        mat_g = self._embedding_net(
+            inputs,
+            natoms,
+            self.filter_neuron,
+            activation_fn=activation_fn,
+            stddev=stddev,
+            bavg=bavg,
+            name=name,
+            reuse=reuse,
+            seed=seed,
+            trainable=trainable,
+        )
+        # nf x natom x nei x outputs_size
         mat_g = tf.reshape(mat_g, [nframes, natoms[0], self.nnei, outputs_size])
-        
+
         # (nf x natom) x nei x outputs_size
         if self.type_one_side:
             if self.numb_aparam > 0:
-                aparam = input_dict['aparam']
-                xyz_scatter \
-                    = self._type_embedding_net_one_side_aparam(mat_g, 
-                                                               atype,
-                                                               natoms, 
-                                                               aparam,
-                                                               name = name,
-                                                               reuse = reuse, 
-                                                               seed = seed,
-                                                               trainable = trainable)
+                aparam = input_dict["aparam"]
+                xyz_scatter = self._type_embedding_net_one_side_aparam(
+                    mat_g,
+                    atype,
+                    natoms,
+                    aparam,
+                    name=name,
+                    reuse=reuse,
+                    seed=seed,
+                    trainable=trainable,
+                )
             else:
-                xyz_scatter \
-                    = self._type_embedding_net_one_side(mat_g, 
-                                                        atype,
-                                                        natoms, 
-                                                        name = name,
-                                                        reuse = reuse, 
-                                                        seed = seed,
-                                                        trainable = trainable)
+                xyz_scatter = self._type_embedding_net_one_side(
+                    mat_g,
+                    atype,
+                    natoms,
+                    name=name,
+                    reuse=reuse,
+                    seed=seed,
+                    trainable=trainable,
+                )
         else:
-            xyz_scatter \
-                = self._type_embedding_net_two_sides(mat_g, 
-                                                     atype,
-                                                     natoms, 
-                                                     name = name,
-                                                     reuse = reuse, 
-                                                     seed = seed,
-                                                     trainable = trainable)
-        
+            xyz_scatter = self._type_embedding_net_two_sides(
+                mat_g,
+                atype,
+                natoms,
+                name=name,
+                reuse=reuse,
+                seed=seed,
+                trainable=trainable,
+            )
+
         # natom x nei x 4
-        inputs_reshape = tf.reshape(inputs, [-1, shape[1]//4, 4])
+        inputs_reshape = tf.reshape(inputs, [-1, shape[1] // 4, 4])
         # natom x 4 x outputs_size
-        xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a = True)
+        xyz_scatter_1 = tf.matmul(inputs_reshape, xyz_scatter, transpose_a=True)
         xyz_scatter_1 = xyz_scatter_1 * (4.0 / shape[1])
         # natom x 4 x outputs_size_2
-        xyz_scatter_2 = tf.slice(xyz_scatter_1, [0,0,0],[-1,-1,outputs_size_2])
+        xyz_scatter_2 = tf.slice(xyz_scatter_1, [0, 0, 0], [-1, -1, outputs_size_2])
         # # natom x 3 x outputs_size_2
         # qmat = tf.slice(xyz_scatter_2, [0,1,0], [-1, 3, -1])
         # natom x 3 x outputs_size_1
-        qmat = tf.slice(xyz_scatter_1, [0,1,0], [-1, 3, -1])
+        qmat = tf.slice(xyz_scatter_1, [0, 1, 0], [-1, 3, -1])
         # natom x outputs_size_2 x 3
-        qmat = tf.transpose(qmat, perm = [0, 2, 1])
+        qmat = tf.transpose(qmat, perm=[0, 2, 1])
         # natom x outputs_size x outputs_size_2
-        result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a = True)
+        result = tf.matmul(xyz_scatter_1, xyz_scatter_2, transpose_a=True)
         # natom x (outputs_size x outputs_size_2)
         result = tf.reshape(result, [-1, outputs_size_2 * outputs_size])
 
         return result, qmat
-
diff --git a/deepmd/descriptor/se_a_ef.py b/deepmd/descriptor/se_a_ef.py
index c272ed7491..d269c9fcd8 100644
--- a/deepmd/descriptor/se_a_ef.py
+++ b/deepmd/descriptor/se_a_ef.py
@@ -1,18 +1,35 @@
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
-
-from deepmd.env import tf
-from deepmd.common import add_data_requirement
-from deepmd.utils.sess import run_sess
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from .se_a import DescrptSeA
-from .descriptor import Descriptor
+
+from deepmd.common import (
+    add_data_requirement,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se_a import (
+    DescrptSeA,
+)
+
 
 @Descriptor.register("se_a_ef")
-class DescrptSeAEf (Descriptor):
+class DescrptSeAEf(Descriptor):
     """
 
     Parameters
@@ -48,27 +65,29 @@ class DescrptSeAEf (Descriptor):
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     """
-    def __init__(self,
-                 rcut: float,
-                 rcut_smth: float,
-                 sel: List[str],
-                 neuron: List[int] = [24,48,96],
-                 axis_neuron: int = 8,
-                 resnet_dt: bool = False,
-                 trainable: bool = True,
-                 seed: Optional[int] = None,
-                 type_one_side: bool = True,
-                 exclude_types: List[List[int]] = [],
-                 set_davg_zero: bool = False,
-                 activation_function: str = 'tanh',
-                 precision: str = 'default',
-                 uniform_seed = False
-    ) -> None:        
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed=False,
+    ) -> None:
         """
         Constructor
         """
         self.descrpt_para = DescrptSeAEfLower(
-            op_module.descrpt_se_a_ef_para, 
+            op_module.descrpt_se_a_ef_para,
             rcut,
             rcut_smth,
             sel,
@@ -102,25 +121,25 @@ def __init__(self,
             uniform_seed,
         )
 
-    def get_rcut (self) -> float:
+    def get_rcut(self) -> float:
         """
         Returns the cut-off radius
         """
         return self.descrpt_vert.rcut_r
 
-    def get_ntypes (self) -> int:
+    def get_ntypes(self) -> int:
         """
         Returns the number of atom types
         """
         return self.descrpt_vert.ntypes
 
-    def get_dim_out (self) -> int:
+    def get_dim_out(self) -> int:
         """
         Returns the output dimension of this descriptor
         """
         return self.descrpt_vert.get_dim_out() + self.descrpt_para.get_dim_out()
 
-    def get_dim_rot_mat_1 (self) -> int:
+    def get_dim_rot_mat_1(self) -> int:
         """
         Returns the first dimension of the rotation matrix. The rotation is of shape dim_1 x 3
         """
@@ -132,8 +151,7 @@ def get_rot_mat(self) -> tf.Tensor:
         """
         return self.qmat
 
-
-    def get_nlist (self)  -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         Returns
         -------
@@ -146,23 +164,25 @@ def get_nlist (self)  -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         sel_r
                 The number of neighbors with only radial information
         """
-        return \
-            self.descrpt_vert.nlist, \
-            self.descrpt_vert.rij, \
-            self.descrpt_vert.sel_a, \
-            self.descrpt_vert.sel_r
-
-    def compute_input_stats (self,
-                             data_coord : list, 
-                             data_box : list, 
-                             data_atype : list, 
-                             natoms_vec : list,
-                             mesh : list, 
-                             input_dict : dict
-    ) -> None :
+        return (
+            self.descrpt_vert.nlist,
+            self.descrpt_vert.rij,
+            self.descrpt_vert.sel_a,
+            self.descrpt_vert.sel_r,
+        )
+
+    def compute_input_stats(
+        self,
+        data_coord: list,
+        data_box: list,
+        data_atype: list,
+        natoms_vec: list,
+        mesh: list,
+        input_dict: dict,
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -178,18 +198,23 @@ def compute_input_stats (self,
         input_dict
                 Dictionary for additional input
         """
-        self.descrpt_vert.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh, input_dict)
-        self.descrpt_para.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh, input_dict)
-
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+        self.descrpt_vert.compute_input_stats(
+            data_coord, data_box, data_atype, natoms_vec, mesh, input_dict
+        )
+        self.descrpt_para.compute_input_stats(
+            data_coord, data_box, data_atype, natoms_vec, mesh, input_dict
+        )
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -207,8 +232,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs. Should have 'efield'.
         reuse
@@ -221,23 +246,30 @@ def build (self,
         descriptor
                 The output descriptor
         """
-        self.dout_vert = self.descrpt_vert.build(coord_, atype_, natoms, box_, mesh, input_dict)
-        self.dout_para = self.descrpt_para.build(coord_, atype_, natoms, box_, mesh, input_dict, reuse = True)
+        self.dout_vert = self.descrpt_vert.build(
+            coord_, atype_, natoms, box_, mesh, input_dict
+        )
+        self.dout_para = self.descrpt_para.build(
+            coord_, atype_, natoms, box_, mesh, input_dict, reuse=True
+        )
         coord = tf.reshape(coord_, [-1, natoms[1] * 3])
         nframes = tf.shape(coord)[0]
-        self.dout_vert = tf.reshape(self.dout_vert, [nframes * natoms[0], self.descrpt_vert.get_dim_out()])
-        self.dout_para = tf.reshape(self.dout_para, [nframes * natoms[0], self.descrpt_para.get_dim_out()])
-        self.dout = tf.concat([self.dout_vert, self.dout_para], axis = 1)
+        self.dout_vert = tf.reshape(
+            self.dout_vert, [nframes * natoms[0], self.descrpt_vert.get_dim_out()]
+        )
+        self.dout_para = tf.reshape(
+            self.dout_para, [nframes * natoms[0], self.descrpt_para.get_dim_out()]
+        )
+        self.dout = tf.concat([self.dout_vert, self.dout_para], axis=1)
         self.dout = tf.reshape(self.dout, [nframes, natoms[0], self.get_dim_out()])
         self.qmat = self.descrpt_vert.qmat + self.descrpt_para.qmat
 
-        tf.summary.histogram('embedding_net_output', self.dout)
+        tf.summary.histogram("embedding_net_output", self.dout)
 
         return self.dout
 
-    def prod_force_virial(self, 
-                          atom_ener : tf.Tensor, 
-                          natoms : tf.Tensor
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial
@@ -261,39 +293,39 @@ def prod_force_virial(self,
         atom_virial
                 The atomic virial
         """
-        f_vert, v_vert, av_vert \
-            = self.descrpt_vert.prod_force_virial(atom_ener, natoms)
-        f_para, v_para, av_para \
-            = self.descrpt_para.prod_force_virial(atom_ener, natoms)
+        f_vert, v_vert, av_vert = self.descrpt_vert.prod_force_virial(atom_ener, natoms)
+        f_para, v_para, av_para = self.descrpt_para.prod_force_virial(atom_ener, natoms)
         force = f_vert + f_para
         virial = v_vert + v_para
         atom_vir = av_vert + av_para
         return force, virial, atom_vir
 
 
-class DescrptSeAEfLower (DescrptSeA):
+class DescrptSeAEfLower(DescrptSeA):
     """
     Helper class for implementing DescrptSeAEf
     """
-    def __init__ (self, 
-                  op,
-                  rcut: float,
-                  rcut_smth: float,
-                  sel: List[str],
-                  neuron: List[int] = [24,48,96],
-                  axis_neuron: int = 8,
-                  resnet_dt: bool = False,
-                  trainable: bool = True,
-                  seed: Optional[int] = None,
-                  type_one_side: bool = True,
-                  exclude_types: List[List[int]] = [],
-                  set_davg_zero: bool = False,
-                  activation_function: str = 'tanh',
-                  precision: str = 'default',
-                  uniform_seed : bool = False,
+
+    def __init__(
+        self,
+        op,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
     ) -> None:
         DescrptSeA.__init__(
-            self, 
+            self,
             rcut,
             rcut_smth,
             sel,
@@ -307,7 +339,7 @@ def __init__ (self,
             set_davg_zero,
             activation_function,
             precision,
-            uniform_seed
+            uniform_seed,
         )
         self.sel_a = sel
         self.rcut_r = rcut
@@ -320,9 +352,9 @@ def __init__ (self,
         self.op = op
 
         # descrpt config
-        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
+        self.sel_r = [0 for ii in range(len(self.sel_a))]
         self.ntypes = len(self.sel_a)
-        assert(self.ntypes == len(self.sel_r))
+        assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
         # numb of neighbors and numb of descrptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
@@ -335,46 +367,55 @@ def __init__ (self,
         self.dstd = None
         self.davg = None
 
-        add_data_requirement('efield', 3, atomic=True, must=True, high_prec=False)
+        add_data_requirement("efield", 3, atomic=True, must=True, high_prec=False)
 
         self.place_holders = {}
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
         sub_graph = tf.Graph()
         with sub_graph.as_default():
-            name_pfx = 'd_sea_ef_'
-            for ii in ['coord', 'box']:
-                self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name = name_pfx+'t_'+ii)
-            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx+'t_type')
-            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms')
-            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh')
-            self.place_holders['efield'] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx+'t_efield')
-            self.stat_descrpt, descrpt_deriv, rij, nlist \
-                = self.op(self.place_holders['coord'],
-                          self.place_holders['type'],
-                          self.place_holders['natoms_vec'],
-                          self.place_holders['box'],
-                          self.place_holders['default_mesh'],
-                          self.place_holders['efield'],
-                          tf.constant(avg_zero),
-                          tf.constant(std_ones),
-                          rcut_a = self.rcut_a,
-                          rcut_r = self.rcut_r,
-                          rcut_r_smth = self.rcut_r_smth,
-                          sel_a = self.sel_a,
-                          sel_r = self.sel_r)
-        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
-
-
-
-    def compute_input_stats (self,
-                             data_coord, 
-                             data_box, 
-                             data_atype, 
-                             natoms_vec,
-                             mesh, 
-                             input_dict) :
-        data_efield = input_dict['efield']
+            name_pfx = "d_sea_ef_"
+            for ii in ["coord", "box"]:
+                self.place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii
+                )
+            self.place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name=name_pfx + "t_type"
+            )
+            self.place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms"
+            )
+            self.place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name=name_pfx + "t_mesh"
+            )
+            self.place_holders["efield"] = tf.placeholder(
+                GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_efield"
+            )
+            self.stat_descrpt, descrpt_deriv, rij, nlist = self.op(
+                self.place_holders["coord"],
+                self.place_holders["type"],
+                self.place_holders["natoms_vec"],
+                self.place_holders["box"],
+                self.place_holders["default_mesh"],
+                self.place_holders["efield"],
+                tf.constant(avg_zero),
+                tf.constant(std_ones),
+                rcut_a=self.rcut_a,
+                rcut_r=self.rcut_r,
+                rcut_r_smth=self.rcut_r_smth,
+                sel_a=self.sel_a,
+                sel_r=self.sel_r,
+            )
+        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
+
+    def compute_input_stats(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh, input_dict
+    ):
+        data_efield = input_dict["efield"]
         all_davg = []
         all_dstd = []
         if True:
@@ -383,26 +424,30 @@ def compute_input_stats (self,
             sumn = []
             sumr2 = []
             suma2 = []
-            for cc,bb,tt,nn,mm,ee in zip(data_coord,data_box,data_atype,natoms_vec,mesh,data_efield) :
-                sysr,sysr2,sysa,sysa2,sysn \
-                    = self._compute_dstats_sys_smth(cc,bb,tt,nn,mm,ee)
+            for cc, bb, tt, nn, mm, ee in zip(
+                data_coord, data_box, data_atype, natoms_vec, mesh, data_efield
+            ):
+                sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth(
+                    cc, bb, tt, nn, mm, ee
+                )
                 sumr.append(sysr)
                 suma.append(sysa)
                 sumn.append(sysn)
                 sumr2.append(sysr2)
                 suma2.append(sysa2)
-            sumr = np.sum(sumr, axis = 0)
-            suma = np.sum(suma, axis = 0)
-            sumn = np.sum(sumn, axis = 0)
-            sumr2 = np.sum(sumr2, axis = 0)
-            suma2 = np.sum(suma2, axis = 0)
-            for type_i in range(self.ntypes) :
-                davgunit = [sumr[type_i]/sumn[type_i], 0, 0, 0]
-                dstdunit = [self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]), 
-                            self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]), 
-                            self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]), 
-                            self._compute_std(suma2[type_i], suma[type_i], sumn[type_i])
-                            ]
+            sumr = np.sum(sumr, axis=0)
+            suma = np.sum(suma, axis=0)
+            sumn = np.sum(sumn, axis=0)
+            sumr2 = np.sum(sumr2, axis=0)
+            suma2 = np.sum(suma2, axis=0)
+            for type_i in range(self.ntypes):
+                davgunit = [sumr[type_i] / sumn[type_i], 0, 0, 0]
+                dstdunit = [
+                    self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]),
+                    self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                    self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                    self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                ]
                 davg = np.tile(davgunit, self.ndescrpt // 4)
                 dstd = np.tile(dstdunit, self.ndescrpt // 4)
                 all_davg.append(davg)
@@ -412,107 +457,106 @@ def compute_input_stats (self,
         self.dstd = np.array(all_dstd)
 
     def _normalize_3d(self, a):
-        na = tf.norm(a, axis = 1)
-        na = tf.tile(tf.reshape(na, [-1,1]), tf.constant([1, 3]))
+        na = tf.norm(a, axis=1)
+        na = tf.tile(tf.reshape(na, [-1, 1]), tf.constant([1, 3]))
         return tf.divide(a, na)
 
-    def build (self, 
-               coord_, 
-               atype_,
-               natoms,
-               box_, 
-               mesh,
-               input_dict,
-               suffix = '', 
-               reuse = None):
-        efield = input_dict['efield']
+    def build(
+        self, coord_, atype_, natoms, box_, mesh, input_dict, suffix="", reuse=None
+    ):
+        efield = input_dict["efield"]
         davg = self.davg
         dstd = self.dstd
-        with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             if davg is None:
-                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+                davg = np.zeros([self.ntypes, self.ndescrpt])
             if dstd is None:
-                dstd = np.ones ([self.ntypes, self.ndescrpt])
-            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), 
-                                 name = 'rcut', 
-                                 dtype = GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.ntypes, 
-                                   name = 'ntypes', 
-                                   dtype = tf.int32)
-            t_ndescrpt = tf.constant(self.ndescrpt, 
-                                     name = 'ndescrpt', 
-                                     dtype = tf.int32)            
-            t_sel = tf.constant(self.sel_a, 
-                                name = 'sel', 
-                                dtype = tf.int32)            
-            self.t_avg = tf.get_variable('t_avg', 
-                                         davg.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(davg))
-            self.t_std = tf.get_variable('t_std', 
-                                         dstd.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(dstd))
-
-        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
-        box   = tf.reshape (box_, [-1, 9])
-        atype = tf.reshape (atype_, [-1, natoms[1]])
+                dstd = np.ones([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(
+                np.max([self.rcut_r, self.rcut_a]),
+                name="rcut",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32)
+            t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32)
+            self.t_avg = tf.get_variable(
+                "t_avg",
+                davg.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(davg),
+            )
+            self.t_std = tf.get_variable(
+                "t_std",
+                dstd.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(dstd),
+            )
+
+        coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        box = tf.reshape(box_, [-1, 9])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
         efield = tf.reshape(efield, [-1, 3])
         efield = self._normalize_3d(efield)
         efield = tf.reshape(efield, [-1, natoms[0] * 3])
 
-        self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
-            = self.op (coord,
-                       atype,
-                       natoms,
-                       box,
-                       mesh,
-                       efield,
-                       self.t_avg,
-                       self.t_std,
-                       rcut_a = self.rcut_a,
-                       rcut_r = self.rcut_r,
-                       rcut_r_smth = self.rcut_r_smth,
-                       sel_a = self.sel_a,
-                       sel_r = self.sel_r)
+        self.descrpt, self.descrpt_deriv, self.rij, self.nlist = self.op(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            efield,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
 
         self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
-        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name = 'o_rmat')
-        self.descrpt_deriv = tf.identity(self.descrpt_deriv, name = 'o_rmat_deriv')
-        self.rij = tf.identity(self.rij, name = 'o_rij')
-        self.nlist = tf.identity(self.nlist, name = 'o_nlist')
+        self.descrpt_reshape = tf.identity(self.descrpt_reshape, name="o_rmat")
+        self.descrpt_deriv = tf.identity(self.descrpt_deriv, name="o_rmat_deriv")
+        self.rij = tf.identity(self.rij, name="o_rij")
+        self.nlist = tf.identity(self.nlist, name="o_nlist")
 
         # only used when tensorboard was set as true
-        tf.summary.histogram('descrpt', self.descrpt)
-        tf.summary.histogram('rij', self.rij)
-        tf.summary.histogram('nlist', self.nlist)
-
-        self.dout, self.qmat = self._pass_filter(self.descrpt_reshape, atype, natoms, input_dict, suffix = suffix, reuse = reuse, trainable = self.trainable)
-        tf.summary.histogram('embedding_net_output', self.dout)
+        tf.summary.histogram("descrpt", self.descrpt)
+        tf.summary.histogram("rij", self.rij)
+        tf.summary.histogram("nlist", self.nlist)
+
+        self.dout, self.qmat = self._pass_filter(
+            self.descrpt_reshape,
+            atype,
+            natoms,
+            input_dict,
+            suffix=suffix,
+            reuse=reuse,
+            trainable=self.trainable,
+        )
+        tf.summary.histogram("embedding_net_output", self.dout)
 
         return self.dout
 
-
-
-    def _compute_dstats_sys_smth (self,
-                                  data_coord, 
-                                  data_box, 
-                                  data_atype,                             
-                                  natoms_vec,
-                                  mesh,
-                                  data_efield) :
-        dd_all \
-            = run_sess(self.sub_sess, self.stat_descrpt, 
-                                feed_dict = {
-                                    self.place_holders['coord']: data_coord,
-                                    self.place_holders['type']: data_atype,
-                                    self.place_holders['natoms_vec']: natoms_vec,
-                                    self.place_holders['box']: data_box,
-                                    self.place_holders['default_mesh']: mesh,
-                                    self.place_holders['efield']: data_efield,
-                                })
+    def _compute_dstats_sys_smth(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh, data_efield
+    ):
+        dd_all = run_sess(
+            self.sub_sess,
+            self.stat_descrpt,
+            feed_dict={
+                self.place_holders["coord"]: data_coord,
+                self.place_holders["type"]: data_atype,
+                self.place_holders["natoms_vec"]: natoms_vec,
+                self.place_holders["box"]: data_box,
+                self.place_holders["default_mesh"]: mesh,
+                self.place_holders["efield"]: data_efield,
+            },
+        )
         natoms = natoms_vec
         dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
         start_index = 0
@@ -522,24 +566,22 @@ def _compute_dstats_sys_smth (self,
         sysr2 = []
         sysa2 = []
         for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            end_index = start_index + self.ndescrpt * natoms[2 + type_i]
             dd = dd_all[:, start_index:end_index]
             dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
+            start_index = end_index
             # compute
-            dd = np.reshape (dd, [-1, 4])
-            ddr = dd[:,:1]
-            dda = dd[:,1:]
+            dd = np.reshape(dd, [-1, 4])
+            ddr = dd[:, :1]
+            dda = dd[:, 1:]
             sumr = np.sum(ddr)
-            suma = np.sum(dda) / 3.
+            suma = np.sum(dda) / 3.0
             sumn = dd.shape[0]
             sumr2 = np.sum(np.multiply(ddr, ddr))
-            suma2 = np.sum(np.multiply(dda, dda)) / 3.
+            suma2 = np.sum(np.multiply(dda, dda)) / 3.0
             sysr.append(sumr)
             sysa.append(suma)
             sysn.append(sumn)
             sysr2.append(sumr2)
             sysa2.append(suma2)
         return sysr, sysr2, sysa, sysa2, sysn
-
-
diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 00190672e4..2c02521640 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -1,24 +1,60 @@
 import math
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List, Dict, Any
-from packaging.version import Version
-
-from deepmd.env import tf
-from deepmd.common import get_activation_func, get_precision, cast_precision
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import TF_VERSION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.network import one_layer, embedding_net, embedding_net_rand_seed_shift
-from deepmd.utils.tabulate import DPTabulate
-from deepmd.utils.type_embed import embed_atom_type
-from deepmd.utils.sess import run_sess
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph, get_tensor_by_name
-from deepmd.utils.graph import get_attention_layer_variables_from_graph_def
-from deepmd.utils.errors import GraphWithoutTensorError
-from .descriptor import Descriptor
-from .se_a import DescrptSeA
+from packaging.version import (
+    Version,
+)
+
+from deepmd.common import (
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    TF_VERSION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_attention_layer_variables_from_graph_def,
+    get_tensor_by_name,
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.utils.network import (
+    embedding_net,
+    embedding_net_rand_seed_shift,
+    one_layer,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+from deepmd.utils.tabulate import (
+    DPTabulate,
+)
+from deepmd.utils.type_embed import (
+    embed_atom_type,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se_a import (
+    DescrptSeA,
+)
 
 
 @Descriptor.register("se_atten")
@@ -68,49 +104,53 @@ class DescrptSeAtten(DescrptSeA):
             If the model has multi fitting nets to train.
     """
 
-    def __init__(self,
-                 rcut: float,
-                 rcut_smth: float,
-                 sel: int,
-                 ntypes: int,
-                 neuron: List[int] = [24, 48, 96],
-                 axis_neuron: int = 8,
-                 resnet_dt: bool = False,
-                 trainable: bool = True,
-                 seed: Optional[int] = None,
-                 type_one_side: bool = True,
-                 exclude_types: List[List[int]] = [],
-                 set_davg_zero: bool = False,
-                 activation_function: str = 'tanh',
-                 precision: str = 'default',
-                 uniform_seed: bool = False,
-                 attn: int = 128,
-                 attn_layer: int = 2,
-                 attn_dotr: bool = True,
-                 attn_mask: bool = False,
-                 multi_task: bool = False
-                 ) -> None:
-        DescrptSeA.__init__(self,
-                            rcut,
-                            rcut_smth,
-                            [sel],
-                            neuron=neuron,
-                            axis_neuron=axis_neuron,
-                            resnet_dt=resnet_dt,
-                            trainable=trainable,
-                            seed=seed,
-                            type_one_side=type_one_side,
-                            exclude_types=exclude_types,
-                            set_davg_zero=set_davg_zero,
-                            activation_function=activation_function,
-                            precision=precision,
-                            uniform_seed=uniform_seed,
-                            multi_task=multi_task
-                            )
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: int,
+        ntypes: int,
+        neuron: List[int] = [24, 48, 96],
+        axis_neuron: int = 8,
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
+        attn: int = 128,
+        attn_layer: int = 2,
+        attn_dotr: bool = True,
+        attn_mask: bool = False,
+        multi_task: bool = False,
+    ) -> None:
+        DescrptSeA.__init__(
+            self,
+            rcut,
+            rcut_smth,
+            [sel],
+            neuron=neuron,
+            axis_neuron=axis_neuron,
+            resnet_dt=resnet_dt,
+            trainable=trainable,
+            seed=seed,
+            type_one_side=type_one_side,
+            exclude_types=exclude_types,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
+            uniform_seed=uniform_seed,
+            multi_task=multi_task,
+        )
         """
         Constructor
         """
-        assert (Version(TF_VERSION) > Version('2')), "se_atten only support tensorflow version 2.0 or higher."
+        assert Version(TF_VERSION) > Version(
+            "2"
+        ), "se_atten only support tensorflow version 2.0 or higher."
         self.ntypes = ntypes
         self.att_n = attn
         self.attn_layer = attn_layer
@@ -120,48 +160,72 @@ def __init__(self,
         # descrpt config
         self.sel_all_a = [sel]
         self.sel_all_r = [0]
-        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        self.beta = np.zeros([self.attn_layer, self.filter_neuron[-1]]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        self.gamma = np.ones([self.attn_layer, self.filter_neuron[-1]]).astype(GLOBAL_NP_FLOAT_PRECISION)
+        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        self.beta = np.zeros([self.attn_layer, self.filter_neuron[-1]]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        self.gamma = np.ones([self.attn_layer, self.filter_neuron[-1]]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
         self.attention_layer_variables = None
         sub_graph = tf.Graph()
         with sub_graph.as_default():
-            name_pfx = 'd_sea_'
-            for ii in ['coord', 'box']:
-                self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None],
-                                                        name=name_pfx + 't_' + ii)
-            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx + 't_type')
-            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes + 2], name=name_pfx + 't_natoms')
-            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx + 't_mesh')
-            self.stat_descrpt, self.descrpt_deriv_t, self.rij_t, self.nlist_t, self.nei_type_vec_t, self.nmask_t \
-                = op_module.prod_env_mat_a_mix(self.place_holders['coord'],
-                                               self.place_holders['type'],
-                                               self.place_holders['natoms_vec'],
-                                               self.place_holders['box'],
-                                               self.place_holders['default_mesh'],
-                                               tf.constant(avg_zero),
-                                               tf.constant(std_ones),
-                                               rcut_a=self.rcut_a,
-                                               rcut_r=self.rcut_r,
-                                               rcut_r_smth=self.rcut_r_smth,
-                                               sel_a=self.sel_all_a,
-                                               sel_r=self.sel_all_r)
+            name_pfx = "d_sea_"
+            for ii in ["coord", "box"]:
+                self.place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii
+                )
+            self.place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name=name_pfx + "t_type"
+            )
+            self.place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms"
+            )
+            self.place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name=name_pfx + "t_mesh"
+            )
+            (
+                self.stat_descrpt,
+                self.descrpt_deriv_t,
+                self.rij_t,
+                self.nlist_t,
+                self.nei_type_vec_t,
+                self.nmask_t,
+            ) = op_module.prod_env_mat_a_mix(
+                self.place_holders["coord"],
+                self.place_holders["type"],
+                self.place_holders["natoms_vec"],
+                self.place_holders["box"],
+                self.place_holders["default_mesh"],
+                tf.constant(avg_zero),
+                tf.constant(std_ones),
+                rcut_a=self.rcut_a,
+                rcut_r=self.rcut_r,
+                rcut_r_smth=self.rcut_r_smth,
+                sel_a=self.sel_all_a,
+                sel_r=self.sel_all_r,
+            )
         self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
 
-    def compute_input_stats(self,
-                            data_coord: list,
-                            data_box: list,
-                            data_atype: list,
-                            natoms_vec: list,
-                            mesh: list,
-                            input_dict: dict,
-                            mixed_type: bool = False,
-                            real_natoms_vec: Optional[list] = None
-                            ) -> None:
+    def compute_input_stats(
+        self,
+        data_coord: list,
+        data_box: list,
+        data_atype: list,
+        natoms_vec: list,
+        mesh: list,
+        input_dict: dict,
+        mixed_type: bool = False,
+        real_natoms_vec: Optional[list] = None,
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -192,9 +256,12 @@ def compute_input_stats(self,
             suma2 = []
             if mixed_type:
                 sys_num = 0
-                for cc, bb, tt, nn, mm, r_n in zip(data_coord, data_box, data_atype, natoms_vec, mesh, real_natoms_vec):
-                    sysr, sysr2, sysa, sysa2, sysn \
-                        = self._compute_dstats_sys_smth(cc, bb, tt, nn, mm, mixed_type, r_n)
+                for cc, bb, tt, nn, mm, r_n in zip(
+                    data_coord, data_box, data_atype, natoms_vec, mesh, real_natoms_vec
+                ):
+                    sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth(
+                        cc, bb, tt, nn, mm, mixed_type, r_n
+                    )
                     sys_num += 1
                     sumr.append(sysr)
                     suma.append(sysa)
@@ -202,34 +269,44 @@ def compute_input_stats(self,
                     sumr2.append(sysr2)
                     suma2.append(sysa2)
             else:
-                for cc, bb, tt, nn, mm in zip(data_coord, data_box, data_atype, natoms_vec, mesh):
-                    sysr, sysr2, sysa, sysa2, sysn \
-                        = self._compute_dstats_sys_smth(cc, bb, tt, nn, mm)
+                for cc, bb, tt, nn, mm in zip(
+                    data_coord, data_box, data_atype, natoms_vec, mesh
+                ):
+                    sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth(
+                        cc, bb, tt, nn, mm
+                    )
                     sumr.append(sysr)
                     suma.append(sysa)
                     sumn.append(sysn)
                     sumr2.append(sysr2)
                     suma2.append(sysa2)
             if not self.multi_task:
-                stat_dict = {'sumr': sumr, 'suma': suma, 'sumn': sumn, 'sumr2': sumr2, 'suma2': suma2}
+                stat_dict = {
+                    "sumr": sumr,
+                    "suma": suma,
+                    "sumn": sumn,
+                    "sumr2": sumr2,
+                    "suma2": suma2,
+                }
                 self.merge_input_stats(stat_dict)
             else:
-                self.stat_dict['sumr'] += sumr
-                self.stat_dict['suma'] += suma
-                self.stat_dict['sumn'] += sumn
-                self.stat_dict['sumr2'] += sumr2
-                self.stat_dict['suma2'] += suma2
-
-    def build(self,
-              coord_: tf.Tensor,
-              atype_: tf.Tensor,
-              natoms: tf.Tensor,
-              box_: tf.Tensor,
-              mesh: tf.Tensor,
-              input_dict: dict,
-              reuse: Optional[bool] = None,
-              suffix: str = ''
-              ) -> tf.Tensor:
+                self.stat_dict["sumr"] += sumr
+                self.stat_dict["suma"] += suma
+                self.stat_dict["sumn"] += sumn
+                self.stat_dict["sumr2"] += sumr2
+                self.stat_dict["suma2"] += suma2
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: Optional[bool] = None,
+        suffix: str = "",
+    ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
 
@@ -246,8 +323,8 @@ def build(self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -262,36 +339,38 @@ def build(self,
         """
         davg = self.davg
         dstd = self.dstd
-        with tf.variable_scope('descrpt_attr' + suffix, reuse=reuse):
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             if davg is None:
                 davg = np.zeros([self.ntypes, self.ndescrpt])
             if dstd is None:
                 dstd = np.ones([self.ntypes, self.ndescrpt])
-            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]),
-                                 name='rcut',
-                                 dtype=GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.ntypes,
-                                   name='ntypes',
-                                   dtype=tf.int32)
-            t_ndescrpt = tf.constant(self.ndescrpt,
-                                     name='ndescrpt',
-                                     dtype=tf.int32)
-            t_sel = tf.constant(self.sel_a,
-                                name='sel',
-                                dtype=tf.int32)
-            t_original_sel = tf.constant(self.original_sel if self.original_sel is not None else self.sel_a,
-                                         name='original_sel',
-                                         dtype=tf.int32)
-            self.t_avg = tf.get_variable('t_avg',
-                                         davg.shape,
-                                         dtype=GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable=False,
-                                         initializer=tf.constant_initializer(davg))
-            self.t_std = tf.get_variable('t_std',
-                                         dstd.shape,
-                                         dtype=GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable=False,
-                                         initializer=tf.constant_initializer(dstd))
+            t_rcut = tf.constant(
+                np.max([self.rcut_r, self.rcut_a]),
+                name="rcut",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32)
+            t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32)
+            t_original_sel = tf.constant(
+                self.original_sel if self.original_sel is not None else self.sel_a,
+                name="original_sel",
+                dtype=tf.int32,
+            )
+            self.t_avg = tf.get_variable(
+                "t_avg",
+                davg.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(davg),
+            )
+            self.t_std = tf.get_variable(
+                "t_std",
+                dstd.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(dstd),
+            )
 
         with tf.control_dependencies([t_sel, t_original_sel]):
             coord = tf.reshape(coord_, [-1, natoms[1] * 3])
@@ -301,114 +380,151 @@ def build(self,
         self.angular_weight = [None for i in range(self.attn_layer)]
         self.attn_weight_final = [None for i in range(self.attn_layer)]
 
-        self.descrpt, self.descrpt_deriv, self.rij, self.nlist, self.nei_type_vec, self.nmask \
-            = op_module.prod_env_mat_a_mix(coord,
-                                           atype,
-                                           natoms,
-                                           box,
-                                           mesh,
-                                           self.t_avg,
-                                           self.t_std,
-                                           rcut_a=self.rcut_a,
-                                           rcut_r=self.rcut_r,
-                                           rcut_r_smth=self.rcut_r_smth,
-                                           sel_a=self.sel_all_a,
-                                           sel_r=self.sel_all_r)
+        (
+            self.descrpt,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+            self.nei_type_vec,
+            self.nmask,
+        ) = op_module.prod_env_mat_a_mix(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_all_a,
+            sel_r=self.sel_all_r,
+        )
         self.nei_type_vec = tf.reshape(self.nei_type_vec, [-1])
-        self.nmask = tf.cast(tf.reshape(self.nmask, [-1, 1, self.sel_all_a[0]]), GLOBAL_TF_FLOAT_PRECISION)
+        self.nmask = tf.cast(
+            tf.reshape(self.nmask, [-1, 1, self.sel_all_a[0]]),
+            GLOBAL_TF_FLOAT_PRECISION,
+        )
         self.negative_mask = -(2 << 32) * (1.0 - self.nmask)
         # only used when tensorboard was set as true
-        tf.summary.histogram('descrpt', self.descrpt)
-        tf.summary.histogram('rij', self.rij)
-        tf.summary.histogram('nlist', self.nlist)
+        tf.summary.histogram("descrpt", self.descrpt)
+        tf.summary.histogram("rij", self.rij)
+        tf.summary.histogram("nlist", self.nlist)
 
         self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
-        self.atype_nloc = tf.reshape(tf.slice(atype, [0, 0], [-1, natoms[0]]),
-                                     [-1])  ## lammps will have error without this
+        self.atype_nloc = tf.reshape(
+            tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
+        )  ## lammps will have error without this
         self._identity_tensors(suffix=suffix)
 
-        self.dout, self.qmat = self._pass_filter(self.descrpt_reshape,
-                                                 self.atype_nloc,
-                                                 natoms,
-                                                 input_dict,
-                                                 suffix=suffix,
-                                                 reuse=reuse,
-                                                 trainable=self.trainable)
+        self.dout, self.qmat = self._pass_filter(
+            self.descrpt_reshape,
+            self.atype_nloc,
+            natoms,
+            input_dict,
+            suffix=suffix,
+            reuse=reuse,
+            trainable=self.trainable,
+        )
 
         # only used when tensorboard was set as true
-        tf.summary.histogram('embedding_net_output', self.dout)
+        tf.summary.histogram("embedding_net_output", self.dout)
         return self.dout
 
-    def _pass_filter(self,
-                     inputs,
-                     atype,
-                     natoms,
-                     input_dict,
-                     reuse=None,
-                     suffix='',
-                     trainable=True):
-        assert (input_dict is not None and input_dict.get('type_embedding', None) is not None), \
-            'se_atten desctiptor must use type_embedding'
-        type_embedding = input_dict.get('type_embedding', None)
+    def _pass_filter(
+        self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True
+    ):
+        assert (
+            input_dict is not None
+            and input_dict.get("type_embedding", None) is not None
+        ), "se_atten desctiptor must use type_embedding"
+        type_embedding = input_dict.get("type_embedding", None)
         inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt])
         output = []
         output_qmat = []
         inputs_i = inputs
         inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
         type_i = -1
-        layer, qmat = self._filter(inputs_i, type_i, natoms, name='filter_type_all' + suffix, suffix=suffix,
-                                   reuse=reuse, trainable=trainable, activation_fn=self.filter_activation_fn,
-                                   type_embedding=type_embedding, atype=atype)
+        layer, qmat = self._filter(
+            inputs_i,
+            type_i,
+            natoms,
+            name="filter_type_all" + suffix,
+            suffix=suffix,
+            reuse=reuse,
+            trainable=trainable,
+            activation_fn=self.filter_activation_fn,
+            type_embedding=type_embedding,
+            atype=atype,
+        )
         layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
-        qmat = tf.reshape(qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3])
+        qmat = tf.reshape(
+            qmat, [tf.shape(inputs)[0], natoms[0], self.get_dim_rot_mat_1() * 3]
+        )
         output.append(layer)
         output_qmat.append(qmat)
         output = tf.concat(output, axis=1)
         output_qmat = tf.concat(output_qmat, axis=1)
         return output, output_qmat
 
-    def _compute_dstats_sys_smth(self,
-                                 data_coord,
-                                 data_box,
-                                 data_atype,
-                                 natoms_vec,
-                                 mesh,
-                                 mixed_type=False,
-                                 real_natoms_vec=None):
-        dd_all, descrpt_deriv_t, rij_t, nlist_t, nei_type_vec_t, nmask_t \
-            = run_sess(self.sub_sess, [self.stat_descrpt, self.descrpt_deriv_t, self.rij_t, self.nlist_t, self.nei_type_vec_t, self.nmask_t],
-                       feed_dict={
-                           self.place_holders['coord']: data_coord,
-                           self.place_holders['type']: data_atype,
-                           self.place_holders['natoms_vec']: natoms_vec,
-                           self.place_holders['box']: data_box,
-                           self.place_holders['default_mesh']: mesh,
-                       })
+    def _compute_dstats_sys_smth(
+        self,
+        data_coord,
+        data_box,
+        data_atype,
+        natoms_vec,
+        mesh,
+        mixed_type=False,
+        real_natoms_vec=None,
+    ):
+        dd_all, descrpt_deriv_t, rij_t, nlist_t, nei_type_vec_t, nmask_t = run_sess(
+            self.sub_sess,
+            [
+                self.stat_descrpt,
+                self.descrpt_deriv_t,
+                self.rij_t,
+                self.nlist_t,
+                self.nei_type_vec_t,
+                self.nmask_t,
+            ],
+            feed_dict={
+                self.place_holders["coord"]: data_coord,
+                self.place_holders["type"]: data_atype,
+                self.place_holders["natoms_vec"]: natoms_vec,
+                self.place_holders["box"]: data_box,
+                self.place_holders["default_mesh"]: mesh,
+            },
+        )
         if mixed_type:
             nframes = dd_all.shape[0]
-            sysr = [0. for i in range(self.ntypes)]
-            sysa = [0. for i in range(self.ntypes)]
+            sysr = [0.0 for i in range(self.ntypes)]
+            sysa = [0.0 for i in range(self.ntypes)]
             sysn = [0 for i in range(self.ntypes)]
-            sysr2 = [0. for i in range(self.ntypes)]
-            sysa2 = [0. for i in range(self.ntypes)]
+            sysr2 = [0.0 for i in range(self.ntypes)]
+            sysa2 = [0.0 for i in range(self.ntypes)]
             for ff in range(nframes):
                 natoms = real_natoms_vec[ff]
                 dd_ff = np.reshape(dd_all[ff], [-1, self.ndescrpt * natoms[0]])
                 start_index = 0
                 for type_i in range(self.ntypes):
-                    end_index = start_index + self.ndescrpt * natoms[2 + type_i]  # center atom split
+                    end_index = (
+                        start_index + self.ndescrpt * natoms[2 + type_i]
+                    )  # center atom split
                     dd = dd_ff[:, start_index:end_index]
-                    dd = np.reshape(dd, [-1, self.ndescrpt])  # nframes * typen_atoms , nnei * 4
+                    dd = np.reshape(
+                        dd, [-1, self.ndescrpt]
+                    )  # nframes * typen_atoms , nnei * 4
                     start_index = end_index
                     # compute
                     dd = np.reshape(dd, [-1, 4])  # nframes * typen_atoms * nnei, 4
                     ddr = dd[:, :1]
                     dda = dd[:, 1:]
                     sumr = np.sum(ddr)
-                    suma = np.sum(dda) / 3.
+                    suma = np.sum(dda) / 3.0
                     sumn = dd.shape[0]
                     sumr2 = np.sum(np.multiply(ddr, ddr))
-                    suma2 = np.sum(np.multiply(dda, dda)) / 3.
+                    suma2 = np.sum(np.multiply(dda, dda)) / 3.0
                     sysr[type_i] += sumr
                     sysa[type_i] += suma
                     sysn[type_i] += sumn
@@ -424,19 +540,23 @@ def _compute_dstats_sys_smth(self,
             sysr2 = []
             sysa2 = []
             for type_i in range(self.ntypes):
-                end_index = start_index + self.ndescrpt * natoms[2 + type_i]  # center atom split
+                end_index = (
+                    start_index + self.ndescrpt * natoms[2 + type_i]
+                )  # center atom split
                 dd = dd_all[:, start_index:end_index]
-                dd = np.reshape(dd, [-1, self.ndescrpt])  # nframes * typen_atoms , nnei * 4
+                dd = np.reshape(
+                    dd, [-1, self.ndescrpt]
+                )  # nframes * typen_atoms , nnei * 4
                 start_index = end_index
                 # compute
                 dd = np.reshape(dd, [-1, 4])  # nframes * typen_atoms * nnei, 4
                 ddr = dd[:, :1]
                 dda = dd[:, 1:]
                 sumr = np.sum(ddr)
-                suma = np.sum(dda) / 3.
+                suma = np.sum(dda) / 3.0
                 sumn = dd.shape[0]
                 sumr2 = np.sum(np.multiply(ddr, ddr))
-                suma2 = np.sum(np.multiply(dda, dda)) / 3.
+                suma2 = np.sum(np.multiply(dda, dda)) / 3.0
                 sysr.append(sumr)
                 sysa.append(suma)
                 sysn.append(sumn)
@@ -445,12 +565,12 @@ def _compute_dstats_sys_smth(self,
         return sysr, sysr2, sysa, sysa2, sysn
 
     def _lookup_type_embedding(
-            self,
-            xyz_scatter,
-            natype,
-            type_embedding,
+        self,
+        xyz_scatter,
+        natype,
+        type_embedding,
     ):
-        '''Concatenate `type_embedding` of neighbors and `xyz_scatter`.
+        """Concatenate `type_embedding` of neighbors and `xyz_scatter`.
         If not self.type_one_side, concatenate `type_embedding` of center atoms as well.
 
         Parameters
@@ -468,55 +588,77 @@ def _lookup_type_embedding(
         -------
             embedding:
                 environment of each atom represented by embedding.
-        '''
+        """
         te_out_dim = type_embedding.get_shape().as_list()[-1]
         self.test_type_embedding = type_embedding
-        self.test_nei_embed = tf.nn.embedding_lookup(type_embedding,
-                                                     self.nei_type_vec)  # shape is [self.nnei, 1+te_out_dim]
+        self.test_nei_embed = tf.nn.embedding_lookup(
+            type_embedding, self.nei_type_vec
+        )  # shape is [self.nnei, 1+te_out_dim]
         # nei_embed = tf.tile(nei_embed, (nframes * natoms[0], 1))  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
         nei_embed = tf.reshape(self.test_nei_embed, [-1, te_out_dim])
-        self.embedding_input = tf.concat([xyz_scatter, nei_embed],
-                                         1)  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim]
+        self.embedding_input = tf.concat(
+            [xyz_scatter, nei_embed], 1
+        )  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim]
         if not self.type_one_side:
-            self.atm_embed = tf.nn.embedding_lookup(type_embedding, natype)  # shape is [nframes*natoms[0], te_out_dim]
-            self.atm_embed = tf.tile(self.atm_embed,
-                                     [1, self.nnei])  # shape is [nframes*natoms[0], self.nnei*te_out_dim]
-            self.atm_embed = tf.reshape(self.atm_embed,
-                                        [-1, te_out_dim])  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
-            self.embedding_input_2 = tf.concat([self.embedding_input, self.atm_embed],
-                                               1)  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim+te_out_dim]
+            self.atm_embed = tf.nn.embedding_lookup(
+                type_embedding, natype
+            )  # shape is [nframes*natoms[0], te_out_dim]
+            self.atm_embed = tf.tile(
+                self.atm_embed, [1, self.nnei]
+            )  # shape is [nframes*natoms[0], self.nnei*te_out_dim]
+            self.atm_embed = tf.reshape(
+                self.atm_embed, [-1, te_out_dim]
+            )  # shape is [nframes*natoms[0]*self.nnei, te_out_dim]
+            self.embedding_input_2 = tf.concat(
+                [self.embedding_input, self.atm_embed], 1
+            )  # shape is [nframes*natoms[0]*self.nnei, 1+te_out_dim+te_out_dim]
             return self.embedding_input_2
         return self.embedding_input
 
     def _feedforward(self, input_xyz, d_in, d_mid):
         residual = input_xyz
-        input_xyz = tf.nn.relu(one_layer(
-            input_xyz,
-            d_mid,
-            name='c_ffn1',
-            reuse=tf.AUTO_REUSE,
-            seed=self.seed,
-            activation_fn=None,
-            precision=self.filter_precision,
-            trainable=True,
-            uniform_seed=self.uniform_seed,
-            initial_variables=self.attention_layer_variables))
+        input_xyz = tf.nn.relu(
+            one_layer(
+                input_xyz,
+                d_mid,
+                name="c_ffn1",
+                reuse=tf.AUTO_REUSE,
+                seed=self.seed,
+                activation_fn=None,
+                precision=self.filter_precision,
+                trainable=True,
+                uniform_seed=self.uniform_seed,
+                initial_variables=self.attention_layer_variables,
+            )
+        )
         input_xyz = one_layer(
             input_xyz,
             d_in,
-            name='c_ffn2',
+            name="c_ffn2",
             reuse=tf.AUTO_REUSE,
             seed=self.seed,
             activation_fn=None,
             precision=self.filter_precision,
             trainable=True,
             uniform_seed=self.uniform_seed,
-            initial_variables=self.attention_layer_variables)
+            initial_variables=self.attention_layer_variables,
+        )
         input_xyz += residual
         input_xyz = tf.keras.layers.LayerNormalization()(input_xyz)
         return input_xyz
 
-    def _scaled_dot_attn(self, Q, K, V, temperature, input_r, dotr=False, do_mask=False, layer=0, save_weights=True):
+    def _scaled_dot_attn(
+        self,
+        Q,
+        K,
+        V,
+        temperature,
+        input_r,
+        dotr=False,
+        do_mask=False,
+        layer=0,
+        save_weights=True,
+    ):
         attn = tf.matmul(Q / temperature, K, transpose_b=True)
         attn *= self.nmask
         attn += self.negative_mask
@@ -538,127 +680,140 @@ def _scaled_dot_attn(self, Q, K, V, temperature, input_r, dotr=False, do_mask=Fa
         return output
 
     def _attention_layers(
-            self,
-            input_xyz,
-            layer_num,
-            shape_i,
-            outputs_size,
-            input_r,
-            dotr=False,
-            do_mask=False,
-            trainable=True,
-            suffix=''
+        self,
+        input_xyz,
+        layer_num,
+        shape_i,
+        outputs_size,
+        input_r,
+        dotr=False,
+        do_mask=False,
+        trainable=True,
+        suffix="",
     ):
-        sd_k = tf.sqrt(tf.cast(1., dtype=self.filter_precision))
+        sd_k = tf.sqrt(tf.cast(1.0, dtype=self.filter_precision))
         for i in range(layer_num):
-            name = 'attention_layer_{}{}'.format(i, suffix)
+            name = "attention_layer_{}{}".format(i, suffix)
             with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
                 # input_xyz_in = tf.nn.l2_normalize(input_xyz, -1)
                 Q_c = one_layer(
                     input_xyz,
                     self.att_n,
-                    name='c_query',
-                    scope=name+'/',
+                    name="c_query",
+                    scope=name + "/",
                     reuse=tf.AUTO_REUSE,
                     seed=self.seed,
                     activation_fn=None,
                     precision=self.filter_precision,
                     trainable=trainable,
                     uniform_seed=self.uniform_seed,
-                    initial_variables=self.attention_layer_variables)
+                    initial_variables=self.attention_layer_variables,
+                )
                 K_c = one_layer(
                     input_xyz,
                     self.att_n,
-                    name='c_key',
-                    scope=name+'/',
+                    name="c_key",
+                    scope=name + "/",
                     reuse=tf.AUTO_REUSE,
                     seed=self.seed,
                     activation_fn=None,
                     precision=self.filter_precision,
                     trainable=trainable,
                     uniform_seed=self.uniform_seed,
-                    initial_variables=self.attention_layer_variables)
+                    initial_variables=self.attention_layer_variables,
+                )
                 V_c = one_layer(
                     input_xyz,
                     self.att_n,
-                    name='c_value',
-                    scope=name+'/',
+                    name="c_value",
+                    scope=name + "/",
                     reuse=tf.AUTO_REUSE,
                     seed=self.seed,
                     activation_fn=None,
                     precision=self.filter_precision,
                     trainable=trainable,
                     uniform_seed=self.uniform_seed,
-                    initial_variables=self.attention_layer_variables)
+                    initial_variables=self.attention_layer_variables,
+                )
                 # # natom x nei_type_i x out_size
                 # xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]))
                 # natom x nei_type_i x att_n
-                Q_c = tf.nn.l2_normalize(tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n)), -1)
-                K_c = tf.nn.l2_normalize(tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n)), -1)
-                V_c = tf.nn.l2_normalize(tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n)), -1)
+                Q_c = tf.nn.l2_normalize(
+                    tf.reshape(Q_c, (-1, shape_i[1] // 4, self.att_n)), -1
+                )
+                K_c = tf.nn.l2_normalize(
+                    tf.reshape(K_c, (-1, shape_i[1] // 4, self.att_n)), -1
+                )
+                V_c = tf.nn.l2_normalize(
+                    tf.reshape(V_c, (-1, shape_i[1] // 4, self.att_n)), -1
+                )
 
-                input_att = self._scaled_dot_attn(Q_c, K_c, V_c, sd_k, input_r, dotr=dotr, do_mask=do_mask, layer=i)
+                input_att = self._scaled_dot_attn(
+                    Q_c, K_c, V_c, sd_k, input_r, dotr=dotr, do_mask=do_mask, layer=i
+                )
                 input_att = tf.reshape(input_att, (-1, self.att_n))
 
                 # (natom x nei_type_i) x out_size
                 input_xyz += one_layer(
                     input_att,
                     outputs_size[-1],
-                    name='c_out',
-                    scope=name+'/',
+                    name="c_out",
+                    scope=name + "/",
                     reuse=tf.AUTO_REUSE,
                     seed=self.seed,
                     activation_fn=None,
                     precision=self.filter_precision,
                     trainable=trainable,
                     uniform_seed=self.uniform_seed,
-                    initial_variables=self.attention_layer_variables)
-                input_xyz = tf.keras.layers.LayerNormalization(beta_initializer=tf.constant_initializer(self.beta[i]),
-                                                gamma_initializer=tf.constant_initializer(self.gamma[i]))(input_xyz)
+                    initial_variables=self.attention_layer_variables,
+                )
+                input_xyz = tf.keras.layers.LayerNormalization(
+                    beta_initializer=tf.constant_initializer(self.beta[i]),
+                    gamma_initializer=tf.constant_initializer(self.gamma[i]),
+                )(input_xyz)
                 # input_xyz = self._feedforward(input_xyz, outputs_size[-1], self.att_n)
         return input_xyz
 
     def _filter_lower(
-            self,
-            type_i,
-            type_input,
-            start_index,
-            incrs_index,
-            inputs,
-            type_embedding=None,
-            atype=None,
-            is_exclude=False,
-            activation_fn=None,
-            bavg=0.0,
-            stddev=1.0,
-            trainable=True,
-            suffix='',
-            name='filter_',
-            reuse=None
+        self,
+        type_i,
+        type_input,
+        start_index,
+        incrs_index,
+        inputs,
+        type_embedding=None,
+        atype=None,
+        is_exclude=False,
+        activation_fn=None,
+        bavg=0.0,
+        stddev=1.0,
+        trainable=True,
+        suffix="",
+        name="filter_",
+        reuse=None,
     ):
         """
         input env matrix, returns R.G
         """
         outputs_size = [1] + self.filter_neuron
         # cut-out inputs
-        # with natom x (nei_type_i x 4)  
-        inputs_i = tf.slice(inputs,
-                            [0, start_index * 4],
-                            [-1, incrs_index * 4])
+        # with natom x (nei_type_i x 4)
+        inputs_i = tf.slice(inputs, [0, start_index * 4], [-1, incrs_index * 4])
         shape_i = inputs_i.get_shape().as_list()
         natom = tf.shape(inputs_i)[0]
         # with (natom x nei_type_i) x 4
         inputs_reshape = tf.reshape(inputs_i, [-1, 4])
         # with (natom x nei_type_i) x 1
         xyz_scatter = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1])
-        assert atype is not None, 'atype must exist!!'
+        assert atype is not None, "atype must exist!!"
         type_embedding = tf.cast(type_embedding, self.filter_precision)
-        xyz_scatter = self._lookup_type_embedding(
-            xyz_scatter, atype, type_embedding)
+        xyz_scatter = self._lookup_type_embedding(xyz_scatter, atype, type_embedding)
         if self.compress:
-            raise RuntimeError('compression of attention descriptor is not supported at the moment')
+            raise RuntimeError(
+                "compression of attention descriptor is not supported at the moment"
+            )
         # natom x 4 x outputs_size
-        if (not is_exclude):
+        if not is_exclude:
             with tf.variable_scope(name, reuse=reuse):
                 # with (natom x nei_type_i) x out_size
                 xyz_scatter = embedding_net(
@@ -674,47 +829,73 @@ def _filter_lower(
                     trainable=trainable,
                     uniform_seed=self.uniform_seed,
                     initial_variables=self.embedding_net_variables,
-                    mixed_prec=self.mixed_prec)
-                if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
-            input_r = tf.slice(tf.reshape(inputs_i, (-1, shape_i[1] // 4, 4)), [0, 0, 1], [-1, -1, 3])
+                    mixed_prec=self.mixed_prec,
+                )
+                if (not self.uniform_seed) and (self.seed is not None):
+                    self.seed += self.seed_shift
+            input_r = tf.slice(
+                tf.reshape(inputs_i, (-1, shape_i[1] // 4, 4)), [0, 0, 1], [-1, -1, 3]
+            )
             input_r = tf.nn.l2_normalize(input_r, -1)
             # natom x nei_type_i x out_size
             xyz_scatter_att = tf.reshape(
-                self._attention_layers(xyz_scatter, self.attn_layer, shape_i, outputs_size, input_r,
-                                       dotr=self.attn_dotr, do_mask=self.attn_mask, trainable=trainable, suffix=suffix),
-                (-1, shape_i[1] // 4, outputs_size[-1]))
+                self._attention_layers(
+                    xyz_scatter,
+                    self.attn_layer,
+                    shape_i,
+                    outputs_size,
+                    input_r,
+                    dotr=self.attn_dotr,
+                    do_mask=self.attn_mask,
+                    trainable=trainable,
+                    suffix=suffix,
+                ),
+                (-1, shape_i[1] // 4, outputs_size[-1]),
+            )
             # xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, outputs_size[-1]))
         else:
             # we can safely return the final xyz_scatter filled with zero directly
-            return tf.cast(tf.fill((natom, 4, outputs_size[-1]), 0.), self.filter_precision)
+            return tf.cast(
+                tf.fill((natom, 4, outputs_size[-1]), 0.0), self.filter_precision
+            )
         # When using tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below
         # [588 24] -> [588 6 4] correct
         # but if sel is zero
         # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4]
         # So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1
-        return tf.matmul(tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), xyz_scatter_att, transpose_a=True)
+        return tf.matmul(
+            tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]),
+            xyz_scatter_att,
+            transpose_a=True,
+        )
 
     @cast_precision
     def _filter(
-            self,
-            inputs,
-            type_input,
-            natoms,
-            type_embedding=None,
-            atype=None,
-            activation_fn=tf.nn.tanh,
-            stddev=1.0,
-            bavg=0.0,
-            suffix='',
-            name='linear',
-            reuse=None,
-            trainable=True):
+        self,
+        inputs,
+        type_input,
+        natoms,
+        type_embedding=None,
+        atype=None,
+        activation_fn=tf.nn.tanh,
+        stddev=1.0,
+        bavg=0.0,
+        suffix="",
+        name="linear",
+        reuse=None,
+        trainable=True,
+    ):
         nframes = tf.shape(tf.reshape(inputs, [-1, natoms[0], self.ndescrpt]))[0]
         # natom x (nei x 4)
         shape = inputs.get_shape().as_list()
         outputs_size = [1] + self.filter_neuron
         outputs_size_2 = self.n_axis_neuron
-        all_excluded = all([(type_input, type_i) in self.exclude_types for type_i in range(self.ntypes)])
+        all_excluded = all(
+            [
+                (type_input, type_i) in self.exclude_types
+                for type_i in range(self.ntypes)
+            ]
+        )
         if all_excluded:
             # all types are excluded so result and qmat should be zeros
             # we can safaly return a zero matrix...
@@ -722,16 +903,23 @@ def _filter(
             # result: natom x outputs_size x outputs_size_2
             # qmat: natom x outputs_size x 3
             natom = tf.shape(inputs)[0]
-            result = tf.cast(tf.fill((natom, outputs_size_2, outputs_size[-1]), 0.), GLOBAL_TF_FLOAT_PRECISION)
-            qmat = tf.cast(tf.fill((natom, outputs_size[-1], 3), 0.), GLOBAL_TF_FLOAT_PRECISION)
+            result = tf.cast(
+                tf.fill((natom, outputs_size_2, outputs_size[-1]), 0.0),
+                GLOBAL_TF_FLOAT_PRECISION,
+            )
+            qmat = tf.cast(
+                tf.fill((natom, outputs_size[-1], 3), 0.0), GLOBAL_TF_FLOAT_PRECISION
+            )
             return result, qmat
 
         start_index = 0
         type_i = 0
         # natom x 4 x outputs_size
         xyz_scatter_1 = self._filter_lower(
-            type_i, type_input,
-            start_index, np.cumsum(self.sel_a)[-1],
+            type_i,
+            type_input,
+            start_index,
+            np.cumsum(self.sel_a)[-1],
             inputs,
             type_embedding=type_embedding,
             is_exclude=False,
@@ -742,7 +930,8 @@ def _filter(
             suffix=suffix,
             name=name,
             reuse=reuse,
-            atype=atype)
+            atype=atype,
+        )
         # natom x nei x outputs_size
         # xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
         # natom x nei x 4
@@ -753,8 +942,15 @@ def _filter(
             # shape[1] = nnei * 4
             nnei = shape[1] / 4
         else:
-            nnei = tf.cast(tf.Variable(np.sum(self.original_sel), dtype=tf.int32, trainable=False, name="nnei"),
-                           self.filter_precision)
+            nnei = tf.cast(
+                tf.Variable(
+                    np.sum(self.original_sel),
+                    dtype=tf.int32,
+                    trainable=False,
+                    name="nnei",
+                ),
+                self.filter_precision,
+            )
         xyz_scatter_1 = xyz_scatter_1 / nnei
         # natom x 4 x outputs_size_2
         xyz_scatter_2 = tf.slice(xyz_scatter_1, [0, 0, 0], [-1, -1, outputs_size_2])
@@ -771,11 +967,12 @@ def _filter(
 
         return result, qmat
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix: str = "",
-                       ) -> None:
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
+    ) -> None:
         """
         Init the embedding net variables with the given dict
 
@@ -789,12 +986,24 @@ def init_variables(self,
             The suffix of the scope
         """
         super().init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
-        self.attention_layer_variables = get_attention_layer_variables_from_graph_def(graph_def, suffix=suffix)
+        self.attention_layer_variables = get_attention_layer_variables_from_graph_def(
+            graph_def, suffix=suffix
+        )
         if self.attn_layer > 0:
-            self.beta[0] = self.attention_layer_variables['attention_layer_0{}/layer_normalization/beta'.format(suffix)]
-            self.gamma[0] = self.attention_layer_variables['attention_layer_0{}/layer_normalization/gamma'.format(suffix)]
+            self.beta[0] = self.attention_layer_variables[
+                "attention_layer_0{}/layer_normalization/beta".format(suffix)
+            ]
+            self.gamma[0] = self.attention_layer_variables[
+                "attention_layer_0{}/layer_normalization/gamma".format(suffix)
+            ]
             for i in range(1, self.attn_layer):
                 self.beta[i] = self.attention_layer_variables[
-                    'attention_layer_{}{}/layer_normalization_{}/beta'.format(i, suffix, i)]
+                    "attention_layer_{}{}/layer_normalization_{}/beta".format(
+                        i, suffix, i
+                    )
+                ]
                 self.gamma[i] = self.attention_layer_variables[
-                    'attention_layer_{}{}/layer_normalization_{}/gamma'.format(i, suffix, i)]
+                    "attention_layer_{}{}/layer_normalization_{}/gamma".format(
+                        i, suffix, i
+                    )
+                ]
diff --git a/deepmd/descriptor/se_r.py b/deepmd/descriptor/se_r.py
index 5773c47202..d072d87e8c 100644
--- a/deepmd/descriptor/se_r.py
+++ b/deepmd/descriptor/se_r.py
@@ -1,25 +1,51 @@
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
-
-from deepmd.env import tf
-from deepmd.common import get_activation_func, get_precision, cast_precision
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.tabulate import DPTabulate
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
-from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift
-from deepmd.utils.sess import run_sess
-from .descriptor import Descriptor
-from .se import DescrptSe
+
+from deepmd.common import (
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.utils.network import (
+    embedding_net,
+    embedding_net_rand_seed_shift,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+from deepmd.utils.tabulate import (
+    DPTabulate,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se import (
+    DescrptSe,
+)
 
 
 @Descriptor.register("se_e2_r")
 @Descriptor.register("se_r")
-class DescrptSeR (DescrptSe):
+class DescrptSeR(DescrptSe):
     """DeepPot-SE constructed from radial information of atomic configurations.
-    
+
     The embedding takes the distance between atoms as input.
 
     Parameters
@@ -51,49 +77,53 @@ class DescrptSeR (DescrptSe):
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     """
-    def __init__ (self, 
-                  rcut: float,
-                  rcut_smth: float,
-                  sel: List[str],
-                  neuron: List[int] = [24,48,96],
-                  resnet_dt: bool = False,
-                  trainable: bool = True,
-                  seed: Optional[int] = None,
-                  type_one_side: bool = True,
-                  exclude_types: List[List[int]] = [],
-                  set_davg_zero: bool = False,
-                  activation_function: str = 'tanh',
-                  precision: str = 'default',
-                  uniform_seed: bool = False,
-                  multi_task: bool = False
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        type_one_side: bool = True,
+        exclude_types: List[List[int]] = [],
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
+        multi_task: bool = False,
     ) -> None:
         """
         Constructor
         """
         if rcut < rcut_smth:
-            raise RuntimeError("rcut_smth (%f) should be no more than rcut (%f)!" % (rcut_smth, rcut))
+            raise RuntimeError(
+                "rcut_smth (%f) should be no more than rcut (%f)!" % (rcut_smth, rcut)
+            )
         self.sel_r = sel
         self.rcut = rcut
         self.rcut_smth = rcut_smth
         self.filter_neuron = neuron
         self.filter_resnet_dt = resnet_dt
-        self.seed = seed        
+        self.seed = seed
         self.uniform_seed = uniform_seed
         self.seed_shift = embedding_net_rand_seed_shift(self.filter_neuron)
         self.trainable = trainable
-        self.filter_activation_fn = get_activation_func(activation_function) 
-        self.filter_precision = get_precision(precision)  
+        self.filter_activation_fn = get_activation_func(activation_function)
+        self.filter_precision = get_precision(precision)
         exclude_types = exclude_types
         self.exclude_types = set()
         for tt in exclude_types:
-            assert(len(tt) == 2)
+            assert len(tt) == 2
             self.exclude_types.add((tt[0], tt[1]))
             self.exclude_types.add((tt[1], tt[0]))
         self.set_davg_zero = set_davg_zero
         self.type_one_side = type_one_side
 
         # descrpt config
-        self.sel_a = [ 0 for ii in range(len(self.sel_r)) ]
+        self.sel_a = [0 for ii in range(len(self.sel_r))]
         self.ntypes = len(self.sel_r)
         # numb of neighbors and numb of descrptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
@@ -105,55 +135,70 @@ def __init__ (self,
         self.useBN = False
         self.davg = None
         self.dstd = None
-        self.compress=False
+        self.compress = False
         self.embedding_net_variables = None
 
         self.place_holders = {}
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
         sub_graph = tf.Graph()
         with sub_graph.as_default():
-            name_pfx = 'd_ser_'
-            for ii in ['coord', 'box']:
-                self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name = name_pfx+'t_'+ii)
-            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx+'t_type')
-            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms')
-            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh')
-            self.stat_descrpt, descrpt_deriv, rij, nlist \
-                = op_module.prod_env_mat_r(self.place_holders['coord'],
-                                         self.place_holders['type'],
-                                         self.place_holders['natoms_vec'],
-                                         self.place_holders['box'],
-                                         self.place_holders['default_mesh'],
-                                         tf.constant(avg_zero),
-                                         tf.constant(std_ones),
-                                         rcut = self.rcut,
-                                         rcut_smth = self.rcut_smth,
-                                         sel = self.sel_r)
-            self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
+            name_pfx = "d_ser_"
+            for ii in ["coord", "box"]:
+                self.place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii
+                )
+            self.place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name=name_pfx + "t_type"
+            )
+            self.place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms"
+            )
+            self.place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name=name_pfx + "t_mesh"
+            )
+            self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_r(
+                self.place_holders["coord"],
+                self.place_holders["type"],
+                self.place_holders["natoms_vec"],
+                self.place_holders["box"],
+                self.place_holders["default_mesh"],
+                tf.constant(avg_zero),
+                tf.constant(std_ones),
+                rcut=self.rcut,
+                rcut_smth=self.rcut_smth,
+                sel=self.sel_r,
+            )
+            self.sub_sess = tf.Session(
+                graph=sub_graph, config=default_tf_session_config
+            )
         self.multi_task = multi_task
         if multi_task:
-            self.stat_dict = {'sumr': [], 'sumn': [], 'sumr2': []}
+            self.stat_dict = {"sumr": [], "sumn": [], "sumr2": []}
 
-    def get_rcut (self) :
+    def get_rcut(self):
         """
         Returns the cut-off radius
         """
         return self.rcut
 
-    def get_ntypes (self) :
+    def get_ntypes(self):
         """
         Returns the number of atom types
         """
         return self.ntypes
 
-    def get_dim_out (self) :
+    def get_dim_out(self):
         """
         Returns the output dimension of this descriptor
         """
         return self.filter_neuron[-1]
 
-    def get_nlist (self) :
+    def get_nlist(self):
         """
         Returns
         -------
@@ -168,16 +213,12 @@ def get_nlist (self) :
         """
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_input_stats (self,
-                             data_coord, 
-                             data_box, 
-                             data_atype, 
-                             natoms_vec,
-                             mesh, 
-                             input_dict) :    
+    def compute_input_stats(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh, input_dict
+    ):
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -196,19 +237,20 @@ def compute_input_stats (self,
         sumr = []
         sumn = []
         sumr2 = []
-        for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
-            sysr,sysr2,sysn \
-                = self._compute_dstats_sys_se_r(cc,bb,tt,nn,mm)
+        for cc, bb, tt, nn, mm in zip(
+            data_coord, data_box, data_atype, natoms_vec, mesh
+        ):
+            sysr, sysr2, sysn = self._compute_dstats_sys_se_r(cc, bb, tt, nn, mm)
             sumr.append(sysr)
             sumn.append(sysn)
             sumr2.append(sysr2)
         if not self.multi_task:
-            stat_dict = {'sumr': sumr, 'sumn': sumn, 'sumr2': sumr2}
+            stat_dict = {"sumr": sumr, "sumn": sumn, "sumr2": sumr2}
             self.merge_input_stats(stat_dict)
         else:
-            self.stat_dict['sumr'] += sumr
-            self.stat_dict['sumn'] += sumn
-            self.stat_dict['sumr2'] += sumr2
+            self.stat_dict["sumr"] += sumr
+            self.stat_dict["sumn"] += sumn
+            self.stat_dict["sumr2"] += sumr2
 
     def merge_input_stats(self, stat_dict):
         """
@@ -227,9 +269,9 @@ def merge_input_stats(self, stat_dict):
         """
         all_davg = []
         all_dstd = []
-        sumr = np.sum(stat_dict['sumr'], axis=0)
-        sumn = np.sum(stat_dict['sumn'], axis=0)
-        sumr2 = np.sum(stat_dict['sumr2'], axis=0)
+        sumr = np.sum(stat_dict["sumr"], axis=0)
+        sumn = np.sum(stat_dict["sumn"], axis=0)
+        sumr2 = np.sum(stat_dict["sumr2"], axis=0)
         for type_i in range(self.ntypes):
             davgunit = [sumr[type_i] / sumn[type_i]]
             dstdunit = [self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i])]
@@ -242,15 +284,16 @@ def merge_input_stats(self, stat_dict):
             self.davg = np.array(all_davg)
         self.dstd = np.array(all_dstd)
 
-    def enable_compression(self,
-                           min_nbor_dist : float,
-                           graph: tf.Graph,
-                           graph_def: tf.GraphDef,
-                           table_extrapolate : float = 5,
-                           table_stride_1 : float = 0.01,
-                           table_stride_2 : float = 0.1,
-                           check_frequency : int = -1,
-                           suffix : str = "",
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+        suffix: str = "",
     ) -> None:
         """
         Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
@@ -282,32 +325,46 @@ def enable_compression(self,
             if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
                 raise NotImplementedError(
                     "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer." 
-                    % ','.join([str(item) for item in self.filter_neuron])
+                    "The size of the next layer of the neural network must be twice the size of the previous layer."
+                    % ",".join([str(item) for item in self.filter_neuron])
                 )
 
         self.compress = True
         self.table = DPTabulate(
-            self, self.filter_neuron, graph, graph_def, activation_fn = self.filter_activation_fn, suffix=suffix)
-        self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
-        self.lower, self.upper \
-            = self.table.build(min_nbor_dist, 
-                               table_extrapolate, 
-                               table_stride_1, 
-                               table_stride_2)
-        
-        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
-
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+            self,
+            self.filter_neuron,
+            graph,
+            graph_def,
+            activation_fn=self.filter_activation_fn,
+            suffix=suffix,
+        )
+        self.table_config = [
+            table_extrapolate,
+            table_stride_1,
+            table_stride_2,
+            check_frequency,
+        ]
+        self.lower, self.upper = self.table.build(
+            min_nbor_dist, table_extrapolate, table_stride_1, table_stride_2
+        )
+
+        self.davg = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_avg" % suffix
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_std" % suffix
+        )
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -325,8 +382,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -341,67 +398,76 @@ def build (self,
         """
         davg = self.davg
         dstd = self.dstd
-        with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             if davg is None:
-                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+                davg = np.zeros([self.ntypes, self.ndescrpt])
             if dstd is None:
-                dstd = np.ones ([self.ntypes, self.ndescrpt])
-            t_rcut = tf.constant(self.rcut, 
-                                 name = 'rcut', 
-                                 dtype = GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.ntypes, 
-                                   name = 'ntypes', 
-                                   dtype = tf.int32)
-            t_ndescrpt = tf.constant(self.ndescrpt, 
-                                     name = 'ndescrpt', 
-                                     dtype = tf.int32)            
-            t_sel = tf.constant(self.sel_a, 
-                                name = 'sel', 
-                                dtype = tf.int32)            
-            self.t_avg = tf.get_variable('t_avg', 
-                                         davg.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(davg))
-            self.t_std = tf.get_variable('t_std', 
-                                         dstd.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(dstd))
-
-        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
-        box   = tf.reshape (box_, [-1, 9])
-        atype = tf.reshape (atype_, [-1, natoms[1]])
-
-        self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
-            = op_module.prod_env_mat_r(coord,
-                                      atype,
-                                      natoms,
-                                      box,
-                                      mesh,
-                                      self.t_avg,
-                                      self.t_std,
-                                      rcut = self.rcut,
-                                      rcut_smth = self.rcut_smth,
-                                      sel = self.sel_r)
+                dstd = np.ones([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(
+                self.rcut, name="rcut", dtype=GLOBAL_TF_FLOAT_PRECISION
+            )
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32)
+            t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32)
+            self.t_avg = tf.get_variable(
+                "t_avg",
+                davg.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(davg),
+            )
+            self.t_std = tf.get_variable(
+                "t_std",
+                dstd.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(dstd),
+            )
+
+        coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        box = tf.reshape(box_, [-1, 9])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
+
+        (
+            self.descrpt,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+        ) = op_module.prod_env_mat_r(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            self.t_avg,
+            self.t_std,
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel_r,
+        )
 
         self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
         self._identity_tensors(suffix=suffix)
 
         # only used when tensorboard was set as true
-        tf.summary.histogram('descrpt', self.descrpt)
-        tf.summary.histogram('rij', self.rij)
-        tf.summary.histogram('nlist', self.nlist)
-
-        self.dout = self._pass_filter(self.descrpt_reshape, atype, natoms, suffix = suffix, reuse = reuse, trainable = self.trainable)
-        tf.summary.histogram('embedding_net_output', self.dout)
+        tf.summary.histogram("descrpt", self.descrpt)
+        tf.summary.histogram("rij", self.rij)
+        tf.summary.histogram("nlist", self.nlist)
+
+        self.dout = self._pass_filter(
+            self.descrpt_reshape,
+            atype,
+            natoms,
+            suffix=suffix,
+            reuse=reuse,
+            trainable=self.trainable,
+        )
+        tf.summary.histogram("embedding_net_output", self.dout)
 
         return self.dout
 
-
-    def prod_force_virial(self, 
-                          atom_ener : tf.Tensor, 
-                          natoms : tf.Tensor
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial
@@ -425,49 +491,52 @@ def prod_force_virial(self,
         atom_virial
                 The atomic virial
         """
-        [net_deriv] = tf.gradients (atom_ener, self.descrpt_reshape)
-        tf.summary.histogram('net_derivative', net_deriv)
-        net_deriv_reshape = tf.reshape (net_deriv, [np.cast['int64'](-1), natoms[0] * np.cast['int64'](self.ndescrpt)])        
-        force \
-            = op_module.prod_force_se_r (net_deriv_reshape,
-                                         self.descrpt_deriv,
-                                         self.nlist,
-                                         natoms)
-        virial, atom_virial \
-            = op_module.prod_virial_se_r (net_deriv_reshape,
-                                          self.descrpt_deriv,
-                                          self.rij,
-                                          self.nlist,
-                                          natoms)
-        tf.summary.histogram('force', force)
-        tf.summary.histogram('virial', virial)
-        tf.summary.histogram('atom_virial', atom_virial)
+        [net_deriv] = tf.gradients(atom_ener, self.descrpt_reshape)
+        tf.summary.histogram("net_derivative", net_deriv)
+        net_deriv_reshape = tf.reshape(
+            net_deriv,
+            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+        )
+        force = op_module.prod_force_se_r(
+            net_deriv_reshape, self.descrpt_deriv, self.nlist, natoms
+        )
+        virial, atom_virial = op_module.prod_virial_se_r(
+            net_deriv_reshape, self.descrpt_deriv, self.rij, self.nlist, natoms
+        )
+        tf.summary.histogram("force", force)
+        tf.summary.histogram("virial", virial)
+        tf.summary.histogram("atom_virial", atom_virial)
 
         return force, virial, atom_virial
-    
-
-    def _pass_filter(self, 
-                     inputs,
-                     atype,
-                     natoms,
-                     reuse = None,
-                     suffix = '', 
-                     trainable = True) :
+
+    def _pass_filter(
+        self, inputs, atype, natoms, reuse=None, suffix="", trainable=True
+    ):
         start_index = 0
         inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt])
         output = []
         if not self.type_one_side:
             for type_i in range(self.ntypes):
-                inputs_i = tf.slice (inputs,
-                                     [ 0, start_index, 0],
-                                     [-1, natoms[2+type_i], -1] )
+                inputs_i = tf.slice(
+                    inputs, [0, start_index, 0], [-1, natoms[2 + type_i], -1]
+                )
                 inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
-                filter_name = 'filter_type_'+str(type_i)+suffix
-                layer = self._filter_r(inputs_i, type_i, name=filter_name, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
-                layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[2+type_i], self.get_dim_out()])
+                filter_name = "filter_type_" + str(type_i) + suffix
+                layer = self._filter_r(
+                    inputs_i,
+                    type_i,
+                    name=filter_name,
+                    natoms=natoms,
+                    reuse=reuse,
+                    trainable=trainable,
+                    activation_fn=self.filter_activation_fn,
+                )
+                layer = tf.reshape(
+                    layer, [tf.shape(inputs)[0], natoms[2 + type_i], self.get_dim_out()]
+                )
                 output.append(layer)
-                start_index += natoms[2+type_i]
-        else :
+                start_index += natoms[2 + type_i]
+        else:
             inputs_i = inputs
             inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
             type_i = -1
@@ -481,27 +550,36 @@ def _pass_filter(self,
                     tf.shape(inputs_i)[0],
                 )
                 inputs_i *= mask
-            layer = self._filter_r(inputs_i, type_i, name='filter_type_all'+suffix, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
-            layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
+            layer = self._filter_r(
+                inputs_i,
+                type_i,
+                name="filter_type_all" + suffix,
+                natoms=natoms,
+                reuse=reuse,
+                trainable=trainable,
+                activation_fn=self.filter_activation_fn,
+            )
+            layer = tf.reshape(
+                layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()]
+            )
             output.append(layer)
-        output = tf.concat(output, axis = 1)
+        output = tf.concat(output, axis=1)
         return output
 
-    def _compute_dstats_sys_se_r (self,
-                                  data_coord, 
-                                  data_box, 
-                                  data_atype,                             
-                                  natoms_vec,
-                                  mesh) :    
-        dd_all \
-            = run_sess(self.sub_sess, self.stat_descrpt, 
-                                feed_dict = {
-                                    self.place_holders['coord']: data_coord,
-                                    self.place_holders['type']: data_atype,
-                                    self.place_holders['natoms_vec']: natoms_vec,
-                                    self.place_holders['box']: data_box,
-                                    self.place_holders['default_mesh']: mesh,
-                                })
+    def _compute_dstats_sys_se_r(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh
+    ):
+        dd_all = run_sess(
+            self.sub_sess,
+            self.stat_descrpt,
+            feed_dict={
+                self.place_holders["coord"]: data_coord,
+                self.place_holders["type"]: data_atype,
+                self.place_holders["natoms_vec"]: natoms_vec,
+                self.place_holders["box"]: data_box,
+                self.place_holders["default_mesh"]: mesh,
+            },
+        )
         natoms = natoms_vec
         dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
         start_index = 0
@@ -509,13 +587,13 @@ def _compute_dstats_sys_se_r (self,
         sysn = []
         sysr2 = []
         for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            end_index = start_index + self.ndescrpt * natoms[2 + type_i]
             dd = dd_all[:, start_index:end_index]
             dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
+            start_index = end_index
             # compute
-            dd = np.reshape (dd, [-1, 1])
-            ddr = dd[:,:1]
+            dd = np.reshape(dd, [-1, 1])
+            ddr = dd[:, :1]
             sumr = np.sum(ddr)
             sumn = dd.shape[0]
             sumr2 = np.sum(np.multiply(ddr, ddr))
@@ -524,24 +602,25 @@ def _compute_dstats_sys_se_r (self,
             sysr2.append(sumr2)
         return sysr, sysr2, sysn
 
-
-    def _compute_std (self,sumv2, sumv, sumn) :
-        val = np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+    def _compute_std(self, sumv2, sumv, sumn):
+        val = np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn))
         if np.abs(val) < 1e-2:
             val = 1e-2
         return val
 
     @cast_precision
-    def _filter_r(self, 
-                  inputs, 
-                  type_input,
-                  natoms,
-                  activation_fn=tf.nn.tanh, 
-                  stddev=1.0,
-                  bavg=0.0,
-                  name='linear', 
-                  reuse=None,
-                  trainable = True):
+    def _filter_r(
+        self,
+        inputs,
+        type_input,
+        natoms,
+        activation_fn=tf.nn.tanh,
+        stddev=1.0,
+        bavg=0.0,
+        name="linear",
+        reuse=None,
+        trainable=True,
+    ):
         # natom x nei
         outputs_size = [1] + self.filter_neuron
         with tf.variable_scope(name, reuse=reuse):
@@ -550,44 +629,61 @@ def _filter_r(self,
             for type_i in range(self.ntypes):
                 # cut-out inputs
                 # with natom x nei_type_i
-                inputs_i = tf.slice (inputs,
-                                     [ 0, start_index       ],
-                                     [-1, self.sel_r[type_i]] )
+                inputs_i = tf.slice(inputs, [0, start_index], [-1, self.sel_r[type_i]])
                 start_index += self.sel_r[type_i]
                 shape_i = inputs_i.get_shape().as_list()
                 # with (natom x nei_type_i) x 1
                 xyz_scatter = tf.reshape(inputs_i, [-1, 1])
                 if self.compress and ((type_input, type_i) not in self.exclude_types):
-                    net = 'filter_' + str(type_input) + '_net_' + str(type_i)
-                    info = [self.lower[net], self.upper[net], self.upper[net] * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
-                    xyz_scatter = op_module.tabulate_fusion_se_r(tf.cast(self.table.data[net], self.filter_precision), info, inputs_i, last_layer_size = outputs_size[-1]) 
+                    net = "filter_" + str(type_input) + "_net_" + str(type_i)
+                    info = [
+                        self.lower[net],
+                        self.upper[net],
+                        self.upper[net] * self.table_config[0],
+                        self.table_config[1],
+                        self.table_config[2],
+                        self.table_config[3],
+                    ]
+                    xyz_scatter = op_module.tabulate_fusion_se_r(
+                        tf.cast(self.table.data[net], self.filter_precision),
+                        info,
+                        inputs_i,
+                        last_layer_size=outputs_size[-1],
+                    )
                 elif (type_input, type_i) not in self.exclude_types:
-                    xyz_scatter = embedding_net(xyz_scatter, 
-                                                self.filter_neuron, 
-                                                self.filter_precision, 
-                                                activation_fn = activation_fn, 
-                                                resnet_dt = self.filter_resnet_dt,
-                                                name_suffix = "_"+str(type_i),
-                                                stddev = stddev,
-                                                bavg = bavg,
-                                                seed = self.seed,
-                                                trainable = trainable, 
-                                                uniform_seed = self.uniform_seed,
-                                                initial_variables = self.embedding_net_variables,
-                                                )
-                    if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+                    xyz_scatter = embedding_net(
+                        xyz_scatter,
+                        self.filter_neuron,
+                        self.filter_precision,
+                        activation_fn=activation_fn,
+                        resnet_dt=self.filter_resnet_dt,
+                        name_suffix="_" + str(type_i),
+                        stddev=stddev,
+                        bavg=bavg,
+                        seed=self.seed,
+                        trainable=trainable,
+                        uniform_seed=self.uniform_seed,
+                        initial_variables=self.embedding_net_variables,
+                    )
+                    if (not self.uniform_seed) and (self.seed is not None):
+                        self.seed += self.seed_shift
                     # natom x nei_type_i x out_size
-                    xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1], outputs_size[-1]))
+                    xyz_scatter = tf.reshape(
+                        xyz_scatter, (-1, shape_i[1], outputs_size[-1])
+                    )
                 else:
                     natom = tf.shape(inputs)[0]
-                    xyz_scatter = tf.cast(tf.fill((natom, shape_i[1], outputs_size[-1]), 0.), self.filter_precision)
+                    xyz_scatter = tf.cast(
+                        tf.fill((natom, shape_i[1], outputs_size[-1]), 0.0),
+                        self.filter_precision,
+                    )
                 xyz_scatter_total.append(xyz_scatter)
 
             # natom x nei x outputs_size
             xyz_scatter = tf.concat(xyz_scatter_total, axis=1)
             # natom x outputs_size
-            # 
-            res_rescale = 1./5.
-            result = tf.reduce_mean(xyz_scatter, axis = 1) * res_rescale
+            #
+            res_rescale = 1.0 / 5.0
+            result = tf.reduce_mean(xyz_scatter, axis=1) * res_rescale
 
         return result
diff --git a/deepmd/descriptor/se_t.py b/deepmd/descriptor/se_t.py
index 2ecfa00693..623a3b0e83 100644
--- a/deepmd/descriptor/se_t.py
+++ b/deepmd/descriptor/se_t.py
@@ -1,26 +1,53 @@
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
-
-from deepmd.env import tf
-from deepmd.common import get_activation_func, get_precision, cast_precision
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift
-from deepmd.utils.sess import run_sess
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
-from deepmd.utils.tabulate import DPTabulate
-from .descriptor import Descriptor
-from .se import DescrptSe
+
+from deepmd.common import (
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.utils.network import (
+    embedding_net,
+    embedding_net_rand_seed_shift,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+from deepmd.utils.tabulate import (
+    DPTabulate,
+)
+
+from .descriptor import (
+    Descriptor,
+)
+from .se import (
+    DescrptSe,
+)
+
 
 @Descriptor.register("se_e3")
 @Descriptor.register("se_at")
 @Descriptor.register("se_a_3be")
-class DescrptSeT (DescrptSe):
+class DescrptSeT(DescrptSe):
     """DeepPot-SE constructed from all information (both angular and radial) of atomic
     configurations.
-    
+
     The embedding takes angles between two neighboring atoms as input.
 
     Parameters
@@ -49,25 +76,29 @@ class DescrptSeT (DescrptSe):
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     """
-    def __init__ (self, 
-                  rcut: float,
-                  rcut_smth: float,
-                  sel: List[str],
-                  neuron: List[int] = [24,48,96],
-                  resnet_dt: bool = False,
-                  trainable: bool = True,
-                  seed: Optional[int] = None,
-                  set_davg_zero: bool = False,
-                  activation_function: str = 'tanh',
-                  precision: str = 'default',
-                  uniform_seed: bool = False,
-                  multi_task: bool = False
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: List[str],
+        neuron: List[int] = [24, 48, 96],
+        resnet_dt: bool = False,
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        set_davg_zero: bool = False,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
+        multi_task: bool = False,
     ) -> None:
         """
         Constructor
         """
         if rcut < rcut_smth:
-            raise RuntimeError("rcut_smth (%f) should be no more than rcut (%f)!" % (rcut_smth, rcut))
+            raise RuntimeError(
+                "rcut_smth (%f) should be no more than rcut (%f)!" % (rcut_smth, rcut)
+            )
         self.sel_a = sel
         self.rcut_r = rcut
         self.rcut_r_smth = rcut_smth
@@ -87,9 +118,9 @@ def __init__ (self,
         self.set_davg_zero = set_davg_zero
 
         # descrpt config
-        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
+        self.sel_r = [0 for ii in range(len(self.sel_a))]
         self.ntypes = len(self.sel_a)
-        assert(self.ntypes == len(self.sel_r))
+        assert self.ntypes == len(self.sel_r)
         self.rcut_a = -1
         # numb of neighbors and numb of descrptors
         self.nnei_a = np.cumsum(self.sel_a)[-1]
@@ -105,53 +136,72 @@ def __init__ (self,
         self.embedding_net_variables = None
 
         self.place_holders = {}
-        avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
-        std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION)
+        avg_zero = np.zeros([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
+        std_ones = np.ones([self.ntypes, self.ndescrpt]).astype(
+            GLOBAL_NP_FLOAT_PRECISION
+        )
         sub_graph = tf.Graph()
         with sub_graph.as_default():
-            name_pfx = 'd_sea_'
-            for ii in ['coord', 'box']:
-                self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name = name_pfx+'t_'+ii)
-            self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx+'t_type')
-            self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms')
-            self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh')
-            self.stat_descrpt, descrpt_deriv, rij, nlist \
-                = op_module.prod_env_mat_a(self.place_holders['coord'],
-                                         self.place_holders['type'],
-                                         self.place_holders['natoms_vec'],
-                                         self.place_holders['box'],
-                                         self.place_holders['default_mesh'],
-                                         tf.constant(avg_zero),
-                                         tf.constant(std_ones),
-                                         rcut_a = self.rcut_a,
-                                         rcut_r = self.rcut_r,
-                                         rcut_r_smth = self.rcut_r_smth,
-                                         sel_a = self.sel_a,
-                                         sel_r = self.sel_r)
-        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
+            name_pfx = "d_sea_"
+            for ii in ["coord", "box"]:
+                self.place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name=name_pfx + "t_" + ii
+                )
+            self.place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name=name_pfx + "t_type"
+            )
+            self.place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name=name_pfx + "t_natoms"
+            )
+            self.place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name=name_pfx + "t_mesh"
+            )
+            self.stat_descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a(
+                self.place_holders["coord"],
+                self.place_holders["type"],
+                self.place_holders["natoms_vec"],
+                self.place_holders["box"],
+                self.place_holders["default_mesh"],
+                tf.constant(avg_zero),
+                tf.constant(std_ones),
+                rcut_a=self.rcut_a,
+                rcut_r=self.rcut_r,
+                rcut_r_smth=self.rcut_r_smth,
+                sel_a=self.sel_a,
+                sel_r=self.sel_r,
+            )
+        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
         self.multi_task = multi_task
         if multi_task:
-            self.stat_dict = {'sumr': [], 'suma': [], 'sumn': [], 'sumr2': [], 'suma2': []}
-
-    def get_rcut (self) -> float:
+            self.stat_dict = {
+                "sumr": [],
+                "suma": [],
+                "sumn": [],
+                "sumr2": [],
+                "suma2": [],
+            }
+
+    def get_rcut(self) -> float:
         """
         Returns the cut-off radius
         """
         return self.rcut_r
 
-    def get_ntypes (self) -> int:
+    def get_ntypes(self) -> int:
         """
         Returns the number of atom types
         """
         return self.ntypes
 
-    def get_dim_out (self) -> int:
+    def get_dim_out(self) -> int:
         """
         Returns the output dimension of this descriptor
         """
         return self.filter_neuron[-1]
 
-    def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
+    def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         Returns
         -------
@@ -166,17 +216,18 @@ def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
         """
         return self.nlist, self.rij, self.sel_a, self.sel_r
 
-    def compute_input_stats (self,
-                             data_coord : list, 
-                             data_box : list, 
-                             data_atype : list, 
-                             natoms_vec : list,
-                             mesh : list, 
-                             input_dict : dict
-    ) -> None :
+    def compute_input_stats(
+        self,
+        data_coord: list,
+        data_box: list,
+        data_atype: list,
+        natoms_vec: list,
+        mesh: list,
+        input_dict: dict,
+    ) -> None:
         """
         Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics.
-        
+
         Parameters
         ----------
         data_coord
@@ -198,23 +249,32 @@ def compute_input_stats (self,
             sumn = []
             sumr2 = []
             suma2 = []
-            for cc,bb,tt,nn,mm in zip(data_coord,data_box,data_atype,natoms_vec,mesh) :
-                sysr,sysr2,sysa,sysa2,sysn \
-                    = self._compute_dstats_sys_smth(cc,bb,tt,nn,mm)
+            for cc, bb, tt, nn, mm in zip(
+                data_coord, data_box, data_atype, natoms_vec, mesh
+            ):
+                sysr, sysr2, sysa, sysa2, sysn = self._compute_dstats_sys_smth(
+                    cc, bb, tt, nn, mm
+                )
                 sumr.append(sysr)
                 suma.append(sysa)
                 sumn.append(sysn)
                 sumr2.append(sysr2)
                 suma2.append(sysa2)
             if not self.multi_task:
-                stat_dict = {'sumr': sumr, 'suma': suma, 'sumn': sumn, 'sumr2': sumr2, 'suma2': suma2}
+                stat_dict = {
+                    "sumr": sumr,
+                    "suma": suma,
+                    "sumn": sumn,
+                    "sumr2": sumr2,
+                    "suma2": suma2,
+                }
                 self.merge_input_stats(stat_dict)
             else:
-                self.stat_dict['sumr'] += sumr
-                self.stat_dict['suma'] += suma
-                self.stat_dict['sumn'] += sumn
-                self.stat_dict['sumr2'] += sumr2
-                self.stat_dict['suma2'] += suma2
+                self.stat_dict["sumr"] += sumr
+                self.stat_dict["suma"] += suma
+                self.stat_dict["sumn"] += sumn
+                self.stat_dict["sumr2"] += sumr2
+                self.stat_dict["suma2"] += suma2
 
     def merge_input_stats(self, stat_dict):
         """
@@ -237,18 +297,19 @@ def merge_input_stats(self, stat_dict):
         """
         all_davg = []
         all_dstd = []
-        sumr = np.sum(stat_dict['sumr'], axis = 0)
-        suma = np.sum(stat_dict['suma'], axis = 0)
-        sumn = np.sum(stat_dict['sumn'], axis = 0)
-        sumr2 = np.sum(stat_dict['sumr2'], axis = 0)
-        suma2 = np.sum(stat_dict['suma2'], axis = 0)
-        for type_i in range(self.ntypes) :
-            davgunit = [sumr[type_i]/(sumn[type_i]+1e-15), 0, 0, 0]
-            dstdunit = [self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]),
-                        self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
-                        self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
-                        self._compute_std(suma2[type_i], suma[type_i], sumn[type_i])
-                        ]
+        sumr = np.sum(stat_dict["sumr"], axis=0)
+        suma = np.sum(stat_dict["suma"], axis=0)
+        sumn = np.sum(stat_dict["sumn"], axis=0)
+        sumr2 = np.sum(stat_dict["sumr2"], axis=0)
+        suma2 = np.sum(stat_dict["suma2"], axis=0)
+        for type_i in range(self.ntypes):
+            davgunit = [sumr[type_i] / (sumn[type_i] + 1e-15), 0, 0, 0]
+            dstdunit = [
+                self._compute_std(sumr2[type_i], sumr[type_i], sumn[type_i]),
+                self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+                self._compute_std(suma2[type_i], suma[type_i], sumn[type_i]),
+            ]
             davg = np.tile(davgunit, self.ndescrpt // 4)
             dstd = np.tile(dstdunit, self.ndescrpt // 4)
             all_davg.append(davg)
@@ -257,16 +318,16 @@ def merge_input_stats(self, stat_dict):
             self.davg = np.array(all_davg)
         self.dstd = np.array(all_dstd)
 
-
-    def enable_compression(self,
-                           min_nbor_dist : float,
-                           graph: tf.Graph,
-                           graph_def: tf.GraphDef,
-                           table_extrapolate : float = 5,
-                           table_stride_1 : float = 0.01,
-                           table_stride_2 : float = 0.1,
-                           check_frequency : int = -1,
-                           suffix : str = "",
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+        suffix: str = "",
     ) -> None:
         """
         Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.
@@ -298,33 +359,46 @@ def enable_compression(self,
             if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
                 raise NotImplementedError(
                     "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer." 
-                    % ','.join([str(item) for item in self.filter_neuron])
+                    "The size of the next layer of the neural network must be twice the size of the previous layer."
+                    % ",".join([str(item) for item in self.filter_neuron])
                 )
 
         self.compress = True
         self.table = DPTabulate(
-            self, self.filter_neuron, graph, graph_def, activation_fn = self.filter_activation_fn, suffix=suffix)
-        self.table_config = [table_extrapolate, table_stride_1 * 10, table_stride_2 * 10, check_frequency]
-        self.lower, self.upper \
-            = self.table.build(min_nbor_dist, 
-                               table_extrapolate, 
-                               table_stride_1 * 10, 
-                               table_stride_2 * 10)
-        
-        self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
-        self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)
-
-
-    def build (self, 
-               coord_ : tf.Tensor, 
-               atype_ : tf.Tensor,
-               natoms : tf.Tensor,
-               box_ : tf.Tensor, 
-               mesh : tf.Tensor,
-               input_dict : dict, 
-               reuse : bool = None,
-               suffix : str = ''
+            self,
+            self.filter_neuron,
+            graph,
+            graph_def,
+            activation_fn=self.filter_activation_fn,
+            suffix=suffix,
+        )
+        self.table_config = [
+            table_extrapolate,
+            table_stride_1 * 10,
+            table_stride_2 * 10,
+            check_frequency,
+        ]
+        self.lower, self.upper = self.table.build(
+            min_nbor_dist, table_extrapolate, table_stride_1 * 10, table_stride_2 * 10
+        )
+
+        self.davg = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_avg" % suffix
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            graph, "descrpt_attr%s/t_std" % suffix
+        )
+
+    def build(
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box_: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for the descriptor
@@ -342,8 +416,8 @@ def build (self,
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         mesh
                 For historical reasons, only the length of the Tensor matters.
-                if size of mesh == 6, pbc is assumed. 
-                if size of mesh == 0, no-pbc is assumed. 
+                if size of mesh == 6, pbc is assumed.
+                if size of mesh == 0, no-pbc is assumed.
         input_dict
                 Dictionary for additional inputs
         reuse
@@ -358,69 +432,75 @@ def build (self,
         """
         davg = self.davg
         dstd = self.dstd
-        with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) :
+        with tf.variable_scope("descrpt_attr" + suffix, reuse=reuse):
             if davg is None:
-                davg = np.zeros([self.ntypes, self.ndescrpt]) 
+                davg = np.zeros([self.ntypes, self.ndescrpt])
             if dstd is None:
-                dstd = np.ones ([self.ntypes, self.ndescrpt])
-            t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), 
-                                 name = 'rcut', 
-                                 dtype = GLOBAL_TF_FLOAT_PRECISION)
-            t_ntypes = tf.constant(self.ntypes, 
-                                   name = 'ntypes', 
-                                   dtype = tf.int32)
-            t_ndescrpt = tf.constant(self.ndescrpt, 
-                                     name = 'ndescrpt', 
-                                     dtype = tf.int32)            
-            t_sel = tf.constant(self.sel_a, 
-                                name = 'sel', 
-                                dtype = tf.int32)            
-            self.t_avg = tf.get_variable('t_avg', 
-                                         davg.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(davg))
-            self.t_std = tf.get_variable('t_std', 
-                                         dstd.shape, 
-                                         dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                         trainable = False,
-                                         initializer = tf.constant_initializer(dstd))
-
-        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
-        box   = tf.reshape (box_, [-1, 9])
-        atype = tf.reshape (atype_, [-1, natoms[1]])
-
-        self.descrpt, self.descrpt_deriv, self.rij, self.nlist \
-            = op_module.prod_env_mat_a (coord,
-                                       atype,
-                                       natoms,
-                                       box,
-                                       mesh,
-                                       self.t_avg,
-                                       self.t_std,
-                                       rcut_a = self.rcut_a,
-                                       rcut_r = self.rcut_r,
-                                       rcut_r_smth = self.rcut_r_smth,
-                                       sel_a = self.sel_a,
-                                       sel_r = self.sel_r)
+                dstd = np.ones([self.ntypes, self.ndescrpt])
+            t_rcut = tf.constant(
+                np.max([self.rcut_r, self.rcut_a]),
+                name="rcut",
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+            )
+            t_ntypes = tf.constant(self.ntypes, name="ntypes", dtype=tf.int32)
+            t_ndescrpt = tf.constant(self.ndescrpt, name="ndescrpt", dtype=tf.int32)
+            t_sel = tf.constant(self.sel_a, name="sel", dtype=tf.int32)
+            self.t_avg = tf.get_variable(
+                "t_avg",
+                davg.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(davg),
+            )
+            self.t_std = tf.get_variable(
+                "t_std",
+                dstd.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(dstd),
+            )
+
+        coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        box = tf.reshape(box_, [-1, 9])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
+
+        (
+            self.descrpt,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+        ) = op_module.prod_env_mat_a(
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
 
         self.descrpt_reshape = tf.reshape(self.descrpt, [-1, self.ndescrpt])
         self._identity_tensors(suffix=suffix)
 
-        self.dout, self.qmat = self._pass_filter(self.descrpt_reshape, 
-                                                 atype,
-                                                 natoms, 
-                                                 input_dict,
-                                                 suffix = suffix, 
-                                                 reuse = reuse, 
-                                                 trainable = self.trainable)
+        self.dout, self.qmat = self._pass_filter(
+            self.descrpt_reshape,
+            atype,
+            natoms,
+            input_dict,
+            suffix=suffix,
+            reuse=reuse,
+            trainable=self.trainable,
+        )
 
         return self.dout
 
-
-    def prod_force_virial(self, 
-                          atom_ener : tf.Tensor, 
-                          natoms : tf.Tensor
+    def prod_force_virial(
+        self, atom_ener: tf.Tensor, natoms: tf.Tensor
     ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
         """
         Compute force and virial
@@ -444,34 +524,33 @@ def prod_force_virial(self,
         atom_virial
                 The atomic virial
         """
-        [net_deriv] = tf.gradients (atom_ener, self.descrpt_reshape)
-        net_deriv_reshape = tf.reshape (net_deriv, [np.cast['int64'](-1), natoms[0] * np.cast['int64'](self.ndescrpt)])        
-        force \
-            = op_module.prod_force_se_a (net_deriv_reshape,
-                                          self.descrpt_deriv,
-                                          self.nlist,
-                                          natoms,
-                                          n_a_sel = self.nnei_a,
-                                          n_r_sel = self.nnei_r)
-        virial, atom_virial \
-            = op_module.prod_virial_se_a (net_deriv_reshape,
-                                           self.descrpt_deriv,
-                                           self.rij,
-                                           self.nlist,
-                                           natoms,
-                                           n_a_sel = self.nnei_a,
-                                           n_r_sel = self.nnei_r)
+        [net_deriv] = tf.gradients(atom_ener, self.descrpt_reshape)
+        net_deriv_reshape = tf.reshape(
+            net_deriv,
+            [np.cast["int64"](-1), natoms[0] * np.cast["int64"](self.ndescrpt)],
+        )
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            self.descrpt_deriv,
+            self.nlist,
+            natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_virial = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+            natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         return force, virial, atom_virial
-        
-
-    def _pass_filter(self, 
-                     inputs,
-                     atype,
-                     natoms,
-                     input_dict,
-                     reuse = None,
-                     suffix = '', 
-                     trainable = True) :
+
+    def _pass_filter(
+        self, inputs, atype, natoms, input_dict, reuse=None, suffix="", trainable=True
+    ):
         start_index = 0
         inputs = tf.reshape(inputs, [-1, natoms[0], self.ndescrpt])
         output = []
@@ -479,31 +558,37 @@ def _pass_filter(self,
         inputs_i = inputs
         inputs_i = tf.reshape(inputs_i, [-1, self.ndescrpt])
         type_i = -1
-        layer, qmat = self._filter(inputs_i, type_i, name='filter_type_all'+suffix, natoms=natoms, reuse=reuse, trainable = trainable, activation_fn = self.filter_activation_fn)
+        layer, qmat = self._filter(
+            inputs_i,
+            type_i,
+            name="filter_type_all" + suffix,
+            natoms=natoms,
+            reuse=reuse,
+            trainable=trainable,
+            activation_fn=self.filter_activation_fn,
+        )
         layer = tf.reshape(layer, [tf.shape(inputs)[0], natoms[0], self.get_dim_out()])
         # qmat  = tf.reshape(qmat,  [tf.shape(inputs)[0], natoms[0] * self.get_dim_rot_mat_1() * 3])
         output.append(layer)
         # output_qmat.append(qmat)
-        output = tf.concat(output, axis = 1)
+        output = tf.concat(output, axis=1)
         # output_qmat = tf.concat(output_qmat, axis = 1)
         return output, None
 
-
-    def _compute_dstats_sys_smth (self,
-                                 data_coord, 
-                                 data_box, 
-                                 data_atype,                             
-                                 natoms_vec,
-                                 mesh) :    
-        dd_all \
-            = run_sess(self.sub_sess, self.stat_descrpt, 
-                                feed_dict = {
-                                    self.place_holders['coord']: data_coord,
-                                    self.place_holders['type']: data_atype,
-                                    self.place_holders['natoms_vec']: natoms_vec,
-                                    self.place_holders['box']: data_box,
-                                    self.place_holders['default_mesh']: mesh,
-                                })
+    def _compute_dstats_sys_smth(
+        self, data_coord, data_box, data_atype, natoms_vec, mesh
+    ):
+        dd_all = run_sess(
+            self.sub_sess,
+            self.stat_descrpt,
+            feed_dict={
+                self.place_holders["coord"]: data_coord,
+                self.place_holders["type"]: data_atype,
+                self.place_holders["natoms_vec"]: natoms_vec,
+                self.place_holders["box"]: data_box,
+                self.place_holders["default_mesh"]: mesh,
+            },
+        )
         natoms = natoms_vec
         dd_all = np.reshape(dd_all, [-1, self.ndescrpt * natoms[0]])
         start_index = 0
@@ -513,19 +598,19 @@ def _compute_dstats_sys_smth (self,
         sysr2 = []
         sysa2 = []
         for type_i in range(self.ntypes):
-            end_index = start_index + self.ndescrpt * natoms[2+type_i]
+            end_index = start_index + self.ndescrpt * natoms[2 + type_i]
             dd = dd_all[:, start_index:end_index]
             dd = np.reshape(dd, [-1, self.ndescrpt])
-            start_index = end_index        
+            start_index = end_index
             # compute
-            dd = np.reshape (dd, [-1, 4])
-            ddr = dd[:,:1]
-            dda = dd[:,1:]
+            dd = np.reshape(dd, [-1, 4])
+            ddr = dd[:, :1]
+            dda = dd[:, 1:]
             sumr = np.sum(ddr)
-            suma = np.sum(dda) / 3.
+            suma = np.sum(dda) / 3.0
             sumn = dd.shape[0]
             sumr2 = np.sum(np.multiply(ddr, ddr))
-            suma2 = np.sum(np.multiply(dda, dda)) / 3.
+            suma2 = np.sum(np.multiply(dda, dda)) / 3.0
             sysr.append(sumr)
             sysa.append(suma)
             sysn.append(sumn)
@@ -533,24 +618,25 @@ def _compute_dstats_sys_smth (self,
             sysa2.append(suma2)
         return sysr, sysr2, sysa, sysa2, sysn
 
-
-    def _compute_std (self,sumv2, sumv, sumn) :
-        val = np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+    def _compute_std(self, sumv2, sumv, sumn):
+        val = np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn))
         if np.abs(val) < 1e-2:
             val = 1e-2
         return val
 
     @cast_precision
-    def _filter(self, 
-                inputs, 
-                type_input,
-                natoms,
-                activation_fn=tf.nn.tanh, 
-                stddev=1.0,
-                bavg=0.0,
-                name='linear', 
-                reuse=None,
-                trainable = True):
+    def _filter(
+        self,
+        inputs,
+        type_input,
+        natoms,
+        activation_fn=tf.nn.tanh,
+        stddev=1.0,
+        bavg=0.0,
+        name="linear",
+        reuse=None,
+        trainable=True,
+    ):
         # natom x (nei x 4)
         shape = inputs.get_shape().as_list()
         outputs_size = [1] + self.filter_neuron
@@ -559,64 +645,80 @@ def _filter(self,
             result = None
             for type_i in range(self.ntypes):
                 # cut-out inputs
-                # with natom x (nei_type_i x 4)  
-                inputs_i = tf.slice (inputs,
-                                     [ 0, start_index_i      *4],
-                                     [-1, self.sel_a[type_i] *4] )
+                # with natom x (nei_type_i x 4)
+                inputs_i = tf.slice(
+                    inputs, [0, start_index_i * 4], [-1, self.sel_a[type_i] * 4]
+                )
                 start_index_j = start_index_i
                 start_index_i += self.sel_a[type_i]
                 nei_type_i = self.sel_a[type_i]
                 shape_i = inputs_i.get_shape().as_list()
-                assert(shape_i[1] == nei_type_i * 4)
+                assert shape_i[1] == nei_type_i * 4
                 # with natom x nei_type_i x 4
                 env_i = tf.reshape(inputs_i, [-1, nei_type_i, 4])
                 # with natom x nei_type_i x 3
                 env_i = tf.slice(env_i, [0, 0, 1], [-1, -1, -1])
                 for type_j in range(type_i, self.ntypes):
-                    # with natom x (nei_type_j x 4)  
-                    inputs_j = tf.slice (inputs,
-                                         [ 0, start_index_j      *4],
-                                         [-1, self.sel_a[type_j] *4] )
+                    # with natom x (nei_type_j x 4)
+                    inputs_j = tf.slice(
+                        inputs, [0, start_index_j * 4], [-1, self.sel_a[type_j] * 4]
+                    )
                     start_index_j += self.sel_a[type_j]
                     nei_type_j = self.sel_a[type_j]
                     shape_j = inputs_j.get_shape().as_list()
-                    assert(shape_j[1] == nei_type_j * 4)
+                    assert shape_j[1] == nei_type_j * 4
                     # with natom x nei_type_j x 4
                     env_j = tf.reshape(inputs_j, [-1, nei_type_j, 4])
                     # with natom x nei_type_i x 3
                     env_j = tf.slice(env_j, [0, 0, 1], [-1, -1, -1])
                     # with natom x nei_type_i x nei_type_j
-                    env_ij = tf.einsum('ijm,ikm->ijk', env_i, env_j)
+                    env_ij = tf.einsum("ijm,ikm->ijk", env_i, env_j)
                     # with (natom x nei_type_i x nei_type_j)
                     ebd_env_ij = tf.reshape(env_ij, [-1, 1])
                     if self.compress:
-                        net = 'filter_' + str(type_i) + '_net_' + str(type_j)
-                        info = [self.lower[net], self.upper[net], self.upper[net] * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
-                        res_ij = op_module.tabulate_fusion_se_t(tf.cast(self.table.data[net], self.filter_precision), info, ebd_env_ij, env_ij, last_layer_size = outputs_size[-1]) 
+                        net = "filter_" + str(type_i) + "_net_" + str(type_j)
+                        info = [
+                            self.lower[net],
+                            self.upper[net],
+                            self.upper[net] * self.table_config[0],
+                            self.table_config[1],
+                            self.table_config[2],
+                            self.table_config[3],
+                        ]
+                        res_ij = op_module.tabulate_fusion_se_t(
+                            tf.cast(self.table.data[net], self.filter_precision),
+                            info,
+                            ebd_env_ij,
+                            env_ij,
+                            last_layer_size=outputs_size[-1],
+                        )
                     else:
                         # with (natom x nei_type_i x nei_type_j) x out_size
-                        ebd_env_ij = embedding_net(ebd_env_ij, 
-                                                   self.filter_neuron, 
-                                                   self.filter_precision, 
-                                                   activation_fn = activation_fn, 
-                                                   resnet_dt = self.filter_resnet_dt,
-                                                   name_suffix = f"_{type_i}_{type_j}",
-                                                   stddev = stddev,
-                                                   bavg = bavg,
-                                                   seed = self.seed,
-                                                   trainable = trainable, 
-                                                   uniform_seed = self.uniform_seed,
-                                                   initial_variables = self.embedding_net_variables,
-                                                   )
-                        if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+                        ebd_env_ij = embedding_net(
+                            ebd_env_ij,
+                            self.filter_neuron,
+                            self.filter_precision,
+                            activation_fn=activation_fn,
+                            resnet_dt=self.filter_resnet_dt,
+                            name_suffix=f"_{type_i}_{type_j}",
+                            stddev=stddev,
+                            bavg=bavg,
+                            seed=self.seed,
+                            trainable=trainable,
+                            uniform_seed=self.uniform_seed,
+                            initial_variables=self.embedding_net_variables,
+                        )
+                        if (not self.uniform_seed) and (self.seed is not None):
+                            self.seed += self.seed_shift
                         # with natom x nei_type_i x nei_type_j x out_size
-                        ebd_env_ij = tf.reshape(ebd_env_ij, [-1, nei_type_i, nei_type_j, outputs_size[-1]])
+                        ebd_env_ij = tf.reshape(
+                            ebd_env_ij, [-1, nei_type_i, nei_type_j, outputs_size[-1]]
+                        )
                         # with natom x out_size
-                        res_ij = tf.einsum('ijk,ijkm->im', env_ij, ebd_env_ij)
+                        res_ij = tf.einsum("ijk,ijkm->im", env_ij, ebd_env_ij)
                     res_ij = res_ij * (1.0 / float(nei_type_i) / float(nei_type_j))
                     if result is None:
                         result = res_ij
                     else:
                         result += res_ij
         return result, None
-
diff --git a/deepmd/entrypoints/__init__.py b/deepmd/entrypoints/__init__.py
index d7559ad74b..2ff77d086c 100644
--- a/deepmd/entrypoints/__init__.py
+++ b/deepmd/entrypoints/__init__.py
@@ -1,17 +1,36 @@
 """Submodule that contains all the DeePMD-Kit entry point scripts."""
 
-from .compress import compress
-from .config import config
-from .doc import doc_train_input
-from .freeze import freeze
-from .test import test
+from ..infer.model_devi import (
+    make_model_devi,
+)
+from .compress import (
+    compress,
+)
+from .config import (
+    config,
+)
+from .convert import (
+    convert,
+)
+from .doc import (
+    doc_train_input,
+)
+from .freeze import (
+    freeze,
+)
+from .neighbor_stat import (
+    neighbor_stat,
+)
+from .test import (
+    test,
+)
+
 # import `train` as `train_dp` to avoid the conflict of the
 # module name `train` and the function name `train`
 from .train import train as train_dp
-from .transfer import transfer
-from ..infer.model_devi import make_model_devi
-from .convert import convert
-from .neighbor_stat import neighbor_stat
+from .transfer import (
+    transfer,
+)
 
 __all__ = [
     "config",
diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py
index 701571398c..891b7e7289 100644
--- a/deepmd/entrypoints/compress.py
+++ b/deepmd/entrypoints/compress.py
@@ -1,20 +1,45 @@
 """Compress a model, which including tabulating the embedding-net."""
 
-import os
 import json
 import logging
-from typing import Optional
-
-from deepmd.common import j_loader
-from deepmd.env import tf, GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.errors import GraphTooLargeError, GraphWithoutTensorError
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
-
-from .freeze import freeze
-from .train import train, get_rcut, get_min_nbor_dist
-from .transfer import transfer
+import os
+from typing import (
+    Optional,
+)
+
+from deepmd.common import (
+    j_loader,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.errors import (
+    GraphTooLargeError,
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+
+from .freeze import (
+    freeze,
+)
+from .train import (
+    get_min_nbor_dist,
+    get_rcut,
+    train,
+)
+from .transfer import (
+    transfer,
+)
 
 __all__ = ["compress"]
 
@@ -68,8 +93,10 @@ def compress(
     """
     graph, _ = load_graph_def(input)
     try:
-        t_jdata = get_tensor_by_name_from_graph(graph, 'train_attr/training_script')
-        t_min_nbor_dist = get_tensor_by_name_from_graph(graph, 'train_attr/min_nbor_dist')
+        t_jdata = get_tensor_by_name_from_graph(graph, "train_attr/training_script")
+        t_min_nbor_dist = get_tensor_by_name_from_graph(
+            graph, "train_attr/min_nbor_dist"
+        )
         jdata = json.loads(t_jdata)
     except GraphWithoutTensorError as e:
         if training_script == None:
@@ -77,11 +104,13 @@ def compress(
                 "The input frozen model: %s has no training script or min_nbor_dist information, "
                 "which is not supported by the model compression interface. "
                 "Please consider using the --training-script command within the model compression interface to provide the training script of the input frozen model. "
-                "Note that the input training script must contain the correct path to the training data." % input
+                "Note that the input training script must contain the correct path to the training data."
+                % input
             ) from e
         elif not os.path.exists(training_script):
             raise RuntimeError(
-                "The input training script %s (%s) does not exist! Please check the path of the training script. " % (input, os.path.abspath(input))
+                "The input training script %s (%s) does not exist! Please check the path of the training script. "
+                % (input, os.path.abspath(input))
             ) from e
         else:
             log.info("stage 0: compute the min_nbor_dist")
@@ -91,9 +120,11 @@ def compress(
 
     _check_compress_type(graph)
 
-    tf.constant(t_min_nbor_dist,
-        name = 'train_attr/min_nbor_dist',
-        dtype = GLOBAL_ENER_FLOAT_PRECISION)
+    tf.constant(
+        t_min_nbor_dist,
+        name="train_attr/min_nbor_dist",
+        dtype=GLOBAL_ENER_FLOAT_PRECISION,
+    )
     jdata["model"]["compress"] = {}
     jdata["model"]["compress"]["model_file"] = input
     jdata["model"]["compress"]["min_nbor_dist"] = t_min_nbor_dist
@@ -130,7 +161,7 @@ def compress(
         )
     except GraphTooLargeError as e:
         raise RuntimeError(
-            "The uniform step size of the tabulation's first table is %f, " 
+            "The uniform step size of the tabulation's first table is %f, "
             "which is too small. This leads to a very large graph size, "
             "exceeding protobuf's limitation (2 GB). You should try to "
             "increase the step size." % step
@@ -146,18 +177,22 @@ def compress(
         freeze(checkpoint_folder=checkpoint_folder, output=output, node_names=None)
     except GraphTooLargeError as e:
         raise RuntimeError(
-            "The uniform step size of the tabulation's first table is %f, " 
+            "The uniform step size of the tabulation's first table is %f, "
             "which is too small. This leads to a very large graph size, "
             "exceeding protobuf's limitation (2 GB). You should try to "
             "increase the step size." % step
         ) from e
 
+
 def _check_compress_type(graph: tf.Graph):
     try:
-        t_model_type = bytes.decode(get_tensor_by_name_from_graph(graph, 'model_type'))
+        t_model_type = bytes.decode(get_tensor_by_name_from_graph(graph, "model_type"))
     except GraphWithoutTensorError as e:
         # Compatible with the upgraded model, which has no 'model_type' info
         t_model_type = None
-    
+
     if t_model_type == "compressed_model":
-        raise RuntimeError("The input frozen model %s has already been compressed! Please do not compress the model repeatedly. " % model_file)
+        raise RuntimeError(
+            "The input frozen model %s has already been compressed! Please do not compress the model repeatedly. "
+            % model_file
+        )
diff --git a/deepmd/entrypoints/config.py b/deepmd/entrypoints/config.py
index 0c98bb5f82..fcdea2ad89 100644
--- a/deepmd/entrypoints/config.py
+++ b/deepmd/entrypoints/config.py
@@ -2,11 +2,18 @@
 """Quickly create a configuration file for smooth model."""
 
 import json
-import yaml
-from pathlib import Path
-from typing import Any, Dict, List, Tuple
+from pathlib import (
+    Path,
+)
+from typing import (
+    Any,
+    Dict,
+    List,
+    Tuple,
+)
 
 import numpy as np
+import yaml
 
 __all__ = ["config"]
 
@@ -262,7 +269,7 @@ def suggest_sel(
         [description]
     """
     max_den = get_max_density(all_type, all_box)
-    return [int(ii) for ii in max_den * 4.0 / 3.0 * np.pi * rcut ** 3 * ratio]
+    return [int(ii) for ii in max_den * 4.0 / 3.0 * np.pi * rcut**3 * ratio]
 
 
 def suggest_batch_size(all_type: List[np.ndarray], min_atom: int) -> List[int]:
diff --git a/deepmd/entrypoints/convert.py b/deepmd/entrypoints/convert.py
index 586d7f2ee2..08f3f67095 100644
--- a/deepmd/entrypoints/convert.py
+++ b/deepmd/entrypoints/convert.py
@@ -1,12 +1,13 @@
 from deepmd.utils.convert import (
-    convert_012_to_21,
     convert_10_to_21,
-    convert_20_to_21,
-    convert_13_to_21,
+    convert_012_to_21,
     convert_12_to_21,
+    convert_13_to_21,
+    convert_20_to_21,
     convert_pbtxt_to_pb,
 )
 
+
 def convert(
     *,
     FROM: str,
@@ -14,18 +15,18 @@ def convert(
     output_model: str,
     **kwargs,
 ):
-    if FROM == '0.12':
+    if FROM == "0.12":
         convert_012_to_21(input_model, output_model)
-    elif FROM == '1.0':
+    elif FROM == "1.0":
         convert_10_to_21(input_model, output_model)
-    elif FROM in ['1.1', '1.2']:
+    elif FROM in ["1.1", "1.2"]:
         # no difference between 1.1 and 1.2
         convert_12_to_21(input_model, output_model)
-    elif FROM == '1.3':
+    elif FROM == "1.3":
         convert_13_to_21(input_model, output_model)
-    elif FROM == '2.0':
+    elif FROM == "2.0":
         convert_20_to_21(input_model, output_model)
-    elif FROM == 'pbtxt':
+    elif FROM == "pbtxt":
         convert_pbtxt_to_pb(input_model, output_model)
     else:
-        raise RuntimeError('unsupported model version ' + FROM)
+        raise RuntimeError("unsupported model version " + FROM)
diff --git a/deepmd/entrypoints/doc.py b/deepmd/entrypoints/doc.py
index 0cb555e4d1..b439c17409 100644
--- a/deepmd/entrypoints/doc.py
+++ b/deepmd/entrypoints/doc.py
@@ -1,6 +1,9 @@
 """Module that prints train input arguments docstrings."""
 
-from deepmd.utils.argcheck import gen_doc, gen_json
+from deepmd.utils.argcheck import (
+    gen_doc,
+    gen_json,
+)
 
 __all__ = ["doc_train_input"]
 
diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py
index 41af0fa747..8cba0f20a9 100755
--- a/deepmd/entrypoints/freeze.py
+++ b/deepmd/entrypoints/freeze.py
@@ -6,54 +6,69 @@
 https://blog.metaflow.fr/tensorflow-how-to-freeze-a-model-and-serve-it-with-a-python-api-d4f3596b3adc
 """
 
+import json
 import logging
+from os.path import (
+    abspath,
+)
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
 import google.protobuf.message
-from deepmd.env import tf, FITTING_NET_PATTERN, REMOVE_SUFFIX_DICT
-from deepmd.utils.errors import GraphTooLargeError
-from deepmd.utils.sess import run_sess
-from deepmd.utils.graph import get_pattern_nodes_from_graph_def
-from os.path import abspath
-import json
 
 # load grad of force module
 import deepmd.op
-
-from typing import List, Optional, Union
-
-from deepmd.nvnmd.entrypoints.freeze import save_weight
+from deepmd.env import (
+    FITTING_NET_PATTERN,
+    REMOVE_SUFFIX_DICT,
+    tf,
+)
+from deepmd.nvnmd.entrypoints.freeze import (
+    save_weight,
+)
+from deepmd.utils.errors import (
+    GraphTooLargeError,
+)
+from deepmd.utils.graph import (
+    get_pattern_nodes_from_graph_def,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 __all__ = ["freeze"]
 
 log = logging.getLogger(__name__)
 
+
 def _transfer_fitting_net_trainable_variables(sess, old_graph_def, raw_graph_def):
     old_pattern = FITTING_NET_PATTERN
-    raw_pattern = FITTING_NET_PATTERN\
-        .replace('idt',    r'idt+_\d+')\
-        .replace('bias',   r'bias+_\d+')\
-        .replace('matrix', r'matrix+_\d+')
-    old_graph_nodes = get_pattern_nodes_from_graph_def(
-        old_graph_def,
-        old_pattern
+    raw_pattern = (
+        FITTING_NET_PATTERN.replace("idt", r"idt+_\d+")
+        .replace("bias", r"bias+_\d+")
+        .replace("matrix", r"matrix+_\d+")
     )
-    try :
+    old_graph_nodes = get_pattern_nodes_from_graph_def(old_graph_def, old_pattern)
+    try:
         raw_graph_def = tf.graph_util.convert_variables_to_constants(
             sess,  # The session is used to retrieve the weights
             raw_graph_def,  # The graph_def is used to retrieve the nodes
-            [n + '_1' for n in old_graph_nodes],  # The output node names are used to select the usefull nodes
+            [
+                n + "_1" for n in old_graph_nodes
+            ],  # The output node names are used to select the usefull nodes
         )
     except AssertionError:
         # if there's no additional nodes
         return old_graph_def
 
-    raw_graph_nodes = get_pattern_nodes_from_graph_def(
-        raw_graph_def,
-        raw_pattern
-    )
+    raw_graph_nodes = get_pattern_nodes_from_graph_def(raw_graph_def, raw_pattern)
     for node in old_graph_def.node:
         if node.name not in old_graph_nodes.keys():
             continue
-        tensor = tf.make_ndarray(raw_graph_nodes[node.name + '_1'])
+        tensor = tf.make_ndarray(raw_graph_nodes[node.name + "_1"])
         node.attr["value"].tensor.tensor_content = tensor.tostring()
     return old_graph_def
 
@@ -75,7 +90,7 @@ def change_name(name, suffix):
                 if item.format(suffix) in name:
                     name = name.replace(item.format(suffix), REMOVE_SUFFIX_DICT[item])
                     break
-            assert suffix not in name, 'fitting net name illegal!'
+            assert suffix not in name, "fitting net name illegal!"
         return name
 
     for node in output_graph_def.node:
@@ -84,10 +99,13 @@ def change_name(name, suffix):
         for idx in range(len(node.input)):
             if out_suffix in node.input[idx]:
                 node.input[idx] = change_name(node.input[idx], out_suffix)
-        attr_list = node.attr['_class'].list.s
+        attr_list = node.attr["_class"].list.s
         for idx in range(len(attr_list)):
             if out_suffix in bytes.decode(attr_list[idx]):
-                attr_list[idx] = bytes(change_name(bytes.decode(attr_list[idx]), out_suffix), encoding='utf8')
+                attr_list[idx] = bytes(
+                    change_name(bytes.decode(attr_list[idx]), out_suffix),
+                    encoding="utf8",
+                )
     return output_graph_def
 
 
@@ -103,42 +121,58 @@ def _modify_model_suffix(output_graph_def, out_suffix, freeze_type):
     freeze_type : str
         The model type to freeze.
     """
-    output_graph_def = _remove_fitting_net_suffix(
-        output_graph_def,
-        out_suffix
-    )
+    output_graph_def = _remove_fitting_net_suffix(output_graph_def, out_suffix)
     for node in output_graph_def.node:
-        if 'model_attr/model_type' in node.name:
-            node.attr['value'].tensor.string_val[0] = bytes(freeze_type, encoding='utf8')
+        if "model_attr/model_type" in node.name:
+            node.attr["value"].tensor.string_val[0] = bytes(
+                freeze_type, encoding="utf8"
+            )
         # change the input script for frozen model
-        elif 'train_attr/training_script' in node.name:
-            jdata = json.loads(node.attr['value'].tensor.string_val[0])
+        elif "train_attr/training_script" in node.name:
+            jdata = json.loads(node.attr["value"].tensor.string_val[0])
             # fitting net
-            assert out_suffix in jdata['model']['fitting_net_dict']
-            jdata['model']['fitting_net'] = jdata['model'].pop('fitting_net_dict')[out_suffix]
+            assert out_suffix in jdata["model"]["fitting_net_dict"]
+            jdata["model"]["fitting_net"] = jdata["model"].pop("fitting_net_dict")[
+                out_suffix
+            ]
             # data systems
-            systems = jdata['training'].pop('data_dict')
+            systems = jdata["training"].pop("data_dict")
             if out_suffix in systems:
-                jdata['training']['training_data'] = systems[out_suffix]['training_data']
-                if 'validation_data' in systems[out_suffix]:
-                    jdata['training']['validation_data'] = systems[out_suffix]['validation_data']
+                jdata["training"]["training_data"] = systems[out_suffix][
+                    "training_data"
+                ]
+                if "validation_data" in systems[out_suffix]:
+                    jdata["training"]["validation_data"] = systems[out_suffix][
+                        "validation_data"
+                    ]
             else:
-                jdata['training']['training_data'] = {}
-                log.warning('The fitting net {} has no training data in input script, resulting in '
-                            'untrained frozen model, and cannot be compressed directly! '.format(out_suffix))
+                jdata["training"]["training_data"] = {}
+                log.warning(
+                    "The fitting net {} has no training data in input script, resulting in "
+                    "untrained frozen model, and cannot be compressed directly! ".format(
+                        out_suffix
+                    )
+                )
             # loss
-            if 'loss_dict' in jdata:
-                loss_dict = jdata.pop('loss_dict')
+            if "loss_dict" in jdata:
+                loss_dict = jdata.pop("loss_dict")
                 if out_suffix in loss_dict:
-                    jdata['loss'] = loss_dict[out_suffix]
+                    jdata["loss"] = loss_dict[out_suffix]
             # fitting weight
-            if 'fitting_weight' in jdata['training']:
-                jdata['training'].pop('fitting_weight')
-            node.attr['value'].tensor.string_val[0] = bytes(json.dumps(jdata), encoding='utf8')
+            if "fitting_weight" in jdata["training"]:
+                jdata["training"].pop("fitting_weight")
+            node.attr["value"].tensor.string_val[0] = bytes(
+                json.dumps(jdata), encoding="utf8"
+            )
     return output_graph_def
 
 
-def _make_node_names(model_type: str, modifier_type: Optional[str] = None, out_suffix: str = '', node_names: Optional[Union[str, list]] = None) -> List[str]:
+def _make_node_names(
+    model_type: str,
+    modifier_type: Optional[str] = None,
+    out_suffix: str = "",
+    node_names: Optional[Union[str, list]] = None,
+) -> List[str]:
     """Get node names based on model type.
 
     Parameters
@@ -222,7 +256,9 @@ def _make_node_names(model_type: str, modifier_type: Optional[str] = None, out_s
             "model_attr/output_dim",
         ]
     elif model_type == "multi_task":
-        assert node_names is not None, "node_names must be defined in multi-task united model! "
+        assert (
+            node_names is not None
+        ), "node_names must be defined in multi-task united model! "
     else:
         raise RuntimeError(f"unknown model type {model_type}")
     if modifier_type == "dipole_charge":
@@ -253,19 +289,33 @@ def _make_node_names(model_type: str, modifier_type: Optional[str] = None, out_s
             nodes = node_names
         else:
             raise RuntimeError(f"unknown node names type {type(node_names)}")
-    if out_suffix != '':
+    if out_suffix != "":
         for ind in range(len(nodes)):
-            if (nodes[ind][:2] == 'o_' and nodes[ind] not in ["o_rmat", "o_rmat_deriv", "o_nlist", "o_rij"]) \
-                    or nodes[ind] == "model_attr/sel_type" \
-                    or nodes[ind] == "model_attr/output_dim":
-                nodes[ind] += '_{}'.format(out_suffix)
-            elif 'fitting_attr' in nodes[ind]:
-                content = nodes[ind].split('/')[1]
-                nodes[ind] = 'fitting_attr_{}/{}'.format(out_suffix, content)
+            if (
+                (
+                    nodes[ind][:2] == "o_"
+                    and nodes[ind] not in ["o_rmat", "o_rmat_deriv", "o_nlist", "o_rij"]
+                )
+                or nodes[ind] == "model_attr/sel_type"
+                or nodes[ind] == "model_attr/output_dim"
+            ):
+                nodes[ind] += "_{}".format(out_suffix)
+            elif "fitting_attr" in nodes[ind]:
+                content = nodes[ind].split("/")[1]
+                nodes[ind] = "fitting_attr_{}/{}".format(out_suffix, content)
     return nodes
 
 
-def freeze_graph(sess, input_graph, input_node, freeze_type, modifier, out_graph_name, node_names=None, out_suffix=''):
+def freeze_graph(
+    sess,
+    input_graph,
+    input_node,
+    freeze_type,
+    modifier,
+    out_graph_name,
+    node_names=None,
+    out_suffix="",
+):
     """Freeze the single graph with chosen out_suffix.
 
     Parameters
@@ -287,7 +337,9 @@ def freeze_graph(sess, input_graph, input_node, freeze_type, modifier, out_graph
     out_suffix : str
         The chosen suffix to freeze in the input_graph.
     """
-    output_node = _make_node_names(freeze_type, modifier, out_suffix=out_suffix, node_names=node_names)
+    output_node = _make_node_names(
+        freeze_type, modifier, out_suffix=out_suffix, node_names=node_names
+    )
     different_set = set(output_node) - set(input_node)
     if different_set:
         log.warning(
@@ -305,14 +357,14 @@ def freeze_graph(sess, input_graph, input_node, freeze_type, modifier, out_graph
         output_node,  # The output node names are used to select the usefull nodes
     )
     # if multi-task, change fitting_net suffix and model_type
-    if out_suffix != '':
-        output_graph_def = _modify_model_suffix(output_graph_def, out_suffix, freeze_type)
+    if out_suffix != "":
+        output_graph_def = _modify_model_suffix(
+            output_graph_def, out_suffix, freeze_type
+        )
 
     # If we need to transfer the fitting net variables
     output_graph_def = _transfer_fitting_net_trainable_variables(
-        sess,
-        output_graph_def,
-        input_graph
+        sess, output_graph_def, input_graph
     )
 
     # Finally we serialize and dump the output graph to the filesystem
@@ -321,7 +373,15 @@ def freeze_graph(sess, input_graph, input_node, freeze_type, modifier, out_graph
     log.info(f"{len(output_graph_def.node):d} ops in the final graph.")
 
 
-def freeze_graph_multi(sess, input_graph, input_node, modifier, out_graph_name, node_names, united_model: bool=False):
+def freeze_graph_multi(
+    sess,
+    input_graph,
+    input_node,
+    modifier,
+    out_graph_name,
+    node_names,
+    united_model: bool = False,
+):
     """Freeze multiple graphs for multi-task model.
 
     Parameters
@@ -341,30 +401,62 @@ def freeze_graph_multi(sess, input_graph, input_node, modifier, out_graph_name,
     united_model : bool
         If freeze all nodes into one unit model
     """
-    input_script = json.loads(run_sess(sess, "train_attr/training_script:0", feed_dict={}))
-    assert 'model' in input_script.keys() and 'fitting_net_dict' in input_script['model']
+    input_script = json.loads(
+        run_sess(sess, "train_attr/training_script:0", feed_dict={})
+    )
+    assert (
+        "model" in input_script.keys() and "fitting_net_dict" in input_script["model"]
+    )
     if not united_model:
-        for fitting_key in input_script['model']['fitting_net_dict']:
-            fitting_type = input_script['model']['fitting_net_dict'][fitting_key]['type']
-            if out_graph_name[-3:] == '.pb':
-                output_graph_item = out_graph_name[:-3] + '_{}.pb'.format(fitting_key)
+        for fitting_key in input_script["model"]["fitting_net_dict"]:
+            fitting_type = input_script["model"]["fitting_net_dict"][fitting_key][
+                "type"
+            ]
+            if out_graph_name[-3:] == ".pb":
+                output_graph_item = out_graph_name[:-3] + "_{}.pb".format(fitting_key)
             else:
-                output_graph_item = out_graph_name + '_{}'.format(fitting_key)
-            freeze_graph(sess, input_graph, input_node, fitting_type, modifier, output_graph_item, node_names,
-                         out_suffix=fitting_key)
+                output_graph_item = out_graph_name + "_{}".format(fitting_key)
+            freeze_graph(
+                sess,
+                input_graph,
+                input_node,
+                fitting_type,
+                modifier,
+                output_graph_item,
+                node_names,
+                out_suffix=fitting_key,
+            )
     else:
         node_multi = []
-        for fitting_key in input_script['model']['fitting_net_dict']:
-            fitting_type = input_script['model']['fitting_net_dict'][fitting_key]['type']
-            node_multi += _make_node_names(fitting_type, modifier, out_suffix=fitting_key)
+        for fitting_key in input_script["model"]["fitting_net_dict"]:
+            fitting_type = input_script["model"]["fitting_net_dict"][fitting_key][
+                "type"
+            ]
+            node_multi += _make_node_names(
+                fitting_type, modifier, out_suffix=fitting_key
+            )
         node_multi = list(set(node_multi))
         if node_names is not None:
             node_multi = node_names
-        freeze_graph(sess, input_graph, input_node, 'multi_task', modifier, out_graph_name, node_multi)
+        freeze_graph(
+            sess,
+            input_graph,
+            input_node,
+            "multi_task",
+            modifier,
+            out_graph_name,
+            node_multi,
+        )
 
 
 def freeze(
-    *, checkpoint_folder: str, output: str, node_names: Optional[str] = None, nvnmd_weight: Optional[str] = None, united_model: bool = False, **kwargs
+    *,
+    checkpoint_folder: str,
+    output: str,
+    node_names: Optional[str] = None,
+    nvnmd_weight: Optional[str] = None,
+    united_model: bool = False,
+    **kwargs,
 ):
     """Freeze the graph in supplied folder.
 
@@ -421,7 +513,9 @@ def freeze(
     # We start a session and restore the graph weights
     with tf.Session() as sess:
         saver.restore(sess, input_checkpoint)
-        model_type = run_sess(sess, "model_attr/model_type:0", feed_dict={}).decode("utf-8")
+        model_type = run_sess(sess, "model_attr/model_type:0", feed_dict={}).decode(
+            "utf-8"
+        )
         if "modifier_attr/type" in nodes:
             modifier_type = run_sess(sess, "modifier_attr/type:0", feed_dict={}).decode(
                 "utf-8"
@@ -429,8 +523,24 @@ def freeze(
         else:
             modifier_type = None
         if nvnmd_weight is not None:
-            save_weight(sess, nvnmd_weight) # nvnmd
-        if model_type != 'multi_task':
-            freeze_graph(sess, input_graph_def, nodes, model_type, modifier_type, output_graph, node_names)
+            save_weight(sess, nvnmd_weight)  # nvnmd
+        if model_type != "multi_task":
+            freeze_graph(
+                sess,
+                input_graph_def,
+                nodes,
+                model_type,
+                modifier_type,
+                output_graph,
+                node_names,
+            )
         else:
-            freeze_graph_multi(sess, input_graph_def, nodes, modifier_type, output_graph, node_names, united_model=united_model)
+            freeze_graph_multi(
+                sess,
+                input_graph_def,
+                nodes,
+                modifier_type,
+                output_graph,
+                node_names,
+                united_model=united_model,
+            )
diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py
index 8932dd0ed7..8b2d3d30b3 100644
--- a/deepmd/entrypoints/main.py
+++ b/deepmd/entrypoints/main.py
@@ -3,26 +3,39 @@
 import argparse
 import logging
 import textwrap
-from pathlib import Path
-from typing import Dict, List, Optional
+from pathlib import (
+    Path,
+)
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
 
-from deepmd import __version__
-from deepmd.common import clear_session
+from deepmd import (
+    __version__,
+)
+from deepmd.common import (
+    clear_session,
+)
 from deepmd.entrypoints import (
     compress,
     config,
+    convert,
     doc_train_input,
     freeze,
+    make_model_devi,
+    neighbor_stat,
     test,
     train_dp,
     transfer,
-    make_model_devi,
-    convert,
-    neighbor_stat,
 )
-from deepmd.loggers import set_log_handles
-
-from deepmd.nvnmd.entrypoints.train import train_nvnmd
+from deepmd.loggers import (
+    set_log_handles,
+)
+from deepmd.nvnmd.entrypoints.train import (
+    train_nvnmd,
+)
 
 __all__ = ["main", "parse_args", "get_ll", "main_parser"]
 
@@ -47,6 +60,7 @@ def get_ll(log_level: str) -> int:
 
     return int_level
 
+
 class RawTextArgumentDefaultsHelpFormatter(
     argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter
 ):
@@ -147,12 +161,14 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log, parser_mpi_log],
         help="train a model",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp train input.json
             dp train input.json --restart model.ckpt
             dp train input.json --init-model model.ckpt
-        """),
+        """
+        ),
     )
     parser_train.add_argument(
         "INPUT", help="the input parameter file in json or yaml format"
@@ -205,11 +221,13 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log],
         help="freeze the model",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp freeze
             dp freeze -o graph.pb
-        """),
+        """
+        ),
     )
     parser_frz.add_argument(
         "-c",
@@ -252,10 +270,12 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log],
         help="test the model",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp test -m graph.pb -s /path/to/system -n 30
-        """),
+        """
+        ),
     )
     parser_tst.add_argument(
         "-m",
@@ -319,11 +339,13 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log, parser_mpi_log],
         help="compress a model",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp compress
             dp compress -i graph.pb -o compressed.pb
-        """),
+        """
+        ),
     )
     parser_compress.add_argument(
         "-i",
@@ -344,13 +366,13 @@ def main_parser() -> argparse.ArgumentParser:
         "--step",
         default=0.01,
         type=float,
-        help="Model compression uses fifth-order polynomials to interpolate the embedding-net. " 
+        help="Model compression uses fifth-order polynomials to interpolate the embedding-net. "
         "It introduces two tables with different step size to store the parameters of the polynomials. "
         "The first table covers the range of the training data, while the second table is an extrapolation of the training data. "
         "The domain of each table is uniformly divided by a given step size. "
         "And the step(parameter) denotes the step size of the first table and the second table will "
         "use 10 * step as it's step size to save the memory. "
-        "Usually the value ranges from 0.1 to 0.001. " 
+        "Usually the value ranges from 0.1 to 0.001. "
         "Smaller step means higher accuracy and bigger model size",
     )
     parser_compress.add_argument(
@@ -393,10 +415,7 @@ def main_parser() -> argparse.ArgumentParser:
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     parsers_doc.add_argument(
-        "--out-type", 
-        default="rst", 
-        type=str, 
-        help="The output type"
+        "--out-type", default="rst", type=str, help="The output type"
     )
 
     # * make model deviation ***********************************************************
@@ -405,10 +424,12 @@ def main_parser() -> argparse.ArgumentParser:
         parents=[parser_log],
         help="calculate model deviation",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp model-devi -m graph.000.pb graph.001.pb graph.002.pb graph.003.pb -s ./data -o model_devi.out
-        """),
+        """
+        ),
     )
     parser_model_devi.add_argument(
         "-m",
@@ -430,61 +451,65 @@ def main_parser() -> argparse.ArgumentParser:
     )
     parser_model_devi.add_argument(
         "-o",
-        "--output", 
-        default="model_devi.out", 
-        type=str, 
-        help="The output file for results of model deviation"
+        "--output",
+        default="model_devi.out",
+        type=str,
+        help="The output file for results of model deviation",
     )
     parser_model_devi.add_argument(
         "-f",
         "--frequency",
         default=1,
         type=int,
-        help="The trajectory frequency of the system"
+        help="The trajectory frequency of the system",
     )
 
     # * convert models
     parser_transform = subparsers.add_parser(
-        'convert-from',
+        "convert-from",
         parents=[parser_log],
-        help='convert lower model version to supported version',
+        help="convert lower model version to supported version",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp convert-from 1.0 -i graph.pb -o graph_new.pb
-        """),
+        """
+        ),
     )
     parser_transform.add_argument(
-        'FROM',
-        type = str,
-        choices = ['0.12', '1.0', '1.1', '1.2', '1.3', '2.0', 'pbtxt'],
+        "FROM",
+        type=str,
+        choices=["0.12", "1.0", "1.1", "1.2", "1.3", "2.0", "pbtxt"],
         help="The original model compatibility",
     )
     parser_transform.add_argument(
-        '-i',
+        "-i",
         "--input-model",
-        default = "frozen_model.pb",
-        type=str, 
-		help = "the input model",
+        default="frozen_model.pb",
+        type=str,
+        help="the input model",
     )
     parser_transform.add_argument(
         "-o",
         "--output-model",
-        default = "convert_out.pb",
-        type=str, 
-		help='the output model',
+        default="convert_out.pb",
+        type=str,
+        help="the output model",
     )
 
     # neighbor_stat
     parser_neighbor_stat = subparsers.add_parser(
-        'neighbor-stat',
+        "neighbor-stat",
         parents=[parser_log],
-        help='Calculate neighbor statistics',
+        help="Calculate neighbor statistics",
         formatter_class=RawTextArgumentDefaultsHelpFormatter,
-        epilog=textwrap.dedent("""\
+        epilog=textwrap.dedent(
+            """\
         examples:
             dp neighbor-stat -s data -r 6.0 -t O H
-        """),
+        """
+        ),
     )
     parser_neighbor_stat.add_argument(
         "-s",
@@ -504,7 +529,7 @@ def main_parser() -> argparse.ArgumentParser:
         "-t",
         "--type-map",
         type=str,
-        nargs='+',
+        nargs="+",
         required=True,
         help="type map",
     )
@@ -516,7 +541,9 @@ def main_parser() -> argparse.ArgumentParser:
     )
 
     # --version
-    parser.add_argument('--version', action='version', version='DeePMD-kit v%s' % __version__)
+    parser.add_argument(
+        "--version", action="version", version="DeePMD-kit v%s" % __version__
+    )
 
     # * train nvnmd script ******************************************************************
     parser_train_nvnmd = subparsers.add_parser(
@@ -540,8 +567,8 @@ def main_parser() -> argparse.ArgumentParser:
         "--step",
         default="s1",
         type=str,
-        choices=['s1', 's2'],
-        help="steps to train model of NVNMD: s1 (train CNN), s2 (train QNN)"
+        choices=["s1", "s2"],
+        help="steps to train model of NVNMD: s1 (train CNN), s2 (train QNN)",
     )
     return parser
 
diff --git a/deepmd/entrypoints/neighbor_stat.py b/deepmd/entrypoints/neighbor_stat.py
index bc3c6430ff..4b6f78acde 100644
--- a/deepmd/entrypoints/neighbor_stat.py
+++ b/deepmd/entrypoints/neighbor_stat.py
@@ -1,12 +1,21 @@
 import logging
-from typing import List
+from typing import (
+    List,
+)
 
-from deepmd.common import expand_sys_str
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.utils.neighbor_stat import NeighborStat
+from deepmd.common import (
+    expand_sys_str,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.neighbor_stat import (
+    NeighborStat,
+)
 
 log = logging.getLogger(__name__)
 
+
 def neighbor_stat(
     *,
     system: str,
@@ -33,7 +42,7 @@ def neighbor_stat(
     >>> neighbor_stat(system='.', rcut=6., type_map=["C", "H", "O", "N", "P", "S", "Mg", "Na", "HW", "OW", "mNa", "mCl", "mC", "mH", "mMg", "mN", "mO", "mP"])
     min_nbor_dist: 0.6599510670195264
     max_nbor_size: [23, 26, 19, 16, 2, 2, 1, 1, 72, 37, 5, 0, 31, 29, 1, 21, 20, 5]
-    """ 
+    """
     all_sys = expand_sys_str(system)
     if not len(all_sys):
         raise RuntimeError("Did not find valid system")
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index a4feaa88f6..c9236ee131 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -1,18 +1,42 @@
 """Test trained DeePMD model."""
 import logging
-from pathlib import Path
-from typing import TYPE_CHECKING, List, Dict, Optional, Tuple
+from pathlib import (
+    Path,
+)
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
 
 import numpy as np
-from deepmd import DeepPotential
-from deepmd.common import expand_sys_str
+
+from deepmd import (
+    DeepPotential,
+)
+from deepmd.common import (
+    expand_sys_str,
+)
 from deepmd.utils import random as dp_random
-from deepmd.utils.data import DeepmdData
-from deepmd.utils.weight_avg import weighted_average
+from deepmd.utils.data import (
+    DeepmdData,
+)
+from deepmd.utils.weight_avg import (
+    weighted_average,
+)
 
 if TYPE_CHECKING:
-    from deepmd.infer import DeepDipole, DeepPolar, DeepPot, DeepWFC
-    from deepmd.infer.deep_tensor import DeepTensor
+    from deepmd.infer import (
+        DeepDipole,
+        DeepPolar,
+        DeepPot,
+        DeepWFC,
+    )
+    from deepmd.infer.deep_tensor import (
+        DeepTensor,
+    )
 
 __all__ = ["test"]
 
@@ -61,7 +85,7 @@ def test(
         if no valid system was found
     """
     if datafile is not None:
-        datalist = open(datafile, 'r')
+        datalist = open(datafile, "r")
         all_sys = datalist.read().splitlines()
         datalist.close()
     else:
@@ -74,7 +98,7 @@ def test(
 
     # init random seed
     if rand_seed is not None:
-        dp_random.seed(rand_seed % (2 ** 32))
+        dp_random.seed(rand_seed % (2**32))
 
     # init model
     dp = DeepPotential(model)
@@ -101,9 +125,13 @@ def test(
             err = test_dipole(dp, data, numb_test, detail_file, atomic)
         elif dp.model_type == "polar":
             err = test_polar(dp, data, numb_test, detail_file, atomic=atomic)
-        elif dp.model_type == "global_polar":   # should not appear in this new version
-            log.warning("Global polar model is not currently supported. Please directly use the polar mode and change loss parameters.")
-            err = test_polar(dp, data, numb_test, detail_file, atomic=False)    # YWolfeee: downward compatibility
+        elif dp.model_type == "global_polar":  # should not appear in this new version
+            log.warning(
+                "Global polar model is not currently supported. Please directly use the polar mode and change loss parameters."
+            )
+            err = test_polar(
+                dp, data, numb_test, detail_file, atomic=False
+            )  # YWolfeee: downward compatibility
         log.info("# ----------------------------------------------- ")
         err_coll.append(err)
 
@@ -247,7 +275,7 @@ def test_ener(
         aparam=aparam,
         atomic=has_atom_ener,
         efield=efield,
-        mixed_type=mixed_type
+        mixed_type=mixed_type,
     )
     energy = ret[0]
     force = ret[1]
@@ -302,7 +330,7 @@ def test_ener(
         save_txt_file(
             detail_path.with_suffix(".e_peratom.out"),
             pe_atom,
-            header = "%s: data_e pred_e" % system,
+            header="%s: data_e pred_e" % system,
             append=append_detail,
         )
         pf = np.concatenate(
@@ -341,15 +369,15 @@ def test_ener(
             "data_vyz data_vzx data_vzy data_vzz pred_vxx pred_vxy pred_vxz pred_vyx "
             "pred_vyy pred_vyz pred_vzx pred_vzy pred_vzz",
             append=append_detail,
-        )        
+        )
     return {
-        "rmse_ea" : (rmse_ea, energy.size),
-        "rmse_f" : (rmse_f, force.size),
-        "rmse_va" : (rmse_va, virial.size),
+        "rmse_ea": (rmse_ea, energy.size),
+        "rmse_f": (rmse_f, force.size),
+        "rmse_va": (rmse_va, virial.size),
     }
 
 
-def print_ener_sys_avg(avg: Dict[str,float]):
+def print_ener_sys_avg(avg: Dict[str, float]):
     """Print errors summary for energy type potential.
 
     Parameters
@@ -438,9 +466,7 @@ def test_wfc(
             pe,
             header="ref_wfc(12 dofs)   predicted_wfc(12 dofs)",
         )
-    return {
-        'rmse' : (rmse_f, wfc.size)
-    }
+    return {"rmse": (rmse_f, wfc.size)}
 
 
 def print_wfc_sys_avg(avg):
@@ -490,7 +516,7 @@ def test_polar(
         high_prec=False,
         type_sel=dp.get_sel_type(),
     )
-    
+
     test_data = data.get_test()
     polar, numb_test, atype = run_test(dp, test_data, numb_test)
 
@@ -501,13 +527,13 @@ def test_polar(
 
     # YWolfeee: do summation in global polar mode
     if not atomic:
-        polar = np.sum(polar.reshape((polar.shape[0],-1,9)),axis=1)    
+        polar = np.sum(polar.reshape((polar.shape[0], -1, 9)), axis=1)
         rmse_f = rmse(polar - test_data["polarizability"][:numb_test])
         rmse_fs = rmse_f / np.sqrt(sel_natoms)
         rmse_fa = rmse_f / sel_natoms
     else:
         rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test])
-    
+
     log.info(f"# number of test data : {numb_test:d} ")
     log.info(f"Polarizability  RMSE       : {rmse_f:e}")
     if not atomic:
@@ -532,9 +558,7 @@ def test_polar(
             "data_pzy data_pzz pred_pxx pred_pxy pred_pxz pred_pyx pred_pyy pred_pyz "
             "pred_pzx pred_pzy pred_pzz",
         )
-    return {
-        "rmse" : (rmse_f, polar.size)
-    }
+    return {"rmse": (rmse_f, polar.size)}
 
 
 def print_polar_sys_avg(avg):
@@ -577,11 +601,11 @@ def test_dipole(
     """
     data.add(
         "dipole" if not atomic else "atomic_dipole",
-        3, 
-        atomic=atomic, 
-        must=True, 
-        high_prec=False, 
-        type_sel=dp.get_sel_type()
+        3,
+        atomic=atomic,
+        must=True,
+        high_prec=False,
+        type_sel=dp.get_sel_type(),
     )
     test_data = data.get_test()
     dipole, numb_test, atype = run_test(dp, test_data, numb_test)
@@ -590,16 +614,16 @@ def test_dipole(
     sel_natoms = 0
     for ii in sel_type:
         sel_natoms += sum(atype == ii)
-    
+
     # do summation in atom dimension
     if not atomic:
-        dipole = np.sum(dipole.reshape((dipole.shape[0], -1, 3)),axis=1)
+        dipole = np.sum(dipole.reshape((dipole.shape[0], -1, 3)), axis=1)
         rmse_f = rmse(dipole - test_data["dipole"][:numb_test])
         rmse_fs = rmse_f / np.sqrt(sel_natoms)
         rmse_fa = rmse_f / sel_natoms
     else:
         rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test])
-    
+
     log.info(f"# number of test data : {numb_test:d}")
     log.info(f"Dipole  RMSE       : {rmse_f:e}")
     if not atomic:
@@ -622,9 +646,7 @@ def test_dipole(
             pe,
             header="data_x data_y data_z pred_x pred_y pred_z",
         )
-    return {
-        'rmse' : (rmse_f, dipole.size)
-    }
+    return {"rmse": (rmse_f, dipole.size)}
 
 
 def print_dipole_sys_avg(avg):
diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
index 5703324ac0..9afc255522 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/entrypoints/train.py
@@ -5,25 +5,65 @@
 
 import json
 import logging
-import time
 import os
-from typing import Dict, List, Optional, Any
+import time
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
+
 import numpy as np
 
-from deepmd.common import data_requirement, expand_sys_str, j_loader, j_must_have
-from deepmd.env import tf, reset_default_tf_session_config, GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.infer.data_modifier import DipoleChargeModifier
-from deepmd.train.run_options import BUILD, CITATION, WELCOME, RunOptions
-from deepmd.train.trainer import DPTrainer
+from deepmd.common import (
+    data_requirement,
+    expand_sys_str,
+    j_loader,
+    j_must_have,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    reset_default_tf_session_config,
+    tf,
+)
+from deepmd.infer.data_modifier import (
+    DipoleChargeModifier,
+)
+from deepmd.train.run_options import (
+    BUILD,
+    CITATION,
+    WELCOME,
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
 from deepmd.utils import random as dp_random
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.utils.sess import run_sess
-from deepmd.utils.neighbor_stat import NeighborStat
-from deepmd.utils.path import DPPath
-from deepmd.utils.finetune import replace_model_params_with_pretrained_model
-from deepmd.utils.multi_init import replace_model_params_with_frz_multi_model
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.finetune import (
+    replace_model_params_with_pretrained_model,
+)
+from deepmd.utils.multi_init import (
+    replace_model_params_with_frz_multi_model,
+)
+from deepmd.utils.neighbor_stat import (
+    NeighborStat,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 __all__ = ["train"]
 
@@ -84,7 +124,7 @@ def train(
         finetune=finetune,
         log_path=log_path,
         log_level=log_level,
-        mpi_log=mpi_log
+        mpi_log=mpi_log,
     )
     if run_opt.is_distrib and len(run_opt.gpus or []) > 1:
         # avoid conflict of visible gpus among multipe tf sessions in one process
@@ -95,7 +135,9 @@ def train(
 
     origin_type_map = None
     if run_opt.finetune is not None:
-        jdata, origin_type_map = replace_model_params_with_pretrained_model(jdata, run_opt.finetune)
+        jdata, origin_type_map = replace_model_params_with_pretrained_model(
+            jdata, run_opt.finetune
+        )
 
     if "fitting_net_dict" in jdata["model"] and run_opt.init_frz_model is not None:
         jdata = replace_model_params_with_frz_multi_model(jdata, run_opt.init_frz_model)
@@ -112,14 +154,18 @@ def train(
 
     # save the training script into the graph
     # remove white spaces as it is not compressed
-    tf.constant(json.dumps(jdata, separators=(',', ':')), name='train_attr/training_script', dtype=tf.string)
+    tf.constant(
+        json.dumps(jdata, separators=(",", ":")),
+        name="train_attr/training_script",
+        dtype=tf.string,
+    )
 
     for message in WELCOME + CITATION + BUILD:
         log.info(message)
 
     run_opt.print_resource_summary()
     if origin_type_map is not None:
-        jdata['model']['origin_type_map'] = origin_type_map
+        jdata["model"]["origin_type_map"] = origin_type_map
     _do_work(jdata, run_opt, is_compress)
 
 
@@ -144,7 +190,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
     assert "training" in jdata
 
     # init the model
-    model = DPTrainer(jdata, run_opt=run_opt, is_compress = is_compress)
+    model = DPTrainer(jdata, run_opt=run_opt, is_compress=is_compress)
     rcut = model.model.get_rcut()
     type_map = model.model.get_type_map()
     if len(type_map) == 0:
@@ -157,7 +203,7 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
     if seed is not None:
         # avoid the same batch sequence among workers
         seed += run_opt.my_rank
-        seed = seed % (2 ** 32)
+        seed = seed % (2**32)
     dp_random.seed(seed)
 
     # setup data modifier
@@ -172,31 +218,63 @@ def _do_work(jdata: Dict[str, Any], run_opt: RunOptions, is_compress: bool = Fal
     if not is_compress:
         # init data
         if not multi_task_mode:
-            train_data = get_data(jdata["training"]["training_data"], rcut, ipt_type_map, modifier)
+            train_data = get_data(
+                jdata["training"]["training_data"], rcut, ipt_type_map, modifier
+            )
             train_data.print_summary("training")
             if jdata["training"].get("validation_data", None) is not None:
-                valid_data = get_data(jdata["training"]["validation_data"], rcut, train_data.type_map, modifier)
+                valid_data = get_data(
+                    jdata["training"]["validation_data"],
+                    rcut,
+                    train_data.type_map,
+                    modifier,
+                )
                 valid_data.print_summary("validation")
         else:
             train_data = {}
             valid_data = {}
             for data_systems in jdata["training"]["data_dict"]:
-                if jdata["training"]["fitting_weight"][data_systems] > 0.:  # check only the available pair
+                if (
+                    jdata["training"]["fitting_weight"][data_systems] > 0.0
+                ):  # check only the available pair
                     train_data[data_systems] = get_data(
-                        jdata["training"]["data_dict"][data_systems]["training_data"], rcut,
-                        ipt_type_map, modifier, multi_task_mode)
-                    train_data[data_systems].print_summary("training in {}".format(data_systems))
-                    if jdata["training"]["data_dict"][data_systems].get("validation_data", None) is not None:
+                        jdata["training"]["data_dict"][data_systems]["training_data"],
+                        rcut,
+                        ipt_type_map,
+                        modifier,
+                        multi_task_mode,
+                    )
+                    train_data[data_systems].print_summary(
+                        "training in {}".format(data_systems)
+                    )
+                    if (
+                        jdata["training"]["data_dict"][data_systems].get(
+                            "validation_data", None
+                        )
+                        is not None
+                    ):
                         valid_data[data_systems] = get_data(
-                            jdata["training"]["data_dict"][data_systems]["validation_data"], rcut,
-                            train_data[data_systems].type_map, modifier, multi_task_mode)
-                        valid_data[data_systems].print_summary("validation in {}".format(data_systems))
+                            jdata["training"]["data_dict"][data_systems][
+                                "validation_data"
+                            ],
+                            rcut,
+                            train_data[data_systems].type_map,
+                            modifier,
+                            multi_task_mode,
+                        )
+                        valid_data[data_systems].print_summary(
+                            "validation in {}".format(data_systems)
+                        )
 
     # get training info
     stop_batch = j_must_have(jdata["training"], "numb_steps")
     origin_type_map = jdata["model"].get("origin_type_map", None)
-    if origin_type_map is not None and not origin_type_map:  # get the type_map from data if not provided
-        origin_type_map = get_data(jdata["training"]["training_data"], rcut, None, modifier).get_type_map()
+    if (
+        origin_type_map is not None and not origin_type_map
+    ):  # get the type_map from data if not provided
+        origin_type_map = get_data(
+            jdata["training"]["training_data"], rcut, None, modifier
+        ).get_type_map()
     model.build(train_data, stop_batch, origin_type_map=origin_type_map)
 
     if not is_compress:
@@ -217,21 +295,21 @@ def get_data(jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=Fa
         systems = expand_sys_str(systems)
     elif isinstance(systems, list):
         systems = systems.copy()
-    help_msg = 'Please check your setting for data systems'
+    help_msg = "Please check your setting for data systems"
     # check length of systems
     if len(systems) == 0:
-        msg = 'cannot find valid a data system'
+        msg = "cannot find valid a data system"
         log.fatal(msg)
         raise IOError(msg, help_msg)
     # rougly check all items in systems are valid
     for ii in systems:
         ii = DPPath(ii)
-        if (not ii.is_dir()):
-            msg = f'dir {ii} is not a valid dir'
+        if not ii.is_dir():
+            msg = f"dir {ii} is not a valid dir"
             log.fatal(msg)
             raise IOError(msg, help_msg)
-        if (not (ii / 'type.raw').is_file()):
-            msg = f'dir {ii} is not a valid data system dir'
+        if not (ii / "type.raw").is_file():
+            msg = f"dir {ii} is not a valid data system dir"
             log.fatal(msg)
             raise IOError(msg, help_msg)
 
@@ -243,15 +321,15 @@ def get_data(jdata: Dict[str, Any], rcut, type_map, modifier, multi_task_mode=Fa
     data = DeepmdDataSystem(
         systems=systems,
         batch_size=batch_size,
-        test_size=1,        # to satisfy the old api
+        test_size=1,  # to satisfy the old api
         shuffle_test=True,  # to satisfy the old api
         rcut=rcut,
         type_map=type_map,
         optional_type_map=optional_type_map,
         modifier=modifier,
-        trn_all_set=True,    # sample from all sets
+        trn_all_set=True,  # sample from all sets
         sys_probs=sys_probs,
-        auto_prob_style=auto_prob
+        auto_prob_style=auto_prob,
     )
     data.add_dict(data_requirement)
 
@@ -277,18 +355,18 @@ def get_modifier(modi_data=None):
 
 
 def get_rcut(jdata):
-    descrpt_data = jdata['model']['descriptor']
+    descrpt_data = jdata["model"]["descriptor"]
     rcut_list = []
-    if descrpt_data['type'] == 'hybrid':
-        for ii in descrpt_data['list']:
-            rcut_list.append(ii['rcut'])
+    if descrpt_data["type"] == "hybrid":
+        for ii in descrpt_data["list"]:
+            rcut_list.append(ii["rcut"])
     else:
-        rcut_list.append(descrpt_data['rcut'])
+        rcut_list.append(descrpt_data["rcut"])
     return max(rcut_list)
 
 
 def get_type_map(jdata):
-    return jdata['model'].get('type_map', None)
+    return jdata["model"].get("type_map", None)
 
 
 def get_nbor_stat(jdata, rcut, one_type: bool = False):
@@ -299,16 +377,28 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False):
         type_map = None
     multi_task_mode = "data_dict" in jdata["training"]
     if not multi_task_mode:
-        train_data = get_data(jdata["training"]["training_data"], max_rcut, type_map, None)
+        train_data = get_data(
+            jdata["training"]["training_data"], max_rcut, type_map, None
+        )
         train_data.get_batch()
     else:
-        assert type_map is not None, 'Data stat in multi-task mode must have available type_map! '
+        assert (
+            type_map is not None
+        ), "Data stat in multi-task mode must have available type_map! "
         train_data = None
         for systems in jdata["training"]["data_dict"]:
-            tmp_data = get_data(jdata["training"]["data_dict"][systems]["training_data"], max_rcut, type_map, None)
+            tmp_data = get_data(
+                jdata["training"]["data_dict"][systems]["training_data"],
+                max_rcut,
+                type_map,
+                None,
+            )
             tmp_data.get_batch()
-            assert tmp_data.get_type_map(), \
-                "In multi-task mode, 'type_map.raw' must be defined in data systems {}! ".format(systems)
+            assert (
+                tmp_data.get_type_map()
+            ), "In multi-task mode, 'type_map.raw' must be defined in data systems {}! ".format(
+                systems
+            )
             if train_data is None:
                 train_data = tmp_data
             else:
@@ -331,43 +421,46 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False):
     # moved from traier.py as duplicated
     # TODO: this is a simple fix but we should have a clear
     #       architecture to call neighbor stat
-    tf.constant(min_nbor_dist,
-        name = 'train_attr/min_nbor_dist',
-        dtype = GLOBAL_ENER_FLOAT_PRECISION)
-    tf.constant(max_nbor_size,
-        name = 'train_attr/max_nbor_size',
-        dtype = tf.int32)
+    tf.constant(
+        min_nbor_dist,
+        name="train_attr/min_nbor_dist",
+        dtype=GLOBAL_ENER_FLOAT_PRECISION,
+    )
+    tf.constant(max_nbor_size, name="train_attr/max_nbor_size", dtype=tf.int32)
     return min_nbor_dist, max_nbor_size
 
+
 def get_sel(jdata, rcut, one_type: bool = False):
     _, max_nbor_size = get_nbor_stat(jdata, rcut, one_type=one_type)
     return max_nbor_size
 
+
 def get_min_nbor_dist(jdata, rcut):
     min_nbor_dist, _ = get_nbor_stat(jdata, rcut)
     return min_nbor_dist
 
+
 def parse_auto_sel(sel):
     if type(sel) is not str:
         return False
-    words = sel.split(':')
-    if words[0] == 'auto':
+    words = sel.split(":")
+    if words[0] == "auto":
         return True
     else:
         return False
 
-    
+
 def parse_auto_sel_ratio(sel):
     if not parse_auto_sel(sel):
-        raise RuntimeError(f'invalid auto sel format {sel}')
+        raise RuntimeError(f"invalid auto sel format {sel}")
     else:
-        words = sel.split(':')
+        words = sel.split(":")
         if len(words) == 1:
             ratio = 1.1
         elif len(words) == 2:
             ratio = float(words[1])
         else:
-            raise RuntimeError(f'invalid auto sel format {sel}')
+            raise RuntimeError(f"invalid auto sel format {sel}")
         return ratio
 
 
@@ -376,17 +469,17 @@ def wrap_up_4(xx):
 
 
 def update_one_sel(jdata, descriptor):
-    if descriptor['type'] == 'loc_frame':
+    if descriptor["type"] == "loc_frame":
         return descriptor
-    rcut = descriptor['rcut']
-    tmp_sel = get_sel(jdata, rcut, one_type=descriptor['type'] in ('se_atten',))
-    sel = descriptor['sel']
+    rcut = descriptor["rcut"]
+    tmp_sel = get_sel(jdata, rcut, one_type=descriptor["type"] in ("se_atten",))
+    sel = descriptor["sel"]
     if isinstance(sel, int):
         # convert to list and finnally convert back to int
         sel = [sel]
-    if parse_auto_sel(descriptor['sel']) :
-        ratio = parse_auto_sel_ratio(descriptor['sel'])
-        descriptor['sel'] = sel = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel]
+    if parse_auto_sel(descriptor["sel"]):
+        ratio = parse_auto_sel_ratio(descriptor["sel"])
+        descriptor["sel"] = sel = [int(wrap_up_4(ii * ratio)) for ii in tmp_sel]
     else:
         # sel is set by user
         for ii, (tt, dd) in enumerate(zip(tmp_sel, sel)):
@@ -396,20 +489,22 @@ def update_one_sel(jdata, descriptor):
                 log.warning(
                     "sel of type %d is not enough! The expected value is "
                     "not less than %d, but you set it to %d. The accuracy"
-                    " of your model may get worse." %(ii, tt, dd)
+                    " of your model may get worse." % (ii, tt, dd)
                 )
-    if descriptor['type'] in ('se_atten',):
-        descriptor['sel'] = sel = sum(sel)
+    if descriptor["type"] in ("se_atten",):
+        descriptor["sel"] = sel = sum(sel)
     return descriptor
 
 
-def update_sel(jdata):    
-    log.info("Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)")
-    descrpt_data = jdata['model']['descriptor']
-    if descrpt_data['type'] == 'hybrid':
-        for ii in range(len(descrpt_data['list'])):
-            descrpt_data['list'][ii] = update_one_sel(jdata, descrpt_data['list'][ii])
+def update_sel(jdata):
+    log.info(
+        "Calculate neighbor statistics... (add --skip-neighbor-stat to skip this step)"
+    )
+    descrpt_data = jdata["model"]["descriptor"]
+    if descrpt_data["type"] == "hybrid":
+        for ii in range(len(descrpt_data["list"])):
+            descrpt_data["list"][ii] = update_one_sel(jdata, descrpt_data["list"][ii])
     else:
         descrpt_data = update_one_sel(jdata, descrpt_data)
-    jdata['model']['descriptor'] = descrpt_data
+    jdata["model"]["descriptor"] = descrpt_data
     return jdata
diff --git a/deepmd/entrypoints/transfer.py b/deepmd/entrypoints/transfer.py
index 85664ac83c..d30401cf18 100644
--- a/deepmd/entrypoints/transfer.py
+++ b/deepmd/entrypoints/transfer.py
@@ -1,10 +1,20 @@
 """Module used for transfering parameters between models."""
 
-from typing import Dict, Optional, Sequence, Tuple
-from deepmd.env import tf, TRANSFER_PATTERN
+import logging
 import re
+from typing import (
+    Dict,
+    Optional,
+    Sequence,
+    Tuple,
+)
+
 import numpy as np
-import logging
+
+from deepmd.env import (
+    TRANSFER_PATTERN,
+    tf,
+)
 
 __all__ = ["transfer"]
 
@@ -16,7 +26,7 @@ def convert_number(number: int) -> float:
     binary = bin(number).replace("0b", "").zfill(16)
     sign = int(binary[0]) * -2 + 1
     exp = int(binary[1:6], 2)
-    frac = (int(binary[6:], 2) + 2 ** 10) * (2 ** -10)
+    frac = (int(binary[6:], 2) + 2**10) * (2**-10)
     return sign * (2 ** (exp - 15)) * frac
 
 
@@ -130,7 +140,9 @@ def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph:
         if raw_graph_dtype == np.float16:
             if old_graph_dtype == np.float64 or old_graph_dtype == np.float32:
                 if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-                    tensor = np.frombuffer(old_node.tensor_content, dtype = old_graph_dtype)
+                    tensor = np.frombuffer(
+                        old_node.tensor_content, dtype=old_graph_dtype
+                    )
                     tensor = tensor.astype(raw_graph_dtype)
                     cp_attr.from_str(tensor)
                 else:
@@ -144,7 +156,9 @@ def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph:
         elif raw_graph_dtype == np.float64 or raw_graph_dtype == np.float32:
             if old_graph_dtype == np.float64 or old_graph_dtype == np.float32:
                 if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-                    tensor = np.frombuffer(old_node.tensor_content, dtype = old_graph_dtype)
+                    tensor = np.frombuffer(
+                        old_node.tensor_content, dtype=old_graph_dtype
+                    )
                     tensor = tensor.astype(raw_graph_dtype)
                     cp_attr.from_str(tensor)
                 else:
@@ -153,10 +167,14 @@ def transform_graph(raw_graph: tf.Graph, old_graph: tf.Graph) -> tf.Graph:
 
             elif old_graph_dtype == np.float16:
                 if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-                    tensor = convertMatrix(np.array(old_node.half_val), tensor_shape).astype(raw_graph_dtype)
+                    tensor = convertMatrix(
+                        np.array(old_node.half_val), tensor_shape
+                    ).astype(raw_graph_dtype)
                     cp_attr.from_str(tensor)
                 else:
-                    tensor = convertMatrix(np.array(old_node.half_val), tensor_shape).astype(raw_graph_dtype)
+                    tensor = convertMatrix(
+                        np.array(old_node.half_val), tensor_shape
+                    ).astype(raw_graph_dtype)
                     cp_attr.from_array(tensor, raw_graph_dtype)
 
     return raw_graph_def
diff --git a/deepmd/env.py b/deepmd/env.py
index 95f29c244b..ddca6e1c4e 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -1,25 +1,43 @@
 """Module that sets tensorflow working environment and exports inportant constants."""
 
+import ctypes
 import logging
 import os
-import re
 import platform
-import ctypes
-from configparser import ConfigParser
-from importlib import reload, import_module
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
-from packaging.version import Version
+import re
+from configparser import (
+    ConfigParser,
+)
+from importlib import (
+    import_module,
+    reload,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
 
 import numpy as np
+from packaging.version import (
+    Version,
+)
 
 if TYPE_CHECKING:
-    from types import ModuleType
+    from types import (
+        ModuleType,
+    )
 
 
 def dlopen_library(module: str, filename: str):
     """Dlopen a library from a module.
-    
+
     Parameters
     ----------
     module : str
@@ -37,6 +55,7 @@ def dlopen_library(module: str, filename: str):
         if len(libs):
             ctypes.CDLL(str(libs[0].absolute()))
 
+
 # dlopen pip cuda library before tensorflow
 if platform.system() == "Linux":
     dlopen_library("nvidia.cuda_runtime.lib", "libcudart.so*")
@@ -81,7 +100,7 @@ def dlopen_library(module: str, filename: str):
     "TYPE_EMBEDDING_PATTERN",
     "ATTENTION_LAYER_PATTERN",
     "REMOVE_SUFFIX_DICT",
-    "TF_VERSION"
+    "TF_VERSION",
 ]
 
 SHARED_LIB_MODULE = "op"
@@ -117,7 +136,7 @@ def dlopen_library(module: str, filename: str):
     r"final_layer_type_\d+/matrix|"
     r"final_layer/bias|"
     r"final_layer_type_\d+/bias|"
-    # layer_name 
+    # layer_name
     r"share_.+_type_\d/matrix|"
     r"share_.+_type_\d/bias|"
     r"share_.+_type_\d/idt|"
@@ -147,11 +166,11 @@ def dlopen_library(module: str, filename: str):
     r"attention_layer_\d+/layer_normalization_\d+/gamma|"
 )
 
-TRANSFER_PATTERN = \
-    EMBEDDING_NET_PATTERN + \
-    FITTING_NET_PATTERN + \
-    TYPE_EMBEDDING_PATTERN + \
-    str(
+TRANSFER_PATTERN = (
+    EMBEDDING_NET_PATTERN
+    + FITTING_NET_PATTERN
+    + TYPE_EMBEDDING_PATTERN
+    + str(
         r"descrpt_attr/t_avg|"
         r"descrpt_attr/t_std|"
         r"fitting_attr/t_fparam_avg|"
@@ -160,6 +179,7 @@ def dlopen_library(module: str, filename: str):
         r"fitting_attr/t_aparam_istd|"
         r"model_attr/t_tab_info|"
         r"model_attr/t_tab_data|"
+    )
 )
 
 REMOVE_SUFFIX_DICT = {
@@ -225,8 +245,7 @@ def set_mkl():
     """
     if "mkl_rt" in np.__config__.get_info("blas_mkl_info").get("libraries", []):
         set_env_if_empty("KMP_BLOCKTIME", "0")
-        set_env_if_empty(
-            "KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
+        set_env_if_empty("KMP_AFFINITY", "granularity=fine,verbose,compact,1,0")
         reload(np)
 
 
@@ -238,14 +257,17 @@ def set_tf_default_nthreads():
     `TF_INTRA_OP_PARALLELISM_THREADS` and `TF_INTER_OP_PARALLELISM_THREADS`
     control TF configuration of multithreading.
     """
-    if "OMP_NUM_THREADS" not in os.environ or \
-       "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ or \
-       "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ:
+    if (
+        "OMP_NUM_THREADS" not in os.environ
+        or "TF_INTRA_OP_PARALLELISM_THREADS" not in os.environ
+        or "TF_INTER_OP_PARALLELISM_THREADS" not in os.environ
+    ):
         logging.warning(
             "To get the best performance, it is recommended to adjust "
             "the number of threads by setting the environment variables "
             "OMP_NUM_THREADS, TF_INTRA_OP_PARALLELISM_THREADS, and "
-            "TF_INTER_OP_PARALLELISM_THREADS.")
+            "TF_INTER_OP_PARALLELISM_THREADS."
+        )
     set_env_if_empty("TF_INTRA_OP_PARALLELISM_THREADS", "0", verbose=False)
     set_env_if_empty("TF_INTER_OP_PARALLELISM_THREADS", "0", verbose=False)
 
@@ -278,9 +300,12 @@ def get_tf_session_config() -> Any:
         set_env_if_empty("TF_XLA_FLAGS", "--tf_xla_auto_jit=2")
     config = tf.ConfigProto(
         gpu_options=tf.GPUOptions(allow_growth=True),
-        intra_op_parallelism_threads=intra, inter_op_parallelism_threads=inter
+        intra_op_parallelism_threads=intra,
+        inter_op_parallelism_threads=inter,
     )
-    if Version(tf_py_version) >= Version('1.15') and int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
+    if Version(tf_py_version) >= Version("1.15") and int(
+        os.environ.get("DP_AUTO_PARALLELIZATION", 0)
+    ):
         config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
     return config
 
@@ -298,10 +323,10 @@ def reset_default_tf_session_config(cpu_only: bool):
     """
     global default_tf_session_config
     if cpu_only:
-        default_tf_session_config.device_count['GPU'] = 0
+        default_tf_session_config.device_count["GPU"] = 0
     else:
-        if 'GPU' in default_tf_session_config.device_count:
-            del default_tf_session_config.device_count['GPU']
+        if "GPU" in default_tf_session_config.device_count:
+            del default_tf_session_config.device_count["GPU"]
 
 
 def get_module(module_name: str) -> "ModuleType":
@@ -320,7 +345,7 @@ def get_module(module_name: str) -> "ModuleType":
     if platform.system() == "Windows":
         ext = ".dll"
         prefix = ""
-    #elif platform.system() == "Darwin":
+    # elif platform.system() == "Darwin":
     #    ext = ".dylib"
     else:
         ext = ".so"
@@ -341,7 +366,7 @@ def get_module(module_name: str) -> "ModuleType":
             # check CXX11_ABI_FLAG is compatiblity
             # see https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html
             # ABI should be the same
-            if 'CXX11_ABI_FLAG' in tf.__dict__:
+            if "CXX11_ABI_FLAG" in tf.__dict__:
                 tf_cxx11_abi_flag = tf.CXX11_ABI_FLAG
             else:
                 tf_cxx11_abi_flag = tf.sysconfig.CXX11_ABI_FLAG
@@ -352,11 +377,13 @@ def get_module(module_name: str) -> "ModuleType":
                     "with CXX11_ABI_FLAG=%d. These two library ABIs are "
                     "incompatible and thus an error is raised when loading %s. "
                     "You need to rebuild deepmd-kit against this TensorFlow "
-                    "runtime." % (
+                    "runtime."
+                    % (
                         TF_CXX11_ABI_FLAG,
                         tf_cxx11_abi_flag,
                         module_name,
-                    )) from e
+                    )
+                ) from e
 
             # different versions may cause incompatibility
             # see #406, #447, #557, #774, and #796 for example
@@ -371,20 +398,20 @@ def get_module(module_name: str) -> "ModuleType":
                     "against TensorFlow %s.\nIf you are using a wheel from "
                     "pypi, you may consider to install deepmd-kit execuating "
                     "`pip install deepmd-kit --no-binary deepmd-kit` "
-                    "instead." % (
+                    "instead."
+                    % (
                         TF_VERSION,
                         tf_py_version,
                         module_name,
                         TF_VERSION,
                         tf_py_version,
-                    )) from e
+                    )
+                ) from e
             error_message = (
                 "This deepmd-kit package is inconsitent with TensorFlow "
                 "Runtime, thus an error is raised when loading %s. "
                 "You need to rebuild deepmd-kit against this TensorFlow "
-                "runtime." % (
-                    module_name,
-                )
+                "runtime." % (module_name,)
             )
             if TF_CXX11_ABI_FLAG == 1:
                 # #1791
diff --git a/deepmd/fit/__init__.py b/deepmd/fit/__init__.py
index 3f94c00a43..485319cd62 100644
--- a/deepmd/fit/__init__.py
+++ b/deepmd/fit/__init__.py
@@ -1,4 +1,10 @@
-from .ener      import EnerFitting
-from .dipole    import DipoleFittingSeA
-from .polar     import PolarFittingSeA
-from .polar     import GlobalPolarFittingSeA
+from .dipole import (
+    DipoleFittingSeA,
+)
+from .ener import (
+    EnerFitting,
+)
+from .polar import (
+    GlobalPolarFittingSeA,
+    PolarFittingSeA,
+)
diff --git a/deepmd/fit/dipole.py b/deepmd/fit/dipole.py
index 932c834577..79f7818714 100644
--- a/deepmd/fit/dipole.py
+++ b/deepmd/fit/dipole.py
@@ -1,21 +1,42 @@
 import warnings
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
 
-from deepmd.env import tf
-from deepmd.common import add_data_requirement, get_activation_func, get_precision, cast_precision
-from deepmd.utils.network import one_layer, one_layer_rand_seed_shift
-from deepmd.utils.graph import get_fitting_net_variables_from_graph_def
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit.fitting import Fitting
+from deepmd.common import (
+    add_data_requirement,
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    global_cvt_2_tf_float,
+    tf,
+)
+from deepmd.fit.fitting import (
+    Fitting,
+)
+from deepmd.utils.graph import (
+    get_fitting_net_variables_from_graph_def,
+)
+from deepmd.utils.network import (
+    one_layer,
+    one_layer_rand_seed_shift,
+)
 
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
 
-class DipoleFittingSeA (Fitting) :
+class DipoleFittingSeA(Fitting):
     """
     Fit the atomic dipole with descriptor se_a
-    
+
     Parameters
     ----------
     descrpt : tf.Tensor
@@ -36,15 +57,17 @@ class DipoleFittingSeA (Fitting) :
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     """
-    def __init__ (self, 
-                  descrpt : tf.Tensor,
-                  neuron : List[int] = [120,120,120], 
-                  resnet_dt : bool = True,
-                  sel_type : List[int] = None,
-                  seed : int = None,
-                  activation_function : str = 'tanh',
-                  precision : str = 'default',
-                  uniform_seed: bool = False
+
+    def __init__(
+        self,
+        descrpt: tf.Tensor,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        sel_type: List[int] = None,
+        seed: int = None,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
     ) -> None:
         """
         Constructor
@@ -56,7 +79,9 @@ def __init__ (self,
         self.sel_type = sel_type
         if self.sel_type is None:
             self.sel_type = [ii for ii in range(self.ntypes)]
-        self.sel_mask = np.array([ii in self.sel_type for ii in range(self.ntypes)], dtype=bool)
+        self.sel_mask = np.array(
+            [ii in self.sel_type for ii in range(self.ntypes)], dtype=bool
+        )
         self.seed = seed
         self.uniform_seed = uniform_seed
         self.seed_shift = one_layer_rand_seed_shift()
@@ -80,46 +105,63 @@ def get_out_size(self) -> int:
         """
         return 3
 
-    def _build_lower(self,
-                     start_index,
-                     natoms,
-                     inputs,
-                     rot_mat,
-                     suffix='',
-                     reuse=None
-                     ):
+    def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=None):
         # cut-out inputs
-        inputs_i = tf.slice(inputs,
-                            [0, start_index, 0],
-                            [-1, natoms, -1])
+        inputs_i = tf.slice(inputs, [0, start_index, 0], [-1, natoms, -1])
         inputs_i = tf.reshape(inputs_i, [-1, self.dim_descrpt])
-        rot_mat_i = tf.slice(rot_mat,
-                             [0, start_index, 0],
-                             [-1, natoms, -1])
+        rot_mat_i = tf.slice(rot_mat, [0, start_index, 0], [-1, natoms, -1])
         rot_mat_i = tf.reshape(rot_mat_i, [-1, self.dim_rot_mat_1, 3])
         layer = inputs_i
         for ii in range(0, len(self.n_neuron)):
             if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]:
-                layer += one_layer(layer, self.n_neuron[ii], name='layer_' + str(ii) + suffix,
-                                   reuse=reuse, seed=self.seed, use_timestep=self.resnet_dt,
-                                   activation_fn=self.fitting_activation_fn, precision=self.fitting_precision,
-                                   uniform_seed=self.uniform_seed, initial_variables=self.fitting_net_variables,
-                                   mixed_prec=self.mixed_prec)
+                layer += one_layer(
+                    layer,
+                    self.n_neuron[ii],
+                    name="layer_" + str(ii) + suffix,
+                    reuse=reuse,
+                    seed=self.seed,
+                    use_timestep=self.resnet_dt,
+                    activation_fn=self.fitting_activation_fn,
+                    precision=self.fitting_precision,
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.fitting_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
             else:
-                layer = one_layer(layer, self.n_neuron[ii], name='layer_' + str(ii) + suffix,
-                                  reuse=reuse, seed=self.seed, activation_fn=self.fitting_activation_fn,
-                                  precision=self.fitting_precision, uniform_seed=self.uniform_seed,
-                                  initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec)
-            if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+                layer = one_layer(
+                    layer,
+                    self.n_neuron[ii],
+                    name="layer_" + str(ii) + suffix,
+                    reuse=reuse,
+                    seed=self.seed,
+                    activation_fn=self.fitting_activation_fn,
+                    precision=self.fitting_precision,
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.fitting_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
+            if (not self.uniform_seed) and (self.seed is not None):
+                self.seed += self.seed_shift
         # (nframes x natoms) x naxis
-        final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn=None,
-                                name='final_layer' + suffix, reuse=reuse, seed=self.seed,
-                                precision=self.fitting_precision, uniform_seed=self.uniform_seed,
-                                initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec,
-                                final_layer=True)
-        if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+        final_layer = one_layer(
+            layer,
+            self.dim_rot_mat_1,
+            activation_fn=None,
+            name="final_layer" + suffix,
+            reuse=reuse,
+            seed=self.seed,
+            precision=self.fitting_precision,
+            uniform_seed=self.uniform_seed,
+            initial_variables=self.fitting_net_variables,
+            mixed_prec=self.mixed_prec,
+            final_layer=True,
+        )
+        if (not self.uniform_seed) and (self.seed is not None):
+            self.seed += self.seed_shift
         # (nframes x natoms) x 1 * naxis
-        final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms, 1, self.dim_rot_mat_1])
+        final_layer = tf.reshape(
+            final_layer, [tf.shape(inputs)[0] * natoms, 1, self.dim_rot_mat_1]
+        )
         # (nframes x natoms) x 1 x 3(coord)
         final_layer = tf.matmul(final_layer, rot_mat_i)
         # nframes x natoms x 3
@@ -127,16 +169,18 @@ def _build_lower(self,
         return final_layer
 
     @cast_precision
-    def build (self, 
-               input_d : tf.Tensor,
-               rot_mat : tf.Tensor,
-               natoms : tf.Tensor,
-               input_dict: Optional[dict] = None,
-               reuse : bool = None,
-               suffix : str = '') -> tf.Tensor:
+    def build(
+        self,
+        input_d: tf.Tensor,
+        rot_mat: tf.Tensor,
+        natoms: tf.Tensor,
+        input_dict: Optional[dict] = None,
+        reuse: bool = None,
+        suffix: str = "",
+    ) -> tf.Tensor:
         """
         Build the computational graph for fitting net
-        
+
         Parameters
         ----------
         input_d
@@ -162,19 +206,25 @@ def build (self,
         """
         if input_dict is None:
             input_dict = {}
-        type_embedding = input_dict.get('type_embedding', None)
-        atype = input_dict.get('atype', None)
-        nframes = input_dict.get('nframes')
+        type_embedding = input_dict.get("type_embedding", None)
+        atype = input_dict.get("atype", None)
+        nframes = input_dict.get("nframes")
         start_index = 0
         inputs = tf.reshape(input_d, [-1, natoms[0], self.dim_descrpt])
         rot_mat = tf.reshape(rot_mat, [-1, natoms[0], self.dim_rot_mat])
 
         if type_embedding is not None:
-            nloc_mask = tf.reshape(tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1])
+            nloc_mask = tf.reshape(
+                tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1]
+            )
             atype_nall = tf.reshape(atype, [-1, natoms[1]])
             # (nframes x nloc_masked)
-            self.atype_nloc_masked = tf.reshape(tf.slice(atype_nall, [0, 0], [-1, natoms[0]])[nloc_mask], [-1])  ## lammps will make error
-            self.nloc_masked = tf.shape(tf.reshape(self.atype_nloc_masked, [nframes, -1]))[1]
+            self.atype_nloc_masked = tf.reshape(
+                tf.slice(atype_nall, [0, 0], [-1, natoms[0]])[nloc_mask], [-1]
+            )  ## lammps will make error
+            self.nloc_masked = tf.shape(
+                tf.reshape(self.atype_nloc_masked, [nframes, -1])
+            )[1]
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc_masked)
         else:
             atype_embed = None
@@ -186,41 +236,55 @@ def build (self,
             outs_list = []
             for type_i in range(self.ntypes):
                 if type_i not in self.sel_type:
-                    start_index += natoms[2+type_i]
+                    start_index += natoms[2 + type_i]
                     continue
                 final_layer = self._build_lower(
-                    start_index, natoms[2+type_i],
-                    inputs, rot_mat, suffix='_type_'+str(type_i)+suffix, reuse=reuse)
+                    start_index,
+                    natoms[2 + type_i],
+                    inputs,
+                    rot_mat,
+                    suffix="_type_" + str(type_i) + suffix,
+                    reuse=reuse,
+                )
                 start_index += natoms[2 + type_i]
                 # concat the results
                 outs_list.append(final_layer)
                 count += 1
-            outs = tf.concat(outs_list, axis = 1)
+            outs = tf.concat(outs_list, axis=1)
         else:
-            inputs = tf.reshape(tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
-                                [-1, self.dim_descrpt])
-            rot_mat = tf.reshape(tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat_1 * 3])[nloc_mask],
-                                 [-1, self.dim_rot_mat_1, 3])
+            inputs = tf.reshape(
+                tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
+                [-1, self.dim_descrpt],
+            )
+            rot_mat = tf.reshape(
+                tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat_1 * 3])[
+                    nloc_mask
+                ],
+                [-1, self.dim_rot_mat_1, 3],
+            )
             atype_embed = tf.cast(atype_embed, self.fitting_precision)
             type_shape = atype_embed.get_shape().as_list()
             inputs = tf.concat([inputs, atype_embed], axis=1)
             self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [nframes, self.nloc_masked, self.dim_descrpt])
-            rot_mat = tf.reshape(rot_mat, [nframes, self.nloc_masked, self.dim_rot_mat_1 * 3])
+            rot_mat = tf.reshape(
+                rot_mat, [nframes, self.nloc_masked, self.dim_rot_mat_1 * 3]
+            )
             final_layer = self._build_lower(
-                0, self.nloc_masked,
-                inputs, rot_mat, suffix=suffix, reuse=reuse)
+                0, self.nloc_masked, inputs, rot_mat, suffix=suffix, reuse=reuse
+            )
             # nframes x natoms x 3
             outs = tf.reshape(final_layer, [nframes, self.nloc_masked, 3])
 
-        tf.summary.histogram('fitting_net_output', outs)
+        tf.summary.histogram("fitting_net_output", outs)
         return tf.reshape(outs, [-1])
         # return tf.reshape(outs, [tf.shape(inputs)[0] * natoms[0] * 3 // 3])
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the fitting net variables with the given dict
@@ -234,10 +298,11 @@ def init_variables(self,
         suffix : str
             suffix to name scope
         """
-        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix=suffix)
-
+        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(
+            graph_def, suffix=suffix
+        )
 
-    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+    def enable_mixed_precision(self, mixed_prec: dict = None) -> None:
         """
         Reveive the mixed precision setting.
 
@@ -247,4 +312,4 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
                 The mixed precision setting used in the embedding net
         """
         self.mixed_prec = mixed_prec
-        self.fitting_precision = get_precision(mixed_prec['output_prec'])
+        self.fitting_precision = get_precision(mixed_prec["output_prec"])
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index 4956c2e173..afa12948fa 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -1,28 +1,60 @@
 import logging
 import warnings
-import numpy as np
-from typing import Optional, Tuple, List
-from packaging.version import Version
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
 
-from deepmd.env import tf
-from deepmd.infer import DeepPotential
-from deepmd.common import add_data_requirement, get_activation_func, get_precision, cast_precision
-from deepmd.utils.network import one_layer_rand_seed_shift
+import numpy as np
+from packaging.version import (
+    Version,
+)
+
+from deepmd.common import (
+    add_data_requirement,
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    TF_VERSION,
+    global_cvt_2_tf_float,
+    tf,
+)
+from deepmd.fit.fitting import (
+    Fitting,
+)
+from deepmd.infer import (
+    DeepPotential,
+)
+from deepmd.nvnmd.fit.ener import (
+    one_layer_nvnmd,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_fitting_net_variables_from_graph_def,
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
 from deepmd.utils.network import one_layer as one_layer_deepmd
-from deepmd.utils.type_embed import embed_atom_type
-from deepmd.utils.graph import get_fitting_net_variables_from_graph_def, load_graph_def, get_tensor_by_name_from_graph
-from deepmd.utils.errors import GraphWithoutTensorError
-from deepmd.fit.fitting import Fitting
-
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, TF_VERSION
-
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.fit.ener import one_layer_nvnmd
+from deepmd.utils.network import (
+    one_layer_rand_seed_shift,
+)
+from deepmd.utils.type_embed import (
+    embed_atom_type,
+)
 
 log = logging.getLogger(__name__)
 
-class EnerFitting (Fitting):
+
+class EnerFitting(Fitting):
     r"""Fitting the energy of the system. The force and the virial can also be trained.
 
     The potential energy :math:`E` is a fitting network function of the descriptor :math:`\mathcal{D}`:
@@ -72,8 +104,8 @@ class EnerFitting (Fitting):
     tot_ener_zero
             Force the total energy to zero. Useful for the charge fitting.
     trainable
-            If the weights of fitting net are trainable. 
-            Suppose that we have :math:`N_l` hidden layers in the fitting net, 
+            If the weights of fitting net are trainable.
+            Suppose that we have :math:`N_l` hidden layers in the fitting net,
             this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
     seed
             Random seed for initializing the network parameters.
@@ -89,21 +121,23 @@ class EnerFitting (Fitting):
             The name of the each layer. If two layers, either in the same fitting or different fittings,
             have the same name, they will share the same neural network parameters.
     """
-    def __init__ (self, 
-                  descrpt : tf.Tensor,
-                  neuron : List[int] = [120,120,120],
-                  resnet_dt : bool = True,
-                  numb_fparam : int = 0,
-                  numb_aparam : int = 0,
-                  rcond : float = 1e-3,
-                  tot_ener_zero : bool = False,
-                  trainable : List[bool] = None,
-                  seed : int = None,
-                  atom_ener : List[float] = [],
-                  activation_function : str = 'tanh',
-                  precision : str = 'default',
-                  uniform_seed: bool = False,
-                  layer_name: Optional[List[Optional[str]]] = None,
+
+    def __init__(
+        self,
+        descrpt: tf.Tensor,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        numb_fparam: int = 0,
+        numb_aparam: int = 0,
+        rcond: float = 1e-3,
+        tot_ener_zero: bool = False,
+        trainable: List[bool] = None,
+        seed: int = None,
+        atom_ener: List[float] = [],
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
+        layer_name: Optional[List[Optional[str]]] = None,
     ) -> None:
         """
         Constructor
@@ -138,25 +172,33 @@ def __init__ (self,
         if self.trainable is None:
             self.trainable = [True for ii in range(len(self.n_neuron) + 1)]
         if isinstance(self.trainable, bool):
-            self.trainable = [self.trainable] * (len(self.n_neuron)+1)
-        assert(len(self.trainable) == len(self.n_neuron) + 1), 'length of trainable should be that of n_neuron + 1'
+            self.trainable = [self.trainable] * (len(self.n_neuron) + 1)
+        assert (
+            len(self.trainable) == len(self.n_neuron) + 1
+        ), "length of trainable should be that of n_neuron + 1"
         self.atom_ener = []
         self.atom_ener_v = atom_ener
         for at, ae in enumerate(atom_ener):
             if ae is not None:
-                self.atom_ener.append(tf.constant(ae, GLOBAL_TF_FLOAT_PRECISION, name = "atom_%d_ener" % at))
+                self.atom_ener.append(
+                    tf.constant(ae, GLOBAL_TF_FLOAT_PRECISION, name="atom_%d_ener" % at)
+                )
             else:
                 self.atom_ener.append(None)
         self.useBN = False
         self.bias_atom_e = np.zeros(self.ntypes, dtype=np.float64)
         # data requirement
-        if self.numb_fparam > 0 :
-            add_data_requirement('fparam', self.numb_fparam, atomic=False, must=True, high_prec=False)
+        if self.numb_fparam > 0:
+            add_data_requirement(
+                "fparam", self.numb_fparam, atomic=False, must=True, high_prec=False
+            )
             self.fparam_avg = None
             self.fparam_std = None
             self.fparam_inv_std = None
         if self.numb_aparam > 0:
-            add_data_requirement('aparam', self.numb_aparam, atomic=True,  must=True, high_prec=False)
+            add_data_requirement(
+                "aparam", self.numb_aparam, atomic=True, must=True, high_prec=False
+            )
             self.aparam_avg = None
             self.aparam_std = None
             self.aparam_inv_std = None
@@ -165,8 +207,10 @@ def __init__ (self,
         self.mixed_prec = None
         self.layer_name = layer_name
         if self.layer_name is not None:
-            assert isinstance(self.layer_name, list), 'layer_name should be a list'
-            assert len(self.layer_name) == len(self.n_neuron) + 1, 'length of layer_name should be that of n_neuron + 1'
+            assert isinstance(self.layer_name, list), "layer_name should be a list"
+            assert (
+                len(self.layer_name) == len(self.n_neuron) + 1
+            ), "length of layer_name should be that of n_neuron + 1"
 
     def get_numb_fparam(self) -> int:
         """
@@ -180,10 +224,7 @@ def get_numb_aparam(self) -> int:
         """
         return self.numb_fparam
 
-    def compute_output_stats(self, 
-                             all_stat: dict,
-                             mixed_type: bool = False
-    ) -> None:
+    def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None:
         """
         Compute the ouput statistics
 
@@ -198,10 +239,12 @@ def compute_output_stats(self,
                 If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
                 in which frames in a system may have different natoms_vec(s), with the same nloc.
         """
-        self.bias_atom_e = self._compute_output_stats(all_stat, rcond=self.rcond, mixed_type=mixed_type)
+        self.bias_atom_e = self._compute_output_stats(
+            all_stat, rcond=self.rcond, mixed_type=mixed_type
+        )
 
     def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False):
-        data = all_stat['energy']
+        data = all_stat["energy"]
         # data[sys_idx][batch_idx][frame_idx]
         sys_ener = []
         for ss in range(len(data)):
@@ -214,7 +257,7 @@ def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False):
         sys_ener = np.array(sys_ener)
         sys_tynatom = []
         if mixed_type:
-            data = all_stat['real_natoms_vec']
+            data = all_stat["real_natoms_vec"]
             nsys = len(data)
             for ss in range(len(data)):
                 tmp_tynatom = []
@@ -224,32 +267,35 @@ def _compute_output_stats(self, all_stat, rcond=1e-3, mixed_type=False):
                 tmp_tynatom = np.average(np.array(tmp_tynatom), axis=0)
                 sys_tynatom.append(tmp_tynatom)
         else:
-            data = all_stat['natoms_vec']
+            data = all_stat["natoms_vec"]
             nsys = len(data)
             for ss in range(len(data)):
                 sys_tynatom.append(data[ss][0].astype(np.float64))
         sys_tynatom = np.array(sys_tynatom)
-        sys_tynatom = np.reshape(sys_tynatom, [nsys,-1])
-        sys_tynatom = sys_tynatom[:,2:]
+        sys_tynatom = np.reshape(sys_tynatom, [nsys, -1])
+        sys_tynatom = sys_tynatom[:, 2:]
         if len(self.atom_ener) > 0:
             # Atomic energies stats are incorrect if atomic energies are assigned.
             # In this situation, we directly use these assigned energies instead of computing stats.
             # This will make the loss decrease quickly
-            assigned_atom_ener = np.array(list((ee for ee in self.atom_ener_v if ee is not None)))
-            assigned_ener_idx = list((ii for ii, ee in enumerate(self.atom_ener_v) if ee is not None))
+            assigned_atom_ener = np.array(
+                list((ee for ee in self.atom_ener_v if ee is not None))
+            )
+            assigned_ener_idx = list(
+                (ii for ii, ee in enumerate(self.atom_ener_v) if ee is not None)
+            )
             # np.dot out size: nframe
             sys_ener -= np.dot(sys_tynatom[:, assigned_ener_idx], assigned_atom_ener)
-            sys_tynatom[:, assigned_ener_idx] = 0.
-        energy_shift,resd,rank,s_value \
-            = np.linalg.lstsq(sys_tynatom, sys_ener, rcond = rcond)
+            sys_tynatom[:, assigned_ener_idx] = 0.0
+        energy_shift, resd, rank, s_value = np.linalg.lstsq(
+            sys_tynatom, sys_ener, rcond=rcond
+        )
         if len(self.atom_ener) > 0:
             for ii in assigned_ener_idx:
                 energy_shift[ii] = self.atom_ener_v[ii]
-        return energy_shift    
+        return energy_shift
 
-    def compute_input_stats(self, 
-                            all_stat : dict,
-                            protection : float = 1e-2) -> None:
+    def compute_input_stats(self, all_stat: dict, protection: float = 1e-2) -> None:
         """
         Compute the input statistics
 
@@ -264,140 +310,144 @@ def compute_input_stats(self,
         """
         # stat fparam
         if self.numb_fparam > 0:
-            cat_data = np.concatenate(all_stat['fparam'], axis = 0)
+            cat_data = np.concatenate(all_stat["fparam"], axis=0)
             cat_data = np.reshape(cat_data, [-1, self.numb_fparam])
-            self.fparam_avg = np.average(cat_data, axis = 0)
-            self.fparam_std = np.std(cat_data, axis = 0)
+            self.fparam_avg = np.average(cat_data, axis=0)
+            self.fparam_std = np.std(cat_data, axis=0)
             for ii in range(self.fparam_std.size):
                 if self.fparam_std[ii] < protection:
                     self.fparam_std[ii] = protection
-            self.fparam_inv_std = 1./self.fparam_std
+            self.fparam_inv_std = 1.0 / self.fparam_std
         # stat aparam
         if self.numb_aparam > 0:
             sys_sumv = []
             sys_sumv2 = []
             sys_sumn = []
-            for ss_ in all_stat['aparam'] : 
+            for ss_ in all_stat["aparam"]:
                 ss = np.reshape(ss_, [-1, self.numb_aparam])
-                sys_sumv.append(np.sum(ss, axis = 0))
-                sys_sumv2.append(np.sum(np.multiply(ss, ss), axis = 0))
+                sys_sumv.append(np.sum(ss, axis=0))
+                sys_sumv2.append(np.sum(np.multiply(ss, ss), axis=0))
                 sys_sumn.append(ss.shape[0])
-            sumv = np.sum(sys_sumv, axis = 0)
-            sumv2 = np.sum(sys_sumv2, axis = 0)
+            sumv = np.sum(sys_sumv, axis=0)
+            sumv2 = np.sum(sys_sumv2, axis=0)
             sumn = np.sum(sys_sumn)
-            self.aparam_avg = (sumv)/sumn
+            self.aparam_avg = (sumv) / sumn
             self.aparam_std = self._compute_std(sumv2, sumv, sumn)
             for ii in range(self.aparam_std.size):
                 if self.aparam_std[ii] < protection:
                     self.aparam_std[ii] = protection
-            self.aparam_inv_std = 1./self.aparam_std
-
+            self.aparam_inv_std = 1.0 / self.aparam_std
 
-    def _compute_std (self, sumv2, sumv, sumn) :
-        return np.sqrt(sumv2/sumn - np.multiply(sumv/sumn, sumv/sumn))
+    def _compute_std(self, sumv2, sumv, sumn):
+        return np.sqrt(sumv2 / sumn - np.multiply(sumv / sumn, sumv / sumn))
 
     @cast_precision
     def _build_lower(
-            self,
-            start_index,
-            natoms,
-            inputs,
-            fparam = None,
-            aparam = None, 
-            bias_atom_e = 0.0,
-            type_suffix = '',
-            suffix = '',
-            reuse = None
+        self,
+        start_index,
+        natoms,
+        inputs,
+        fparam=None,
+        aparam=None,
+        bias_atom_e=0.0,
+        type_suffix="",
+        suffix="",
+        reuse=None,
     ):
         # cut-out inputs
-        inputs_i = tf.slice (inputs,
-                             [ 0, start_index, 0],
-                             [-1, natoms, -1] )
+        inputs_i = tf.slice(inputs, [0, start_index, 0], [-1, natoms, -1])
         inputs_i = tf.reshape(inputs_i, [-1, self.dim_descrpt])
         layer = inputs_i
         if fparam is not None:
             ext_fparam = tf.tile(fparam, [1, natoms])
             ext_fparam = tf.reshape(ext_fparam, [-1, self.numb_fparam])
-            ext_fparam = tf.cast(ext_fparam,self.fitting_precision)
-            layer = tf.concat([layer, ext_fparam], axis = 1)
+            ext_fparam = tf.cast(ext_fparam, self.fitting_precision)
+            layer = tf.concat([layer, ext_fparam], axis=1)
         if aparam is not None:
-            ext_aparam = tf.slice(aparam, 
-                                  [ 0, start_index      * self.numb_aparam],
-                                  [-1, natoms * self.numb_aparam])
+            ext_aparam = tf.slice(
+                aparam,
+                [0, start_index * self.numb_aparam],
+                [-1, natoms * self.numb_aparam],
+            )
             ext_aparam = tf.reshape(ext_aparam, [-1, self.numb_aparam])
-            ext_aparam = tf.cast(ext_aparam,self.fitting_precision)
-            layer = tf.concat([layer, ext_aparam], axis = 1)
+            ext_aparam = tf.cast(ext_aparam, self.fitting_precision)
+            layer = tf.concat([layer, ext_aparam], axis=1)
 
-        if nvnmd_cfg.enable: 
+        if nvnmd_cfg.enable:
             one_layer = one_layer_nvnmd
         else:
             one_layer = one_layer_deepmd
-        for ii in range(0,len(self.n_neuron)) :
+        for ii in range(0, len(self.n_neuron)):
             if self.layer_name is not None and self.layer_name[ii] is not None:
-                layer_suffix = 'share_' + self.layer_name[ii] + type_suffix
+                layer_suffix = "share_" + self.layer_name[ii] + type_suffix
                 layer_reuse = tf.AUTO_REUSE
             else:
-                layer_suffix = 'layer_' + str(ii) + type_suffix + suffix
+                layer_suffix = "layer_" + str(ii) + type_suffix + suffix
                 layer_reuse = reuse
-            if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii-1]:
-                layer+= one_layer(
+            if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]:
+                layer += one_layer(
                     layer,
                     self.n_neuron[ii],
                     name=layer_suffix,
                     reuse=layer_reuse,
-                    seed = self.seed,
-                    use_timestep = self.resnet_dt,
-                    activation_fn = self.fitting_activation_fn,
-                    precision = self.fitting_precision,
-                    trainable = self.trainable[ii],
-                    uniform_seed = self.uniform_seed,
-                    initial_variables = self.fitting_net_variables,
-                    mixed_prec = self.mixed_prec)
-            else :
+                    seed=self.seed,
+                    use_timestep=self.resnet_dt,
+                    activation_fn=self.fitting_activation_fn,
+                    precision=self.fitting_precision,
+                    trainable=self.trainable[ii],
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.fitting_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
+            else:
                 layer = one_layer(
                     layer,
                     self.n_neuron[ii],
                     name=layer_suffix,
                     reuse=layer_reuse,
-                    seed = self.seed,
-                    activation_fn = self.fitting_activation_fn,
-                    precision = self.fitting_precision,
-                    trainable = self.trainable[ii],
-                    uniform_seed = self.uniform_seed,
-                    initial_variables = self.fitting_net_variables,
-                    mixed_prec = self.mixed_prec)
-            if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+                    seed=self.seed,
+                    activation_fn=self.fitting_activation_fn,
+                    precision=self.fitting_precision,
+                    trainable=self.trainable[ii],
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.fitting_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
+            if (not self.uniform_seed) and (self.seed is not None):
+                self.seed += self.seed_shift
         if self.layer_name is not None and self.layer_name[-1] is not None:
-            layer_suffix = 'share_' + self.layer_name[-1] + type_suffix
+            layer_suffix = "share_" + self.layer_name[-1] + type_suffix
             layer_reuse = tf.AUTO_REUSE
         else:
-            layer_suffix = 'final_layer' + type_suffix + suffix
+            layer_suffix = "final_layer" + type_suffix + suffix
             layer_reuse = reuse
         final_layer = one_layer(
-            layer, 
-            1, 
-            activation_fn = None, 
-            bavg = bias_atom_e, 
+            layer,
+            1,
+            activation_fn=None,
+            bavg=bias_atom_e,
             name=layer_suffix,
             reuse=layer_reuse,
-            seed = self.seed, 
-            precision = self.fitting_precision, 
-            trainable = self.trainable[-1],
-            uniform_seed = self.uniform_seed,
-            initial_variables = self.fitting_net_variables,
-            mixed_prec = self.mixed_prec,
-            final_layer = True)
-        if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+            seed=self.seed,
+            precision=self.fitting_precision,
+            trainable=self.trainable[-1],
+            uniform_seed=self.uniform_seed,
+            initial_variables=self.fitting_net_variables,
+            mixed_prec=self.mixed_prec,
+            final_layer=True,
+        )
+        if (not self.uniform_seed) and (self.seed is not None):
+            self.seed += self.seed_shift
 
         return final_layer
-            
-            
-    def build (self, 
-               inputs : tf.Tensor,
-               natoms : tf.Tensor,
-               input_dict : dict = None,
-               reuse : bool = None,
-               suffix : str = '', 
+
+    def build(
+        self,
+        inputs: tf.Tensor,
+        natoms: tf.Tensor,
+        input_dict: dict = None,
+        reuse: bool = None,
+        suffix: str = "",
     ) -> tf.Tensor:
         """
         Build the computational graph for fitting net
@@ -407,7 +457,7 @@ def build (self,
         inputs
                 The input descriptor
         input_dict
-                Additional dict for inputs. 
+                Additional dict for inputs.
                 if numb_fparam > 0, should have input_dict['fparam']
                 if numb_aparam > 0, should have input_dict['aparam']
         natoms
@@ -428,82 +478,92 @@ def build (self,
         if input_dict is None:
             input_dict = {}
         bias_atom_e = self.bias_atom_e
-        type_embedding = input_dict.get('type_embedding', None)
-        atype = input_dict.get('atype', None)
+        type_embedding = input_dict.get("type_embedding", None)
+        atype = input_dict.get("atype", None)
         if self.numb_fparam > 0:
             if self.fparam_avg is None:
-                self.fparam_avg = 0.
+                self.fparam_avg = 0.0
             if self.fparam_inv_std is None:
-                self.fparam_inv_std = 1.
+                self.fparam_inv_std = 1.0
         if self.numb_aparam > 0:
             if self.aparam_avg is None:
-                self.aparam_avg = 0.
+                self.aparam_avg = 0.0
             if self.aparam_inv_std is None:
-                self.aparam_inv_std = 1.
-
-        with tf.variable_scope('fitting_attr' + suffix, reuse = reuse) :
-            t_dfparam = tf.constant(self.numb_fparam, 
-                                    name = 'dfparam', 
-                                    dtype = tf.int32)
-            t_daparam = tf.constant(self.numb_aparam, 
-                                    name = 'daparam', 
-                                    dtype = tf.int32)
-            self.t_bias_atom_e = tf.get_variable('t_bias_atom_e',
-                                        self.bias_atom_e.shape,
-                                        dtype=GLOBAL_TF_FLOAT_PRECISION,
-                                        trainable=False,
-                                        initializer=tf.constant_initializer(self.bias_atom_e))
-            if self.numb_fparam > 0: 
-                t_fparam_avg = tf.get_variable('t_fparam_avg', 
-                                               self.numb_fparam,
-                                               dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                               trainable = False,
-                                               initializer = tf.constant_initializer(self.fparam_avg))
-                t_fparam_istd = tf.get_variable('t_fparam_istd', 
-                                                self.numb_fparam,
-                                                dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                                trainable = False,
-                                                initializer = tf.constant_initializer(self.fparam_inv_std))
-            if self.numb_aparam > 0: 
-                t_aparam_avg = tf.get_variable('t_aparam_avg', 
-                                               self.numb_aparam,
-                                               dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                               trainable = False,
-                                               initializer = tf.constant_initializer(self.aparam_avg))
-                t_aparam_istd = tf.get_variable('t_aparam_istd', 
-                                                self.numb_aparam,
-                                                dtype = GLOBAL_TF_FLOAT_PRECISION,
-                                                trainable = False,
-                                                initializer = tf.constant_initializer(self.aparam_inv_std))
-            
+                self.aparam_inv_std = 1.0
+
+        with tf.variable_scope("fitting_attr" + suffix, reuse=reuse):
+            t_dfparam = tf.constant(self.numb_fparam, name="dfparam", dtype=tf.int32)
+            t_daparam = tf.constant(self.numb_aparam, name="daparam", dtype=tf.int32)
+            self.t_bias_atom_e = tf.get_variable(
+                "t_bias_atom_e",
+                self.bias_atom_e.shape,
+                dtype=GLOBAL_TF_FLOAT_PRECISION,
+                trainable=False,
+                initializer=tf.constant_initializer(self.bias_atom_e),
+            )
+            if self.numb_fparam > 0:
+                t_fparam_avg = tf.get_variable(
+                    "t_fparam_avg",
+                    self.numb_fparam,
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
+                    trainable=False,
+                    initializer=tf.constant_initializer(self.fparam_avg),
+                )
+                t_fparam_istd = tf.get_variable(
+                    "t_fparam_istd",
+                    self.numb_fparam,
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
+                    trainable=False,
+                    initializer=tf.constant_initializer(self.fparam_inv_std),
+                )
+            if self.numb_aparam > 0:
+                t_aparam_avg = tf.get_variable(
+                    "t_aparam_avg",
+                    self.numb_aparam,
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
+                    trainable=False,
+                    initializer=tf.constant_initializer(self.aparam_avg),
+                )
+                t_aparam_istd = tf.get_variable(
+                    "t_aparam_istd",
+                    self.numb_aparam,
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
+                    trainable=False,
+                    initializer=tf.constant_initializer(self.aparam_inv_std),
+                )
+
         inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt])
         if len(self.atom_ener):
             # only for atom_ener
-            nframes = input_dict.get('nframes')
+            nframes = input_dict.get("nframes")
             if nframes is not None:
                 # like inputs, but we don't want to add a dependency on inputs
-                inputs_zero = tf.zeros((nframes, natoms[0], self.dim_descrpt), dtype=GLOBAL_TF_FLOAT_PRECISION)
+                inputs_zero = tf.zeros(
+                    (nframes, natoms[0], self.dim_descrpt),
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
+                )
             else:
                 inputs_zero = tf.zeros_like(inputs, dtype=GLOBAL_TF_FLOAT_PRECISION)
-        
 
-        if bias_atom_e is not None :
-            assert(len(bias_atom_e) == self.ntypes)
+        if bias_atom_e is not None:
+            assert len(bias_atom_e) == self.ntypes
 
         fparam = None
         aparam = None
-        if self.numb_fparam > 0 :
-            fparam = input_dict['fparam']
+        if self.numb_fparam > 0:
+            fparam = input_dict["fparam"]
             fparam = tf.reshape(fparam, [-1, self.numb_fparam])
-            fparam = (fparam - t_fparam_avg) * t_fparam_istd            
-        if self.numb_aparam > 0 :
-            aparam = input_dict['aparam']
+            fparam = (fparam - t_fparam_avg) * t_fparam_istd
+        if self.numb_aparam > 0:
+            aparam = input_dict["aparam"]
             aparam = tf.reshape(aparam, [-1, self.numb_aparam])
             aparam = (aparam - t_aparam_avg) * t_aparam_istd
             aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]])
 
         atype_nall = tf.reshape(atype, [-1, natoms[1]])
-        self.atype_nloc = tf.reshape(tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1])  ## lammps will make error
+        self.atype_nloc = tf.reshape(
+            tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1]
+        )  ## lammps will make error
         if type_embedding is not None:
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc)
         else:
@@ -516,83 +576,105 @@ def build (self,
             outs_list = []
             for type_i in range(self.ntypes):
                 final_layer = self._build_lower(
-                    start_index, natoms[2+type_i], 
-                    inputs, fparam, aparam, 
-                    bias_atom_e=0.,
-                    type_suffix='_type_' + str(type_i),
+                    start_index,
+                    natoms[2 + type_i],
+                    inputs,
+                    fparam,
+                    aparam,
+                    bias_atom_e=0.0,
+                    type_suffix="_type_" + str(type_i),
                     suffix=suffix,
                     reuse=reuse,
                 )
                 # concat the results
-                if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None:                
+                if type_i < len(self.atom_ener) and self.atom_ener[type_i] is not None:
                     zero_layer = self._build_lower(
-                        start_index, natoms[2+type_i], 
-                        inputs_zero, fparam, aparam, 
-                        bias_atom_e=0.,
-                        type_suffix='_type_' + str(type_i),
+                        start_index,
+                        natoms[2 + type_i],
+                        inputs_zero,
+                        fparam,
+                        aparam,
+                        bias_atom_e=0.0,
+                        type_suffix="_type_" + str(type_i),
                         suffix=suffix,
                         reuse=True,
                     )
                     final_layer -= zero_layer
-                final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[2+type_i]])
+                final_layer = tf.reshape(
+                    final_layer, [tf.shape(inputs)[0], natoms[2 + type_i]]
+                )
                 outs_list.append(final_layer)
-                start_index += natoms[2+type_i]
+                start_index += natoms[2 + type_i]
             # concat the results
             # concat once may be faster than multiple concat
-            outs = tf.concat(outs_list, axis = 1)
+            outs = tf.concat(outs_list, axis=1)
         # with type embedding
         else:
             atype_embed = tf.cast(atype_embed, GLOBAL_TF_FLOAT_PRECISION)
             type_shape = atype_embed.get_shape().as_list()
             inputs = tf.concat(
-                [tf.reshape(inputs,[-1,self.dim_descrpt]),atype_embed],
-                axis=1
+                [tf.reshape(inputs, [-1, self.dim_descrpt]), atype_embed], axis=1
             )
             original_dim_descrpt = self.dim_descrpt
             self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [-1, natoms[0], self.dim_descrpt])
             final_layer = self._build_lower(
-                0, natoms[0], 
-                inputs, fparam, aparam, 
-                bias_atom_e=0.0, suffix=suffix, reuse=reuse
+                0,
+                natoms[0],
+                inputs,
+                fparam,
+                aparam,
+                bias_atom_e=0.0,
+                suffix=suffix,
+                reuse=reuse,
             )
             if len(self.atom_ener):
                 # remove contribution in vacuum
                 inputs_zero = tf.concat(
                     [tf.reshape(inputs_zero, [-1, original_dim_descrpt]), atype_embed],
-                    axis=1
+                    axis=1,
                 )
                 inputs_zero = tf.reshape(inputs_zero, [-1, natoms[0], self.dim_descrpt])
                 zero_layer = self._build_lower(
-                    0, natoms[0],
-                    inputs_zero, fparam, aparam,
-                    bias_atom_e=0.0, suffix=suffix, reuse=True,
+                    0,
+                    natoms[0],
+                    inputs_zero,
+                    fparam,
+                    aparam,
+                    bias_atom_e=0.0,
+                    suffix=suffix,
+                    reuse=True,
                 )
                 # atomic energy will be stored in `self.t_bias_atom_e` which is not trainable
                 final_layer -= zero_layer
             outs = tf.reshape(final_layer, [tf.shape(inputs)[0], natoms[0]])
         # add bias
         self.atom_ener_before = outs
-        self.add_type = tf.reshape(tf.nn.embedding_lookup(self.t_bias_atom_e, self.atype_nloc), [tf.shape(inputs)[0], natoms[0]])
+        self.add_type = tf.reshape(
+            tf.nn.embedding_lookup(self.t_bias_atom_e, self.atype_nloc),
+            [tf.shape(inputs)[0], natoms[0]],
+        )
         outs = outs + self.add_type
         self.atom_ener_after = outs
 
         if self.tot_ener_zero:
             force_tot_ener = 0.0
             outs = tf.reshape(outs, [-1, natoms[0]])
-            outs_mean = tf.reshape(tf.reduce_mean(outs, axis = 1), [-1, 1])
-            outs_mean = outs_mean - tf.ones_like(outs_mean, dtype = GLOBAL_TF_FLOAT_PRECISION) * (force_tot_ener/global_cvt_2_tf_float(natoms[0]))
+            outs_mean = tf.reshape(tf.reduce_mean(outs, axis=1), [-1, 1])
+            outs_mean = outs_mean - tf.ones_like(
+                outs_mean, dtype=GLOBAL_TF_FLOAT_PRECISION
+            ) * (force_tot_ener / global_cvt_2_tf_float(natoms[0]))
             outs = outs - outs_mean
             outs = tf.reshape(outs, [-1])
 
-        tf.summary.histogram('fitting_net_output', outs)
+        tf.summary.histogram("fitting_net_output", outs)
         return tf.reshape(outs, [-1])
 
-
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the fitting net variables with the given dict
@@ -606,30 +688,45 @@ def init_variables(self,
         suffix : str
             suffix to name scope
         """
-        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix=suffix)
+        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(
+            graph_def, suffix=suffix
+        )
         if self.layer_name is not None:
             # shared variables have no suffix
-            shared_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix="")
+            shared_variables = get_fitting_net_variables_from_graph_def(
+                graph_def, suffix=""
+            )
             self.fitting_net_variables.update(shared_variables)
         if self.numb_fparam > 0:
-            self.fparam_avg = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_fparam_avg' % suffix)
-            self.fparam_inv_std = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_fparam_istd' % suffix)
+            self.fparam_avg = get_tensor_by_name_from_graph(
+                graph, "fitting_attr%s/t_fparam_avg" % suffix
+            )
+            self.fparam_inv_std = get_tensor_by_name_from_graph(
+                graph, "fitting_attr%s/t_fparam_istd" % suffix
+            )
         if self.numb_aparam > 0:
-            self.aparam_avg = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_aparam_avg' % suffix)
-            self.aparam_inv_std = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_aparam_istd' % suffix)
+            self.aparam_avg = get_tensor_by_name_from_graph(
+                graph, "fitting_attr%s/t_aparam_avg" % suffix
+            )
+            self.aparam_inv_std = get_tensor_by_name_from_graph(
+                graph, "fitting_attr%s/t_aparam_istd" % suffix
+            )
         try:
-            self.bias_atom_e = get_tensor_by_name_from_graph(graph, 'fitting_attr%s/t_bias_atom_e' % suffix)
+            self.bias_atom_e = get_tensor_by_name_from_graph(
+                graph, "fitting_attr%s/t_bias_atom_e" % suffix
+            )
         except GraphWithoutTensorError:
             # for compatibility, old models has no t_bias_atom_e
             pass
 
-    def change_energy_bias(self,
-                           data,
-                           frozen_model,
-                           origin_type_map,
-                           full_type_map,
-                           bias_shift='delta',
-                           ntest=10
+    def change_energy_bias(
+        self,
+        data,
+        frozen_model,
+        origin_type_map,
+        full_type_map,
+        bias_shift="delta",
+        ntest=10,
     ) -> None:
         """
         Change the energy bias according to the input data and the pretrained model.
@@ -656,11 +753,13 @@ def change_energy_bias(self,
         energy_ground_truth = []
         energy_predict = []
         sorter = np.argsort(full_type_map)
-        idx_type_map = sorter[np.searchsorted(full_type_map, origin_type_map, sorter=sorter)]
+        idx_type_map = sorter[
+            np.searchsorted(full_type_map, origin_type_map, sorter=sorter)
+        ]
         mixed_type = data.mixed_type
         numb_type = len(full_type_map)
         dp = None
-        if bias_shift == 'delta':
+        if bias_shift == "delta":
             # init model
             dp = DeepPotential(frozen_model)
         for sys in data.data_systems:
@@ -671,14 +770,27 @@ def change_energy_bias(self,
                 atype = test_data["type"][:numb_test].reshape([numb_test, -1])
             else:
                 atype = test_data["type"][0]
-            assert np.array([i in idx_type_map for i in list(set(atype.reshape(-1)))]).all(), \
-                "Some types are not in 'type_map'!"
-            energy_ground_truth.append(test_data["energy"][:numb_test].reshape([numb_test, 1]))
+            assert np.array(
+                [i in idx_type_map for i in list(set(atype.reshape(-1)))]
+            ).all(), "Some types are not in 'type_map'!"
+            energy_ground_truth.append(
+                test_data["energy"][:numb_test].reshape([numb_test, 1])
+            )
             if mixed_type:
-                type_numbs.append(np.array([(atype == i).sum(axis=-1) for i in idx_type_map], dtype=np.int32).T)
+                type_numbs.append(
+                    np.array(
+                        [(atype == i).sum(axis=-1) for i in idx_type_map],
+                        dtype=np.int32,
+                    ).T
+                )
             else:
-                type_numbs.append(np.tile(np.bincount(atype, minlength=numb_type)[idx_type_map], (numb_test, 1)))
-            if bias_shift == 'delta':
+                type_numbs.append(
+                    np.tile(
+                        np.bincount(atype, minlength=numb_type)[idx_type_map],
+                        (numb_test, 1),
+                    )
+                )
+            if bias_shift == "delta":
                 coord = test_data["coord"][:numb_test].reshape([numb_test, -1])
                 box = test_data["box"][:numb_test]
                 ret = dp.eval(coord, box, atype, mixed_type=mixed_type)
@@ -686,23 +798,34 @@ def change_energy_bias(self,
         type_numbs = np.concatenate(type_numbs)
         energy_ground_truth = np.concatenate(energy_ground_truth)
         old_bias = self.bias_atom_e[idx_type_map]
-        if bias_shift == 'delta':
+        if bias_shift == "delta":
             energy_predict = np.concatenate(energy_predict)
             bias_diff = energy_ground_truth - energy_predict
             delta_bias = np.linalg.lstsq(type_numbs, bias_diff, rcond=None)[0]
             unbias_e = energy_predict + type_numbs @ delta_bias
             atom_numbs = type_numbs.sum(-1)
-            rmse_ae = (np.sqrt(np.square(unbias_e - energy_ground_truth)) / atom_numbs).mean()
+            rmse_ae = (
+                np.sqrt(np.square(unbias_e - energy_ground_truth)) / atom_numbs
+            ).mean()
             self.bias_atom_e[idx_type_map] += delta_bias.reshape(-1)
-            log.info("RMSE of atomic energy after linear regression is: {} eV/atom.".format(rmse_ae))
-        elif bias_shift == 'statistic':
-            statistic_bias = np.linalg.lstsq(type_numbs, energy_ground_truth, rcond=None)[0]
+            log.info(
+                "RMSE of atomic energy after linear regression is: {} eV/atom.".format(
+                    rmse_ae
+                )
+            )
+        elif bias_shift == "statistic":
+            statistic_bias = np.linalg.lstsq(
+                type_numbs, energy_ground_truth, rcond=None
+            )[0]
             self.bias_atom_e[idx_type_map] = statistic_bias.reshape(-1)
         else:
-            raise RuntimeError('Unknown bias_shift mode: ' + bias_shift)
-        log.info("Change energy bias of {} from {} to {}.".format(str(origin_type_map), str(old_bias),
-                                                                         str(self.bias_atom_e[idx_type_map])))
- 
+            raise RuntimeError("Unknown bias_shift mode: " + bias_shift)
+        log.info(
+            "Change energy bias of {} from {} to {}.".format(
+                str(origin_type_map), str(old_bias), str(self.bias_atom_e[idx_type_map])
+            )
+        )
+
     def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
         """
         Reveive the mixed precision setting.
@@ -713,4 +836,4 @@ def enable_mixed_precision(self, mixed_prec: Optional[dict] = None) -> None:
                 The mixed precision setting used in the embedding net
         """
         self.mixed_prec = mixed_prec
-        self.fitting_precision = get_precision(mixed_prec['output_prec'])
+        self.fitting_precision = get_precision(mixed_prec["output_prec"])
diff --git a/deepmd/fit/fitting.py b/deepmd/fit/fitting.py
index 034def72d4..15bf89f3d4 100644
--- a/deepmd/fit/fitting.py
+++ b/deepmd/fit/fitting.py
@@ -1,5 +1,11 @@
-from deepmd.env import tf
-from deepmd.utils import Plugin, PluginVariant
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils import (
+    Plugin,
+    PluginVariant,
+)
+
 
 class Fitting:
     @property
@@ -7,10 +13,11 @@ def precision(self) -> tf.DType:
         """Precision of fitting network."""
         return self.fitting_precision
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the fitting net variables with the given dict
@@ -23,10 +30,12 @@ def init_variables(self,
             The input frozen model graph_def
         suffix : str
             suffix to name scope
-        
+
         Notes
         -----
         This method is called by others when the fitting supported initialization from the given variables.
         """
         raise NotImplementedError(
-            "Fitting %s doesn't support initialization from the given variables!" % type(self).__name__)
+            "Fitting %s doesn't support initialization from the given variables!"
+            % type(self).__name__
+        )
diff --git a/deepmd/fit/polar.py b/deepmd/fit/polar.py
index 2bd07b847b..37189224fd 100644
--- a/deepmd/fit/polar.py
+++ b/deepmd/fit/polar.py
@@ -1,20 +1,40 @@
 import warnings
-import numpy as np
-from typing import Optional, Tuple, List
-
-from deepmd.env import tf
-from deepmd.common import add_data_requirement, cast_precision, get_activation_func, get_precision
-from deepmd.utils.network import one_layer, one_layer_rand_seed_shift
-from deepmd.utils.graph import get_fitting_net_variables_from_graph_def
-from deepmd.descriptor import DescrptLocFrame
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit.fitting import Fitting
-
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
 
+import numpy as np
 
-class PolarFittingSeA (Fitting) :
+from deepmd.common import (
+    add_data_requirement,
+    cast_precision,
+    get_activation_func,
+    get_precision,
+)
+from deepmd.descriptor import (
+    DescrptLocFrame,
+    DescrptSeA,
+)
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    global_cvt_2_tf_float,
+    tf,
+)
+from deepmd.fit.fitting import (
+    Fitting,
+)
+from deepmd.utils.graph import (
+    get_fitting_net_variables_from_graph_def,
+)
+from deepmd.utils.network import (
+    one_layer,
+    one_layer_rand_seed_shift,
+)
+
+
+class PolarFittingSeA(Fitting):
     """
     Fit the atomic polarizability with descriptor se_a
 
@@ -34,7 +54,7 @@ class PolarFittingSeA (Fitting) :
     scale : List[float]
             The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
     diag_shift : List[float]
-            The diagonal part of the polarizability matrix of type i will be shifted by diag_shift[i]. The shift operation is carried out after scale.        
+            The diagonal part of the polarizability matrix of type i will be shifted by diag_shift[i]. The shift operation is carried out after scale.
     seed : int
             Random seed for initializing the network parameters.
     activation_function : str
@@ -44,19 +64,21 @@ class PolarFittingSeA (Fitting) :
     uniform_seed
             Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
     """
-    def __init__ (self, 
-                  descrpt : tf.Tensor,
-                  neuron : List[int] = [120,120,120],
-                  resnet_dt : bool = True,
-                  sel_type : List[int] = None,
-                  fit_diag : bool = True,
-                  scale : List[float] = None,
-                  shift_diag : bool = True,     # YWolfeee: will support the user to decide whether to use this function
-                  #diag_shift : List[float] = None, YWolfeee: will not support the user to assign a shift
-                  seed : int = None,
-                  activation_function : str = 'tanh',
-                  precision : str = 'default',
-                  uniform_seed: bool = False                  
+
+    def __init__(
+        self,
+        descrpt: tf.Tensor,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        sel_type: List[int] = None,
+        fit_diag: bool = True,
+        scale: List[float] = None,
+        shift_diag: bool = True,  # YWolfeee: will support the user to decide whether to use this function
+        # diag_shift : List[float] = None, YWolfeee: will not support the user to assign a shift
+        seed: int = None,
+        activation_function: str = "tanh",
+        precision: str = "default",
+        uniform_seed: bool = False,
     ) -> None:
         """
         Constructor
@@ -70,23 +92,27 @@ def __init__ (self,
         self.seed = seed
         self.uniform_seed = uniform_seed
         self.seed_shift = one_layer_rand_seed_shift()
-        #self.diag_shift = diag_shift
+        # self.diag_shift = diag_shift
         self.shift_diag = shift_diag
         self.scale = scale
         self.fitting_activation_fn = get_activation_func(activation_function)
         self.fitting_precision = get_precision(precision)
         if self.sel_type is None:
             self.sel_type = [ii for ii in range(self.ntypes)]
-        self.sel_mask = np.array([ii in self.sel_type for ii in range(self.ntypes)], dtype=bool)
+        self.sel_mask = np.array(
+            [ii in self.sel_type for ii in range(self.ntypes)], dtype=bool
+        )
         if self.scale is None:
             self.scale = [1.0 for ii in range(self.ntypes)]
-        #if self.diag_shift is None:
+        # if self.diag_shift is None:
         #    self.diag_shift = [0.0 for ii in range(self.ntypes)]
         if type(self.sel_type) is not list:
             self.sel_type = [self.sel_type]
         self.sel_type = sorted(self.sel_type)
-        self.constant_matrix = np.zeros(self.ntypes) # self.ntypes x 1, store the average diagonal value
-        #if type(self.diag_shift) is not list:
+        self.constant_matrix = np.zeros(
+            self.ntypes
+        )  # self.ntypes x 1, store the average diagonal value
+        # if type(self.diag_shift) is not list:
         #    self.diag_shift = [self.diag_shift]
         if type(self.scale) is not list:
             self.scale = [self.scale for ii in range(self.ntypes)]
@@ -109,116 +135,166 @@ def get_out_size(self) -> int:
         """
         return 9
 
-    def compute_input_stats(self, 
-                            all_stat, 
-                            protection = 1e-2):
+    def compute_input_stats(self, all_stat, protection=1e-2):
         """
         Compute the input statistics
 
         Parameters
         ----------
         all_stat
-                Dictionary of inputs. 
+                Dictionary of inputs.
                 can be prepared by model.make_stat_input
         protection
                 Divided-by-zero protection
         """
-        if not ('polarizability' in all_stat.keys()):
+        if not ("polarizability" in all_stat.keys()):
             self.avgeig = np.zeros([9])
-            warnings.warn('no polarizability data, cannot do data stat. use zeros as guess')
+            warnings.warn(
+                "no polarizability data, cannot do data stat. use zeros as guess"
+            )
             return
-        data = all_stat['polarizability']
+        data = all_stat["polarizability"]
         all_tmp = []
         for ss in range(len(data)):
-            tmp = np.concatenate(data[ss], axis = 0)
+            tmp = np.concatenate(data[ss], axis=0)
             tmp = np.reshape(tmp, [-1, 3, 3])
-            tmp,_ = np.linalg.eig(tmp)
+            tmp, _ = np.linalg.eig(tmp)
             tmp = np.absolute(tmp)
-            tmp = np.sort(tmp, axis = 1)
+            tmp = np.sort(tmp, axis=1)
             all_tmp.append(tmp)
-        all_tmp = np.concatenate(all_tmp, axis = 1)
-        self.avgeig = np.average(all_tmp, axis = 0)
+        all_tmp = np.concatenate(all_tmp, axis=1)
+        self.avgeig = np.average(all_tmp, axis=0)
 
-        # YWolfeee: support polar normalization, initialize to a more appropriate point 
+        # YWolfeee: support polar normalization, initialize to a more appropriate point
         if self.shift_diag:
             mean_polar = np.zeros([len(self.sel_type), 9])
             sys_matrix, polar_bias = [], []
-            for ss in range(len(all_stat['type'])):
-                atom_has_polar = [w for w in all_stat['type'][ss][0] if (w in self.sel_type)]   # select atom with polar
-                if all_stat['find_atomic_polarizability'][ss] > 0.0:
-                    for itype in range(len(self.sel_type)): # Atomic polar mode, should specify the atoms
-                        index_lis = [index for index, w in enumerate(atom_has_polar) \
-                                        if atom_has_polar[index] == self.sel_type[itype]]   # select index in this type
-
-                        sys_matrix.append(np.zeros((1,len(self.sel_type))))
-                        sys_matrix[-1][0,itype] = len(index_lis)
-
-                        polar_bias.append(np.sum(
-                            all_stat['atomic_polarizability'][ss].reshape((-1,9))[index_lis],axis=0).reshape((1,9)))
-                else:   # No atomic polar in this system, so it should have global polar
-                    if not all_stat['find_polarizability'][ss] > 0.0: # This system is jsut a joke?
+            for ss in range(len(all_stat["type"])):
+                atom_has_polar = [
+                    w for w in all_stat["type"][ss][0] if (w in self.sel_type)
+                ]  # select atom with polar
+                if all_stat["find_atomic_polarizability"][ss] > 0.0:
+                    for itype in range(
+                        len(self.sel_type)
+                    ):  # Atomic polar mode, should specify the atoms
+                        index_lis = [
+                            index
+                            for index, w in enumerate(atom_has_polar)
+                            if atom_has_polar[index] == self.sel_type[itype]
+                        ]  # select index in this type
+
+                        sys_matrix.append(np.zeros((1, len(self.sel_type))))
+                        sys_matrix[-1][0, itype] = len(index_lis)
+
+                        polar_bias.append(
+                            np.sum(
+                                all_stat["atomic_polarizability"][ss].reshape((-1, 9))[
+                                    index_lis
+                                ],
+                                axis=0,
+                            ).reshape((1, 9))
+                        )
+                else:  # No atomic polar in this system, so it should have global polar
+                    if (
+                        not all_stat["find_polarizability"][ss] > 0.0
+                    ):  # This system is jsut a joke?
                         continue
                     # Till here, we have global polar
-                    sys_matrix.append(np.zeros((1,len(self.sel_type)))) # add a line in the equations
-                    for itype in range(len(self.sel_type)): # Atomic polar mode, should specify the atoms
-                        index_lis = [index for index, w in enumerate(atom_has_polar) \
-                                        if atom_has_polar[index] == self.sel_type[itype]]   # select index in this type
+                    sys_matrix.append(
+                        np.zeros((1, len(self.sel_type)))
+                    )  # add a line in the equations
+                    for itype in range(
+                        len(self.sel_type)
+                    ):  # Atomic polar mode, should specify the atoms
+                        index_lis = [
+                            index
+                            for index, w in enumerate(atom_has_polar)
+                            if atom_has_polar[index] == self.sel_type[itype]
+                        ]  # select index in this type
+
+                        sys_matrix[-1][0, itype] = len(index_lis)
 
-                        sys_matrix[-1][0,itype] = len(index_lis)
-                    
                     # add polar_bias
-                    polar_bias.append(all_stat['polarizability'][ss].reshape((1,9)))
+                    polar_bias.append(all_stat["polarizability"][ss].reshape((1, 9)))
 
-            matrix, bias = np.concatenate(sys_matrix,axis=0), np.concatenate(polar_bias,axis=0)
-            atom_polar,_,_,_ \
-                = np.linalg.lstsq(matrix, bias, rcond = 1e-3)
+            matrix, bias = np.concatenate(sys_matrix, axis=0), np.concatenate(
+                polar_bias, axis=0
+            )
+            atom_polar, _, _, _ = np.linalg.lstsq(matrix, bias, rcond=1e-3)
             for itype in range(len(self.sel_type)):
-                self.constant_matrix[self.sel_type[itype]] = np.mean(np.diagonal(atom_polar[itype].reshape((3,3))))
-
-    def _build_lower(self,
-                     start_index,
-                     natoms,
-                     inputs,
-                     rot_mat,
-                     suffix='',
-                     reuse=None):
+                self.constant_matrix[self.sel_type[itype]] = np.mean(
+                    np.diagonal(atom_polar[itype].reshape((3, 3)))
+                )
+
+    def _build_lower(self, start_index, natoms, inputs, rot_mat, suffix="", reuse=None):
         # cut-out inputs
-        inputs_i = tf.slice(inputs,
-                            [0, start_index * self.dim_descrpt],
-                            [-1, natoms * self.dim_descrpt])
+        inputs_i = tf.slice(
+            inputs, [0, start_index * self.dim_descrpt], [-1, natoms * self.dim_descrpt]
+        )
         inputs_i = tf.reshape(inputs_i, [-1, self.dim_descrpt])
-        rot_mat_i = tf.slice(rot_mat,
-                             [0, start_index * self.dim_rot_mat],
-                             [-1, natoms * self.dim_rot_mat])
+        rot_mat_i = tf.slice(
+            rot_mat,
+            [0, start_index * self.dim_rot_mat],
+            [-1, natoms * self.dim_rot_mat],
+        )
         rot_mat_i = tf.reshape(rot_mat_i, [-1, self.dim_rot_mat_1, 3])
         layer = inputs_i
         for ii in range(0, len(self.n_neuron)):
             if ii >= 1 and self.n_neuron[ii] == self.n_neuron[ii - 1]:
-                layer += one_layer(layer, self.n_neuron[ii], name='layer_' + str(ii) + suffix,
-                                   reuse=reuse, seed=self.seed, use_timestep=self.resnet_dt,
-                                   activation_fn=self.fitting_activation_fn, precision=self.fitting_precision,
-                                   uniform_seed=self.uniform_seed, initial_variables=self.fitting_net_variables,
-                                   mixed_prec=self.mixed_prec)
+                layer += one_layer(
+                    layer,
+                    self.n_neuron[ii],
+                    name="layer_" + str(ii) + suffix,
+                    reuse=reuse,
+                    seed=self.seed,
+                    use_timestep=self.resnet_dt,
+                    activation_fn=self.fitting_activation_fn,
+                    precision=self.fitting_precision,
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.fitting_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
             else:
-                layer = one_layer(layer, self.n_neuron[ii], name='layer_' + str(ii) + suffix,
-                                  reuse=reuse, seed=self.seed, activation_fn=self.fitting_activation_fn,
-                                  precision=self.fitting_precision, uniform_seed=self.uniform_seed,
-                                  initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec)
-            if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+                layer = one_layer(
+                    layer,
+                    self.n_neuron[ii],
+                    name="layer_" + str(ii) + suffix,
+                    reuse=reuse,
+                    seed=self.seed,
+                    activation_fn=self.fitting_activation_fn,
+                    precision=self.fitting_precision,
+                    uniform_seed=self.uniform_seed,
+                    initial_variables=self.fitting_net_variables,
+                    mixed_prec=self.mixed_prec,
+                )
+            if (not self.uniform_seed) and (self.seed is not None):
+                self.seed += self.seed_shift
         if self.fit_diag:
             bavg = np.zeros(self.dim_rot_mat_1)
             # bavg[0] = self.avgeig[0]
             # bavg[1] = self.avgeig[1]
             # bavg[2] = self.avgeig[2]
             # (nframes x natoms) x naxis
-            final_layer = one_layer(layer, self.dim_rot_mat_1, activation_fn=None,
-                                    name='final_layer' + suffix, reuse=reuse, seed=self.seed,
-                                    bavg=bavg, precision=self.fitting_precision, uniform_seed=self.uniform_seed,
-                                    initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec,
-                                    final_layer=True)
-            if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+            final_layer = one_layer(
+                layer,
+                self.dim_rot_mat_1,
+                activation_fn=None,
+                name="final_layer" + suffix,
+                reuse=reuse,
+                seed=self.seed,
+                bavg=bavg,
+                precision=self.fitting_precision,
+                uniform_seed=self.uniform_seed,
+                initial_variables=self.fitting_net_variables,
+                mixed_prec=self.mixed_prec,
+                final_layer=True,
+            )
+            if (not self.uniform_seed) and (self.seed is not None):
+                self.seed += self.seed_shift
             # (nframes x natoms) x naxis
-            final_layer = tf.reshape(final_layer, [tf.shape(inputs)[0] * natoms, self.dim_rot_mat_1])
+            final_layer = tf.reshape(
+                final_layer, [tf.shape(inputs)[0] * natoms, self.dim_rot_mat_1]
+            )
             # (nframes x natoms) x naxis x naxis
             final_layer = tf.matrix_diag(final_layer)
         else:
@@ -227,15 +303,27 @@ def _build_lower(self,
             # bavg[1*self.dim_rot_mat_1+1] = self.avgeig[1]
             # bavg[2*self.dim_rot_mat_1+2] = self.avgeig[2]
             # (nframes x natoms) x (naxis x naxis)
-            final_layer = one_layer(layer, self.dim_rot_mat_1 * self.dim_rot_mat_1, activation_fn=None,
-                                    name='final_layer' + suffix, reuse=reuse, seed=self.seed,
-                                    bavg=bavg, precision=self.fitting_precision, uniform_seed=self.uniform_seed,
-                                    initial_variables=self.fitting_net_variables, mixed_prec=self.mixed_prec,
-                                    final_layer=True)
-            if (not self.uniform_seed) and (self.seed is not None): self.seed += self.seed_shift
+            final_layer = one_layer(
+                layer,
+                self.dim_rot_mat_1 * self.dim_rot_mat_1,
+                activation_fn=None,
+                name="final_layer" + suffix,
+                reuse=reuse,
+                seed=self.seed,
+                bavg=bavg,
+                precision=self.fitting_precision,
+                uniform_seed=self.uniform_seed,
+                initial_variables=self.fitting_net_variables,
+                mixed_prec=self.mixed_prec,
+                final_layer=True,
+            )
+            if (not self.uniform_seed) and (self.seed is not None):
+                self.seed += self.seed_shift
             # (nframes x natoms) x naxis x naxis
-            final_layer = tf.reshape(final_layer,
-                                     [tf.shape(inputs)[0] * natoms, self.dim_rot_mat_1, self.dim_rot_mat_1])
+            final_layer = tf.reshape(
+                final_layer,
+                [tf.shape(inputs)[0] * natoms, self.dim_rot_mat_1, self.dim_rot_mat_1],
+            )
             # (nframes x natoms) x naxis x naxis
             final_layer = final_layer + tf.transpose(final_layer, perm=[0, 2, 1])
         # (nframes x natoms) x naxis x 3(coord)
@@ -247,16 +335,18 @@ def _build_lower(self,
         return final_layer
 
     @cast_precision
-    def build (self, 
-               input_d : tf.Tensor,
-               rot_mat : tf.Tensor,
-               natoms : tf.Tensor,
-               input_dict: Optional[dict] = None,
-               reuse : bool = None,
-               suffix : str = '') :
+    def build(
+        self,
+        input_d: tf.Tensor,
+        rot_mat: tf.Tensor,
+        natoms: tf.Tensor,
+        input_dict: Optional[dict] = None,
+        reuse: bool = None,
+        suffix: str = "",
+    ):
         """
         Build the computational graph for fitting net
-        
+
         Parameters
         ----------
         input_d
@@ -278,32 +368,46 @@ def build (self,
         Returns
         -------
         atomic_polar
-                The atomic polarizability        
+                The atomic polarizability
         """
         if input_dict is None:
             input_dict = {}
-        type_embedding = input_dict.get('type_embedding', None)
-        atype = input_dict.get('atype', None)
-        nframes = input_dict.get('nframes')
+        type_embedding = input_dict.get("type_embedding", None)
+        atype = input_dict.get("atype", None)
+        nframes = input_dict.get("nframes")
         start_index = 0
         inputs = tf.reshape(input_d, [-1, self.dim_descrpt * natoms[0]])
         rot_mat = tf.reshape(rot_mat, [-1, self.dim_rot_mat * natoms[0]])
 
         if type_embedding is not None:
             # nframes x nloc
-            nloc_mask = tf.reshape(tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1])
+            nloc_mask = tf.reshape(
+                tf.tile(tf.repeat(self.sel_mask, natoms[2:]), [nframes]), [nframes, -1]
+            )
             # nframes x nloc_masked
             scale = tf.reshape(
-                tf.reshape(tf.tile(tf.repeat(self.scale, natoms[2:]), [nframes]), [nframes, -1])[nloc_mask],
-                [nframes, -1])
+                tf.reshape(
+                    tf.tile(tf.repeat(self.scale, natoms[2:]), [nframes]), [nframes, -1]
+                )[nloc_mask],
+                [nframes, -1],
+            )
             if self.shift_diag:
                 # nframes x nloc_masked
-                constant_matrix = tf.reshape(tf.reshape(tf.tile(tf.repeat(
-                    self.constant_matrix, natoms[2:]), [nframes]), [nframes, -1])[nloc_mask], [nframes, -1])
+                constant_matrix = tf.reshape(
+                    tf.reshape(
+                        tf.tile(tf.repeat(self.constant_matrix, natoms[2:]), [nframes]),
+                        [nframes, -1],
+                    )[nloc_mask],
+                    [nframes, -1],
+                )
             atype_nall = tf.reshape(atype, [-1, natoms[1]])
             # (nframes x nloc_masked)
-            self.atype_nloc_masked = tf.reshape(tf.slice(atype_nall, [0, 0], [-1, natoms[0]])[nloc_mask], [-1])  ## lammps will make error
-            self.nloc_masked = tf.shape(tf.reshape(self.atype_nloc_masked, [nframes, -1]))[1]
+            self.atype_nloc_masked = tf.reshape(
+                tf.slice(atype_nall, [0, 0], [-1, natoms[0]])[nloc_mask], [-1]
+            )  ## lammps will make error
+            self.nloc_masked = tf.shape(
+                tf.reshape(self.atype_nloc_masked, [nframes, -1])
+            )[1]
             atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc_masked)
         else:
             atype_embed = None
@@ -315,48 +419,63 @@ def build (self,
             outs_list = []
             for type_i in range(self.ntypes):
                 if type_i not in self.sel_type:
-                    start_index += natoms[2+type_i]
+                    start_index += natoms[2 + type_i]
                     continue
                 final_layer = self._build_lower(
-                    start_index, natoms[2+type_i],
-                    inputs, rot_mat, suffix='_type_'+str(type_i)+suffix, reuse=reuse)
+                    start_index,
+                    natoms[2 + type_i],
+                    inputs,
+                    rot_mat,
+                    suffix="_type_" + str(type_i) + suffix,
+                    reuse=reuse,
+                )
                 # shift and scale
                 sel_type_idx = self.sel_type.index(type_i)
                 final_layer = final_layer * self.scale[sel_type_idx]
-                final_layer = final_layer + self.constant_matrix[sel_type_idx] * tf.eye(3, batch_shape=[tf.shape(inputs)[0], natoms[2+type_i]], dtype = self.fitting_precision)
+                final_layer = final_layer + self.constant_matrix[sel_type_idx] * tf.eye(
+                    3,
+                    batch_shape=[tf.shape(inputs)[0], natoms[2 + type_i]],
+                    dtype=self.fitting_precision,
+                )
                 start_index += natoms[2 + type_i]
 
                 # concat the results
                 outs_list.append(final_layer)
                 count += 1
-            outs = tf.concat(outs_list, axis = 1)
+            outs = tf.concat(outs_list, axis=1)
         else:
-            inputs = tf.reshape(tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
-                                [-1, self.dim_descrpt])
-            rot_mat = tf.reshape(tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat])[nloc_mask],
-                                 [-1, self.dim_rot_mat * self.nloc_masked])
+            inputs = tf.reshape(
+                tf.reshape(inputs, [nframes, natoms[0], self.dim_descrpt])[nloc_mask],
+                [-1, self.dim_descrpt],
+            )
+            rot_mat = tf.reshape(
+                tf.reshape(rot_mat, [nframes, natoms[0], self.dim_rot_mat])[nloc_mask],
+                [-1, self.dim_rot_mat * self.nloc_masked],
+            )
             atype_embed = tf.cast(atype_embed, self.fitting_precision)
             type_shape = atype_embed.get_shape().as_list()
             inputs = tf.concat([inputs, atype_embed], axis=1)
             self.dim_descrpt = self.dim_descrpt + type_shape[1]
             inputs = tf.reshape(inputs, [-1, self.dim_descrpt * self.nloc_masked])
             final_layer = self._build_lower(
-                0, self.nloc_masked,
-                inputs, rot_mat, suffix=suffix, reuse=reuse)
+                0, self.nloc_masked, inputs, rot_mat, suffix=suffix, reuse=reuse
+            )
             # shift and scale
             final_layer *= tf.expand_dims(tf.expand_dims(scale, -1), -1)
             if self.shift_diag:
-                final_layer += tf.expand_dims(tf.expand_dims(constant_matrix, -1), -1) * \
-                               tf.eye(3, batch_shape=[1, 1], dtype=self.fitting_precision)
+                final_layer += tf.expand_dims(
+                    tf.expand_dims(constant_matrix, -1), -1
+                ) * tf.eye(3, batch_shape=[1, 1], dtype=self.fitting_precision)
             outs = final_layer
 
-        tf.summary.histogram('fitting_net_output', outs)
+        tf.summary.histogram("fitting_net_output", outs)
         return tf.reshape(outs, [-1])
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the fitting net variables with the given dict
@@ -370,10 +489,11 @@ def init_variables(self,
         suffix : str
             suffix to name scope
         """
-        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(graph_def, suffix=suffix)
+        self.fitting_net_variables = get_fitting_net_variables_from_graph_def(
+            graph_def, suffix=suffix
+        )
 
-
-    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+    def enable_mixed_precision(self, mixed_prec: dict = None) -> None:
         """
         Reveive the mixed precision setting.
 
@@ -383,10 +503,10 @@ def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
                 The mixed precision setting used in the embedding net
         """
         self.mixed_prec = mixed_prec
-        self.fitting_precision = get_precision(mixed_prec['output_prec'])
+        self.fitting_precision = get_precision(mixed_prec["output_prec"])
 
 
-class GlobalPolarFittingSeA () :
+class GlobalPolarFittingSeA:
     """
     Fit the system polarizability with descriptor se_a
 
@@ -406,7 +526,7 @@ class GlobalPolarFittingSeA () :
     scale : List[float]
             The output of the fitting net (polarizability matrix) for type i atom will be scaled by scale[i]
     diag_shift : List[float]
-            The diagonal part of the polarizability matrix of type i will be shifted by diag_shift[i]. The shift operation is carried out after scale.        
+            The diagonal part of the polarizability matrix of type i will be shifted by diag_shift[i]. The shift operation is carried out after scale.
     seed : int
             Random seed for initializing the network parameters.
     activation_function : str
@@ -414,35 +534,39 @@ class GlobalPolarFittingSeA () :
     precision : str
             The precision of the embedding net parameters. Supported options are |PRECISION|
     """
-    def __init__ (self, 
-                  descrpt : tf.Tensor,
-                  neuron : List[int] = [120,120,120],
-                  resnet_dt : bool = True,
-                  sel_type : List[int] = None,
-                  fit_diag : bool = True,
-                  scale : List[float] = None,
-                  diag_shift : List[float] = None,
-                  seed : int = None,
-                  activation_function : str = 'tanh',
-                  precision : str = 'default'
+
+    def __init__(
+        self,
+        descrpt: tf.Tensor,
+        neuron: List[int] = [120, 120, 120],
+        resnet_dt: bool = True,
+        sel_type: List[int] = None,
+        fit_diag: bool = True,
+        scale: List[float] = None,
+        diag_shift: List[float] = None,
+        seed: int = None,
+        activation_function: str = "tanh",
+        precision: str = "default",
     ) -> None:
         """
-        Constructor            
+        Constructor
         """
-        if not isinstance(descrpt, DescrptSeA) :
-            raise RuntimeError('GlobalPolarFittingSeA only supports DescrptSeA')
+        if not isinstance(descrpt, DescrptSeA):
+            raise RuntimeError("GlobalPolarFittingSeA only supports DescrptSeA")
         self.ntypes = descrpt.get_ntypes()
         self.dim_descrpt = descrpt.get_dim_out()
-        self.polar_fitting = PolarFittingSeA(descrpt,
-                                             neuron,
-                                             resnet_dt,
-                                             sel_type,
-                                             fit_diag,
-                                             scale,
-                                             diag_shift,
-                                             seed,
-                                             activation_function,
-                                             precision)
+        self.polar_fitting = PolarFittingSeA(
+            descrpt,
+            neuron,
+            resnet_dt,
+            sel_type,
+            fit_diag,
+            scale,
+            diag_shift,
+            seed,
+            activation_function,
+            precision,
+        )
 
     def get_sel_type(self) -> int:
         """
@@ -456,16 +580,18 @@ def get_out_size(self) -> int:
         """
         return self.polar_fitting.get_out_size()
 
-    def build (self,
-               input_d,
-               rot_mat,
-               natoms,
-               input_dict: Optional[dict] = None,
-               reuse = None,
-               suffix = '') -> tf.Tensor:
+    def build(
+        self,
+        input_d,
+        rot_mat,
+        natoms,
+        input_dict: Optional[dict] = None,
+        reuse=None,
+        suffix="",
+    ) -> tf.Tensor:
         """
         Build the computational graph for fitting net
-        
+
         Parameters
         ----------
         input_d
@@ -487,20 +613,23 @@ def build (self,
         Returns
         -------
         polar
-                The system polarizability        
+                The system polarizability
         """
         inputs = tf.reshape(input_d, [-1, self.dim_descrpt * natoms[0]])
-        outs = self.polar_fitting.build(input_d, rot_mat, natoms, input_dict, reuse, suffix)
+        outs = self.polar_fitting.build(
+            input_d, rot_mat, natoms, input_dict, reuse, suffix
+        )
         # nframes x natoms x 9
         outs = tf.reshape(outs, [tf.shape(inputs)[0], -1, 9])
-        outs = tf.reduce_sum(outs, axis = 1)
-        tf.summary.histogram('fitting_net_output', outs)
+        outs = tf.reduce_sum(outs, axis=1)
+        tf.summary.histogram("fitting_net_output", outs)
         return tf.reshape(outs, [-1])
-    
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix : str = "",
+
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix: str = "",
     ) -> None:
         """
         Init the fitting net variables with the given dict
@@ -514,10 +643,11 @@ def init_variables(self,
         suffix : str
             suffix to name scope
         """
-        self.polar_fitting.init_variables(graph=graph, graph_def=graph_def, suffix=suffix)
-
+        self.polar_fitting.init_variables(
+            graph=graph, graph_def=graph_def, suffix=suffix
+        )
 
-    def enable_mixed_precision(self, mixed_prec : dict = None) -> None:
+    def enable_mixed_precision(self, mixed_prec: dict = None) -> None:
         """
         Reveive the mixed precision setting.
 
diff --git a/deepmd/infer/__init__.py b/deepmd/infer/__init__.py
index 2440b8fb0c..d10d9cdec6 100644
--- a/deepmd/infer/__init__.py
+++ b/deepmd/infer/__init__.py
@@ -1,16 +1,37 @@
 """Submodule containing all the implemented potentials."""
 
-from pathlib import Path
-from typing import Union
+from pathlib import (
+    Path,
+)
+from typing import (
+    Union,
+)
 
-from .data_modifier import DipoleChargeModifier
-from .deep_dipole import DeepDipole
-from .deep_eval import DeepEval
-from .deep_polar import DeepGlobalPolar, DeepPolar
-from .deep_pot import DeepPot
-from .deep_wfc import DeepWFC
-from .ewald_recp import EwaldRecp
-from .model_devi import calc_model_devi
+from .data_modifier import (
+    DipoleChargeModifier,
+)
+from .deep_dipole import (
+    DeepDipole,
+)
+from .deep_eval import (
+    DeepEval,
+)
+from .deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
+from .deep_pot import (
+    DeepPot,
+)
+from .deep_wfc import (
+    DeepWFC,
+)
+from .ewald_recp import (
+    EwaldRecp,
+)
+from .model_devi import (
+    calc_model_devi,
+)
 
 __all__ = [
     "DeepPotential",
@@ -22,7 +43,7 @@
     "DeepWFC",
     "DipoleChargeModifier",
     "EwaldRecp",
-    "calc_model_devi"
+    "calc_model_devi",
 ]
 
 
diff --git a/deepmd/infer/data_modifier.py b/deepmd/infer/data_modifier.py
index 0aa8d9fcc1..34c0d9d8f1 100644
--- a/deepmd/infer/data_modifier.py
+++ b/deepmd/infer/data_modifier.py
@@ -1,23 +1,38 @@
 import os
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, List
 
-from deepmd.infer.deep_dipole import DeepDipole
-from deepmd.infer.ewald_recp import EwaldRecp
-from deepmd.env import tf
-from deepmd.common import select_idx_map, make_default_mesh
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import global_cvt_2_ener_float
-from deepmd.env import op_module
-from deepmd.utils.sess import run_sess
+from deepmd.common import (
+    make_default_mesh,
+    select_idx_map,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    global_cvt_2_ener_float,
+    global_cvt_2_tf_float,
+    op_module,
+    tf,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.ewald_recp import (
+    EwaldRecp,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 
 class DipoleChargeModifier(DeepDipole):
     """
-    
+
     Parameters
     ----------
     model_name
@@ -31,23 +46,24 @@ class DipoleChargeModifier(DeepDipole):
     ewald_beta
             Splitting parameter of the Ewald sum. Unit: A^{-1}
     """
-    def __init__(self, 
-                 model_name : str, 
-                 model_charge_map : List[float],
-                 sys_charge_map : List[float], 
-                 ewald_h : float = 1, 
-                 ewald_beta : float = 1
+
+    def __init__(
+        self,
+        model_name: str,
+        model_charge_map: List[float],
+        sys_charge_map: List[float],
+        ewald_h: float = 1,
+        ewald_beta: float = 1,
     ) -> None:
         """
-        Constructor 
+        Constructor
         """
         # the dipole model is loaded with prefix 'dipole_charge'
-        self.modifier_prefix = 'dipole_charge'
+        self.modifier_prefix = "dipole_charge"
         # init dipole model
-        DeepDipole.__init__(self, 
-                            model_name, 
-                            load_prefix = self.modifier_prefix, 
-                            default_tf_graph = True)
+        DeepDipole.__init__(
+            self, model_name, load_prefix=self.modifier_prefix, default_tf_graph=True
+        )
         self.model_name = model_name
         self.model_charge_map = model_charge_map
         self.sys_charge_map = sys_charge_map
@@ -58,52 +74,54 @@ def __init__(self,
         self.er = EwaldRecp(self.ewald_h, self.ewald_beta)
         # dimension of dipole
         self.ext_dim = 3
-        self.t_ndesc  = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'descrpt_attr/ndescrpt:0'))
-        self.t_sela  = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'descrpt_attr/sel:0'))
+        self.t_ndesc = self.graph.get_tensor_by_name(
+            os.path.join(self.modifier_prefix, "descrpt_attr/ndescrpt:0")
+        )
+        self.t_sela = self.graph.get_tensor_by_name(
+            os.path.join(self.modifier_prefix, "descrpt_attr/sel:0")
+        )
         [self.ndescrpt, self.sel_a] = run_sess(self.sess, [self.t_ndesc, self.t_sela])
-        self.sel_r = [ 0 for ii in range(len(self.sel_a)) ]
+        self.sel_r = [0 for ii in range(len(self.sel_a))]
         self.nnei_a = np.cumsum(self.sel_a)[-1]
         self.nnei_r = np.cumsum(self.sel_r)[-1]
         self.nnei = self.nnei_a + self.nnei_r
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
-        assert(self.ndescrpt == self.ndescrpt_a + self.ndescrpt_r)
+        assert self.ndescrpt == self.ndescrpt_a + self.ndescrpt_r
         self.force = None
         self.ntypes = len(self.sel_a)
 
-
     def build_fv_graph(self) -> tf.Tensor:
         """
         Build the computational graph for the force and virial inference.
         """
-        with tf.variable_scope('modifier_attr') :
-            t_mdl_name = tf.constant(self.model_name, 
-                                     name = 'mdl_name', 
-                                     dtype = tf.string)
-            t_modi_type = tf.constant(self.modifier_prefix, 
-                                      name = 'type', 
-                                      dtype = tf.string)
-            t_mdl_charge_map = tf.constant(' '.join([str(ii) for ii in self.model_charge_map]),
-                                            name = 'mdl_charge_map', 
-                                            dtype = tf.string)
-            t_sys_charge_map = tf.constant(' '.join([str(ii) for ii in self.sys_charge_map]),
-                                            name = 'sys_charge_map', 
-                                            dtype = tf.string)
-            t_ewald_h = tf.constant(self.ewald_h,
-                                    name = 'ewald_h', 
-                                    dtype = tf.float64)
-            t_ewald_b = tf.constant(self.ewald_beta,
-                                    name = 'ewald_beta',
-                                    dtype = tf.float64)
+        with tf.variable_scope("modifier_attr"):
+            t_mdl_name = tf.constant(self.model_name, name="mdl_name", dtype=tf.string)
+            t_modi_type = tf.constant(
+                self.modifier_prefix, name="type", dtype=tf.string
+            )
+            t_mdl_charge_map = tf.constant(
+                " ".join([str(ii) for ii in self.model_charge_map]),
+                name="mdl_charge_map",
+                dtype=tf.string,
+            )
+            t_sys_charge_map = tf.constant(
+                " ".join([str(ii) for ii in self.sys_charge_map]),
+                name="sys_charge_map",
+                dtype=tf.string,
+            )
+            t_ewald_h = tf.constant(self.ewald_h, name="ewald_h", dtype=tf.float64)
+            t_ewald_b = tf.constant(
+                self.ewald_beta, name="ewald_beta", dtype=tf.float64
+            )
         with self.graph.as_default():
-            return self._build_fv_graph_inner()        
-
+            return self._build_fv_graph_inner()
 
     def _build_fv_graph_inner(self):
-        self.t_ef = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name = 't_ef')
+        self.t_ef = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_ef")
         nf = 10
-        nfxnas = 64*nf
-        nfxna = 192*nf
+        nfxnas = 64 * nf
+        nfxna = 192 * nf
         nf = -1
         nfxnas = -1
         nfxna = -1
@@ -111,89 +129,107 @@ def _build_fv_graph_inner(self):
         t_nframes = tf.shape(self.t_box_reshape)[0]
 
         # (nframes x natoms) x ndescrpt
-        self.descrpt = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rmat:0'))
-        self.descrpt_deriv = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rmat_deriv:0'))
-        self.nlist = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_nlist:0'))
-        self.rij = self.graph.get_tensor_by_name(os.path.join(self.modifier_prefix, 'o_rij:0'))
+        self.descrpt = self.graph.get_tensor_by_name(
+            os.path.join(self.modifier_prefix, "o_rmat:0")
+        )
+        self.descrpt_deriv = self.graph.get_tensor_by_name(
+            os.path.join(self.modifier_prefix, "o_rmat_deriv:0")
+        )
+        self.nlist = self.graph.get_tensor_by_name(
+            os.path.join(self.modifier_prefix, "o_nlist:0")
+        )
+        self.rij = self.graph.get_tensor_by_name(
+            os.path.join(self.modifier_prefix, "o_rij:0")
+        )
         # self.descrpt_reshape = tf.reshape(self.descrpt, [nf, 192 * self.ndescrpt])
         # self.descrpt_deriv = tf.reshape(self.descrpt_deriv, [nf, 192 * self.ndescrpt * 3])
 
         # nframes x (natoms_sel x 3)
-        self.t_ef_reshape = tf.reshape(self.t_ef,  [t_nframes, -1])
+        self.t_ef_reshape = tf.reshape(self.t_ef, [t_nframes, -1])
         # nframes x (natoms x 3)
-        self.t_ef_reshape = self._enrich(self.t_ef_reshape, dof = 3)
+        self.t_ef_reshape = self._enrich(self.t_ef_reshape, dof=3)
         # (nframes x natoms) x 3
         self.t_ef_reshape = tf.reshape(self.t_ef_reshape, [nfxna, 3])
         # nframes x (natoms_sel x 3)
         self.t_tensor_reshape = tf.reshape(self.t_tensor, [t_nframes, -1])
         # nframes x (natoms x 3)
-        self.t_tensor_reshape = self._enrich(self.t_tensor_reshape, dof = 3)
+        self.t_tensor_reshape = self._enrich(self.t_tensor_reshape, dof=3)
         # (nframes x natoms) x 3
         self.t_tensor_reshape = tf.reshape(self.t_tensor_reshape, [nfxna, 3])
         # (nframes x natoms) x ndescrpt
-        [self.t_ef_d] = tf.gradients(self.t_tensor_reshape, self.descrpt, self.t_ef_reshape)
+        [self.t_ef_d] = tf.gradients(
+            self.t_tensor_reshape, self.descrpt, self.t_ef_reshape
+        )
         # nframes x (natoms x ndescrpt)
         self.t_ef_d = tf.reshape(self.t_ef_d, [nf, self.t_natoms[0] * self.ndescrpt])
         # t_ef_d is force (with -1), prod_forc takes deriv, so we need the opposite
         self.t_ef_d_oppo = -self.t_ef_d
-        
-        force = op_module.prod_force_se_a(self.t_ef_d_oppo,
-                                          self.descrpt_deriv, 
-                                          self.nlist, 
-                                          self.t_natoms,
-                                          n_a_sel = self.nnei_a,
-                                          n_r_sel = self.nnei_r)
-        virial, atom_virial \
-            = op_module.prod_virial_se_a (self.t_ef_d_oppo,
-                                          self.descrpt_deriv,
-                                          self.rij,
-                                          self.nlist,
-                                          self.t_natoms,
-                                          n_a_sel = self.nnei_a,
-                                          n_r_sel = self.nnei_r)
-        force = tf.identity(force, name='o_dm_force')
-        virial = tf.identity(virial, name='o_dm_virial')
-        atom_virial = tf.identity(atom_virial, name='o_dm_av')
-        return force, virial, atom_virial
 
+        force = op_module.prod_force_se_a(
+            self.t_ef_d_oppo,
+            self.descrpt_deriv,
+            self.nlist,
+            self.t_natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_virial = op_module.prod_virial_se_a(
+            self.t_ef_d_oppo,
+            self.descrpt_deriv,
+            self.rij,
+            self.nlist,
+            self.t_natoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        force = tf.identity(force, name="o_dm_force")
+        virial = tf.identity(virial, name="o_dm_virial")
+        atom_virial = tf.identity(atom_virial, name="o_dm_av")
+        return force, virial, atom_virial
 
-    def _enrich(self, dipole, dof = 3):
-        coll = []                
+    def _enrich(self, dipole, dof=3):
+        coll = []
         sel_start_idx = 0
         for type_i in range(self.ntypes):
             if type_i in self.sel_type:
-                di = tf.slice(dipole, 
-                              [ 0, sel_start_idx           * dof],
-                              [-1, self.t_natoms[2+type_i] * dof])
-                sel_start_idx += self.t_natoms[2+type_i]
+                di = tf.slice(
+                    dipole,
+                    [0, sel_start_idx * dof],
+                    [-1, self.t_natoms[2 + type_i] * dof],
+                )
+                sel_start_idx += self.t_natoms[2 + type_i]
             else:
-                di = tf.zeros([tf.shape(dipole)[0], self.t_natoms[2+type_i] * dof],
-                              dtype = GLOBAL_TF_FLOAT_PRECISION)
+                di = tf.zeros(
+                    [tf.shape(dipole)[0], self.t_natoms[2 + type_i] * dof],
+                    dtype=GLOBAL_TF_FLOAT_PRECISION,
+                )
             coll.append(di)
-        return tf.concat(coll, axis = 1)
+        return tf.concat(coll, axis=1)
 
     def _slice_descrpt_deriv(self, deriv):
         coll = []
         start_idx = 0
         for type_i in range(self.ntypes):
             if type_i in self.sel_type:
-                di = tf.slice(deriv, 
-                              [ 0, start_idx               * self.ndescrpt],
-                              [-1, self.t_natoms[2+type_i] * self.ndescrpt])
+                di = tf.slice(
+                    deriv,
+                    [0, start_idx * self.ndescrpt],
+                    [-1, self.t_natoms[2 + type_i] * self.ndescrpt],
+                )
                 coll.append(di)
-            start_idx += self.t_natoms[2+type_i]
-        return tf.concat(coll, axis = 1)        
-
-
-    def eval(self, 
-             coord : np.ndarray, 
-             box : np.ndarray, 
-             atype : np.ndarray, 
-             eval_fv : bool = True
+            start_idx += self.t_natoms[2 + type_i]
+        return tf.concat(coll, axis=1)
+
+    def eval(
+        self,
+        coord: np.ndarray,
+        box: np.ndarray,
+        atype: np.ndarray,
+        eval_fv: bool = True,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
         Evaluate the modification
-        
+
         Parameters
         ----------
         coord
@@ -231,23 +267,27 @@ def eval(self,
 
         # add wfcc
         all_coord, all_charge, dipole = self._extend_system(coord, box, atype, charge)
-        
+
         # print('compute er')
         batch_size = 5
         tot_e = []
         all_f = []
         all_v = []
-        for ii in range(0,nframes,batch_size):
-            e,f,v = self.er.eval(all_coord[ii:ii+batch_size], all_charge[ii:ii+batch_size], box[ii:ii+batch_size])
+        for ii in range(0, nframes, batch_size):
+            e, f, v = self.er.eval(
+                all_coord[ii : ii + batch_size],
+                all_charge[ii : ii + batch_size],
+                box[ii : ii + batch_size],
+            )
             tot_e.append(e)
             all_f.append(f)
             all_v.append(v)
-        tot_e = np.concatenate(tot_e, axis = 0)
-        all_f = np.concatenate(all_f, axis = 0)
-        all_v = np.concatenate(all_v, axis = 0)
+        tot_e = np.concatenate(tot_e, axis=0)
+        all_f = np.concatenate(all_f, axis=0)
+        all_v = np.concatenate(all_v, axis=0)
         # print('finish  er')
         # reshape
-        tot_e.reshape([nframes,1])
+        tot_e.reshape([nframes, 1])
 
         tot_f = None
         tot_v = None
@@ -255,25 +295,32 @@ def eval(self,
             self.force, self.virial, self.av = self.build_fv_graph()
         if eval_fv:
             # compute f
-            ext_f = all_f[:,natoms*3:]
+            ext_f = all_f[:, natoms * 3 :]
             corr_f = []
             corr_v = []
             corr_av = []
-            for ii in range(0,nframes,batch_size):
-                f, v, av = self._eval_fv(coord[ii:ii+batch_size], box[ii:ii+batch_size], atype, ext_f[ii:ii+batch_size])
+            for ii in range(0, nframes, batch_size):
+                f, v, av = self._eval_fv(
+                    coord[ii : ii + batch_size],
+                    box[ii : ii + batch_size],
+                    atype,
+                    ext_f[ii : ii + batch_size],
+                )
                 corr_f.append(f)
                 corr_v.append(v)
                 corr_av.append(av)
-            corr_f = np.concatenate(corr_f, axis = 0)
-            corr_v = np.concatenate(corr_v, axis = 0)
-            corr_av = np.concatenate(corr_av, axis = 0)
-            tot_f = all_f[:,:natoms*3] + corr_f
-            for ii in range(nsel):            
-                orig_idx = sel_idx_map[ii]            
-                tot_f[:,orig_idx*3:orig_idx*3+3] += ext_f[:,ii*3:ii*3+3]                
-            tot_f = self.reverse_map(np.reshape(tot_f, [nframes,-1,3]), imap)
+            corr_f = np.concatenate(corr_f, axis=0)
+            corr_v = np.concatenate(corr_v, axis=0)
+            corr_av = np.concatenate(corr_av, axis=0)
+            tot_f = all_f[:, : natoms * 3] + corr_f
+            for ii in range(nsel):
+                orig_idx = sel_idx_map[ii]
+                tot_f[:, orig_idx * 3 : orig_idx * 3 + 3] += ext_f[
+                    :, ii * 3 : ii * 3 + 3
+                ]
+            tot_f = self.reverse_map(np.reshape(tot_f, [nframes, -1, 3]), imap)
             # reshape
-            tot_f = tot_f.reshape([nframes,natoms,3])
+            tot_f = tot_f.reshape([nframes, natoms, 3])
             # compute v
             dipole3 = np.reshape(dipole, [nframes, nsel, 3])
             ext_f3 = np.reshape(ext_f, [nframes, nsel, 3])
@@ -285,45 +332,45 @@ def eval(self,
             # print(all_v, '\n', corr_v, '\n', fd_corr_v)
             tot_v = all_v + corr_v + fd_corr_v
             # reshape
-            tot_v = tot_v.reshape([nframes,9])
+            tot_v = tot_v.reshape([nframes, 9])
 
         return tot_e, tot_f, tot_v
 
-
-    def _eval_fv(self, coords, cells, atom_types, ext_f) :
-        # reshape the inputs 
+    def _eval_fv(self, coords, cells, atom_types, ext_f):
+        # reshape the inputs
         cells = np.reshape(cells, [-1, 9])
         nframes = cells.shape[0]
         coords = np.reshape(coords, [nframes, -1])
         natoms = coords.shape[1] // 3
 
         # sort inputs
-        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(coords, atom_types, sel_atoms = self.get_sel_type())
+        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
+            coords, atom_types, sel_atoms=self.get_sel_type()
+        )
 
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types)
-        assert(natoms_vec[0] == natoms)
+        assert natoms_vec[0] == natoms
         default_mesh = make_default_mesh(cells)
 
         # evaluate
         tensor = []
         feed_dict_test = {}
         feed_dict_test[self.t_natoms] = natoms_vec
-        feed_dict_test[self.t_type  ] = np.tile(atom_types, [nframes, 1]).reshape([-1])
-        feed_dict_test[self.t_coord ] = coords.reshape([-1])
-        feed_dict_test[self.t_box   ] = cells.reshape([-1])
-        feed_dict_test[self.t_mesh  ] = default_mesh.reshape([-1])
-        feed_dict_test[self.t_ef    ] = ext_f.reshape([-1])
+        feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape([-1])
+        feed_dict_test[self.t_coord] = coords.reshape([-1])
+        feed_dict_test[self.t_box] = cells.reshape([-1])
+        feed_dict_test[self.t_mesh] = default_mesh.reshape([-1])
+        feed_dict_test[self.t_ef] = ext_f.reshape([-1])
         # print(run_sess(self.sess, tf.shape(self.t_tensor), feed_dict = feed_dict_test))
-        fout, vout, avout \
-            = run_sess(self.sess, [self.force, self.virial, self.av],
-                            feed_dict = feed_dict_test)
+        fout, vout, avout = run_sess(
+            self.sess, [self.force, self.virial, self.av], feed_dict=feed_dict_test
+        )
         # print('fout: ', fout.shape, fout)
-        fout = self.reverse_map(np.reshape(fout, [nframes,-1,3]), imap)
+        fout = self.reverse_map(np.reshape(fout, [nframes, -1, 3]), imap)
         fout = np.reshape(fout, [nframes, -1])
         return fout, vout, avout
 
-
     def _extend_system(self, coord, box, atype, charge):
         natoms = coord.shape[1] // 3
         nframes = coord.shape[0]
@@ -331,21 +378,20 @@ def _extend_system(self, coord, box, atype, charge):
         sel_idx_map = select_idx_map(atype, self.sel_type)
         nsel = len(sel_idx_map)
         coord3 = coord.reshape([nframes, natoms, 3])
-        ref_coord = coord3[:,sel_idx_map,:]
+        ref_coord = coord3[:, sel_idx_map, :]
         ref_coord = np.reshape(ref_coord, [nframes, nsel * 3])
-        
+
         batch_size = 8
         all_dipole = []
-        for ii in range(0,nframes,batch_size):
-            dipole = DeepDipole.eval(self,
-                                     coord[ii:ii+batch_size],
-                                     box[ii:ii+batch_size],
-                                     atype)
+        for ii in range(0, nframes, batch_size):
+            dipole = DeepDipole.eval(
+                self, coord[ii : ii + batch_size], box[ii : ii + batch_size], atype
+            )
             all_dipole.append(dipole)
-        dipole = np.concatenate(all_dipole, axis = 0)
-        assert(dipole.shape[0] == nframes)
+        dipole = np.concatenate(all_dipole, axis=0)
+        assert dipole.shape[0] == nframes
         dipole = np.reshape(dipole, [nframes, nsel * 3])
-        
+
         wfcc_coord = ref_coord + dipole
         # wfcc_coord = dipole
         wfcc_charge = np.zeros([nsel])
@@ -357,14 +403,12 @@ def _extend_system(self, coord, box, atype, charge):
         wfcc_coord = np.reshape(wfcc_coord, [nframes, nsel * 3])
         wfcc_charge = np.reshape(wfcc_charge, [nframes, nsel])
 
-        all_coord = np.concatenate((coord, wfcc_coord), axis = 1)
-        all_charge = np.concatenate((charge, wfcc_charge), axis = 1)
+        all_coord = np.concatenate((coord, wfcc_coord), axis=1)
+        all_charge = np.concatenate((charge, wfcc_charge), axis=1)
 
         return all_coord, all_charge, dipole
 
-
-    def modify_data(self, 
-                    data : dict) -> None:        
+    def modify_data(self, data: dict) -> None:
         """
         Modify data.
 
@@ -383,26 +427,27 @@ def modify_data(self,
                 - force         force
                 - virial        virial
         """
-        if 'find_energy' not in data and 'find_force' not in data and 'find_virial' not in data:
+        if (
+            "find_energy" not in data
+            and "find_force" not in data
+            and "find_virial" not in data
+        ):
             return
 
-        get_nframes=None
-        coord = data['coord'][:get_nframes,:]
-        box = data['box'][:get_nframes,:]
-        atype = data['type'][:get_nframes,:]
+        get_nframes = None
+        coord = data["coord"][:get_nframes, :]
+        box = data["box"][:get_nframes, :]
+        atype = data["type"][:get_nframes, :]
         atype = atype[0]
         nframes = coord.shape[0]
 
         tot_e, tot_f, tot_v = self.eval(coord, box, atype)
 
         # print(tot_f[:,0])
-        
-        if 'find_energy' in data and data['find_energy'] == 1.0 :
-            data['energy'] -= tot_e.reshape(data['energy'].shape)
-        if 'find_force' in data and data['find_force'] == 1.0 :
-            data['force'] -= tot_f.reshape(data['force'].shape)
-        if 'find_virial' in data and data['find_virial'] == 1.0 :
-            data['virial'] -= tot_v.reshape(data['virial'].shape)
-
 
-                           
+        if "find_energy" in data and data["find_energy"] == 1.0:
+            data["energy"] -= tot_e.reshape(data["energy"].shape)
+        if "find_force" in data and data["find_force"] == 1.0:
+            data["force"] -= tot_f.reshape(data["force"].shape)
+        if "find_virial" in data and data["find_virial"] == 1.0:
+            data["virial"] -= tot_v.reshape(data["virial"].shape)
diff --git a/deepmd/infer/deep_dipole.py b/deepmd/infer/deep_dipole.py
index d158cbafac..8988083a5e 100644
--- a/deepmd/infer/deep_dipole.py
+++ b/deepmd/infer/deep_dipole.py
@@ -1,9 +1,15 @@
-from deepmd.infer.deep_tensor import DeepTensor
+from typing import (
+    TYPE_CHECKING,
+)
 
-from typing import TYPE_CHECKING
+from deepmd.infer.deep_tensor import (
+    DeepTensor,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
 
 
 class DeepDipole(DeepTensor):
@@ -26,7 +32,10 @@ class DeepDipole(DeepTensor):
     """
 
     def __init__(
-        self, model_file: "Path", load_prefix: str = "load", default_tf_graph: bool = False
+        self,
+        model_file: "Path",
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
     ) -> None:
 
         # use this in favor of dict update to move attribute from class to
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 3ea27cf1c5..fa44633f8c 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -1,19 +1,39 @@
-from typing import List, Optional, TYPE_CHECKING, Union
-from functools import lru_cache
+from functools import (
+    lru_cache,
+)
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Optional,
+    Union,
+)
 
 import numpy as np
-from deepmd.common import make_default_mesh
-from deepmd.env import default_tf_session_config, tf, MODEL_VERSION
-from deepmd.utils.sess import run_sess
-from deepmd.utils.batch_size import AutoBatchSize
+
+from deepmd.common import (
+    make_default_mesh,
+)
+from deepmd.env import (
+    MODEL_VERSION,
+    default_tf_session_config,
+    tf,
+)
+from deepmd.utils.batch_size import (
+    AutoBatchSize,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
 
 
 class DeepEval:
     """Common methods for DeepPot, DeepWFC, DeepPolar, ...
-    
+
     Parameters
     ----------
     model_file : Path
@@ -47,7 +67,7 @@ def __init__(
                 f"model in graph (version {self.model_version}) is incompatible"
                 f"with the model (version {MODEL_VERSION}) supported by the current code."
             )
-        
+
         # set default to False, as subclasses may not support
         if isinstance(auto_batch_size, bool):
             if auto_batch_size:
@@ -98,22 +118,21 @@ def sess(self) -> tf.Session:
         # start a tf session associated to the graph
         return tf.Session(graph=self.graph, config=default_tf_session_config)
 
-    def _graph_compatable(
-        self
-    ) -> bool :
-        """ Check the model compatability
-        
+    def _graph_compatable(self) -> bool:
+        """Check the model compatability
+
         Returns
         -------
         bool
             If the model stored in the graph file is compatable with the current code
         """
-        model_version_major = int(self.model_version.split('.')[0])
-        model_version_minor = int(self.model_version.split('.')[1])
-        MODEL_VERSION_MAJOR = int(MODEL_VERSION.split('.')[0])
-        MODEL_VERSION_MINOR = int(MODEL_VERSION.split('.')[1])
-        if (model_version_major != MODEL_VERSION_MAJOR) or \
-           (model_version_minor >  MODEL_VERSION_MINOR) :
+        model_version_major = int(self.model_version.split(".")[0])
+        model_version_minor = int(self.model_version.split(".")[1])
+        MODEL_VERSION_MAJOR = int(MODEL_VERSION.split(".")[0])
+        MODEL_VERSION_MINOR = int(MODEL_VERSION.split(".")[1])
+        if (model_version_major != MODEL_VERSION_MAJOR) or (
+            model_version_minor > MODEL_VERSION_MINOR
+        ):
             return False
         else:
             return True
@@ -147,7 +166,9 @@ def _get_tensor(
 
     @staticmethod
     def _load_graph(
-        frozen_graph_filename: "Path", prefix: str = "load", default_tf_graph: bool = False
+        frozen_graph_filename: "Path",
+        prefix: str = "load",
+        default_tf_graph: bool = False,
     ):
         # We load the protobuf file from the disk and parse it to retrieve the
         # unserialized graph_def
@@ -157,14 +178,14 @@ def _load_graph(
 
             if default_tf_graph:
                 tf.import_graph_def(
-                    graph_def, 
-                    input_map=None, 
-                    return_elements=None, 
-                    name=prefix, 
-                    producer_op_list=None
+                    graph_def,
+                    input_map=None,
+                    return_elements=None,
+                    name=prefix,
+                    producer_op_list=None,
                 )
                 graph = tf.get_default_graph()
-            else :
+            else:
                 # Then, we can use again a convenient built-in function to import
                 # a graph_def into the  current default Graph
                 with tf.Graph().as_default() as graph:
@@ -173,18 +194,21 @@ def _load_graph(
                         input_map=None,
                         return_elements=None,
                         name=prefix,
-                        producer_op_list=None
+                        producer_op_list=None,
                     )
 
             return graph
 
     @staticmethod
     def sort_input(
-        coord : np.ndarray, atom_type : np.ndarray, sel_atoms : List[int] = None, mixed_type : bool = False
+        coord: np.ndarray,
+        atom_type: np.ndarray,
+        sel_atoms: List[int] = None,
+        mixed_type: bool = False,
     ):
         """
         Sort atoms in the system according their types.
-        
+
         Parameters
         ----------
         coord
@@ -199,7 +223,7 @@ def sort_input(
                 Whether to perform the mixed_type mode.
                 If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
                 in which frames in a system may have different natoms_vec(s), with the same nloc.
-        
+
         Returns
         -------
         coord_out
@@ -207,7 +231,7 @@ def sort_input(
         atom_type_out
                 The atom types after sorting
         idx_map
-                The index mapping from the input to the output. 
+                The index mapping from the input to the output.
                 For example coord_out = coord[:,idx_map,:]
         sel_atom_type
                 Only output if sel_atoms is not None
@@ -224,14 +248,14 @@ def sort_input(
         if sel_atoms is not None:
             selection = [False] * np.size(atom_type)
             for ii in sel_atoms:
-                selection += (atom_type == ii)
+                selection += atom_type == ii
             sel_atom_type = atom_type[selection]
         natoms = atom_type.size
-        idx = np.arange (natoms)
-        idx_map = np.lexsort ((idx, atom_type))
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, atom_type))
         nframes = coord.shape[0]
         coord = coord.reshape([nframes, -1, 3])
-        coord = np.reshape(coord[:,idx_map,:], [nframes, -1])
+        coord = np.reshape(coord[:, idx_map, :], [nframes, -1])
         atom_type = atom_type[idx_map]
         if sel_atoms is not None:
             sel_natoms = np.size(sel_atom_type)
@@ -243,7 +267,7 @@ def sort_input(
             return coord, atom_type, idx_map
 
     @staticmethod
-    def reverse_map(vec : np.ndarray, imap : List[int]) -> np.ndarray:
+    def reverse_map(vec: np.ndarray, imap: List[int]) -> np.ndarray:
         """Reverse mapping of a vector according to the index map
 
         Parameters
@@ -252,20 +276,21 @@ def reverse_map(vec : np.ndarray, imap : List[int]) -> np.ndarray:
                 Input vector. Be of shape [nframes, natoms, -1]
         imap
                 Index map. Be of shape [natoms]
-        
+
         Returns
         -------
         vec_out
                 Reverse mapped vector.
         """
-        ret = np.zeros(vec.shape)        
+        ret = np.zeros(vec.shape)
         # for idx,ii in enumerate(imap) :
         #     ret[:,ii,:] = vec[:,idx,:]
         ret[:, imap, :] = vec
         return ret
 
-
-    def make_natoms_vec(self, atom_types : np.ndarray, mixed_type : bool = False) -> np.ndarray :
+    def make_natoms_vec(
+        self, atom_types: np.ndarray, mixed_type: bool = False
+    ) -> np.ndarray:
         """Make the natom vector used by deepmd-kit.
 
         Parameters
@@ -276,7 +301,7 @@ def make_natoms_vec(self, atom_types : np.ndarray, mixed_type : bool = False) ->
                 Whether to perform the mixed_type mode.
                 If True, the input data has the mixed_type format (see doc/model/train_se_atten.md),
                 in which frames in a system may have different natoms_vec(s), with the same nloc.
-        
+
         Returns
         -------
         natoms
@@ -284,9 +309,9 @@ def make_natoms_vec(self, atom_types : np.ndarray, mixed_type : bool = False) ->
                 natoms[0]: number of local atoms
                 natoms[1]: total number of atoms held by this processor
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
-  
+
         """
-        natoms_vec = np.zeros (self.ntypes+2).astype(int)
+        natoms_vec = np.zeros(self.ntypes + 2).astype(int)
         if mixed_type:
             natoms = atom_types[0].size
         else:
@@ -296,8 +321,8 @@ def make_natoms_vec(self, atom_types : np.ndarray, mixed_type : bool = False) ->
         if mixed_type:
             natoms_vec[2] = natoms
             return natoms_vec
-        for ii in range (self.ntypes) :
-            natoms_vec[ii+2] = np.count_nonzero(atom_types == ii)
+        for ii in range(self.ntypes):
+            natoms_vec[ii + 2] = np.count_nonzero(atom_types == ii)
         return natoms_vec
 
     def eval_typeebd(self) -> np.ndarray:
@@ -309,7 +334,7 @@ def eval_typeebd(self) -> np.ndarray:
             The output of type embedding network. The shape is [ntypes, o_size],
             where ntypes is the number of types, and o_size is the number of nodes
             in the output layer.
-        
+
         Raises
         ------
         KeyError
diff --git a/deepmd/infer/deep_polar.py b/deepmd/infer/deep_polar.py
index d594be32f8..d76ae5ccad 100644
--- a/deepmd/infer/deep_polar.py
+++ b/deepmd/infer/deep_polar.py
@@ -1,10 +1,19 @@
-from deepmd.infer.deep_tensor import DeepTensor
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Optional,
+)
+
 import numpy as np
 
-from typing import TYPE_CHECKING, List, Optional
+from deepmd.infer.deep_tensor import (
+    DeepTensor,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
 
 
 class DeepPolar(DeepTensor):
@@ -27,7 +36,10 @@ class DeepPolar(DeepTensor):
     """
 
     def __init__(
-        self, model_file: "Path", load_prefix: str = "load", default_tf_graph: bool = False
+        self,
+        model_file: "Path",
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
     ) -> None:
 
         # use this in favor of dict update to move attribute from class to
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index 14ff152a2b..1a7a777b70 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -1,16 +1,39 @@
 import logging
-from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Callable
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
 
 import numpy as np
-from deepmd.common import make_default_mesh
-from deepmd.env import default_tf_session_config, tf
-from deepmd.infer.data_modifier import DipoleChargeModifier
-from deepmd.infer.deep_eval import DeepEval
-from deepmd.utils.sess import run_sess
-from deepmd.utils.batch_size import AutoBatchSize
+
+from deepmd.common import (
+    make_default_mesh,
+)
+from deepmd.env import (
+    default_tf_session_config,
+    tf,
+)
+from deepmd.infer.data_modifier import (
+    DipoleChargeModifier,
+)
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+from deepmd.utils.batch_size import (
+    AutoBatchSize,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
 
 log = logging.getLogger(__name__)
 
@@ -39,7 +62,7 @@ class DeepPot(DeepEval):
     >>> cell = np.diag(10 * np.ones(3)).reshape([1, -1])
     >>> atype = [1,0,1]
     >>> e, f, v = dp.eval(coord, cell, atype)
-    
+
     where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
 
     Warnings
@@ -97,7 +120,7 @@ def __init__(
         operations = [op.name for op in self.graph.get_operations()]
         # check if the graph has these operations:
         # if yes add them
-        if 't_efield' in operations:
+        if "t_efield" in operations:
             self._get_tensor("t_efield:0", "t_efield")
             self.has_efield = True
         else:
@@ -105,7 +128,7 @@ def __init__(
             self.t_efield = None
             self.has_efield = False
 
-        if 'load/t_fparam' in operations:
+        if "load/t_fparam" in operations:
             self.tensors.update({"t_fparam": "t_fparam:0"})
             self.has_fparam = True
         else:
@@ -113,7 +136,7 @@ def __init__(
             self.t_fparam = None
             self.has_fparam = False
 
-        if 'load/t_aparam' in operations:
+        if "load/t_aparam" in operations:
             self.tensors.update({"t_aparam": "t_aparam:0"})
             self.has_aparam = True
         else:
@@ -130,7 +153,7 @@ def __init__(
                     raise
 
         self._run_default_sess()
-        self.tmap = self.tmap.decode('UTF-8').split()        
+        self.tmap = self.tmap.decode("UTF-8").split()
 
         # setup modifier
         try:
@@ -145,15 +168,31 @@ def __init__(
             t_sys_charge_map = self._get_tensor("modifier_attr/sys_charge_map:0")
             t_ewald_h = self._get_tensor("modifier_attr/ewald_h:0")
             t_ewald_beta = self._get_tensor("modifier_attr/ewald_beta:0")
-            [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess(self.sess, [t_mdl_name, t_mdl_charge_map, t_sys_charge_map, t_ewald_h, t_ewald_beta])
+            [mdl_name, mdl_charge_map, sys_charge_map, ewald_h, ewald_beta] = run_sess(
+                self.sess,
+                [
+                    t_mdl_name,
+                    t_mdl_charge_map,
+                    t_sys_charge_map,
+                    t_ewald_h,
+                    t_ewald_beta,
+                ],
+            )
             mdl_name = mdl_name.decode("UTF-8")
             mdl_charge_map = [int(ii) for ii in mdl_charge_map.decode("UTF-8").split()]
             sys_charge_map = [int(ii) for ii in sys_charge_map.decode("UTF-8").split()]
-            self.dm = DipoleChargeModifier(mdl_name, mdl_charge_map, sys_charge_map, ewald_h = ewald_h, ewald_beta = ewald_beta)
+            self.dm = DipoleChargeModifier(
+                mdl_name,
+                mdl_charge_map,
+                sys_charge_map,
+                ewald_h=ewald_h,
+                ewald_beta=ewald_beta,
+            )
 
     def _run_default_sess(self):
-        [self.ntypes, self.rcut, self.dfparam, self.daparam, self.tmap] = run_sess(self.sess, 
-            [self.t_ntypes, self.t_rcut, self.t_dfparam, self.t_daparam, self.t_tmap]
+        [self.ntypes, self.rcut, self.dfparam, self.daparam, self.tmap] = run_sess(
+            self.sess,
+            [self.t_ntypes, self.t_rcut, self.t_dfparam, self.t_daparam, self.t_tmap],
         )
 
     def get_ntypes(self) -> int:
@@ -171,6 +210,7 @@ def get_type_map(self) -> List[str]:
     def get_sel_type(self) -> List[int]:
         """Unsupported in this model."""
         raise NotImplementedError("This model type does not support this attribute")
+
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this DP."""
         return self.dfparam
@@ -178,10 +218,10 @@ def get_dim_fparam(self) -> int:
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this DP."""
         return self.daparam
-    
+
     def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Callable:
         """Wrapper method with auto batch size.
-        
+
         Parameters
         ----------
         inner_func : Callable
@@ -190,20 +230,29 @@ def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Calla
             number of tests
         natoms : int
             number of atoms
-        
+
         Returns
         -------
         Callable
             the wrapper
         """
         if self.auto_batch_size is not None:
+
             def eval_func(*args, **kwargs):
-                return self.auto_batch_size.execute_all(inner_func, numb_test, natoms, *args, **kwargs)
+                return self.auto_batch_size.execute_all(
+                    inner_func, numb_test, natoms, *args, **kwargs
+                )
+
         else:
             eval_func = inner_func
         return eval_func
 
-    def _get_natoms_and_nframes(self, coords: np.ndarray, atom_types: Union[List[int], np.ndarray], mixed_type: bool = False) -> Tuple[int, int]:
+    def _get_natoms_and_nframes(
+        self,
+        coords: np.ndarray,
+        atom_types: Union[List[int], np.ndarray],
+        mixed_type: bool = False,
+    ) -> Tuple[int, int]:
         if mixed_type:
             natoms = len(atom_types[0])
         else:
@@ -271,14 +320,25 @@ def eval(
             The atomic virial. Only returned when atomic == True
         """
         # reshape coords before getting shape
-        natoms, numb_test = self._get_natoms_and_nframes(coords, atom_types, mixed_type=mixed_type)
-        output = self._eval_func(self._eval_inner, numb_test, natoms)(coords, cells, atom_types, fparam = fparam, aparam = aparam, atomic = atomic, efield = efield, mixed_type=mixed_type)
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords, atom_types, mixed_type=mixed_type
+        )
+        output = self._eval_func(self._eval_inner, numb_test, natoms)(
+            coords,
+            cells,
+            atom_types,
+            fparam=fparam,
+            aparam=aparam,
+            atomic=atomic,
+            efield=efield,
+            mixed_type=mixed_type,
+        )
 
         if self.modifier_type is not None:
             if atomic:
-                raise RuntimeError('modifier does not support atomic modification')
+                raise RuntimeError("modifier does not support atomic modification")
             me, mf, mv = self.dm.eval(coords, cells, atom_types)
-            output = list(output) # tuple to list
+            output = list(output)  # tuple to list
             e, f, v = output[:3]
             output[0] += me.reshape(e.shape)
             output[1] += mf.reshape(f.shape)
@@ -295,14 +355,16 @@ def _prepare_feed_dict(
         aparam=None,
         atomic=False,
         efield=None,
-        mixed_type=False
+        mixed_type=False,
     ):
         # standarize the shape of inputs
-        natoms, nframes = self._get_natoms_and_nframes(coords, atom_types, mixed_type=mixed_type)
+        natoms, nframes = self._get_natoms_and_nframes(
+            coords, atom_types, mixed_type=mixed_type
+        )
         if mixed_type:
             atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
         else:
-            atom_types = np.array(atom_types, dtype = int).reshape([-1])
+            atom_types = np.array(atom_types, dtype=int).reshape([-1])
         coords = np.reshape(np.array(coords), [-1, natoms * 3])
         if cells is None:
             pbc = False
@@ -311,47 +373,57 @@ def _prepare_feed_dict(
         else:
             pbc = True
             cells = np.array(cells).reshape([nframes, 9])
-        
-        if self.has_fparam :
-            assert(fparam is not None)
+
+        if self.has_fparam:
+            assert fparam is not None
             fparam = np.array(fparam)
-        if self.has_aparam :
-            assert(aparam is not None)
+        if self.has_aparam:
+            assert aparam is not None
             aparam = np.array(aparam)
-        if self.has_efield :
-            assert(efield is not None), "you are using a model with external field, parameter efield should be provided"
+        if self.has_efield:
+            assert (
+                efield is not None
+            ), "you are using a model with external field, parameter efield should be provided"
             efield = np.array(efield)
 
-        # reshape the inputs 
-        if self.has_fparam :
+        # reshape the inputs
+        if self.has_fparam:
             fdim = self.get_dim_fparam()
-            if fparam.size == nframes * fdim :
+            if fparam.size == nframes * fdim:
                 fparam = np.reshape(fparam, [nframes, fdim])
-            elif fparam.size == fdim :
+            elif fparam.size == fdim:
                 fparam = np.tile(fparam.reshape([-1]), [nframes, 1])
-            else :
-                raise RuntimeError('got wrong size of frame param, should be either %d x %d or %d' % (nframes, fdim, fdim))
-        if self.has_aparam :
+            else:
+                raise RuntimeError(
+                    "got wrong size of frame param, should be either %d x %d or %d"
+                    % (nframes, fdim, fdim)
+                )
+        if self.has_aparam:
             fdim = self.get_dim_aparam()
             if aparam.size == nframes * natoms * fdim:
                 aparam = np.reshape(aparam, [nframes, natoms * fdim])
-            elif aparam.size == natoms * fdim :
+            elif aparam.size == natoms * fdim:
                 aparam = np.tile(aparam.reshape([-1]), [nframes, 1])
-            elif aparam.size == fdim :
+            elif aparam.size == fdim:
                 aparam = np.tile(aparam.reshape([-1]), [nframes, natoms])
-            else :
-                raise RuntimeError('got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d' % (nframes, natoms, fdim, natoms, fdim, fdim))
+            else:
+                raise RuntimeError(
+                    "got wrong size of frame param, should be either %d x %d x %d or %d x %d or %d"
+                    % (nframes, natoms, fdim, natoms, fdim, fdim)
+                )
 
         # sort inputs
-        coords, atom_types, imap = self.sort_input(coords, atom_types, mixed_type=mixed_type)
+        coords, atom_types, imap = self.sort_input(
+            coords, atom_types, mixed_type=mixed_type
+        )
         if self.has_efield:
             efield = np.reshape(efield, [nframes, natoms, 3])
-            efield = efield[:,imap,:]
-            efield = np.reshape(efield, [nframes, natoms*3])            
+            efield = efield[:, imap, :]
+            efield = np.reshape(efield, [nframes, natoms * 3])
 
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert(natoms_vec[0] == natoms)
+        assert natoms_vec[0] == natoms
 
         # evaluate
         feed_dict_test = {}
@@ -359,21 +431,23 @@ def _prepare_feed_dict(
         if mixed_type:
             feed_dict_test[self.t_type] = atom_types.reshape([-1])
         else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape([-1])
+            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
+                [-1]
+            )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-        
+
         if len(self.t_box.shape) == 1:
-            feed_dict_test[self.t_box  ] = np.reshape(cells , [-1])
+            feed_dict_test[self.t_box] = np.reshape(cells, [-1])
         elif len(self.t_box.shape) == 2:
-            feed_dict_test[self.t_box  ] = cells
+            feed_dict_test[self.t_box] = cells
         else:
             raise RuntimeError
         if self.has_efield:
-            feed_dict_test[self.t_efield]= np.reshape(efield, [-1])
+            feed_dict_test[self.t_efield] = np.reshape(efield, [-1])
         if pbc:
-            feed_dict_test[self.t_mesh ] = make_default_mesh(cells)
+            feed_dict_test[self.t_mesh] = make_default_mesh(cells)
         else:
-            feed_dict_test[self.t_mesh ] = np.array([], dtype = np.int32)
+            feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32)
         if self.has_fparam:
             feed_dict_test[self.t_fparam] = np.reshape(fparam, [-1])
         if self.has_aparam:
@@ -389,18 +463,19 @@ def _eval_inner(
         aparam=None,
         atomic=False,
         efield=None,
-        mixed_type=False
+        mixed_type=False,
     ):
-        natoms, nframes = self._get_natoms_and_nframes(coords, atom_types, mixed_type=mixed_type)
-        feed_dict_test, imap = self._prepare_feed_dict(coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type)
-        t_out = [self.t_energy, 
-                 self.t_force, 
-                 self.t_virial]
-        if atomic :
-            t_out += [self.t_ae, 
-                      self.t_av]
-
-        v_out = run_sess(self.sess, t_out, feed_dict = feed_dict_test)
+        natoms, nframes = self._get_natoms_and_nframes(
+            coords, atom_types, mixed_type=mixed_type
+        )
+        feed_dict_test, imap = self._prepare_feed_dict(
+            coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
+        )
+        t_out = [self.t_energy, self.t_force, self.t_virial]
+        if atomic:
+            t_out += [self.t_ae, self.t_av]
+
+        v_out = run_sess(self.sess, t_out, feed_dict=feed_dict_test)
         energy = v_out[0]
         force = v_out[1]
         virial = v_out[2]
@@ -409,10 +484,10 @@ def _eval_inner(
             av = v_out[4]
 
         # reverse map of the outputs
-        force  = self.reverse_map(np.reshape(force, [nframes,-1,3]), imap)
-        if atomic :
-            ae  = self.reverse_map(np.reshape(ae, [nframes,-1,1]), imap)
-            av  = self.reverse_map(np.reshape(av, [nframes,-1,9]), imap)
+        force = self.reverse_map(np.reshape(force, [nframes, -1, 3]), imap)
+        if atomic:
+            ae = self.reverse_map(np.reshape(ae, [nframes, -1, 1]), imap)
+            av = self.reverse_map(np.reshape(av, [nframes, -1, 9]), imap)
 
         energy = np.reshape(energy, [nframes, 1])
         force = np.reshape(force, [nframes, natoms, 3])
@@ -421,18 +496,19 @@ def _eval_inner(
             ae = np.reshape(ae, [nframes, natoms, 1])
             av = np.reshape(av, [nframes, natoms, 9])
             return energy, force, virial, ae, av
-        else :
+        else:
             return energy, force, virial
 
-    def eval_descriptor(self,
-            coords: np.ndarray,
-            cells: np.ndarray,
-            atom_types: List[int],
-            fparam: Optional[np.ndarray] = None,
-            aparam: Optional[np.ndarray] = None,
-            efield: Optional[np.ndarray] = None,
-            mixed_type: bool = False,
-            ) -> np.array:
+    def eval_descriptor(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: List[int],
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
+        mixed_type: bool = False,
+    ) -> np.array:
         """Evaluate descriptors by using this DP.
 
         Parameters
@@ -471,20 +547,37 @@ def eval_descriptor(self,
         descriptor
             Descriptors.
         """
-        natoms, numb_test = self._get_natoms_and_nframes(coords, atom_types, mixed_type=mixed_type)
-        descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)(coords, cells, atom_types, fparam = fparam, aparam = aparam, efield = efield, mixed_type=mixed_type)
+        natoms, numb_test = self._get_natoms_and_nframes(
+            coords, atom_types, mixed_type=mixed_type
+        )
+        descriptor = self._eval_func(self._eval_descriptor_inner, numb_test, natoms)(
+            coords,
+            cells,
+            atom_types,
+            fparam=fparam,
+            aparam=aparam,
+            efield=efield,
+            mixed_type=mixed_type,
+        )
         return descriptor
-    
-    def _eval_descriptor_inner(self,
-            coords: np.ndarray,
-            cells: np.ndarray,
-            atom_types: List[int],
-            fparam: Optional[np.ndarray] = None,
-            aparam: Optional[np.ndarray] = None,
-            efield: Optional[np.ndarray] = None,
-            mixed_type: bool = False,
-            ) -> np.array:
-        natoms, nframes = self._get_natoms_and_nframes(coords, atom_types, mixed_type=mixed_type)
-        feed_dict_test, imap = self._prepare_feed_dict(coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type)
-        descriptor, = run_sess(self.sess, [self.t_descriptor], feed_dict = feed_dict_test)
+
+    def _eval_descriptor_inner(
+        self,
+        coords: np.ndarray,
+        cells: np.ndarray,
+        atom_types: List[int],
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        efield: Optional[np.ndarray] = None,
+        mixed_type: bool = False,
+    ) -> np.array:
+        natoms, nframes = self._get_natoms_and_nframes(
+            coords, atom_types, mixed_type=mixed_type
+        )
+        feed_dict_test, imap = self._prepare_feed_dict(
+            coords, cells, atom_types, fparam, aparam, efield, mixed_type=mixed_type
+        )
+        (descriptor,) = run_sess(
+            self.sess, [self.t_descriptor], feed_dict=feed_dict_test
+        )
         return self.reverse_map(np.reshape(descriptor, [nframes, natoms, -1]), imap)
diff --git a/deepmd/infer/deep_tensor.py b/deepmd/infer/deep_tensor.py
index b4f6b69427..74e17c6092 100644
--- a/deepmd/infer/deep_tensor.py
+++ b/deepmd/infer/deep_tensor.py
@@ -1,14 +1,32 @@
 import os
-from typing import List, Optional, TYPE_CHECKING, Tuple
+from typing import (
+    TYPE_CHECKING,
+    List,
+    Optional,
+    Tuple,
+)
 
 import numpy as np
-from deepmd.common import make_default_mesh
-from deepmd.env import default_tf_session_config, tf
-from deepmd.infer.deep_eval import DeepEval
-from deepmd.utils.sess import run_sess
+
+from deepmd.common import (
+    make_default_mesh,
+)
+from deepmd.env import (
+    default_tf_session_config,
+    tf,
+)
+from deepmd.infer.deep_eval import (
+    DeepEval,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
+
 
 class DeepTensor(DeepEval):
     """Evaluates a tensor model.
@@ -42,31 +60,29 @@ class DeepTensor(DeepEval):
     def __init__(
         self,
         model_file: "Path",
-        load_prefix: str = 'load',
-        default_tf_graph: bool = False
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
     ) -> None:
         """Constructor"""
         DeepEval.__init__(
-            self,
-            model_file,
-            load_prefix=load_prefix,
-            default_tf_graph=default_tf_graph
+            self, model_file, load_prefix=load_prefix, default_tf_graph=default_tf_graph
         )
         # check model type
         model_type = self.tensors["t_tensor"][2:-2]
-        assert self.model_type == model_type, \
-            f"expect {model_type} model but got {self.model_type}"
+        assert (
+            self.model_type == model_type
+        ), f"expect {model_type} model but got {self.model_type}"
 
         # now load tensors to object attributes
         for attr_name, tensor_name in self.tensors.items():
             self._get_tensor(tensor_name, attr_name)
-        
+
         # load optional tensors if possible
         optional_tensors = {
             "t_global_tensor": f"o_global_{model_type}:0",
             "t_force": "o_force:0",
             "t_virial": "o_virial:0",
-            "t_atom_virial": "o_atom_virial:0"
+            "t_atom_virial": "o_atom_virial:0",
         }
         try:
             # first make sure these tensor all exists (but do not modify self attr)
@@ -80,15 +96,21 @@ def __init__(
         else:
             self.tensors.update(optional_tensors)
             self._support_gfv = True
-            
+
         self._run_default_sess()
-        self.tmap = self.tmap.decode('UTF-8').split()
+        self.tmap = self.tmap.decode("UTF-8").split()
 
     def _run_default_sess(self):
-        [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] \
-            = run_sess(self.sess, 
-                [self.t_ntypes, self.t_rcut, self.t_tmap, self.t_sel_type, self.t_ouput_dim]
-            )
+        [self.ntypes, self.rcut, self.tmap, self.tselt, self.output_dim] = run_sess(
+            self.sess,
+            [
+                self.t_ntypes,
+                self.t_rcut,
+                self.t_tmap,
+                self.t_sel_type,
+                self.t_ouput_dim,
+            ],
+        )
 
     def get_ntypes(self) -> int:
         """Get the number of atom types of this model."""
@@ -103,7 +125,7 @@ def get_type_map(self) -> List[str]:
         return self.tmap
 
     def get_sel_type(self) -> List[int]:
-        """Get the selected atom types of this model."""        
+        """Get the selected atom types of this model."""
         return self.tselt
 
     def get_dim_fparam(self) -> int:
@@ -130,11 +152,11 @@ def eval(
         Parameters
         ----------
         coords
-            The coordinates of atoms. 
+            The coordinates of atoms.
             The array should be of size nframes x natoms x 3
         cells
-            The cell of the region. 
-            If None then non-PBC is assumed, otherwise using PBC. 
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
             The array should be of size nframes x 9
         atom_types
             The atom types
@@ -165,7 +187,7 @@ def eval(
             natoms = atom_types[0].size
             atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
         else:
-            atom_types = np.array(atom_types, dtype = int).reshape([-1])
+            atom_types = np.array(atom_types, dtype=int).reshape([-1])
             natoms = atom_types.size
         coords = np.reshape(np.array(coords), [-1, natoms * 3])
         nframes = coords.shape[0]
@@ -177,12 +199,13 @@ def eval(
             cells = np.array(cells).reshape([nframes, 9])
 
         # sort inputs
-        coords, atom_types, imap, sel_at, sel_imap = \
-            self.sort_input(coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type)
+        coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
+            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type
+        )
 
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert(natoms_vec[0] == natoms)
+        assert natoms_vec[0] == natoms
 
         # evaluate
         feed_dict_test = {}
@@ -190,33 +213,39 @@ def eval(
         if mixed_type:
             feed_dict_test[self.t_type] = atom_types.reshape([-1])
         else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape([-1])
+            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
+                [-1]
+            )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-        feed_dict_test[self.t_box  ] = np.reshape(cells , [-1])
+        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
         if pbc:
-            feed_dict_test[self.t_mesh ] = make_default_mesh(cells)
+            feed_dict_test[self.t_mesh] = make_default_mesh(cells)
         else:
-            feed_dict_test[self.t_mesh ] = np.array([], dtype = np.int32)
+            feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32)
 
         if atomic:
-            assert "global" not in self.model_type, \
-                f"cannot do atomic evaluation with model type {self.model_type}"
+            assert (
+                "global" not in self.model_type
+            ), f"cannot do atomic evaluation with model type {self.model_type}"
             t_out = [self.t_tensor]
         else:
-            assert self._support_gfv or "global" in self.model_type, \
-                f"do not support global tensor evaluation with old {self.model_type} model"
+            assert (
+                self._support_gfv or "global" in self.model_type
+            ), f"do not support global tensor evaluation with old {self.model_type} model"
             t_out = [self.t_global_tensor if self._support_gfv else self.t_tensor]
-        v_out = self.sess.run (t_out, feed_dict = feed_dict_test)
+        v_out = self.sess.run(t_out, feed_dict=feed_dict_test)
         tensor = v_out[0]
 
         # reverse map of the outputs
         if atomic:
             tensor = np.array(tensor)
-            tensor = self.reverse_map(np.reshape(tensor, [nframes,-1,self.output_dim]), sel_imap)
+            tensor = self.reverse_map(
+                np.reshape(tensor, [nframes, -1, self.output_dim]), sel_imap
+            )
             tensor = np.reshape(tensor, [nframes, len(sel_at), self.output_dim])
         else:
             tensor = np.reshape(tensor, [nframes, self.output_dim])
-        
+
         return tensor
 
     def eval_full(
@@ -237,11 +266,11 @@ def eval_full(
         Parameters
         ----------
         coords
-            The coordinates of atoms. 
+            The coordinates of atoms.
             The array should be of size nframes x natoms x 3
         cells
-            The cell of the region. 
-            If None then non-PBC is assumed, otherwise using PBC. 
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
             The array should be of size nframes x 9
         atom_types
             The atom types
@@ -262,7 +291,7 @@ def eval_full(
         Returns
         -------
         tensor
-            The global tensor. 
+            The global tensor.
             shape: [nframes x nout]
         force
             The component-wise force (negative derivative) on each atom.
@@ -277,13 +306,12 @@ def eval_full(
             The atomic virial. Only returned when atomic == True
             shape: [nframes x nout x natoms x 9]
         """
-        assert self._support_gfv, \
-            f"do not support eval_full with old tensor model"
+        assert self._support_gfv, f"do not support eval_full with old tensor model"
 
         # standarize the shape of inputs
         if mixed_type:
             natoms = atom_types[0].size
-            atom_types = np.array(atom_types, dtype =int).reshape([-1, natoms])
+            atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
         else:
             atom_types = np.array(atom_types, dtype=int).reshape([-1])
             natoms = atom_types.size
@@ -299,11 +327,12 @@ def eval_full(
 
         # sort inputs
         coords, atom_types, imap, sel_at, sel_imap = self.sort_input(
-            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type)
+            coords, atom_types, sel_atoms=self.get_sel_type(), mixed_type=mixed_type
+        )
 
         # make natoms_vec and default_mesh
         natoms_vec = self.make_natoms_vec(atom_types, mixed_type=mixed_type)
-        assert(natoms_vec[0] == natoms)
+        assert natoms_vec[0] == natoms
 
         # evaluate
         feed_dict_test = {}
@@ -311,42 +340,43 @@ def eval_full(
         if mixed_type:
             feed_dict_test[self.t_type] = atom_types.reshape([-1])
         else:
-            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape([-1])
+            feed_dict_test[self.t_type] = np.tile(atom_types, [nframes, 1]).reshape(
+                [-1]
+            )
         feed_dict_test[self.t_coord] = np.reshape(coords, [-1])
-        feed_dict_test[self.t_box  ] = np.reshape(cells , [-1])
+        feed_dict_test[self.t_box] = np.reshape(cells, [-1])
         if pbc:
-            feed_dict_test[self.t_mesh ] = make_default_mesh(cells)
+            feed_dict_test[self.t_mesh] = make_default_mesh(cells)
         else:
-            feed_dict_test[self.t_mesh ] = np.array([], dtype = np.int32)
-        
-        t_out = [self.t_global_tensor, 
-                 self.t_force, 
-                 self.t_virial]
-        if atomic :
-            t_out += [self.t_tensor, 
-                      self.t_atom_virial]
-        
-        v_out = self.sess.run (t_out, feed_dict = feed_dict_test)
-        gt     = v_out[0] # global tensor
-        force  = v_out[1]
+            feed_dict_test[self.t_mesh] = np.array([], dtype=np.int32)
+
+        t_out = [self.t_global_tensor, self.t_force, self.t_virial]
+        if atomic:
+            t_out += [self.t_tensor, self.t_atom_virial]
+
+        v_out = self.sess.run(t_out, feed_dict=feed_dict_test)
+        gt = v_out[0]  # global tensor
+        force = v_out[1]
         virial = v_out[2]
         if atomic:
-            at = v_out[3] # atom tensor
-            av = v_out[4] # atom virial
+            at = v_out[3]  # atom tensor
+            av = v_out[4]  # atom virial
 
         # please note here the shape are wrong!
-        force  = self.reverse_map(np.reshape(force, [nframes*nout, natoms ,3]), imap)
+        force = self.reverse_map(np.reshape(force, [nframes * nout, natoms, 3]), imap)
         if atomic:
-            at = self.reverse_map(np.reshape(at, [nframes, len(sel_at), nout]), sel_imap)
-            av = self.reverse_map(np.reshape(av, [nframes*nout, natoms, 9]), imap)
-        
+            at = self.reverse_map(
+                np.reshape(at, [nframes, len(sel_at), nout]), sel_imap
+            )
+            av = self.reverse_map(np.reshape(av, [nframes * nout, natoms, 9]), imap)
+
         # make sure the shapes are correct here
-        gt     = np.reshape(gt, [nframes, nout])
-        force  = np.reshape(force, [nframes, nout, natoms, 3])
+        gt = np.reshape(gt, [nframes, nout])
+        force = np.reshape(force, [nframes, nout, natoms, 3])
         virial = np.reshape(virial, [nframes, nout, 9])
         if atomic:
             at = np.reshape(at, [nframes, len(sel_at), self.output_dim])
             av = np.reshape(av, [nframes, nout, natoms, 9])
             return gt, force, virial, at, av
         else:
-            return gt, force, virial
\ No newline at end of file
+            return gt, force, virial
diff --git a/deepmd/infer/deep_wfc.py b/deepmd/infer/deep_wfc.py
index 40d3cd6a5c..ef7866f04d 100644
--- a/deepmd/infer/deep_wfc.py
+++ b/deepmd/infer/deep_wfc.py
@@ -1,9 +1,15 @@
-from deepmd.infer.deep_tensor import DeepTensor
+from typing import (
+    TYPE_CHECKING,
+)
 
-from typing import TYPE_CHECKING
+from deepmd.infer.deep_tensor import (
+    DeepTensor,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
 
 
 class DeepWFC(DeepTensor):
@@ -26,7 +32,10 @@ class DeepWFC(DeepTensor):
     """
 
     def __init__(
-        self, model_file: "Path", load_prefix: str = "load", default_tf_graph: bool = False
+        self,
+        model_file: "Path",
+        load_prefix: str = "load",
+        default_tf_graph: bool = False,
     ) -> None:
 
         # use this in favor of dict update to move attribute from class to
diff --git a/deepmd/infer/ewald_recp.py b/deepmd/infer/ewald_recp.py
index 05293e91bd..7f53dcf2a5 100644
--- a/deepmd/infer/ewald_recp.py
+++ b/deepmd/infer/ewald_recp.py
@@ -1,25 +1,33 @@
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, List
 
-from deepmd.env import tf
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import global_cvt_2_ener_float
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.sess import run_sess
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    global_cvt_2_ener_float,
+    global_cvt_2_tf_float,
+    op_module,
+    tf,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+
 
-class EwaldRecp () :
+class EwaldRecp:
     """
     Evaluate the reciprocal part of the Ewald sum
     """
-    def __init__(self, 
-                 hh,
-                 beta):
+
+    def __init__(self, hh, beta):
         """
-        Constructor 
+        Constructor
 
         Parameters
         ----------
@@ -32,25 +40,31 @@ def __init__(self,
         self.beta = beta
         with tf.Graph().as_default() as graph:
             # place holders
-            self.t_nloc       = tf.placeholder(tf.int32, [1], name = "t_nloc")
-            self.t_coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_coord')
-            self.t_charge     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_charge')
-            self.t_box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_box')
-            # output            
-            self.t_energy, self.t_force, self.t_virial \
-                = op_module.ewald_recp(self.t_coord, self.t_charge, self.t_nloc, self.t_box, 
-                                       ewald_h = self.hh,
-                                       ewald_beta = self.beta)
+            self.t_nloc = tf.placeholder(tf.int32, [1], name="t_nloc")
+            self.t_coord = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION, [None], name="t_coord"
+            )
+            self.t_charge = tf.placeholder(
+                GLOBAL_TF_FLOAT_PRECISION, [None], name="t_charge"
+            )
+            self.t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_box")
+            # output
+            self.t_energy, self.t_force, self.t_virial = op_module.ewald_recp(
+                self.t_coord,
+                self.t_charge,
+                self.t_nloc,
+                self.t_box,
+                ewald_h=self.hh,
+                ewald_beta=self.beta,
+            )
         self.sess = tf.Session(graph=graph, config=default_tf_session_config)
 
-    def eval(self, 
-             coord : np.ndarray, 
-             charge : np.ndarray, 
-             box : np.ndarray
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray] :
+    def eval(
+        self, coord: np.ndarray, charge: np.ndarray, box: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """
-        Evaluate 
-        
+        Evaluate
+
         Parameters
         ----------
         coord
@@ -63,11 +77,11 @@ def eval(self,
         Returns
         -------
         e
-                The energy 
+                The energy
         f
-                The force 
+                The force
         v
-                The virial 
+                The virial
         """
         coord = np.array(coord)
         charge = np.array(charge)
@@ -78,15 +92,15 @@ def eval(self,
         charge = np.reshape(charge, [nframes * natoms])
         box = np.reshape(box, [nframes * 9])
 
-        [energy, force, virial] \
-            = run_sess(self.sess, [self.t_energy, self.t_force, self.t_virial], 
-                            feed_dict = {
-                                self.t_coord:  coord,
-                                self.t_charge: charge,
-                                self.t_box:    box,
-                                self.t_nloc:   [natoms],
-                            })
+        [energy, force, virial] = run_sess(
+            self.sess,
+            [self.t_energy, self.t_force, self.t_virial],
+            feed_dict={
+                self.t_coord: coord,
+                self.t_charge: charge,
+                self.t_box: box,
+                self.t_nloc: [natoms],
+            },
+        )
 
         return energy, force, virial
-             
-             
diff --git a/deepmd/infer/model_devi.py b/deepmd/infer/model_devi.py
index 5bbcd66ee2..fc73a97f03 100644
--- a/deepmd/infer/model_devi.py
+++ b/deepmd/infer/model_devi.py
@@ -1,51 +1,64 @@
 import numpy as np
-from .deep_pot import DeepPot
-from ..utils.data import DeepmdData
-from ..utils.batch_size import AutoBatchSize
-from deepmd.common import expand_sys_str
-        
+
+from deepmd.common import (
+    expand_sys_str,
+)
+
+from ..utils.batch_size import (
+    AutoBatchSize,
+)
+from ..utils.data import (
+    DeepmdData,
+)
+from .deep_pot import (
+    DeepPot,
+)
+
 
 def calc_model_devi_f(fs: np.ndarray):
-    '''
+    """
     Parameters
     ----------
     fs : numpy.ndarray
         size of `n_models x n_frames x n_atoms x 3`
-    '''
+    """
     fs_devi = np.linalg.norm(np.std(fs, axis=0), axis=-1)
     max_devi_f = np.max(fs_devi, axis=-1)
     min_devi_f = np.min(fs_devi, axis=-1)
     avg_devi_f = np.mean(fs_devi, axis=-1)
     return max_devi_f, min_devi_f, avg_devi_f
 
+
 def calc_model_devi_e(es: np.ndarray):
-    '''
+    """
     Parameters
     ----------
     es : numpy.ndarray
         size of `n_models x n_frames x n_atoms
-    '''
+    """
     es_devi = np.std(es, axis=0)
     max_devi_e = np.max(es_devi, axis=1)
     min_devi_e = np.min(es_devi, axis=1)
     avg_devi_e = np.mean(es_devi, axis=1)
     return max_devi_e, min_devi_e, avg_devi_e
 
+
 def calc_model_devi_v(vs: np.ndarray):
-    '''
+    """
     Parameters
     ----------
     vs : numpy.ndarray
         size of `n_models x n_frames x 9`
-    '''
+    """
     vs_devi = np.std(vs, axis=0)
     max_devi_v = np.max(vs_devi, axis=-1)
     min_devi_v = np.min(vs_devi, axis=-1)
     avg_devi_v = np.linalg.norm(vs_devi, axis=-1) / 3
     return max_devi_v, min_devi_v, avg_devi_v
 
-def write_model_devi_out(devi: np.ndarray, fname: str, header: str=""):
-    '''
+
+def write_model_devi_out(devi: np.ndarray, fname: str, header: str = ""):
+    """
     Parameters
     ----------
     devi : numpy.ndarray
@@ -54,23 +67,30 @@ def write_model_devi_out(devi: np.ndarray, fname: str, header: str=""):
         the file name to dump
     header : str, default=""
         the header to dump
-    '''
+    """
     assert devi.shape[1] == 7
     header = "%s\n%10s" % (header, "step")
-    for item in 'vf':
-        header += "%19s%19s%19s" % (f"max_devi_{item}", f"min_devi_{item}", f"avg_devi_{item}")
+    for item in "vf":
+        header += "%19s%19s%19s" % (
+            f"max_devi_{item}",
+            f"min_devi_{item}",
+            f"avg_devi_{item}",
+        )
     with open(fname, "ab") as fp:
-        np.savetxt(fp,
-                   devi,
-                   fmt=['%12d'] + ['%19.6e' for _ in range(6)],
-                   delimiter='',
-                   header=header)
+        np.savetxt(
+            fp,
+            devi,
+            fmt=["%12d"] + ["%19.6e" for _ in range(6)],
+            delimiter="",
+            header=header,
+        )
     return devi
 
+
 def _check_tmaps(tmaps, ref_tmap=None):
-    '''
+    """
     Check whether type maps are identical
-    '''
+    """
     assert isinstance(tmaps, list)
     if ref_tmap is None:
         ref_tmap = tmaps[0]
@@ -83,14 +103,16 @@ def _check_tmaps(tmaps, ref_tmap=None):
             break
     return flag
 
-def calc_model_devi(coord,
-                    box,
-                    atype,
-                    models,
-                    fname=None,
-                    frequency=1, 
-                    ):
-    '''
+
+def calc_model_devi(
+    coord,
+    box,
+    atype,
+    models,
+    fname=None,
+    frequency=1,
+):
+    """
     Python interface to calculate model deviation
 
     Parameters
@@ -107,13 +129,13 @@ def calc_model_devi(coord,
         File to dump results, default None
     frequency : int
         Steps between frames (if the system is given by molecular dynamics engine), default 1
-    
+
     Returns
     -------
     model_devi : numpy.ndarray, `n_frames x 7`
         Model deviation results. The first column is index of steps, the other 6 columns are
         max_devi_v, min_devi_v, avg_devi_v, max_devi_f, min_devi_f, avg_devi_f.
-    
+
     Examples
     --------
     >>> from deepmd.infer import calc_model_devi
@@ -124,7 +146,7 @@ def calc_model_devi(coord,
     >>> atype = [1,0,1]
     >>> graphs = [DP("graph.000.pb"), DP("graph.001.pb")]
     >>> model_devi = calc_model_devi(coord, cell, atype, graphs)
-    '''
+    """
     if box is not None:
         nopbc = True
     else:
@@ -140,10 +162,10 @@ def calc_model_devi(coord,
         )
         forces.append(ret[1])
         virials.append(ret[2] / len(atype))
-    
+
     forces = np.array(forces)
     virials = np.array(virials)
-    
+
     devi = [np.arange(coord.shape[0]) * frequency]
     devi += list(calc_model_devi_v(virials))
     devi += list(calc_model_devi_f(forces))
@@ -151,17 +173,12 @@ def calc_model_devi(coord,
     if fname:
         write_model_devi_out(devi, fname)
     return devi
-    
+
+
 def make_model_devi(
-    *,
-    models: list,
-    system: str,
-    set_prefix: str,
-    output: str,
-    frequency: int,
-    **kwargs
+    *, models: list, system: str, set_prefix: str, output: str, frequency: int, **kwargs
 ):
-    '''
+    """
     Make model deviation calculation
 
     Parameters
@@ -175,10 +192,10 @@ def make_model_devi(
     output: str
         The output file for model deviation results
     frequency: int
-        The number of steps that elapse between writing coordinates 
+        The number of steps that elapse between writing coordinates
         in a trajectory by a MD engine (such as Gromacs / Lammps).
         This paramter is used to determine the index in the output file.
-    '''
+    """
     auto_batch_size = AutoBatchSize()
     # init models
     dp_models = [DeepPot(model, auto_batch_size=auto_batch_size) for model in models]
@@ -204,7 +221,7 @@ def make_model_devi(
         for data in data_sets:
             coord = data["coord"]
             box = data["box"]
-            atype = data["type"][0] 
+            atype = data["type"][0]
             if not dp_data.pbc:
                 box = None
             devi = calc_model_devi(coord, box, atype, dp_models)
diff --git a/deepmd/lmp.py b/deepmd/lmp.py
index 1fa2db9e44..2d4c235220 100644
--- a/deepmd/lmp.py
+++ b/deepmd/lmp.py
@@ -1,12 +1,21 @@
 """Register entry points for lammps-wheel."""
 import os
 import platform
-from pathlib import Path
-from typing import List, Optional
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+)
 
-from find_libpython import find_libpython
+from find_libpython import (
+    find_libpython,
+)
 
-from deepmd.env import tf
+from deepmd.env import (
+    tf,
+)
 
 
 def get_env(paths: List[Optional[str]]) -> str:
@@ -24,12 +33,14 @@ def get_env(paths: List[Optional[str]]) -> str:
 tf_dir = tf.sysconfig.get_lib()
 op_dir = str((Path(__file__).parent / "op").absolute())
 # set LD_LIBRARY_PATH
-os.environ[lib_env] = get_env([
-    os.environ.get(lib_env),
-    tf_dir,
-    os.path.join(tf_dir, "python"),
-    op_dir,
-])
+os.environ[lib_env] = get_env(
+    [
+        os.environ.get(lib_env),
+        tf_dir,
+        os.path.join(tf_dir, "python"),
+        op_dir,
+    ]
+)
 
 # preload python library
 libpython = find_libpython()
@@ -39,10 +50,13 @@ def get_env(paths: List[Optional[str]]) -> str:
     preload_env = "DYLD_INSERT_LIBRARIES"
 else:
     raise RuntimeError("Unsupported platform")
-os.environ[preload_env] = get_env([
-    os.environ.get(preload_env),
-    libpython,
-])
+os.environ[preload_env] = get_env(
+    [
+        os.environ.get(preload_env),
+        libpython,
+    ]
+)
+
 
 def get_op_dir() -> str:
     """Get the directory of the deepmd-kit OP library"""
diff --git a/deepmd/loggers/__init__.py b/deepmd/loggers/__init__.py
index 8eb1868ae3..f9cf3087e6 100644
--- a/deepmd/loggers/__init__.py
+++ b/deepmd/loggers/__init__.py
@@ -1,5 +1,7 @@
 """Module taking care of logging duties."""
 
-from .loggers import set_log_handles
+from .loggers import (
+    set_log_handles,
+)
 
 __all__ = ["set_log_handles"]
diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py
index 04321486ad..f5457dd5b0 100644
--- a/deepmd/loggers/loggers.py
+++ b/deepmd/loggers/loggers.py
@@ -2,12 +2,19 @@
 
 import logging
 import os
-from typing import TYPE_CHECKING, Optional
+from typing import (
+    TYPE_CHECKING,
+    Optional,
+)
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from pathlib import (
+        Path,
+    )
 
-    from mpi4py import MPI
+    from mpi4py import (
+        MPI,
+    )
 
     _MPI_APPEND_MODE = MPI.MODE_CREATE | MPI.MODE_APPEND
 
@@ -20,14 +27,14 @@
     "[%(asctime)s] %(app_name)s %(levelname)-7s %(name)-45s %(message)s"
 )
 CFORMATTER = logging.Formatter(
-#    "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s"
+    #    "%(app_name)s %(levelname)-7s |-> %(name)-45s %(message)s"
     "%(app_name)s %(levelname)-7s %(message)s"
 )
 FFORMATTER_MPI = logging.Formatter(
     "[%(asctime)s] %(app_name)s rank:%(rank)-2s %(levelname)-7s %(name)-45s %(message)s"
 )
 CFORMATTER_MPI = logging.Formatter(
-#    "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s"
+    #    "%(app_name)s rank:%(rank)-2s %(levelname)-7s |-> %(name)-45s %(message)s"
     "%(app_name)s rank:%(rank)-2s %(levelname)-7s %(message)s"
 )
 
@@ -135,9 +142,7 @@ def setStream(self, stream):
 
 
 def set_log_handles(
-    level: int,
-    log_path: Optional["Path"] = None,
-    mpi_log: Optional[str] = None
+    level: int, log_path: Optional["Path"] = None, mpi_log: Optional[str] = None
 ):
     """Set desired level for package loggers and add file handlers.
 
@@ -189,7 +194,7 @@ def set_log_handles(
         os.environ["KMP_WARNINGS"] = "FALSE"
 
     # set TF cpp internal logging level
-    os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(int((level / 10) - 1))
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = str(int((level / 10) - 1))
 
     # get root logger
     root_log = logging.getLogger("deepmd")
@@ -201,9 +206,13 @@ def set_log_handles(
     MPI = None
     if mpi_log:
         try:
-            from mpi4py import MPI
+            from mpi4py import (
+                MPI,
+            )
         except ImportError as e:
-            raise RuntimeError("You cannot specify 'mpi_log' when mpi4py not installed") from e
+            raise RuntimeError(
+                "You cannot specify 'mpi_log' when mpi4py not installed"
+            ) from e
 
     # * add console handler ************************************************************
     ch = logging.StreamHandler()
diff --git a/deepmd/loss/__init__.py b/deepmd/loss/__init__.py
index 94655dd734..93c448442c 100644
--- a/deepmd/loss/__init__.py
+++ b/deepmd/loss/__init__.py
@@ -1,4 +1,7 @@
-from .ener import EnerStdLoss
-from .ener import EnerDipoleLoss
-from .tensor import TensorLoss
-
+from .ener import (
+    EnerDipoleLoss,
+    EnerStdLoss,
+)
+from .tensor import (
+    TensorLoss,
+)
diff --git a/deepmd/loss/ener.py b/deepmd/loss/ener.py
index c48fa3d508..7c8d997c6a 100644
--- a/deepmd/loss/ener.py
+++ b/deepmd/loss/ener.py
@@ -1,14 +1,23 @@
 import numpy as np
-from deepmd.env import tf
-from deepmd.common import add_data_requirement
 
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import global_cvt_2_ener_float
-from deepmd.utils.sess import run_sess
-from .loss import Loss
+from deepmd.common import (
+    add_data_requirement,
+)
+from deepmd.env import (
+    global_cvt_2_ener_float,
+    global_cvt_2_tf_float,
+    tf,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
+from .loss import (
+    Loss,
+)
 
-class EnerStdLoss (Loss) :
+
+class EnerStdLoss(Loss):
     """
     Standard loss function for DP models
 
@@ -17,20 +26,22 @@ class EnerStdLoss (Loss) :
     enable_atom_ener_coeff : bool
         if true, the energy will be computed as \\sum_i c_i E_i
     """
-    def __init__ (self, 
-                  starter_learning_rate : float, 
-                  start_pref_e : float = 0.02,
-                  limit_pref_e : float = 1.00,
-                  start_pref_f : float = 1000,
-                  limit_pref_f : float = 1.00,
-                  start_pref_v : float = 0.0,
-                  limit_pref_v : float = 0.0,
-                  start_pref_ae : float = 0.0,
-                  limit_pref_ae : float = 0.0,
-                  start_pref_pf : float = 0.0,
-                  limit_pref_pf : float = 0.0,
-                  relative_f : float = None,
-                  enable_atom_ener_coeff: bool=False,
+
+    def __init__(
+        self,
+        starter_learning_rate: float,
+        start_pref_e: float = 0.02,
+        limit_pref_e: float = 1.00,
+        start_pref_f: float = 1000,
+        limit_pref_f: float = 1.00,
+        start_pref_v: float = 0.0,
+        limit_pref_v: float = 0.0,
+        start_pref_ae: float = 0.0,
+        limit_pref_ae: float = 0.0,
+        start_pref_pf: float = 0.0,
+        limit_pref_pf: float = 0.0,
+        relative_f: float = None,
+        enable_atom_ener_coeff: bool = False,
     ) -> None:
         self.starter_learning_rate = starter_learning_rate
         self.start_pref_e = start_pref_e
@@ -45,43 +56,47 @@ def __init__ (self,
         self.limit_pref_pf = limit_pref_pf
         self.relative_f = relative_f
         self.enable_atom_ener_coeff = enable_atom_ener_coeff
-        self.has_e = (self.start_pref_e != 0.0 or self.limit_pref_e != 0.0)
-        self.has_f = (self.start_pref_f != 0.0 or self.limit_pref_f != 0.0)
-        self.has_v = (self.start_pref_v != 0.0 or self.limit_pref_v != 0.0)
-        self.has_ae = (self.start_pref_ae != 0.0 or self.limit_pref_ae != 0.0)
-        self.has_pf = (self.start_pref_pf != 0.0 or self.limit_pref_pf != 0.0)
+        self.has_e = self.start_pref_e != 0.0 or self.limit_pref_e != 0.0
+        self.has_f = self.start_pref_f != 0.0 or self.limit_pref_f != 0.0
+        self.has_v = self.start_pref_v != 0.0 or self.limit_pref_v != 0.0
+        self.has_ae = self.start_pref_ae != 0.0 or self.limit_pref_ae != 0.0
+        self.has_pf = self.start_pref_pf != 0.0 or self.limit_pref_pf != 0.0
         # data required
-        add_data_requirement('energy', 1, atomic=False, must=False, high_prec=True)
-        add_data_requirement('force',  3, atomic=True,  must=False, high_prec=False)
-        add_data_requirement('virial', 9, atomic=False, must=False, high_prec=False)
-        add_data_requirement('atom_ener', 1, atomic=True, must=False, high_prec=False)
-        add_data_requirement('atom_pref', 1, atomic=True, must=False, high_prec=False, repeat=3)
+        add_data_requirement("energy", 1, atomic=False, must=False, high_prec=True)
+        add_data_requirement("force", 3, atomic=True, must=False, high_prec=False)
+        add_data_requirement("virial", 9, atomic=False, must=False, high_prec=False)
+        add_data_requirement("atom_ener", 1, atomic=True, must=False, high_prec=False)
+        add_data_requirement(
+            "atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3
+        )
         if self.enable_atom_ener_coeff:
-            add_data_requirement('atom_ener_coeff', 1, atomic=True, must=False, high_prec=False, default=1.)
+            add_data_requirement(
+                "atom_ener_coeff",
+                1,
+                atomic=True,
+                must=False,
+                high_prec=False,
+                default=1.0,
+            )
 
-    def build (self, 
-               learning_rate,
-               natoms,
-               model_dict,
-               label_dict,
-               suffix):        
-        energy = model_dict['energy']
-        force = model_dict['force']
-        virial = model_dict['virial']
-        atom_ener = model_dict['atom_ener']
-        energy_hat = label_dict['energy']
-        force_hat = label_dict['force']
-        virial_hat = label_dict['virial']
-        atom_ener_hat = label_dict['atom_ener']
-        atom_pref = label_dict['atom_pref']
-        find_energy = label_dict['find_energy']
-        find_force = label_dict['find_force']
-        find_virial = label_dict['find_virial']
-        find_atom_ener = label_dict['find_atom_ener']                
-        find_atom_pref = label_dict['find_atom_pref']                
+    def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
+        energy = model_dict["energy"]
+        force = model_dict["force"]
+        virial = model_dict["virial"]
+        atom_ener = model_dict["atom_ener"]
+        energy_hat = label_dict["energy"]
+        force_hat = label_dict["force"]
+        virial_hat = label_dict["virial"]
+        atom_ener_hat = label_dict["atom_ener"]
+        atom_pref = label_dict["atom_pref"]
+        find_energy = label_dict["find_energy"]
+        find_force = label_dict["find_force"]
+        find_virial = label_dict["find_virial"]
+        find_atom_ener = label_dict["find_atom_ener"]
+        find_atom_pref = label_dict["find_atom_pref"]
 
         if self.enable_atom_ener_coeff:
-            # when ener_coeff (\nu) is defined, the energy is defined as 
+            # when ener_coeff (\nu) is defined, the energy is defined as
             # E = \sum_i \nu_i E_i
             # instead of the sum of atomic energies.
             #
@@ -89,63 +104,122 @@ def build (self,
             # A + B -> C + D
             # E = - E(A) - E(B) + E(C) + E(D)
             # A, B, C, D could be put far away from each other
-            atom_ener_coeff = label_dict['atom_ener_coeff']
+            atom_ener_coeff = label_dict["atom_ener_coeff"]
             atom_ener_coeff = tf.reshape(atom_ener_coeff, tf.shape(atom_ener))
             energy = tf.reduce_sum(atom_ener_coeff * atom_ener, 1)
-        l2_ener_loss = tf.reduce_mean( tf.square(energy - energy_hat), name='l2_'+suffix)
+        l2_ener_loss = tf.reduce_mean(
+            tf.square(energy - energy_hat), name="l2_" + suffix
+        )
 
-        force_reshape = tf.reshape (force, [-1])
-        force_hat_reshape = tf.reshape (force_hat, [-1])
-        atom_pref_reshape = tf.reshape (atom_pref, [-1])
+        force_reshape = tf.reshape(force, [-1])
+        force_hat_reshape = tf.reshape(force_hat, [-1])
+        atom_pref_reshape = tf.reshape(atom_pref, [-1])
         diff_f = force_hat_reshape - force_reshape
-        if self.relative_f is not None:            
+        if self.relative_f is not None:
             force_hat_3 = tf.reshape(force_hat, [-1, 3])
-            norm_f = tf.reshape(tf.norm(force_hat_3, axis = 1), [-1, 1]) + self.relative_f
+            norm_f = tf.reshape(tf.norm(force_hat_3, axis=1), [-1, 1]) + self.relative_f
             diff_f_3 = tf.reshape(diff_f, [-1, 3])
             diff_f_3 = diff_f_3 / norm_f
             diff_f = tf.reshape(diff_f_3, [-1])
-        l2_force_loss = tf.reduce_mean(tf.square(diff_f), name = "l2_force_" + suffix)
-        l2_pref_force_loss = tf.reduce_mean(tf.multiply(tf.square(diff_f), atom_pref_reshape), name = "l2_pref_force_" + suffix)
+        l2_force_loss = tf.reduce_mean(tf.square(diff_f), name="l2_force_" + suffix)
+        l2_pref_force_loss = tf.reduce_mean(
+            tf.multiply(tf.square(diff_f), atom_pref_reshape),
+            name="l2_pref_force_" + suffix,
+        )
 
-        virial_reshape = tf.reshape (virial, [-1])
-        virial_hat_reshape = tf.reshape (virial_hat, [-1])
-        l2_virial_loss = tf.reduce_mean (tf.square(virial_hat_reshape - virial_reshape), name = "l2_virial_" + suffix)
+        virial_reshape = tf.reshape(virial, [-1])
+        virial_hat_reshape = tf.reshape(virial_hat, [-1])
+        l2_virial_loss = tf.reduce_mean(
+            tf.square(virial_hat_reshape - virial_reshape), name="l2_virial_" + suffix
+        )
 
-        atom_ener_reshape = tf.reshape (atom_ener, [-1])
-        atom_ener_hat_reshape = tf.reshape (atom_ener_hat, [-1])
-        l2_atom_ener_loss = tf.reduce_mean (tf.square(atom_ener_hat_reshape - atom_ener_reshape), name = "l2_atom_ener_" + suffix)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1])
+        atom_ener_hat_reshape = tf.reshape(atom_ener_hat, [-1])
+        l2_atom_ener_loss = tf.reduce_mean(
+            tf.square(atom_ener_hat_reshape - atom_ener_reshape),
+            name="l2_atom_ener_" + suffix,
+        )
 
-        atom_norm  = 1./ global_cvt_2_tf_float(natoms[0]) 
-        atom_norm_ener  = 1./ global_cvt_2_ener_float(natoms[0]) 
-        pref_e = global_cvt_2_ener_float(find_energy * (self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * learning_rate / self.starter_learning_rate) )
-        pref_f = global_cvt_2_tf_float(find_force * (self.limit_pref_f + (self.start_pref_f - self.limit_pref_f) * learning_rate / self.starter_learning_rate) )
-        pref_v = global_cvt_2_tf_float(find_virial * (self.limit_pref_v + (self.start_pref_v - self.limit_pref_v) * learning_rate / self.starter_learning_rate) )
-        pref_ae= global_cvt_2_tf_float(find_atom_ener * (self.limit_pref_ae+ (self.start_pref_ae-self.limit_pref_ae) * learning_rate / self.starter_learning_rate) )
-        pref_pf= global_cvt_2_tf_float(find_atom_pref * (self.limit_pref_pf+ (self.start_pref_pf-self.limit_pref_pf) * learning_rate / self.starter_learning_rate) )
+        atom_norm = 1.0 / global_cvt_2_tf_float(natoms[0])
+        atom_norm_ener = 1.0 / global_cvt_2_ener_float(natoms[0])
+        pref_e = global_cvt_2_ener_float(
+            find_energy
+            * (
+                self.limit_pref_e
+                + (self.start_pref_e - self.limit_pref_e)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
+        pref_f = global_cvt_2_tf_float(
+            find_force
+            * (
+                self.limit_pref_f
+                + (self.start_pref_f - self.limit_pref_f)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
+        pref_v = global_cvt_2_tf_float(
+            find_virial
+            * (
+                self.limit_pref_v
+                + (self.start_pref_v - self.limit_pref_v)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
+        pref_ae = global_cvt_2_tf_float(
+            find_atom_ener
+            * (
+                self.limit_pref_ae
+                + (self.start_pref_ae - self.limit_pref_ae)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
+        pref_pf = global_cvt_2_tf_float(
+            find_atom_pref
+            * (
+                self.limit_pref_pf
+                + (self.start_pref_pf - self.limit_pref_pf)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
 
         l2_loss = 0
         more_loss = {}
-        if self.has_e :
+        if self.has_e:
             l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
-        more_loss['l2_ener_loss'] = l2_ener_loss
-        if self.has_f :
+        more_loss["l2_ener_loss"] = l2_ener_loss
+        if self.has_f:
             l2_loss += global_cvt_2_ener_float(pref_f * l2_force_loss)
-        more_loss['l2_force_loss'] = l2_force_loss
-        if self.has_v :
+        more_loss["l2_force_loss"] = l2_force_loss
+        if self.has_v:
             l2_loss += global_cvt_2_ener_float(atom_norm * (pref_v * l2_virial_loss))
-        more_loss['l2_virial_loss'] = l2_virial_loss
-        if self.has_ae :
+        more_loss["l2_virial_loss"] = l2_virial_loss
+        if self.has_ae:
             l2_loss += global_cvt_2_ener_float(pref_ae * l2_atom_ener_loss)
-        more_loss['l2_atom_ener_loss'] = l2_atom_ener_loss
-        if self.has_pf :
+        more_loss["l2_atom_ener_loss"] = l2_atom_ener_loss
+        if self.has_pf:
             l2_loss += global_cvt_2_ener_float(pref_pf * l2_pref_force_loss)
-        more_loss['l2_pref_force_loss'] = l2_pref_force_loss
+        more_loss["l2_pref_force_loss"] = l2_pref_force_loss
 
         # only used when tensorboard was set as true
-        self.l2_loss_summary = tf.summary.scalar('l2_loss_' + suffix, tf.sqrt(l2_loss))
-        self.l2_loss_ener_summary = tf.summary.scalar('l2_ener_loss_' + suffix, global_cvt_2_tf_float(tf.sqrt(l2_ener_loss)) / global_cvt_2_tf_float(natoms[0]))
-        self.l2_loss_force_summary = tf.summary.scalar('l2_force_loss_' + suffix, tf.sqrt(l2_force_loss))
-        self.l2_loss_virial_summary = tf.summary.scalar('l2_virial_loss_' + suffix, tf.sqrt(l2_virial_loss) / global_cvt_2_tf_float(natoms[0]))
+        self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
+        self.l2_loss_ener_summary = tf.summary.scalar(
+            "l2_ener_loss_" + suffix,
+            global_cvt_2_tf_float(tf.sqrt(l2_ener_loss))
+            / global_cvt_2_tf_float(natoms[0]),
+        )
+        self.l2_loss_force_summary = tf.summary.scalar(
+            "l2_force_loss_" + suffix, tf.sqrt(l2_force_loss)
+        )
+        self.l2_loss_virial_summary = tf.summary.scalar(
+            "l2_virial_loss_" + suffix,
+            tf.sqrt(l2_virial_loss) / global_cvt_2_tf_float(natoms[0]),
+        )
 
         self.l2_l = l2_loss
         self.l2_more = more_loss
@@ -155,13 +229,15 @@ def eval(self, sess, feed_dict, natoms):
         placeholder = self.l2_l
         run_data = [
             self.l2_l,
-            self.l2_more['l2_ener_loss'] if self.has_e else placeholder,
-            self.l2_more['l2_force_loss'] if self.has_f else placeholder,
-            self.l2_more['l2_virial_loss'] if self.has_v else placeholder,
-            self.l2_more['l2_atom_ener_loss'] if self.has_ae else placeholder,
-            self.l2_more['l2_pref_force_loss'] if self.has_pf else placeholder,
+            self.l2_more["l2_ener_loss"] if self.has_e else placeholder,
+            self.l2_more["l2_force_loss"] if self.has_f else placeholder,
+            self.l2_more["l2_virial_loss"] if self.has_v else placeholder,
+            self.l2_more["l2_atom_ener_loss"] if self.has_ae else placeholder,
+            self.l2_more["l2_pref_force_loss"] if self.has_pf else placeholder,
         ]
-        error, error_e, error_f, error_v, error_ae, error_pf = run_sess(sess, run_data, feed_dict=feed_dict)
+        error, error_e, error_f, error_v, error_ae, error_pf = run_sess(
+            sess, run_data, feed_dict=feed_dict
+        )
         results = {"natoms": natoms[0], "rmse": np.sqrt(error)}
         if self.has_e:
             results["rmse_e"] = np.sqrt(error_e) / natoms[0]
@@ -173,70 +249,100 @@ def eval(self, sess, feed_dict, natoms):
             results["rmse_v"] = np.sqrt(error_v) / natoms[0]
         if self.has_pf:
             results["rmse_pf"] = np.sqrt(error_pf)
-        return results 
+        return results
 
 
-class EnerDipoleLoss (Loss) :
-    def __init__ (self, 
-                  starter_learning_rate : float,
-                  start_pref_e : float = 0.1,
-                  limit_pref_e : float = 1.0,
-                  start_pref_ed : float = 1.0,
-                  limit_pref_ed : float = 1.0
-    ) -> None :
+class EnerDipoleLoss(Loss):
+    def __init__(
+        self,
+        starter_learning_rate: float,
+        start_pref_e: float = 0.1,
+        limit_pref_e: float = 1.0,
+        start_pref_ed: float = 1.0,
+        limit_pref_ed: float = 1.0,
+    ) -> None:
         self.starter_learning_rate = starter_learning_rate
         self.start_pref_e = start_pref_e
         self.limit_pref_e = limit_pref_e
         self.start_pref_ed = start_pref_ed
         self.limit_pref_ed = limit_pref_ed
         # data required
-        add_data_requirement('energy', 1, atomic=False, must=True, high_prec=True)
-        add_data_requirement('energy_dipole', 3, atomic=False, must=True, high_prec=False)
+        add_data_requirement("energy", 1, atomic=False, must=True, high_prec=True)
+        add_data_requirement(
+            "energy_dipole", 3, atomic=False, must=True, high_prec=False
+        )
 
-    def build (self, 
-               learning_rate,
-               natoms,
-               model_dict,
-               label_dict,
-               suffix):        
-        coord = model_dict['coord']
-        energy = model_dict['energy']
-        atom_ener = model_dict['atom_ener']
+    def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
+        coord = model_dict["coord"]
+        energy = model_dict["energy"]
+        atom_ener = model_dict["atom_ener"]
         nframes = tf.shape(atom_ener)[0]
         natoms = tf.shape(atom_ener)[1]
         # build energy dipole
-        atom_ener0 = atom_ener - tf.reshape(tf.tile(tf.reshape(energy/global_cvt_2_ener_float(natoms), [-1, 1]), [1, natoms]), [nframes, natoms])
+        atom_ener0 = atom_ener - tf.reshape(
+            tf.tile(
+                tf.reshape(energy / global_cvt_2_ener_float(natoms), [-1, 1]),
+                [1, natoms],
+            ),
+            [nframes, natoms],
+        )
         coord = tf.reshape(coord, [nframes, natoms, 3])
         atom_ener0 = tf.reshape(atom_ener0, [nframes, 1, natoms])
         ener_dipole = tf.matmul(atom_ener0, coord)
         ener_dipole = tf.reshape(ener_dipole, [nframes, 3])
-        
-        energy_hat = label_dict['energy']
-        ener_dipole_hat = label_dict['energy_dipole']
-        find_energy = label_dict['find_energy']
-        find_ener_dipole = label_dict['find_energy_dipole']                
 
-        l2_ener_loss = tf.reduce_mean( tf.square(energy - energy_hat), name='l2_'+suffix)
+        energy_hat = label_dict["energy"]
+        ener_dipole_hat = label_dict["energy_dipole"]
+        find_energy = label_dict["find_energy"]
+        find_ener_dipole = label_dict["find_energy_dipole"]
+
+        l2_ener_loss = tf.reduce_mean(
+            tf.square(energy - energy_hat), name="l2_" + suffix
+        )
 
         ener_dipole_reshape = tf.reshape(ener_dipole, [-1])
         ener_dipole_hat_reshape = tf.reshape(ener_dipole_hat, [-1])
-        l2_ener_dipole_loss = tf.reduce_mean( tf.square(ener_dipole_reshape - ener_dipole_hat_reshape), name='l2_'+suffix)
+        l2_ener_dipole_loss = tf.reduce_mean(
+            tf.square(ener_dipole_reshape - ener_dipole_hat_reshape),
+            name="l2_" + suffix,
+        )
 
-        # atom_norm_ener  = 1./ global_cvt_2_ener_float(natoms[0]) 
-        atom_norm_ener  = 1./ global_cvt_2_ener_float(natoms) 
-        pref_e  = global_cvt_2_ener_float(find_energy * (self.limit_pref_e + (self.start_pref_e - self.limit_pref_e) * learning_rate / self.starter_learning_rate) )
-        pref_ed = global_cvt_2_tf_float(find_ener_dipole * (self.limit_pref_ed + (self.start_pref_ed - self.limit_pref_ed) * learning_rate / self.starter_learning_rate) )
+        # atom_norm_ener  = 1./ global_cvt_2_ener_float(natoms[0])
+        atom_norm_ener = 1.0 / global_cvt_2_ener_float(natoms)
+        pref_e = global_cvt_2_ener_float(
+            find_energy
+            * (
+                self.limit_pref_e
+                + (self.start_pref_e - self.limit_pref_e)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
+        pref_ed = global_cvt_2_tf_float(
+            find_ener_dipole
+            * (
+                self.limit_pref_ed
+                + (self.start_pref_ed - self.limit_pref_ed)
+                * learning_rate
+                / self.starter_learning_rate
+            )
+        )
 
         l2_loss = 0
         more_loss = {}
         l2_loss += atom_norm_ener * (pref_e * l2_ener_loss)
         l2_loss += global_cvt_2_ener_float(pref_ed * l2_ener_dipole_loss)
-        more_loss['l2_ener_loss'] = l2_ener_loss
-        more_loss['l2_ener_dipole_loss'] = l2_ener_dipole_loss
+        more_loss["l2_ener_loss"] = l2_ener_loss
+        more_loss["l2_ener_dipole_loss"] = l2_ener_dipole_loss
 
-        self.l2_loss_summary = tf.summary.scalar('l2_loss_' + suffix, tf.sqrt(l2_loss))
-        self.l2_loss_ener_summary = tf.summary.scalar('l2_ener_loss_' + suffix, tf.sqrt(l2_ener_loss) / global_cvt_2_tf_float(natoms[0]))
-        self.l2_ener_dipole_loss_summary = tf.summary.scalar('l2_ener_dipole_loss_' + suffix, tf.sqrt(l2_ener_dipole_loss))
+        self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
+        self.l2_loss_ener_summary = tf.summary.scalar(
+            "l2_ener_loss_" + suffix,
+            tf.sqrt(l2_ener_loss) / global_cvt_2_tf_float(natoms[0]),
+        )
+        self.l2_ener_dipole_loss_summary = tf.summary.scalar(
+            "l2_ener_dipole_loss_" + suffix, tf.sqrt(l2_ener_dipole_loss)
+        )
 
         self.l2_l = l2_loss
         self.l2_more = more_loss
@@ -245,14 +351,14 @@ def build (self,
     def eval(self, sess, feed_dict, natoms):
         run_data = [
             self.l2_l,
-            self.l2_more['l2_ener_loss'],
-            self.l2_more['l2_ener_dipole_loss']
+            self.l2_more["l2_ener_loss"],
+            self.l2_more["l2_ener_dipole_loss"],
         ]
         error, error_e, error_ed = run_sess(sess, run_data, feed_dict=feed_dict)
         results = {
-            'natoms': natoms[0],
-            'rmse': np.sqrt(error),
-            'rmse_e': np.sqrt(error_e) / natoms[0],
-            'rmse_ed': np.sqrt(error_ed)
+            "natoms": natoms[0],
+            "rmse": np.sqrt(error),
+            "rmse_e": np.sqrt(error_e) / natoms[0],
+            "rmse_ed": np.sqrt(error_ed),
         }
         return results
diff --git a/deepmd/loss/loss.py b/deepmd/loss/loss.py
index 6ae9dc7399..f666445e6e 100644
--- a/deepmd/loss/loss.py
+++ b/deepmd/loss/loss.py
@@ -1,19 +1,31 @@
-from abc import ABCMeta, abstractmethod
-from typing import Tuple, Dict
-from deepmd.env import tf
+from abc import (
+    ABCMeta,
+    abstractmethod,
+)
+from typing import (
+    Dict,
+    Tuple,
+)
+
+from deepmd.env import (
+    tf,
+)
 
 
 class Loss(metaclass=ABCMeta):
     """The abstract class for the loss function."""
+
     @abstractmethod
-    def build(self, 
-            learning_rate: tf.Tensor,
-            natoms: tf.Tensor,
-            model_dict: Dict[str, tf.Tensor],
-            label_dict: Dict[str, tf.Tensor],
-            suffix: str) -> Tuple[tf.Tensor, Dict[str, tf.Tensor]]:
+    def build(
+        self,
+        learning_rate: tf.Tensor,
+        natoms: tf.Tensor,
+        model_dict: Dict[str, tf.Tensor],
+        label_dict: Dict[str, tf.Tensor],
+        suffix: str,
+    ) -> Tuple[tf.Tensor, Dict[str, tf.Tensor]]:
         """Build the loss function graph.
-        
+
         Parameters
         ----------
         learning_rate : tf.Tensor
@@ -36,10 +48,12 @@ def build(self,
         """
 
     @abstractmethod
-    def eval(self,
-             sess: tf.Session,
-             feed_dict: Dict[tf.placeholder, tf.Tensor],
-             natoms: tf.Tensor) -> dict:
+    def eval(
+        self,
+        sess: tf.Session,
+        feed_dict: Dict[tf.placeholder, tf.Tensor],
+        natoms: tf.Tensor,
+    ) -> dict:
         """Eval the loss function.
 
         Parameters
diff --git a/deepmd/loss/tensor.py b/deepmd/loss/tensor.py
index e7cbde6ebd..c0a9ee575f 100644
--- a/deepmd/loss/tensor.py
+++ b/deepmd/loss/tensor.py
@@ -1,131 +1,153 @@
 import numpy as np
-from deepmd.env import tf
-from deepmd.common import add_data_requirement
 
-from deepmd.env import global_cvt_2_tf_float
-from deepmd.env import global_cvt_2_ener_float
-from deepmd.utils.sess import run_sess
-from .loss import Loss
+from deepmd.common import (
+    add_data_requirement,
+)
+from deepmd.env import (
+    global_cvt_2_ener_float,
+    global_cvt_2_tf_float,
+    tf,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
+from .loss import (
+    Loss,
+)
 
-class TensorLoss(Loss) :
+
+class TensorLoss(Loss):
     """
     Loss function for tensorial properties.
     """
-    def __init__ (self, jdata, **kwarg) :
-        model = kwarg.get('model', None)
+
+    def __init__(self, jdata, **kwarg):
+        model = kwarg.get("model", None)
         if model is not None:
             self.type_sel = model.get_sel_type()
         else:
             self.type_sel = None
-        self.tensor_name = kwarg['tensor_name']
-        self.tensor_size = kwarg['tensor_size']
-        self.label_name = kwarg['label_name']
+        self.tensor_name = kwarg["tensor_name"]
+        self.tensor_size = kwarg["tensor_size"]
+        self.label_name = kwarg["label_name"]
         if jdata is not None:
-            self.scale = jdata.get('scale', 1.0)
+            self.scale = jdata.get("scale", 1.0)
         else:
             self.scale = 1.0
 
         # YHT: added for global / local dipole combination
         assert jdata is not None, "Please provide loss parameters!"
         # YWolfeee: modify, use pref / pref_atomic, instead of pref_weight / pref_atomic_weight
-        self.local_weight = jdata.get('pref_atomic', None)
-        self.global_weight = jdata.get('pref', None)
-
-        assert (self.local_weight is not None and self.global_weight is not None), "Both `pref` and `pref_atomic` should be provided."
-        assert self.local_weight >= 0.0 and self.global_weight >= 0.0, "Can not assign negative weight to `pref` and `pref_atomic`"
-        assert (self.local_weight >0.0) or (self.global_weight>0.0), AssertionError('Can not assian zero weight both to `pref` and `pref_atomic`')
+        self.local_weight = jdata.get("pref_atomic", None)
+        self.global_weight = jdata.get("pref", None)
+
+        assert (
+            self.local_weight is not None and self.global_weight is not None
+        ), "Both `pref` and `pref_atomic` should be provided."
+        assert (
+            self.local_weight >= 0.0 and self.global_weight >= 0.0
+        ), "Can not assign negative weight to `pref` and `pref_atomic`"
+        assert (self.local_weight > 0.0) or (self.global_weight > 0.0), AssertionError(
+            "Can not assian zero weight both to `pref` and `pref_atomic`"
+        )
 
         # data required
-        add_data_requirement("atomic_" + self.label_name, 
-                             self.tensor_size, 
-                             atomic=True,  
-                             must=False, 
-                             high_prec=False, 
-                             type_sel = self.type_sel)
-        add_data_requirement(self.label_name, 
-                             self.tensor_size, 
-                             atomic=False,  
-                             must=False, 
-                             high_prec=False, 
-                             type_sel = self.type_sel)
-
-    def build (self, 
-               learning_rate,
-               natoms,
-               model_dict,
-               label_dict,
-               suffix):        
+        add_data_requirement(
+            "atomic_" + self.label_name,
+            self.tensor_size,
+            atomic=True,
+            must=False,
+            high_prec=False,
+            type_sel=self.type_sel,
+        )
+        add_data_requirement(
+            self.label_name,
+            self.tensor_size,
+            atomic=False,
+            must=False,
+            high_prec=False,
+            type_sel=self.type_sel,
+        )
+
+    def build(self, learning_rate, natoms, model_dict, label_dict, suffix):
         polar_hat = label_dict[self.label_name]
         atomic_polar_hat = label_dict["atomic_" + self.label_name]
         polar = tf.reshape(model_dict[self.tensor_name], [-1])
 
-        find_global = label_dict['find_' + self.label_name]
-        find_atomic = label_dict['find_atomic_' + self.label_name]
-        
-        
+        find_global = label_dict["find_" + self.label_name]
+        find_atomic = label_dict["find_atomic_" + self.label_name]
 
         # YHT: added for global / local dipole combination
         l2_loss = global_cvt_2_tf_float(0.0)
         more_loss = {
-            "local_loss":global_cvt_2_tf_float(0.0),
-            "global_loss":global_cvt_2_tf_float(0.0)
+            "local_loss": global_cvt_2_tf_float(0.0),
+            "global_loss": global_cvt_2_tf_float(0.0),
         }
 
-        
         if self.local_weight > 0.0:
-            local_loss = global_cvt_2_tf_float(find_atomic) * tf.reduce_mean( tf.square(self.scale*(polar - atomic_polar_hat)), name='l2_'+suffix)
-            more_loss['local_loss'] = local_loss
+            local_loss = global_cvt_2_tf_float(find_atomic) * tf.reduce_mean(
+                tf.square(self.scale * (polar - atomic_polar_hat)), name="l2_" + suffix
+            )
+            more_loss["local_loss"] = local_loss
             l2_loss += self.local_weight * local_loss
-            self.l2_loss_local_summary = tf.summary.scalar('l2_local_loss_' + suffix,
-                                            tf.sqrt(more_loss['local_loss']))
-        
+            self.l2_loss_local_summary = tf.summary.scalar(
+                "l2_local_loss_" + suffix, tf.sqrt(more_loss["local_loss"])
+            )
 
-        if self.global_weight > 0.0:    # Need global loss
+        if self.global_weight > 0.0:  # Need global loss
             atoms = 0
             if self.type_sel is not None:
                 for w in self.type_sel:
-                    atoms += natoms[2+w]
+                    atoms += natoms[2 + w]
             else:
-                atoms = natoms[0]     
+                atoms = natoms[0]
             nframes = tf.shape(polar)[0] // self.tensor_size // atoms
             # get global results
-            global_polar = tf.reshape(tf.reduce_sum(tf.reshape(
-                polar, [nframes, -1, self.tensor_size]), axis=1),[-1])
-            #if self.atomic: # If label is local, however
+            global_polar = tf.reshape(
+                tf.reduce_sum(
+                    tf.reshape(polar, [nframes, -1, self.tensor_size]), axis=1
+                ),
+                [-1],
+            )
+            # if self.atomic: # If label is local, however
             #    global_polar_hat = tf.reshape(tf.reduce_sum(tf.reshape(
             #        polar_hat, [nframes, -1, self.tensor_size]), axis=1),[-1])
-            #else:
+            # else:
             #    global_polar_hat = polar_hat
-            
-            global_loss = global_cvt_2_tf_float(find_global) * tf.reduce_mean( tf.square(self.scale*(global_polar - polar_hat)), name='l2_'+suffix)
 
-            more_loss['global_loss'] = global_loss
-            self.l2_loss_global_summary = tf.summary.scalar('l2_global_loss_' + suffix,
-                                            tf.sqrt(more_loss['global_loss']) / global_cvt_2_tf_float(atoms))
+            global_loss = global_cvt_2_tf_float(find_global) * tf.reduce_mean(
+                tf.square(self.scale * (global_polar - polar_hat)), name="l2_" + suffix
+            )
+
+            more_loss["global_loss"] = global_loss
+            self.l2_loss_global_summary = tf.summary.scalar(
+                "l2_global_loss_" + suffix,
+                tf.sqrt(more_loss["global_loss"]) / global_cvt_2_tf_float(atoms),
+            )
 
             # YWolfeee: should only consider atoms with dipole, i.e. atoms
-            # atom_norm  = 1./ global_cvt_2_tf_float(natoms[0])  
-            atom_norm  = 1./ global_cvt_2_tf_float(atoms)  
-            global_loss *= atom_norm   
+            # atom_norm  = 1./ global_cvt_2_tf_float(natoms[0])
+            atom_norm = 1.0 / global_cvt_2_tf_float(atoms)
+            global_loss *= atom_norm
 
             l2_loss += self.global_weight * global_loss
-            
+
         self.l2_more = more_loss
         self.l2_l = l2_loss
 
-        self.l2_loss_summary = tf.summary.scalar('l2_loss_' + suffix, tf.sqrt(l2_loss))
+        self.l2_loss_summary = tf.summary.scalar("l2_loss_" + suffix, tf.sqrt(l2_loss))
         return l2_loss, more_loss
 
     def eval(self, sess, feed_dict, natoms):
         atoms = 0
         if self.type_sel is not None:
             for w in self.type_sel:
-                atoms += natoms[2+w]
+                atoms += natoms[2 + w]
         else:
             atoms = natoms[0]
 
-        run_data = [self.l2_l, self.l2_more['local_loss'], self.l2_more['global_loss']]
+        run_data = [self.l2_l, self.l2_more["local_loss"], self.l2_more["global_loss"]]
         error, error_lc, error_gl = run_sess(sess, run_data, feed_dict=feed_dict)
 
         results = {"natoms": atoms, "rmse": np.sqrt(error)}
diff --git a/deepmd/model/__init__.py b/deepmd/model/__init__.py
index 742836bb14..90a0fde7b7 100644
--- a/deepmd/model/__init__.py
+++ b/deepmd/model/__init__.py
@@ -1,6 +1,12 @@
-from .ener import EnerModel
-from .tensor import WFCModel
-from .tensor import DipoleModel
-from .tensor import PolarModel
-from .tensor import GlobalPolarModel
-from .multi import MultiModel
+from .ener import (
+    EnerModel,
+)
+from .multi import (
+    MultiModel,
+)
+from .tensor import (
+    DipoleModel,
+    GlobalPolarModel,
+    PolarModel,
+    WFCModel,
+)
diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index 2e27329bc1..dfb5c4c7c8 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -1,16 +1,34 @@
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
 
-from deepmd.env import tf
-from deepmd.utils.pair_tab import PairTab
-from deepmd.env import global_cvt_2_ener_float, MODEL_VERSION, GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import op_module
-from .model import Model
-from .model_stat import make_stat_input, merge_sys_stat
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    MODEL_VERSION,
+    global_cvt_2_ener_float,
+    op_module,
+    tf,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+from .model import (
+    Model,
+)
+from .model_stat import (
+    make_stat_input,
+    merge_sys_stat,
+)
+
 
-class EnerModel(Model) :
+class EnerModel(Model):
     """Energy model.
-    
+
     Parameters
     ----------
     descrpt
@@ -33,20 +51,21 @@ class EnerModel(Model) :
     sw_rmin
             The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
     """
-    model_type = 'ener'
-
-    def __init__ (
-            self, 
-            descrpt, 
-            fitting, 
-            typeebd = None,
-            type_map : List[str] = None,
-            data_stat_nbatch : int = 10,
-            data_stat_protect : float = 1e-2,
-            use_srtab : str = None,
-            smin_alpha : float = None,
-            sw_rmin : float = None,
-            sw_rmax : float = None
+
+    model_type = "ener"
+
+    def __init__(
+        self,
+        descrpt,
+        fitting,
+        typeebd=None,
+        type_map: List[str] = None,
+        data_stat_nbatch: int = 10,
+        data_stat_protect: float = 1e-2,
+        use_srtab: str = None,
+        smin_alpha: float = None,
+        sw_rmin: float = None,
+        sw_rmax: float = None,
     ) -> None:
         """
         Constructor
@@ -68,210 +87,228 @@ def __init__ (
         self.data_stat_nbatch = data_stat_nbatch
         self.data_stat_protect = data_stat_protect
         self.srtab_name = use_srtab
-        if self.srtab_name is not None :
+        if self.srtab_name is not None:
             self.srtab = PairTab(self.srtab_name)
             self.smin_alpha = smin_alpha
             self.sw_rmin = sw_rmin
             self.sw_rmax = sw_rmax
-        else :
+        else:
             self.srtab = None
 
-
-    def get_rcut (self) :
+    def get_rcut(self):
         return self.rcut
 
-    def get_ntypes (self) :
+    def get_ntypes(self):
         return self.ntypes
 
-    def get_type_map (self) :
+    def get_type_map(self):
         return self.type_map
 
     def data_stat(self, data):
-        all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys = False)
+        all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys=False)
         m_all_stat = merge_sys_stat(all_stat)
-        self._compute_input_stat(m_all_stat, protection=self.data_stat_protect, mixed_type=data.mixed_type)
+        self._compute_input_stat(
+            m_all_stat, protection=self.data_stat_protect, mixed_type=data.mixed_type
+        )
         self._compute_output_stat(all_stat, mixed_type=data.mixed_type)
         # self.bias_atom_e = data.compute_energy_shift(self.rcond)
 
-    def _compute_input_stat (self, all_stat, protection=1e-2, mixed_type=False):
+    def _compute_input_stat(self, all_stat, protection=1e-2, mixed_type=False):
         if mixed_type:
-            self.descrpt.compute_input_stats(all_stat['coord'],
-                                             all_stat['box'],
-                                             all_stat['type'],
-                                             all_stat['natoms_vec'],
-                                             all_stat['default_mesh'],
-                                             all_stat,
-                                             mixed_type,
-                                             all_stat['real_natoms_vec'])
+            self.descrpt.compute_input_stats(
+                all_stat["coord"],
+                all_stat["box"],
+                all_stat["type"],
+                all_stat["natoms_vec"],
+                all_stat["default_mesh"],
+                all_stat,
+                mixed_type,
+                all_stat["real_natoms_vec"],
+            )
         else:
-            self.descrpt.compute_input_stats(all_stat['coord'],
-                                             all_stat['box'],
-                                             all_stat['type'],
-                                             all_stat['natoms_vec'],
-                                             all_stat['default_mesh'],
-                                             all_stat)
+            self.descrpt.compute_input_stats(
+                all_stat["coord"],
+                all_stat["box"],
+                all_stat["type"],
+                all_stat["natoms_vec"],
+                all_stat["default_mesh"],
+                all_stat,
+            )
         self.fitting.compute_input_stats(all_stat, protection=protection)
 
-    def _compute_output_stat (self, all_stat, mixed_type=False):
+    def _compute_output_stat(self, all_stat, mixed_type=False):
         if mixed_type:
             self.fitting.compute_output_stats(all_stat, mixed_type=mixed_type)
         else:
             self.fitting.compute_output_stats(all_stat)
 
+    def build(
+        self,
+        coord_,
+        atype_,
+        natoms,
+        box,
+        mesh,
+        input_dict,
+        frz_model=None,
+        ckpt_meta: Optional[str] = None,
+        suffix="",
+        reuse=None,
+    ):
 
-    def build (self, 
-               coord_, 
-               atype_,
-               natoms,
-               box, 
-               mesh,
-               input_dict,
-               frz_model = None,
-               ckpt_meta: Optional[str] = None,
-               suffix = '', 
-               reuse = None):
- 
         if input_dict is None:
             input_dict = {}
-        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
-            t_tmap = tf.constant(' '.join(self.type_map), 
-                                 name = 'tmap', 
-                                 dtype = tf.string)
-            t_mt = tf.constant(self.model_type, 
-                               name = 'model_type', 
-                               dtype = tf.string)
-            t_ver = tf.constant(MODEL_VERSION,
-                                name = 'model_version',
-                                dtype = tf.string)
-
-            if self.srtab is not None :
+        with tf.variable_scope("model_attr" + suffix, reuse=reuse):
+            t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
+            t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
+            t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
+
+            if self.srtab is not None:
                 tab_info, tab_data = self.srtab.get()
-                self.tab_info = tf.get_variable('t_tab_info',
-                                                tab_info.shape,
-                                                dtype = tf.float64,
-                                                trainable = False,
-                                                initializer = tf.constant_initializer(tab_info, dtype = tf.float64))
-                self.tab_data = tf.get_variable('t_tab_data',
-                                                tab_data.shape,
-                                                dtype = tf.float64,
-                                                trainable = False,
-                                                initializer = tf.constant_initializer(tab_data, dtype = tf.float64))
-
-        coord = tf.reshape (coord_, [-1, natoms[1] * 3])
-        atype = tf.reshape (atype_, [-1, natoms[1]])
-        input_dict['nframes'] = tf.shape(coord)[0]
+                self.tab_info = tf.get_variable(
+                    "t_tab_info",
+                    tab_info.shape,
+                    dtype=tf.float64,
+                    trainable=False,
+                    initializer=tf.constant_initializer(tab_info, dtype=tf.float64),
+                )
+                self.tab_data = tf.get_variable(
+                    "t_tab_data",
+                    tab_data.shape,
+                    dtype=tf.float64,
+                    trainable=False,
+                    initializer=tf.constant_initializer(tab_data, dtype=tf.float64),
+                )
+
+        coord = tf.reshape(coord_, [-1, natoms[1] * 3])
+        atype = tf.reshape(atype_, [-1, natoms[1]])
+        input_dict["nframes"] = tf.shape(coord)[0]
 
         # type embedding if any
         if self.typeebd is not None:
             type_embedding = self.typeebd.build(
                 self.ntypes,
-                reuse = reuse,
-                suffix = suffix,
+                reuse=reuse,
+                suffix=suffix,
             )
-            input_dict['type_embedding'] = type_embedding
-        input_dict['atype'] = atype_
+            input_dict["type_embedding"] = type_embedding
+        input_dict["atype"] = atype_
 
         dout = self.build_descrpt(
-            coord, atype, natoms, box, mesh, input_dict,
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            input_dict,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,
             suffix=suffix,
-            reuse=reuse)
+            reuse=reuse,
+        )
 
-        if self.srtab is not None :
+        if self.srtab is not None:
             nlist, rij, sel_a, sel_r = self.descrpt.get_nlist()
             nnei_a = np.cumsum(sel_a)[-1]
             nnei_r = np.cumsum(sel_r)[-1]
 
-        atom_ener = self.fitting.build (dout, 
-                                        natoms, 
-                                        input_dict, 
-                                        reuse = reuse, 
-                                        suffix = suffix)
+        atom_ener = self.fitting.build(
+            dout, natoms, input_dict, reuse=reuse, suffix=suffix
+        )
         self.atom_ener = atom_ener
 
-        if self.srtab is not None :
-            sw_lambda, sw_deriv \
-                = op_module.soft_min_switch(atype, 
-                                            rij, 
-                                            nlist,
-                                            natoms,
-                                            sel_a = sel_a,
-                                            sel_r = sel_r,
-                                            alpha = self.smin_alpha,
-                                            rmin = self.sw_rmin,
-                                            rmax = self.sw_rmax)            
+        if self.srtab is not None:
+            sw_lambda, sw_deriv = op_module.soft_min_switch(
+                atype,
+                rij,
+                nlist,
+                natoms,
+                sel_a=sel_a,
+                sel_r=sel_r,
+                alpha=self.smin_alpha,
+                rmin=self.sw_rmin,
+                rmax=self.sw_rmax,
+            )
             inv_sw_lambda = 1.0 - sw_lambda
             # NOTICE:
-            # atom energy is not scaled, 
+            # atom energy is not scaled,
             # force and virial are scaled
-            tab_atom_ener, tab_force, tab_atom_virial \
-                = op_module.pair_tab(self.tab_info,
-                                      self.tab_data,
-                                      atype,
-                                      rij,
-                                      nlist,
-                                      natoms,
-                                      sw_lambda,
-                                      sel_a = sel_a,
-                                      sel_r = sel_r)
+            tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab(
+                self.tab_info,
+                self.tab_data,
+                atype,
+                rij,
+                nlist,
+                natoms,
+                sw_lambda,
+                sel_a=sel_a,
+                sel_r=sel_r,
+            )
             energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]])
-            tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
+            tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(
+                tab_atom_ener, [-1]
+            )
             atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
             energy_raw = tab_atom_ener + atom_ener
-        else :
+        else:
             energy_raw = atom_ener
 
-        energy_raw = tf.reshape(energy_raw, [-1, natoms[0]], name = 'o_atom_energy'+suffix)
-        energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1, name='o_energy'+suffix)
+        energy_raw = tf.reshape(
+            energy_raw, [-1, natoms[0]], name="o_atom_energy" + suffix
+        )
+        energy = tf.reduce_sum(
+            global_cvt_2_ener_float(energy_raw), axis=1, name="o_energy" + suffix
+        )
 
-        force, virial, atom_virial \
-            = self.descrpt.prod_force_virial (atom_ener, natoms)
+        force, virial, atom_virial = self.descrpt.prod_force_virial(atom_ener, natoms)
 
-        if self.srtab is not None :
-            sw_force \
-                = op_module.soft_min_force(energy_diff, 
-                                           sw_deriv,
-                                           nlist, 
-                                           natoms,
-                                           n_a_sel = nnei_a,
-                                           n_r_sel = nnei_r)
+        if self.srtab is not None:
+            sw_force = op_module.soft_min_force(
+                energy_diff, sw_deriv, nlist, natoms, n_a_sel=nnei_a, n_r_sel=nnei_r
+            )
             force = force + sw_force + tab_force
 
-        force = tf.reshape (force, [-1, 3 * natoms[1]], name = "o_force"+suffix)
-
-        if self.srtab is not None :
-            sw_virial, sw_atom_virial \
-                = op_module.soft_min_virial (energy_diff,
-                                             sw_deriv,
-                                             rij,
-                                             nlist,
-                                             natoms,
-                                             n_a_sel = nnei_a,
-                                             n_r_sel = nnei_r)
+        force = tf.reshape(force, [-1, 3 * natoms[1]], name="o_force" + suffix)
+
+        if self.srtab is not None:
+            sw_virial, sw_atom_virial = op_module.soft_min_virial(
+                energy_diff,
+                sw_deriv,
+                rij,
+                nlist,
+                natoms,
+                n_a_sel=nnei_a,
+                n_r_sel=nnei_r,
+            )
             atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
-            virial = virial + sw_virial \
-                     + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis = 1)
+            virial = (
+                virial
+                + sw_virial
+                + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis=1)
+            )
 
-        virial = tf.reshape (virial, [-1, 9], name = "o_virial"+suffix)
-        atom_virial = tf.reshape (atom_virial, [-1, 9 * natoms[1]], name = "o_atom_virial"+suffix)
+        virial = tf.reshape(virial, [-1, 9], name="o_virial" + suffix)
+        atom_virial = tf.reshape(
+            atom_virial, [-1, 9 * natoms[1]], name="o_atom_virial" + suffix
+        )
 
         model_dict = {}
-        model_dict['energy'] = energy
-        model_dict['force'] = force
-        model_dict['virial'] = virial
-        model_dict['atom_ener'] = energy_raw
-        model_dict['atom_virial'] = atom_virial
-        model_dict['coord'] = coord
-        model_dict['atype'] = atype
-        
+        model_dict["energy"] = energy
+        model_dict["force"] = force
+        model_dict["virial"] = virial
+        model_dict["atom_ener"] = energy_raw
+        model_dict["atom_virial"] = atom_virial
+        model_dict["coord"] = coord
+        model_dict["atype"] = atype
+
         return model_dict
 
-    def init_variables(self,
-                       graph : tf.Graph,
-                       graph_def : tf.GraphDef,
-                       model_type : str = "original_model",
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        model_type: str = "original_model",
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given frozen model
@@ -289,13 +326,13 @@ def init_variables(self,
         """
         # self.frz_model will control the self.model to import the descriptor from the given frozen model instead of building from scratch...
         # initialize fitting net with the given compressed frozen model
-        if model_type == 'original_model':
+        if model_type == "original_model":
             self.descrpt.init_variables(graph, graph_def, suffix=suffix)
             self.fitting.init_variables(graph, graph_def, suffix=suffix)
-            tf.constant("original_model", name = 'model_type', dtype = tf.string)
-        elif model_type == 'compressed_model':
+            tf.constant("original_model", name="model_type", dtype=tf.string)
+        elif model_type == "compressed_model":
             self.fitting.init_variables(graph, graph_def, suffix=suffix)
-            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+            tf.constant("compressed_model", name="model_type", dtype=tf.string)
         else:
             raise RuntimeError("Unknown model type %s" % model_type)
         if self.typeebd is not None:
diff --git a/deepmd/model/model.py b/deepmd/model/model.py
index be8d8da4e7..14c4bca1c9 100644
--- a/deepmd/model/model.py
+++ b/deepmd/model/model.py
@@ -1,13 +1,28 @@
-from typing import Optional, Union, List
-from abc import ABC, abstractmethod
-from enum import Enum
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from enum import (
+    Enum,
+)
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.utils.graph import (
+    load_graph_def,
+)
 
-from deepmd.env import tf, GLOBAL_TF_FLOAT_PRECISION
-from deepmd.utils.graph import load_graph_def
 
 class Model(ABC):
     @abstractmethod
-    def build (
+    def build(
         self,
         coord_: tf.Tensor,
         atype_: tf.Tensor,
@@ -17,8 +32,8 @@ def build (
         input_dict: dict,
         frz_model: Optional[str] = None,
         ckpt_meta: Optional[str] = None,
-        suffix: str = '', 
-        reuse: Optional[Union[bool, Enum]] = None
+        suffix: str = "",
+        reuse: Optional[Union[bool, Enum]] = None,
     ):
         """Build the model.
 
@@ -51,11 +66,12 @@ def build (
             The output dict
         """
 
-    def init_variables(self,
-                       graph : tf.Graph,
-                       graph_def : tf.GraphDef,
-                       model_type : str = "original_model",
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        model_type: str = "original_model",
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given frozen model
@@ -71,21 +87,23 @@ def init_variables(self,
         suffix : str
             suffix to name scope
         """
-        raise RuntimeError("The 'dp train init-frz-model' command do not support this model!")
+        raise RuntimeError(
+            "The 'dp train init-frz-model' command do not support this model!"
+        )
 
     def build_descrpt(
-            self,
-            coord_: tf.Tensor,
-            atype_: tf.Tensor,
-            natoms: tf.Tensor,
-            box: tf.Tensor,
-            mesh: tf.Tensor,
-            input_dict: dict,
-            frz_model: Optional[str] = None,
-            ckpt_meta: Optional[str] = None,
-            suffix: str = '', 
-            reuse: Optional[Union[bool, Enum]] = None
-        ):
+        self,
+        coord_: tf.Tensor,
+        atype_: tf.Tensor,
+        natoms: tf.Tensor,
+        box: tf.Tensor,
+        mesh: tf.Tensor,
+        input_dict: dict,
+        frz_model: Optional[str] = None,
+        ckpt_meta: Optional[str] = None,
+        suffix: str = "",
+        reuse: Optional[Union[bool, Enum]] = None,
+    ):
         """Build the descriptor part of the model.
 
         Parameters
@@ -117,47 +135,56 @@ def build_descrpt(
             The descriptor tensor
         """
         if frz_model is None and ckpt_meta is None:
-            dout \
-                = self.descrpt.build(coord_,
-                                     atype_,
-                                     natoms,
-                                     box,
-                                     mesh,
-                                     input_dict,
-                                     suffix = suffix,
-                                     reuse = reuse)
-            dout = tf.identity(dout, name='o_descriptor')
+            dout = self.descrpt.build(
+                coord_,
+                atype_,
+                natoms,
+                box,
+                mesh,
+                input_dict,
+                suffix=suffix,
+                reuse=reuse,
+            )
+            dout = tf.identity(dout, name="o_descriptor")
         else:
-            tf.constant(self.rcut,
-                name = 'descrpt_attr/rcut',
-                dtype = GLOBAL_TF_FLOAT_PRECISION)
-            tf.constant(self.ntypes,
-                name = 'descrpt_attr/ntypes',
-                dtype = tf.int32)
+            tf.constant(
+                self.rcut, name="descrpt_attr/rcut", dtype=GLOBAL_TF_FLOAT_PRECISION
+            )
+            tf.constant(self.ntypes, name="descrpt_attr/ntypes", dtype=tf.int32)
             feed_dict = self.descrpt.get_feed_dict(coord_, atype_, natoms, box, mesh)
-            return_elements = [*self.descrpt.get_tensor_names(), 'o_descriptor:0']
+            return_elements = [*self.descrpt.get_tensor_names(), "o_descriptor:0"]
             if frz_model is not None:
-                imported_tensors \
-                    = self._import_graph_def_from_frz_model(frz_model, feed_dict, return_elements)
+                imported_tensors = self._import_graph_def_from_frz_model(
+                    frz_model, feed_dict, return_elements
+                )
             elif ckpt_meta is not None:
-                imported_tensors \
-                    = self._import_graph_def_from_ckpt_meta(ckpt_meta, feed_dict, return_elements)
+                imported_tensors = self._import_graph_def_from_ckpt_meta(
+                    ckpt_meta, feed_dict, return_elements
+                )
             else:
                 raise RuntimeError("should not reach here")  # pragma: no cover
             dout = imported_tensors[-1]
             self.descrpt.pass_tensors_from_frz_model(*imported_tensors[:-1])
         return dout
 
-    def _import_graph_def_from_frz_model(self, frz_model: str, feed_dict: dict, return_elements: List[str]):
+    def _import_graph_def_from_frz_model(
+        self, frz_model: str, feed_dict: dict, return_elements: List[str]
+    ):
         return_nodes = [x[:-2] for x in return_elements]
         graph, graph_def = load_graph_def(frz_model)
         sub_graph_def = tf.graph_util.extract_sub_graph(graph_def, return_nodes)
-        return tf.import_graph_def(sub_graph_def, input_map = feed_dict, return_elements = return_elements, name = "")
+        return tf.import_graph_def(
+            sub_graph_def, input_map=feed_dict, return_elements=return_elements, name=""
+        )
 
-    def _import_graph_def_from_ckpt_meta(self, ckpt_meta: str, feed_dict: dict, return_elements: List[str]):
+    def _import_graph_def_from_ckpt_meta(
+        self, ckpt_meta: str, feed_dict: dict, return_elements: List[str]
+    ):
         return_nodes = [x[:-2] for x in return_elements]
         with tf.Graph().as_default() as graph:
             tf.train.import_meta_graph(f"{ckpt_meta}.meta", clear_devices=True)
             graph_def = graph.as_graph_def()
         sub_graph_def = tf.graph_util.extract_sub_graph(graph_def, return_nodes)
-        return tf.import_graph_def(sub_graph_def, input_map = feed_dict, return_elements = return_elements, name = "")
+        return tf.import_graph_def(
+            sub_graph_def, input_map=feed_dict, return_elements=return_elements, name=""
+        )
diff --git a/deepmd/model/model_stat.py b/deepmd/model/model_stat.py
index b7aa66397c..26c9fb3910 100644
--- a/deepmd/model/model_stat.py
+++ b/deepmd/model/model_stat.py
@@ -1,19 +1,23 @@
+from collections import (
+    defaultdict,
+)
+
 import numpy as np
-from collections import defaultdict
+
 
 def _make_all_stat_ref(data, nbatches):
     all_stat = defaultdict(list)
-    for ii in range(data.get_nsystems()) :
-        for jj in range(nbatches) :
-            stat_data = data.get_batch (sys_idx = ii)
+    for ii in range(data.get_nsystems()):
+        for jj in range(nbatches):
+            stat_data = data.get_batch(sys_idx=ii)
             for dd in stat_data:
                 if dd == "natoms_vec":
-                    stat_data[dd] = stat_data[dd].astype(np.int32) 
-                all_stat[dd].append(stat_data[dd])        
+                    stat_data[dd] = stat_data[dd].astype(np.int32)
+                all_stat[dd].append(stat_data[dd])
     return all_stat
 
 
-def make_stat_input(data, nbatches, merge_sys = True):
+def make_stat_input(data, nbatches, merge_sys=True):
     """
     pack data for statistics
 
@@ -27,29 +31,30 @@ def make_stat_input(data, nbatches, merge_sys = True):
     Returns
     -------
     all_stat:
-        A dictionary of list of list storing data for stat. 
-        if merge_sys == False data can be accessed by 
+        A dictionary of list of list storing data for stat.
+        if merge_sys == False data can be accessed by
             all_stat[key][sys_idx][batch_idx][frame_idx]
-        else merge_sys == True can be accessed by 
+        else merge_sys == True can be accessed by
             all_stat[key][batch_idx][frame_idx]
     """
     all_stat = defaultdict(list)
-    for ii in range(data.get_nsystems()) :
-        sys_stat =  defaultdict(list)
-        for jj in range(nbatches) :
-            stat_data = data.get_batch (sys_idx = ii)
+    for ii in range(data.get_nsystems()):
+        sys_stat = defaultdict(list)
+        for jj in range(nbatches):
+            stat_data = data.get_batch(sys_idx=ii)
             for dd in stat_data:
                 if dd == "natoms_vec":
-                    stat_data[dd] = stat_data[dd].astype(np.int32) 
+                    stat_data[dd] = stat_data[dd].astype(np.int32)
                 sys_stat[dd].append(stat_data[dd])
         for dd in sys_stat:
             if merge_sys:
                 for bb in sys_stat[dd]:
                     all_stat[dd].append(bb)
-            else:                    
+            else:
                 all_stat[dd].append(sys_stat[dd])
     return all_stat
 
+
 def merge_sys_stat(all_stat):
     first_key = list(all_stat.keys())[0]
     nsys = len(all_stat[first_key])
@@ -59,4 +64,3 @@ def merge_sys_stat(all_stat):
             for bb in all_stat[dd][ii]:
                 ret[dd].append(bb)
     return ret
-
diff --git a/deepmd/model/multi.py b/deepmd/model/multi.py
index d74acfeb48..c8f2a998c3 100644
--- a/deepmd/model/multi.py
+++ b/deepmd/model/multi.py
@@ -1,20 +1,39 @@
-import numpy as np
-from typing import Optional, Tuple, List
-
-from deepmd.env import tf
-from deepmd.utils.pair_tab import PairTab
-from deepmd.utils.graph import load_graph_def
-from deepmd.env import global_cvt_2_ener_float, MODEL_VERSION, GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import op_module
-from .model import Model
-from .model_stat import make_stat_input, merge_sys_stat
-from deepmd.utils.graph import get_tensor_by_name_from_graph
 import json
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    MODEL_VERSION,
+    global_cvt_2_ener_float,
+    op_module,
+    tf,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+from .model import (
+    Model,
+)
+from .model_stat import (
+    make_stat_input,
+    merge_sys_stat,
+)
 
 
 class MultiModel(Model):
     """Multi-task model.
-    
+
     Parameters
     ----------
     descrpt
@@ -41,21 +60,22 @@ class MultiModel(Model):
     sw_rmin
             The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
     """
-    model_type = 'multi_task'
+
+    model_type = "multi_task"
 
     def __init__(
-            self,
-            descrpt,
-            fitting_dict,
-            fitting_type_dict,
-            typeebd=None,
-            type_map: Optional[List[str]] = None,
-            data_stat_nbatch: int = 10,
-            data_stat_protect: float = 1e-2,
-            use_srtab: Optional[str] = None,  # all the ener fitting will do this
-            smin_alpha: Optional[float] = None,
-            sw_rmin: Optional[float] = None,
-            sw_rmax: Optional[float] = None
+        self,
+        descrpt,
+        fitting_dict,
+        fitting_type_dict,
+        typeebd=None,
+        type_map: Optional[List[str]] = None,
+        data_stat_nbatch: int = 10,
+        data_stat_protect: float = 1e-2,
+        use_srtab: Optional[str] = None,  # all the ener fitting will do this
+        smin_alpha: Optional[float] = None,
+        sw_rmin: Optional[float] = None,
+        sw_rmax: Optional[float] = None,
     ) -> None:
         """
         Constructor
@@ -67,8 +87,11 @@ def __init__(
         # fitting
         self.fitting_dict = fitting_dict
         self.fitting_type_dict = fitting_type_dict
-        self.numb_fparam_dict = {item: self.fitting_dict[item].get_numb_fparam()
-                                 for item in self.fitting_dict if self.fitting_type_dict[item] == 'ener'}
+        self.numb_fparam_dict = {
+            item: self.fitting_dict[item].get_numb_fparam()
+            for item in self.fitting_dict
+            if self.fitting_type_dict[item] == "ener"
+        }
         # type embedding
         self.typeebd = typeebd
         # other inputs
@@ -98,97 +121,129 @@ def get_type_map(self):
 
     def data_stat(self, data):
         for fitting_key in data:
-            all_stat = make_stat_input(data[fitting_key], self.data_stat_nbatch, merge_sys=False)
+            all_stat = make_stat_input(
+                data[fitting_key], self.data_stat_nbatch, merge_sys=False
+            )
             m_all_stat = merge_sys_stat(all_stat)
-            self._compute_input_stat(m_all_stat, protection=self.data_stat_protect,
-                                     mixed_type=data[fitting_key].mixed_type, fitting_key=fitting_key)
-            self._compute_output_stat(all_stat, mixed_type=data[fitting_key].mixed_type, fitting_key=fitting_key)
+            self._compute_input_stat(
+                m_all_stat,
+                protection=self.data_stat_protect,
+                mixed_type=data[fitting_key].mixed_type,
+                fitting_key=fitting_key,
+            )
+            self._compute_output_stat(
+                all_stat,
+                mixed_type=data[fitting_key].mixed_type,
+                fitting_key=fitting_key,
+            )
         self.descrpt.merge_input_stats(self.descrpt.stat_dict)
 
-    def _compute_input_stat(self, all_stat, protection=1e-2, mixed_type=False, fitting_key=''):
+    def _compute_input_stat(
+        self, all_stat, protection=1e-2, mixed_type=False, fitting_key=""
+    ):
         if mixed_type:
-            self.descrpt.compute_input_stats(all_stat['coord'],
-                                             all_stat['box'],
-                                             all_stat['type'],
-                                             all_stat['natoms_vec'],
-                                             all_stat['default_mesh'],
-                                             all_stat,
-                                             mixed_type,
-                                             all_stat['real_natoms_vec'])
+            self.descrpt.compute_input_stats(
+                all_stat["coord"],
+                all_stat["box"],
+                all_stat["type"],
+                all_stat["natoms_vec"],
+                all_stat["default_mesh"],
+                all_stat,
+                mixed_type,
+                all_stat["real_natoms_vec"],
+            )
         else:
-            self.descrpt.compute_input_stats(all_stat['coord'],
-                                             all_stat['box'],
-                                             all_stat['type'],
-                                             all_stat['natoms_vec'],
-                                             all_stat['default_mesh'],
-                                             all_stat)
-        if hasattr(self.fitting_dict[fitting_key], 'compute_input_stats'):
-            self.fitting_dict[fitting_key].compute_input_stats(all_stat, protection=protection)
-
-    def _compute_output_stat(self, all_stat, mixed_type=False, fitting_key=''):
-        if hasattr(self.fitting_dict[fitting_key], 'compute_output_stats'):
+            self.descrpt.compute_input_stats(
+                all_stat["coord"],
+                all_stat["box"],
+                all_stat["type"],
+                all_stat["natoms_vec"],
+                all_stat["default_mesh"],
+                all_stat,
+            )
+        if hasattr(self.fitting_dict[fitting_key], "compute_input_stats"):
+            self.fitting_dict[fitting_key].compute_input_stats(
+                all_stat, protection=protection
+            )
+
+    def _compute_output_stat(self, all_stat, mixed_type=False, fitting_key=""):
+        if hasattr(self.fitting_dict[fitting_key], "compute_output_stats"):
             if mixed_type:
-                self.fitting_dict[fitting_key].compute_output_stats(all_stat, mixed_type=mixed_type)
+                self.fitting_dict[fitting_key].compute_output_stats(
+                    all_stat, mixed_type=mixed_type
+                )
             else:
                 self.fitting_dict[fitting_key].compute_output_stats(all_stat)
 
-    def build(self,
-              coord_,
-              atype_,
-              natoms,
-              box,
-              mesh,
-              input_dict,
-              frz_model=None,
-              ckpt_meta: Optional[str] = None,
-              suffix='',
-              reuse=None):
+    def build(
+        self,
+        coord_,
+        atype_,
+        natoms,
+        box,
+        mesh,
+        input_dict,
+        frz_model=None,
+        ckpt_meta: Optional[str] = None,
+        suffix="",
+        reuse=None,
+    ):
 
         if input_dict is None:
             input_dict = {}
-        with tf.variable_scope('model_attr' + suffix, reuse=reuse):
-            t_tmap = tf.constant(' '.join(self.type_map),
-                                 name='tmap',
-                                 dtype=tf.string)
-            t_mt = tf.constant(self.model_type,
-                               name='model_type',
-                               dtype=tf.string)
-            t_ver = tf.constant(MODEL_VERSION,
-                                name='model_version',
-                                dtype=tf.string)
+        with tf.variable_scope("model_attr" + suffix, reuse=reuse):
+            t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
+            t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
+            t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
             t_st = {}
             t_od = {}
             sel_type = {}
             natomsel = {}
             nout = {}
             for fitting_key in self.fitting_dict:
-                if self.fitting_type_dict[fitting_key] in ['dipole', 'polar', 'global_polar']:
-                    sel_type[fitting_key] = self.fitting_dict[fitting_key].get_sel_type()
-                    natomsel[fitting_key] = sum(natoms[2 + type_i] for type_i in sel_type[fitting_key])
+                if self.fitting_type_dict[fitting_key] in [
+                    "dipole",
+                    "polar",
+                    "global_polar",
+                ]:
+                    sel_type[fitting_key] = self.fitting_dict[
+                        fitting_key
+                    ].get_sel_type()
+                    natomsel[fitting_key] = sum(
+                        natoms[2 + type_i] for type_i in sel_type[fitting_key]
+                    )
                     nout[fitting_key] = self.fitting_dict[fitting_key].get_out_size()
-                    t_st[fitting_key] = tf.constant(sel_type[fitting_key],
-                                                    name='sel_type_{}'.format(fitting_key),
-                                                    dtype=tf.int32)
-                    t_od[fitting_key] = tf.constant(nout[fitting_key],
-                                                    name='output_dim_{}'.format(fitting_key),
-                                                    dtype=tf.int32)
+                    t_st[fitting_key] = tf.constant(
+                        sel_type[fitting_key],
+                        name="sel_type_{}".format(fitting_key),
+                        dtype=tf.int32,
+                    )
+                    t_od[fitting_key] = tf.constant(
+                        nout[fitting_key],
+                        name="output_dim_{}".format(fitting_key),
+                        dtype=tf.int32,
+                    )
 
             if self.srtab is not None:
                 tab_info, tab_data = self.srtab.get()
-                self.tab_info = tf.get_variable('t_tab_info',
-                                                tab_info.shape,
-                                                dtype=tf.float64,
-                                                trainable=False,
-                                                initializer=tf.constant_initializer(tab_info, dtype=tf.float64))
-                self.tab_data = tf.get_variable('t_tab_data',
-                                                tab_data.shape,
-                                                dtype=tf.float64,
-                                                trainable=False,
-                                                initializer=tf.constant_initializer(tab_data, dtype=tf.float64))
+                self.tab_info = tf.get_variable(
+                    "t_tab_info",
+                    tab_info.shape,
+                    dtype=tf.float64,
+                    trainable=False,
+                    initializer=tf.constant_initializer(tab_info, dtype=tf.float64),
+                )
+                self.tab_data = tf.get_variable(
+                    "t_tab_data",
+                    tab_data.shape,
+                    dtype=tf.float64,
+                    trainable=False,
+                    initializer=tf.constant_initializer(tab_data, dtype=tf.float64),
+                )
 
         coord = tf.reshape(coord_, [-1, natoms[1] * 3])
         atype = tf.reshape(atype_, [-1, natoms[1]])
-        input_dict['nframes'] = tf.shape(coord)[0]
+        input_dict["nframes"] = tf.shape(coord)[0]
 
         # type embedding if any
         if self.typeebd is not None:
@@ -197,130 +252,183 @@ def build(self,
                 reuse=reuse,
                 suffix=suffix,
             )
-            input_dict['type_embedding'] = type_embedding
-        input_dict['atype'] = atype_
+            input_dict["type_embedding"] = type_embedding
+        input_dict["atype"] = atype_
 
         dout = self.build_descrpt(
-            coord, atype, natoms, box, mesh, input_dict,
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            input_dict,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,
             suffix=suffix,
-            reuse=reuse)
-        dout = tf.identity(dout, name='o_descriptor')
+            reuse=reuse,
+        )
+        dout = tf.identity(dout, name="o_descriptor")
 
         if self.srtab is not None:
             nlist, rij, sel_a, sel_r = self.descrpt.get_nlist()
             nnei_a = np.cumsum(sel_a)[-1]
             nnei_r = np.cumsum(sel_r)[-1]
-            sw_lambda, sw_deriv \
-                = op_module.soft_min_switch(atype,
-                                            rij,
-                                            nlist,
-                                            natoms,
-                                            sel_a=sel_a,
-                                            sel_r=sel_r,
-                                            alpha=self.smin_alpha,
-                                            rmin=self.sw_rmin,
-                                            rmax=self.sw_rmax)
+            sw_lambda, sw_deriv = op_module.soft_min_switch(
+                atype,
+                rij,
+                nlist,
+                natoms,
+                sel_a=sel_a,
+                sel_r=sel_r,
+                alpha=self.smin_alpha,
+                rmin=self.sw_rmin,
+                rmax=self.sw_rmax,
+            )
             inv_sw_lambda = 1.0 - sw_lambda
             # NOTICE:
             # atom energy is not scaled,
             # force and virial are scaled
-            tab_atom_ener, tab_force, tab_atom_virial \
-                = op_module.pair_tab(self.tab_info,
-                                     self.tab_data,
-                                     atype,
-                                     rij,
-                                     nlist,
-                                     natoms,
-                                     sw_lambda,
-                                     sel_a=sel_a,
-                                     sel_r=sel_r)
+            tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab(
+                self.tab_info,
+                self.tab_data,
+                atype,
+                rij,
+                nlist,
+                natoms,
+                sw_lambda,
+                sel_a=sel_a,
+                sel_r=sel_r,
+            )
 
         rot_mat = self.descrpt.get_rot_mat()
-        rot_mat = tf.identity(rot_mat, name='o_rot_mat' + suffix)
+        rot_mat = tf.identity(rot_mat, name="o_rot_mat" + suffix)
         self.atom_ener = {}
         model_dict = {}
         for fitting_key in self.fitting_dict:
-            if self.fitting_type_dict[fitting_key] == 'ener':
-                atom_ener = self.fitting_dict[fitting_key].build(dout,
-                                                                 natoms,
-                                                                 input_dict,
-                                                                 reuse=reuse,
-                                                                 suffix='_{}'.format(fitting_key) + suffix)
+            if self.fitting_type_dict[fitting_key] == "ener":
+                atom_ener = self.fitting_dict[fitting_key].build(
+                    dout,
+                    natoms,
+                    input_dict,
+                    reuse=reuse,
+                    suffix="_{}".format(fitting_key) + suffix,
+                )
                 self.atom_ener[fitting_key] = atom_ener
                 if self.srtab is not None:
                     energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, natoms[0]])
-                    tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
+                    tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(
+                        tab_atom_ener, [-1]
+                    )
                     atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
                     energy_raw = tab_atom_ener + atom_ener
                 else:
                     energy_raw = atom_ener
-                energy_raw = tf.reshape(energy_raw, [-1, natoms[0]],
-                                        name='o_atom_energy_{}'.format(fitting_key) + suffix)
-                energy = tf.reduce_sum(global_cvt_2_ener_float(energy_raw), axis=1,
-                                       name='o_energy_{}'.format(fitting_key) + suffix)
-                force, virial, atom_virial \
-                    = self.descrpt.prod_force_virial(atom_ener, natoms)
+                energy_raw = tf.reshape(
+                    energy_raw,
+                    [-1, natoms[0]],
+                    name="o_atom_energy_{}".format(fitting_key) + suffix,
+                )
+                energy = tf.reduce_sum(
+                    global_cvt_2_ener_float(energy_raw),
+                    axis=1,
+                    name="o_energy_{}".format(fitting_key) + suffix,
+                )
+                force, virial, atom_virial = self.descrpt.prod_force_virial(
+                    atom_ener, natoms
+                )
 
                 if self.srtab is not None:
-                    sw_force \
-                        = op_module.soft_min_force(energy_diff,
-                                                   sw_deriv,
-                                                   nlist,
-                                                   natoms,
-                                                   n_a_sel=nnei_a,
-                                                   n_r_sel=nnei_r)
+                    sw_force = op_module.soft_min_force(
+                        energy_diff,
+                        sw_deriv,
+                        nlist,
+                        natoms,
+                        n_a_sel=nnei_a,
+                        n_r_sel=nnei_r,
+                    )
                     force = force + sw_force + tab_force
 
-                force = tf.reshape(force, [-1, 3 * natoms[1]], name="o_force_{}".format(fitting_key) + suffix)
+                force = tf.reshape(
+                    force,
+                    [-1, 3 * natoms[1]],
+                    name="o_force_{}".format(fitting_key) + suffix,
+                )
 
                 if self.srtab is not None:
-                    sw_virial, sw_atom_virial \
-                        = op_module.soft_min_virial(energy_diff,
-                                                    sw_deriv,
-                                                    rij,
-                                                    nlist,
-                                                    natoms,
-                                                    n_a_sel=nnei_a,
-                                                    n_r_sel=nnei_r)
+                    sw_virial, sw_atom_virial = op_module.soft_min_virial(
+                        energy_diff,
+                        sw_deriv,
+                        rij,
+                        nlist,
+                        natoms,
+                        n_a_sel=nnei_a,
+                        n_r_sel=nnei_r,
+                    )
                     atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
-                    virial = virial + sw_virial + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis=1)
-
-                virial = tf.reshape(virial, [-1, 9], name="o_virial_{}".format(fitting_key) + suffix)
-                atom_virial = tf.reshape(atom_virial, [-1, 9 * natoms[1]],
-                                         name="o_atom_virial_{}".format(fitting_key) + suffix)
+                    virial = (
+                        virial
+                        + sw_virial
+                        + tf.reduce_sum(
+                            tf.reshape(tab_atom_virial, [-1, natoms[1], 9]), axis=1
+                        )
+                    )
+
+                virial = tf.reshape(
+                    virial, [-1, 9], name="o_virial_{}".format(fitting_key) + suffix
+                )
+                atom_virial = tf.reshape(
+                    atom_virial,
+                    [-1, 9 * natoms[1]],
+                    name="o_atom_virial_{}".format(fitting_key) + suffix,
+                )
 
                 model_dict[fitting_key] = {}
-                model_dict[fitting_key]['energy'] = energy
-                model_dict[fitting_key]['force'] = force
-                model_dict[fitting_key]['virial'] = virial
-                model_dict[fitting_key]['atom_ener'] = energy_raw
-                model_dict[fitting_key]['atom_virial'] = atom_virial
-                model_dict[fitting_key]['coord'] = coord
-                model_dict[fitting_key]['atype'] = atype
-            elif self.fitting_type_dict[fitting_key] in ['dipole', 'polar', 'global_polar']:
+                model_dict[fitting_key]["energy"] = energy
+                model_dict[fitting_key]["force"] = force
+                model_dict[fitting_key]["virial"] = virial
+                model_dict[fitting_key]["atom_ener"] = energy_raw
+                model_dict[fitting_key]["atom_virial"] = atom_virial
+                model_dict[fitting_key]["coord"] = coord
+                model_dict[fitting_key]["atype"] = atype
+            elif self.fitting_type_dict[fitting_key] in [
+                "dipole",
+                "polar",
+                "global_polar",
+            ]:
                 tensor_name = self.fitting_type_dict[fitting_key]
-                output = self.fitting_dict[fitting_key].build(dout,
-                                                              rot_mat,
-                                                              natoms,
-                                                              input_dict,
-                                                              reuse=reuse,
-                                                              suffix='_{}'.format(fitting_key) + suffix)
-                framesize = nout if "global" in tensor_name else \
-                    natomsel[fitting_key] * nout[fitting_key]
-                output = tf.reshape(output, [-1, framesize],
-                                    name='o_{}_{}'.format(tensor_name, fitting_key) + suffix)
+                output = self.fitting_dict[fitting_key].build(
+                    dout,
+                    rot_mat,
+                    natoms,
+                    input_dict,
+                    reuse=reuse,
+                    suffix="_{}".format(fitting_key) + suffix,
+                )
+                framesize = (
+                    nout
+                    if "global" in tensor_name
+                    else natomsel[fitting_key] * nout[fitting_key]
+                )
+                output = tf.reshape(
+                    output,
+                    [-1, framesize],
+                    name="o_{}_{}".format(tensor_name, fitting_key) + suffix,
+                )
 
                 model_dict[fitting_key] = {}
                 model_dict[fitting_key][tensor_name] = output
 
                 if "global" not in tensor_name:
                     gname = "global_" + tensor_name
-                    atom_out = tf.reshape(output, [-1, natomsel[fitting_key], nout[fitting_key]])
+                    atom_out = tf.reshape(
+                        output, [-1, natomsel[fitting_key], nout[fitting_key]]
+                    )
                     global_out = tf.reduce_sum(atom_out, axis=1)
-                    global_out = tf.reshape(global_out, [-1, nout[fitting_key]],
-                                            name="o_{}_{}".format(gname, fitting_key) + suffix)
+                    global_out = tf.reshape(
+                        global_out,
+                        [-1, nout[fitting_key]],
+                        name="o_{}_{}".format(gname, fitting_key) + suffix,
+                    )
 
                     out_cpnts = tf.split(atom_out, nout[fitting_key], axis=-1)
                     force_cpnts = []
@@ -328,21 +436,35 @@ def build(self,
                     atom_virial_cpnts = []
 
                     for out_i in out_cpnts:
-                        force_i, virial_i, atom_virial_i \
-                            = self.descrpt.prod_force_virial(out_i, natoms)
+                        (
+                            force_i,
+                            virial_i,
+                            atom_virial_i,
+                        ) = self.descrpt.prod_force_virial(out_i, natoms)
                         force_cpnts.append(tf.reshape(force_i, [-1, 3 * natoms[1]]))
                         virial_cpnts.append(tf.reshape(virial_i, [-1, 9]))
-                        atom_virial_cpnts.append(tf.reshape(atom_virial_i, [-1, 9 * natoms[1]]))
+                        atom_virial_cpnts.append(
+                            tf.reshape(atom_virial_i, [-1, 9 * natoms[1]])
+                        )
 
                     # [nframe x nout x (natom x 3)]
-                    force = tf.concat(force_cpnts, axis=1,
-                                      name="o_force_{}".format(fitting_key) + suffix)
+                    force = tf.concat(
+                        force_cpnts,
+                        axis=1,
+                        name="o_force_{}".format(fitting_key) + suffix,
+                    )
                     # [nframe x nout x 9]
-                    virial = tf.concat(virial_cpnts, axis=1,
-                                       name="o_virial_{}".format(fitting_key) + suffix)
+                    virial = tf.concat(
+                        virial_cpnts,
+                        axis=1,
+                        name="o_virial_{}".format(fitting_key) + suffix,
+                    )
                     # [nframe x nout x (natom x 9)]
-                    atom_virial = tf.concat(atom_virial_cpnts, axis=1,
-                                            name="o_atom_virial_{}".format(fitting_key) + suffix)
+                    atom_virial = tf.concat(
+                        atom_virial_cpnts,
+                        axis=1,
+                        name="o_atom_virial_{}".format(fitting_key) + suffix,
+                    )
 
                     model_dict[fitting_key][gname] = global_out
                     model_dict[fitting_key]["force"] = force
@@ -351,12 +473,13 @@ def build(self,
 
         return model_dict
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       model_type: str = "original_model",
-                       suffix: str = "",
-                       ) -> None:
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        model_type: str = "original_model",
+        suffix: str = "",
+    ) -> None:
         """
         Init the embedding net variables with the given frozen model
 
@@ -373,14 +496,20 @@ def init_variables(self,
         """
         # self.frz_model will control the self.model to import the descriptor from the given frozen model instead of building from scratch...
         # initialize fitting net with the given compressed frozen model
-        assert model_type == 'original_model', 'Initialization in multi-task mode does not support compressed model!'
+        assert (
+            model_type == "original_model"
+        ), "Initialization in multi-task mode does not support compressed model!"
         self.descrpt.init_variables(graph, graph_def, suffix=suffix)
-        old_jdata = json.loads(get_tensor_by_name_from_graph(graph, 'train_attr/training_script'))
-        old_fitting_keys = list(old_jdata['model']['fitting_net_dict'].keys())
+        old_jdata = json.loads(
+            get_tensor_by_name_from_graph(graph, "train_attr/training_script")
+        )
+        old_fitting_keys = list(old_jdata["model"]["fitting_net_dict"].keys())
         newly_added_fittings = set(self.fitting_dict.keys()) - set(old_fitting_keys)
         reused_fittings = set(self.fitting_dict.keys()) - newly_added_fittings
         for fitting_key in reused_fittings:
-            self.fitting_dict[fitting_key].init_variables(graph, graph_def, suffix=f'_{fitting_key}' + suffix)
-        tf.constant("original_model", name='model_type', dtype=tf.string)
+            self.fitting_dict[fitting_key].init_variables(
+                graph, graph_def, suffix=f"_{fitting_key}" + suffix
+            )
+        tf.constant("original_model", name="model_type", dtype=tf.string)
         if self.typeebd is not None:
             self.typeebd.init_variables(graph, graph_def, suffix=suffix)
diff --git a/deepmd/model/tensor.py b/deepmd/model/tensor.py
index 77e5492462..045150b0a1 100644
--- a/deepmd/model/tensor.py
+++ b/deepmd/model/tensor.py
@@ -1,12 +1,28 @@
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
-from typing import Optional, Tuple, List
 
-from deepmd.env import tf
-from deepmd.env import global_cvt_2_ener_float, MODEL_VERSION, GLOBAL_TF_FLOAT_PRECISION
-from .model import Model
-from .model_stat import make_stat_input, merge_sys_stat
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    MODEL_VERSION,
+    global_cvt_2_ener_float,
+    tf,
+)
+
+from .model import (
+    Model,
+)
+from .model_stat import (
+    make_stat_input,
+    merge_sys_stat,
+)
+
 
-class TensorModel(Model) :
+class TensorModel(Model):
     """Tensor model.
 
     Parameters
@@ -27,16 +43,17 @@ class TensorModel(Model) :
     data_stat_protect
             Protect parameter for atomic energy regression
     """
-    def __init__ (
-            self, 
-            tensor_name : str,
-            descrpt, 
-            fitting,
-            typeebd=None,
-            type_map : List[str] = None,
-            data_stat_nbatch : int = 10,
-            data_stat_protect : float = 1e-2,
-    )->None:
+
+    def __init__(
+        self,
+        tensor_name: str,
+        descrpt,
+        fitting,
+        typeebd=None,
+        type_map: List[str] = None,
+        data_stat_nbatch: int = 10,
+        data_stat_protect: float = 1e-2,
+    ) -> None:
         """
         Constructor
         """
@@ -56,79 +73,73 @@ def __init__ (
             self.type_map = type_map
         self.data_stat_nbatch = data_stat_nbatch
         self.data_stat_protect = data_stat_protect
-    
-    def get_rcut (self) :
+
+    def get_rcut(self):
         return self.rcut
 
-    def get_ntypes (self) :
+    def get_ntypes(self):
         return self.ntypes
 
-    def get_type_map (self) :
+    def get_type_map(self):
         return self.type_map
 
     def get_sel_type(self):
         return self.fitting.get_sel_type()
 
-    def get_out_size (self) :
+    def get_out_size(self):
         return self.fitting.get_out_size()
 
     def data_stat(self, data):
-        all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys = False)
-        m_all_stat = merge_sys_stat(all_stat)        
-        self._compute_input_stat (m_all_stat, protection = self.data_stat_protect)
+        all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys=False)
+        m_all_stat = merge_sys_stat(all_stat)
+        self._compute_input_stat(m_all_stat, protection=self.data_stat_protect)
         self._compute_output_stat(all_stat)
 
-    def _compute_input_stat(self, all_stat, protection = 1e-2) :
-        self.descrpt.compute_input_stats(all_stat['coord'],
-                                         all_stat['box'],
-                                         all_stat['type'],
-                                         all_stat['natoms_vec'],
-                                         all_stat['default_mesh'], 
-                                         all_stat)
-        if hasattr(self.fitting, 'compute_input_stats'):
-            self.fitting.compute_input_stats(all_stat, protection = protection)
-
-    def _compute_output_stat (self, all_stat) :
-        if hasattr(self.fitting, 'compute_output_stats'):
+    def _compute_input_stat(self, all_stat, protection=1e-2):
+        self.descrpt.compute_input_stats(
+            all_stat["coord"],
+            all_stat["box"],
+            all_stat["type"],
+            all_stat["natoms_vec"],
+            all_stat["default_mesh"],
+            all_stat,
+        )
+        if hasattr(self.fitting, "compute_input_stats"):
+            self.fitting.compute_input_stats(all_stat, protection=protection)
+
+    def _compute_output_stat(self, all_stat):
+        if hasattr(self.fitting, "compute_output_stats"):
             self.fitting.compute_output_stats(all_stat)
 
-    def build (self, 
-               coord_, 
-               atype_,
-               natoms,
-               box, 
-               mesh,
-               input_dict,
-               frz_model = None,         
-               ckpt_meta: Optional[str] = None,
-               suffix = '', 
-               reuse = None):
+    def build(
+        self,
+        coord_,
+        atype_,
+        natoms,
+        box,
+        mesh,
+        input_dict,
+        frz_model=None,
+        ckpt_meta: Optional[str] = None,
+        suffix="",
+        reuse=None,
+    ):
 
         if input_dict is None:
             input_dict = {}
-        with tf.variable_scope('model_attr' + suffix, reuse = reuse) :
-            t_tmap = tf.constant(' '.join(self.type_map), 
-                                 name = 'tmap', 
-                                 dtype = tf.string)
-            t_st = tf.constant(self.get_sel_type(), 
-                               name = 'sel_type',
-                               dtype = tf.int32)
-            t_mt = tf.constant(self.model_type, 
-                               name = 'model_type', 
-                               dtype = tf.string)
-            t_ver = tf.constant(MODEL_VERSION,
-                                name = 'model_version',
-                                dtype = tf.string)
-            t_od = tf.constant(self.get_out_size(), 
-                               name = 'output_dim', 
-                               dtype = tf.int32)
-
-        natomsel = sum(natoms[2+type_i] for type_i in self.get_sel_type())
+        with tf.variable_scope("model_attr" + suffix, reuse=reuse):
+            t_tmap = tf.constant(" ".join(self.type_map), name="tmap", dtype=tf.string)
+            t_st = tf.constant(self.get_sel_type(), name="sel_type", dtype=tf.int32)
+            t_mt = tf.constant(self.model_type, name="model_type", dtype=tf.string)
+            t_ver = tf.constant(MODEL_VERSION, name="model_version", dtype=tf.string)
+            t_od = tf.constant(self.get_out_size(), name="output_dim", dtype=tf.int32)
+
+        natomsel = sum(natoms[2 + type_i] for type_i in self.get_sel_type())
         nout = self.get_out_size()
 
         coord = tf.reshape(coord_, [-1, natoms[1] * 3])
         atype = tf.reshape(atype_, [-1, natoms[1]])
-        input_dict['nframes'] = tf.shape(coord)[0]
+        input_dict["nframes"] = tf.shape(coord)[0]
 
         # type embedding if any
         if self.typeebd is not None:
@@ -137,54 +148,62 @@ def build (self,
                 reuse=reuse,
                 suffix=suffix,
             )
-            input_dict['type_embedding'] = type_embedding
-            input_dict['atype'] = atype_
+            input_dict["type_embedding"] = type_embedding
+            input_dict["atype"] = atype_
 
         dout = self.build_descrpt(
-            coord, atype, natoms, box, mesh, input_dict,
+            coord,
+            atype,
+            natoms,
+            box,
+            mesh,
+            input_dict,
             frz_model=frz_model,
             ckpt_meta=ckpt_meta,
             suffix=suffix,
-            reuse=reuse)
+            reuse=reuse,
+        )
 
         rot_mat = self.descrpt.get_rot_mat()
-        rot_mat = tf.identity(rot_mat, name = 'o_rot_mat'+suffix)
-
-        output = self.fitting.build (dout, 
-                                     rot_mat,
-                                     natoms,
-                                     input_dict,
-                                     reuse = reuse, 
-                                     suffix = suffix)
+        rot_mat = tf.identity(rot_mat, name="o_rot_mat" + suffix)
+
+        output = self.fitting.build(
+            dout, rot_mat, natoms, input_dict, reuse=reuse, suffix=suffix
+        )
         framesize = nout if "global" in self.model_type else natomsel * nout
-        output = tf.reshape(output, [-1, framesize], name = 'o_' + self.model_type + suffix)
+        output = tf.reshape(
+            output, [-1, framesize], name="o_" + self.model_type + suffix
+        )
 
         model_dict = {self.model_type: output}
 
         if "global" not in self.model_type:
-            gname = "global_"+self.model_type
+            gname = "global_" + self.model_type
             atom_out = tf.reshape(output, [-1, natomsel, nout])
             global_out = tf.reduce_sum(atom_out, axis=1)
             global_out = tf.reshape(global_out, [-1, nout], name="o_" + gname + suffix)
-            
+
             out_cpnts = tf.split(atom_out, nout, axis=-1)
             force_cpnts = []
             virial_cpnts = []
             atom_virial_cpnts = []
 
             for out_i in out_cpnts:
-                force_i, virial_i, atom_virial_i \
-                    = self.descrpt.prod_force_virial(out_i, natoms)
-                force_cpnts.append      (tf.reshape(force_i,       [-1, 3*natoms[1]]))
-                virial_cpnts.append     (tf.reshape(virial_i,      [-1, 9]))
-                atom_virial_cpnts.append(tf.reshape(atom_virial_i, [-1, 9*natoms[1]]))
+                force_i, virial_i, atom_virial_i = self.descrpt.prod_force_virial(
+                    out_i, natoms
+                )
+                force_cpnts.append(tf.reshape(force_i, [-1, 3 * natoms[1]]))
+                virial_cpnts.append(tf.reshape(virial_i, [-1, 9]))
+                atom_virial_cpnts.append(tf.reshape(atom_virial_i, [-1, 9 * natoms[1]]))
 
             # [nframe x nout x (natom x 3)]
             force = tf.concat(force_cpnts, axis=1, name="o_force" + suffix)
             # [nframe x nout x 9]
             virial = tf.concat(virial_cpnts, axis=1, name="o_virial" + suffix)
             # [nframe x nout x (natom x 9)]
-            atom_virial = tf.concat(atom_virial_cpnts, axis=1, name="o_atom_virial" + suffix)
+            atom_virial = tf.concat(
+                atom_virial_cpnts, axis=1, name="o_atom_virial" + suffix
+            )
 
             model_dict[gname] = global_out
             model_dict["force"] = force
@@ -193,11 +212,12 @@ def build (self,
 
         return model_dict
 
-    def init_variables(self,
-                       graph : tf.Graph,
-                       graph_def : tf.GraphDef,
-                       model_type : str = "original_model",
-                       suffix : str = "",
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        model_type: str = "original_model",
+        suffix: str = "",
     ) -> None:
         """
         Init the embedding net variables with the given frozen model
@@ -213,32 +233,32 @@ def init_variables(self,
         suffix : str
             suffix to name scope
         """
-        if model_type == 'original_model':
+        if model_type == "original_model":
             self.descrpt.init_variables(graph, graph_def, suffix=suffix)
             self.fitting.init_variables(graph, graph_def, suffix=suffix)
-            tf.constant("original_model", name = 'model_type', dtype = tf.string)
-        elif model_type == 'compressed_model':
+            tf.constant("original_model", name="model_type", dtype=tf.string)
+        elif model_type == "compressed_model":
             self.fitting.init_variables(graph, graph_def, suffix=suffix)
-            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+            tf.constant("compressed_model", name="model_type", dtype=tf.string)
         else:
             raise RuntimeError("Unknown model type %s" % model_type)
 
 
 class WFCModel(TensorModel):
     def __init__(self, *args, **kwargs) -> None:
-        TensorModel.__init__(self, 'wfc', *args, **kwargs)
+        TensorModel.__init__(self, "wfc", *args, **kwargs)
 
 
 class DipoleModel(TensorModel):
     def __init__(self, *args, **kwargs) -> None:
-        TensorModel.__init__(self, 'dipole', *args, **kwargs)
+        TensorModel.__init__(self, "dipole", *args, **kwargs)
 
 
 class PolarModel(TensorModel):
     def __init__(self, *args, **kwargs) -> None:
-        TensorModel.__init__(self, 'polar', *args, **kwargs)
+        TensorModel.__init__(self, "polar", *args, **kwargs)
 
 
 class GlobalPolarModel(TensorModel):
     def __init__(self, *args, **kwargs) -> None:
-        TensorModel.__init__(self, 'global_polar', *args, **kwargs)
+        TensorModel.__init__(self, "global_polar", *args, **kwargs)
diff --git a/deepmd/nvnmd/__init__.py b/deepmd/nvnmd/__init__.py
index f3cdaf13e5..f598ff416a 100644
--- a/deepmd/nvnmd/__init__.py
+++ b/deepmd/nvnmd/__init__.py
@@ -1,5 +1,10 @@
-
-from . import data, descriptor, entrypoints, fit, utils
+from . import (
+    data,
+    descriptor,
+    entrypoints,
+    fit,
+    utils,
+)
 
 __all__ = [
     "data",
diff --git a/deepmd/nvnmd/data/__init__.py b/deepmd/nvnmd/data/__init__.py
index 21c208e404..8324b46b4f 100644
--- a/deepmd/nvnmd/data/__init__.py
+++ b/deepmd/nvnmd/data/__init__.py
@@ -10,35 +10,35 @@
 Data
 ----
 
-jdata_sys 
+jdata_sys
     action configuration
-jdata_config 
+jdata_config
     hardware configuration
 
-    dscp 
+    dscp
         descriptor configuration
-    fitn 
+    fitn
         fitting network configuration
-    size 
+    size
         ram capacity
-    ctrl 
+    ctrl
         control flag, such as Time Division Multiplexing (TDM)
-    nbit 
+    nbit
         number of bits of fixed-point number
-jdata_config_16 (disable) 
+jdata_config_16 (disable)
     difference with configure fitting size as 16
-jdata_config_32 (disable) 
+jdata_config_32 (disable)
     difference with configure fitting size as 32
-jdata_config_64 (disable) 
+jdata_config_64 (disable)
     difference with configure fitting size as 64
-jdata_config_128 (default) 
+jdata_config_128 (default)
     difference with configure fitting size as 128
-jdata_configs 
+jdata_configs
     all configure of jdata_config{nfit_node}
-jdata_deepmd_input 
+jdata_deepmd_input
     default input script for nvnmd training
-NVNMD_WELCOME 
+NVNMD_WELCOME
     nvnmd title when logging
-NVNMD_CITATION 
+NVNMD_CITATION
     citation of nvnmd
 """
diff --git a/deepmd/nvnmd/data/data.py b/deepmd/nvnmd/data/data.py
index daed97e970..d9f61d3b8b 100644
--- a/deepmd/nvnmd/data/data.py
+++ b/deepmd/nvnmd/data/data.py
@@ -1,7 +1,4 @@
-
-jdata_sys = {
-    "debug": False
-}
+jdata_sys = {"debug": False}
 
 jdata_config = {
     "dscp": {
@@ -35,24 +32,18 @@
         # mapping table
         "dmin": 0,
         "smin": -2,
-        "smax": 14
+        "smax": 14,
     },
-
     "fitn": {
         # basic config from deepmd model
         "neuron": [128, 128, 128],
         "resnet_dt": False,
-
         "NNODE_FITS": "(M1*M2, neuron, 1)",
         "nlayer_fit": "len(neuron)+1",
-        "NLAYER": "nlayer_fit"
+        "NLAYER": "nlayer_fit",
     },
-
     # other input for generate input file
-    "dpin": {
-        "type_map" : []
-    },
-
+    "dpin": {"type_map": []},
     "size": {
         # atom system size for simulation
         "Na": 4096,
@@ -64,9 +55,8 @@
         # model size
         "NH_DATA": [0, 0, 0, 0, 0, 0, 0],
         "NW_DATA": [0, 0, 0, 0, 0, 0, 0],
-        "NH_SIM": [0, 0, 0, 0, 0, 0, 0, 0, 0]
+        "NH_SIM": [0, 0, 0, 0, 0, 0, 0, 0, 0],
     },
-
     "ctrl": {
         # NSTDM
         "NSTDM": 64,
@@ -76,7 +66,6 @@
         "NSEL": "NSTDM*NTYPE_MAX",
         "NSADV": "NSTDM+1",
     },
-
     "nbit": {
         # general
         "NBIT_FLTD": 29,
@@ -112,104 +101,48 @@
         # communication
         "NBIT_SPE_MAX": 8,
         "NBIT_LST_MAX": 16,
-
         "NBIT_ADDR": 32,
         "NBIT_SYS": 32,
-
         "NBIT_BYPASS_DATA": 32,
         "NBIT_CFG": 64,
         "NBIT_NET": 72,
-        
         "NBIT_MODEL_HEAD": 32,
         # nbit for mapt-version
-        "NBIT_IDX_S2G" : 9,
-        "NBIT_NEIB" : 8
+        "NBIT_IDX_S2G": 9,
+        "NBIT_NEIB": 8,
     },
-
-    "end": ""
+    "end": "",
 }
 
 jdata_config_16 = {
-    "dscp": {
-        "neuron": [8, 16, 32],
-        "axis_neuron": 4,
-        "NI": 128
-    },
-
-    "fitn": {
-        "neuron": [16, 16, 16]
-    },
-
-    "ctrl": {
-        "NSTDM": 16,
-        "NSTDM_M1": 16,
-        "NSTDM_M2": 1,
-        "NSTDM_M1X": 4
-    }
+    "dscp": {"neuron": [8, 16, 32], "axis_neuron": 4, "NI": 128},
+    "fitn": {"neuron": [16, 16, 16]},
+    "ctrl": {"NSTDM": 16, "NSTDM_M1": 16, "NSTDM_M2": 1, "NSTDM_M1X": 4},
 }
 
 jdata_config_32 = {
-    "dscp": {
-        "neuron": [8, 16, 32],
-        "axis_neuron": 4,
-        "NI": 128
-    },
-
-    "fitn": {
-        "neuron": [32, 32, 32]
-    },
-
-    "ctrl": {
-        "NSTDM": 16,
-        "NSTDM_M1": 16,
-        "NSTDM_M2": 1,
-        "NSTDM_M1X": 4
-    }
+    "dscp": {"neuron": [8, 16, 32], "axis_neuron": 4, "NI": 128},
+    "fitn": {"neuron": [32, 32, 32]},
+    "ctrl": {"NSTDM": 16, "NSTDM_M1": 16, "NSTDM_M2": 1, "NSTDM_M1X": 4},
 }
 
 jdata_config_64 = {
-    "dscp": {
-        "neuron": [8, 16, 32],
-        "axis_neuron": 4,
-        "NI": 128
-    },
-
-    "fitn": {
-        "neuron": [64, 64, 64]
-    },
-
-    "ctrl": {
-        "NSTDM": 32,
-        "NSTDM_M1": 32,
-        "NSTDM_M2": 1,
-        "NSTDM_M1X": 4
-    }
+    "dscp": {"neuron": [8, 16, 32], "axis_neuron": 4, "NI": 128},
+    "fitn": {"neuron": [64, 64, 64]},
+    "ctrl": {"NSTDM": 32, "NSTDM_M1": 32, "NSTDM_M2": 1, "NSTDM_M1X": 4},
 }
 
 jdata_config_128 = {
-    "dscp": {
-        "neuron": [8, 16, 32],
-        "axis_neuron": 4,
-        "NI": 128
-    },
-
-    "fitn": {
-        "neuron": [128, 128, 128]
-    },
-
-    "ctrl": {
-        "NSTDM": 64,
-        "NSTDM_M1": 32,
-        "NSTDM_M2": 2,
-        "NSTDM_M1X": 8
-    }
+    "dscp": {"neuron": [8, 16, 32], "axis_neuron": 4, "NI": 128},
+    "fitn": {"neuron": [128, 128, 128]},
+    "ctrl": {"NSTDM": 64, "NSTDM_M1": 32, "NSTDM_M2": 2, "NSTDM_M1X": 8},
 }
 
 jdata_configs = {
     "_16": jdata_config_16,
     "_32": jdata_config_32,
     "_64": jdata_config_64,
-    "128": jdata_config_128
+    "128": jdata_config_128,
 }
 
 jdata_deepmd_input = {
@@ -217,30 +150,15 @@
         "descriptor": {
             "seed": 1,
             "type": "se_a",
-            "sel": [
-                60,
-                60
-            ],
+            "sel": [60, 60],
             "rcut": 7.0,
             "rcut_smth": 0.5,
-            "neuron": [
-                8,
-                16,
-                32
-            ],
+            "neuron": [8, 16, 32],
             "type_one_side": False,
             "axis_neuron": 4,
-            "resnet_dt": False
+            "resnet_dt": False,
         },
-        "fitting_net": {
-            "seed": 1,
-            "neuron": [
-                128,
-                128,
-                128
-            ],
-            "resnet_dt": False
-        }
+        "fitting_net": {"seed": 1, "neuron": [128, 128, 128], "resnet_dt": False},
     },
     "nvnmd": {
         "net_size": 128,
@@ -251,13 +169,13 @@
         "restore_descriptor": False,
         "restore_fitting_net": False,
         "quantize_descriptor": False,
-        "quantize_fitting_net": False
+        "quantize_fitting_net": False,
     },
     "learning_rate": {
         "type": "exp",
         "decay_steps": 5000,
         "start_lr": 0.005,
-        "stop_lr": 8.257687192506788e-05
+        "stop_lr": 8.257687192506788e-05,
     },
     "loss": {
         "start_pref_e": 0.02,
@@ -265,7 +183,7 @@
         "start_pref_f": 1000,
         "limit_pref_f": 1,
         "start_pref_v": 0,
-        "limit_pref_v": 0
+        "limit_pref_v": 0,
     },
     "training": {
         "seed": 1,
@@ -278,12 +196,8 @@
         "disp_training": True,
         "time_training": True,
         "profiling": False,
-        "training_data": {
-            "systems": "dataset",
-            "set_prefix": "set",
-            "batch_size": 1
-        }
-    }
+        "training_data": {"systems": "dataset", "set_prefix": "set", "batch_size": 1},
+    },
 }
 NVNMD_WELCOME = (
     r" _   _  __     __  _   _   __  __   ____  ",
diff --git a/deepmd/nvnmd/descriptor/se_a.py b/deepmd/nvnmd/descriptor/se_a.py
index 5d3660bb9e..e04c2e4d3b 100644
--- a/deepmd/nvnmd/descriptor/se_a.py
+++ b/deepmd/nvnmd/descriptor/se_a.py
@@ -1,20 +1,34 @@
-import numpy as np
 import logging
 
-from deepmd.env import tf
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.utils.network import embedding_net
-from deepmd.utils.graph import get_tensor_by_name_from_graph
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
 #
-from deepmd.nvnmd.data.data import jdata_sys
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.utils.weight import get_normalize
+from deepmd.nvnmd.data.data import (
+    jdata_sys,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.weight import (
+    get_normalize,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+)
+from deepmd.utils.network import (
+    embedding_net,
+)
 
 log = logging.getLogger(__name__)
 
+
 def build_davg_dstd():
     r"""Get the davg and dstd from the dictionary nvnmd_cfg.
     The davg and dstd have been obtained by training CNN
@@ -22,32 +36,33 @@ def build_davg_dstd():
     davg, dstd = get_normalize(nvnmd_cfg.weight)
     return davg, dstd
 
+
 def check_switch_range(davg, dstd):
-    r"""Check the range of switch, let it in range [-2, 14]
-    """
-    rmin = nvnmd_cfg.dscp['rcut_smth']
+    r"""Check the range of switch, let it in range [-2, 14]"""
+    rmin = nvnmd_cfg.dscp["rcut_smth"]
     #
     namelist = [n.name for n in tf.get_default_graph().as_graph_def().node]
-    if 'train_attr/min_nbor_dist' in namelist:
-        min_dist = get_tensor_by_name_from_graph(tf.get_default_graph(), 'train_attr/min_nbor_dist')
-    elif 'train_attr.min_nbor_dist' in nvnmd_cfg.weight.keys():
-        if nvnmd_cfg.weight['train_attr.min_nbor_dist'] < 1e-6:
+    if "train_attr/min_nbor_dist" in namelist:
+        min_dist = get_tensor_by_name_from_graph(
+            tf.get_default_graph(), "train_attr/min_nbor_dist"
+        )
+    elif "train_attr.min_nbor_dist" in nvnmd_cfg.weight.keys():
+        if nvnmd_cfg.weight["train_attr.min_nbor_dist"] < 1e-6:
             min_dist = rmin
         else:
-            min_dist = nvnmd_cfg.weight['train_attr.min_nbor_dist']
+            min_dist = nvnmd_cfg.weight["train_attr.min_nbor_dist"]
     else:
         min_dist = rmin
-    
-    # if davg and dstd is None, the model initial mode is in 
+
+    # if davg and dstd is None, the model initial mode is in
     #  'init_from_model', 'restart', 'init_from_frz_model', 'finetune'
     if (davg is not None) and (dstd is not None):
-        nvnmd_cfg.dscp['dmin'] = min_dist
+        nvnmd_cfg.dscp["dmin"] = min_dist
         nvnmd_cfg.get_s_range(davg, dstd)
 
 
 def build_op_descriptor():
-    r"""Replace se_a.py/DescrptSeA/build
-    """
+    r"""Replace se_a.py/DescrptSeA/build"""
     if nvnmd_cfg.quantize_descriptor:
         return op_module.prod_env_mat_a_nvnmd_quantize
     else:
@@ -59,44 +74,50 @@ def descrpt2r4(inputs, natoms):
     where :math:`r_{ji} = (x_{ji}, y_{ji}, z_{ji})` and
     :math:`r'_{ji} = (s_{ji}, \frac{s_{ji} x_{ji}}{r_{ji}}, \frac{s_{ji} y_{ji}}{r_{ji}}, \frac{s_{ji} z_{ji}}{r_{ji}})`
     """
-    ntypes = nvnmd_cfg.dscp['ntype']
-    NIDP = nvnmd_cfg.dscp['NIDP']
+    ntypes = nvnmd_cfg.dscp["ntype"]
+    NIDP = nvnmd_cfg.dscp["NIDP"]
     ndescrpt = NIDP * 4
     start_index = 0
 
     # (nf*na*ni, 4)
     inputs_reshape = tf.reshape(inputs, [-1, 4])
 
-    with tf.variable_scope('filter_type_all_x', reuse=True):
+    with tf.variable_scope("filter_type_all_x", reuse=True):
         # u (i.e., r^2)
         u = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1])
-        with tf.variable_scope('u', reuse=True):
+        with tf.variable_scope("u", reuse=True):
             u = op_module.flt_nvnmd(u)
-            log.debug('#u: %s', u)
+            log.debug("#u: %s", u)
             u = tf.ensure_shape(u, [None, 1])
         u = tf.reshape(u, [-1, natoms[0] * NIDP])
         sh0 = tf.shape(u)[0]
         # rij
         rij = tf.reshape(tf.slice(inputs_reshape, [0, 1], [-1, 3]), [-1, 3])
-        with tf.variable_scope('rij', reuse=True):
+        with tf.variable_scope("rij", reuse=True):
             rij = op_module.flt_nvnmd(rij)
             rij = tf.ensure_shape(rij, [None, 3])
-            log.debug('#rij: %s', rij)
+            log.debug("#rij: %s", rij)
         s = []
         h = []
         for type_i in range(ntypes):
             type_input = 0
-            u_i = tf.slice(
-                u,
-                [0, start_index * NIDP],
-                [-1, natoms[2 + type_i] * NIDP])
+            u_i = tf.slice(u, [0, start_index * NIDP], [-1, natoms[2 + type_i] * NIDP])
             u_i = tf.reshape(u_i, [-1, 1])
             # s
-            table = GLOBAL_NP_FLOAT_PRECISION(np.concatenate([nvnmd_cfg.map['s'][type_i], nvnmd_cfg.map['h'][type_i]], axis=1))
-            table_grad = GLOBAL_NP_FLOAT_PRECISION(np.concatenate([nvnmd_cfg.map['s_grad'][type_i], nvnmd_cfg.map['h_grad'][type_i]], axis=1))
-            table_info = nvnmd_cfg.map['cfg_u2s']
+            table = GLOBAL_NP_FLOAT_PRECISION(
+                np.concatenate(
+                    [nvnmd_cfg.map["s"][type_i], nvnmd_cfg.map["h"][type_i]], axis=1
+                )
+            )
+            table_grad = GLOBAL_NP_FLOAT_PRECISION(
+                np.concatenate(
+                    [nvnmd_cfg.map["s_grad"][type_i], nvnmd_cfg.map["h_grad"][type_i]],
+                    axis=1,
+                )
+            )
+            table_info = nvnmd_cfg.map["cfg_u2s"]
             table_info = np.array([np.float64(v) for vs in table_info for v in vs])
-            table_info =  GLOBAL_NP_FLOAT_PRECISION(table_info)
+            table_info = GLOBAL_NP_FLOAT_PRECISION(table_info)
 
             s_h_i = op_module.map_flt_nvnmd(u_i, table, table_grad, table_info)
             s_h_i = tf.ensure_shape(s_h_i, [None, 1, 2])
@@ -116,25 +137,24 @@ def descrpt2r4(inputs, natoms):
         s = tf.reshape(s, [-1, 1])
         h = tf.reshape(h, [-1, 1])
 
-        with tf.variable_scope('s', reuse=True):
+        with tf.variable_scope("s", reuse=True):
             s = op_module.flt_nvnmd(s)
-            log.debug('#s: %s', s)
+            log.debug("#s: %s", s)
             s = tf.ensure_shape(s, [None, 1])
 
-        with tf.variable_scope('h', reuse=True):
+        with tf.variable_scope("h", reuse=True):
             h = op_module.flt_nvnmd(h)
-            log.debug('#h: %s', h)
+            log.debug("#h: %s", h)
             h = tf.ensure_shape(h, [None, 1])
 
-
         # R2R4
         Rs = s
         # Rxyz = h * rij
         Rxyz = op_module.mul_flt_nvnmd(h, rij)
         Rxyz = tf.ensure_shape(Rxyz, [None, 3])
-        with tf.variable_scope('Rxyz', reuse=True):
+        with tf.variable_scope("Rxyz", reuse=True):
             Rxyz = op_module.flt_nvnmd(Rxyz)
-            log.debug('#Rxyz: %s', Rxyz)
+            log.debug("#Rxyz: %s", Rxyz)
             Rxyz = tf.ensure_shape(Rxyz, [None, 3])
         R4 = tf.concat([Rs, Rxyz], axis=1)
         R4 = tf.reshape(R4, [-1, NIDP, 4])
@@ -144,32 +164,32 @@ def descrpt2r4(inputs, natoms):
 
 
 def filter_lower_R42GR(
-        type_i,
-        type_input,
-        inputs_i,
-        is_exclude,
-        activation_fn,
-        bavg,
-        stddev,
-        trainable,
-        suffix,
-        seed,
-        seed_shift,
-        uniform_seed,
-        filter_neuron,
-        filter_precision,
-        filter_resnet_dt,
-        embedding_net_variables):
-    r"""Replace se_a.py/DescrptSeA/_filter_lower
-    """
+    type_i,
+    type_input,
+    inputs_i,
+    is_exclude,
+    activation_fn,
+    bavg,
+    stddev,
+    trainable,
+    suffix,
+    seed,
+    seed_shift,
+    uniform_seed,
+    filter_neuron,
+    filter_precision,
+    filter_resnet_dt,
+    embedding_net_variables,
+):
+    r"""Replace se_a.py/DescrptSeA/_filter_lower"""
     shape_i = inputs_i.get_shape().as_list()
     inputs_reshape = tf.reshape(inputs_i, [-1, 4])
     natom = tf.shape(inputs_i)[0]
-    M1 = nvnmd_cfg.dscp['M1']
+    M1 = nvnmd_cfg.dscp["M1"]
 
     type_input = 0 if (type_input < 0) else type_input
 
-    if (nvnmd_cfg.quantize_descriptor):
+    if nvnmd_cfg.quantize_descriptor:
         # copy
         inputs_reshape = op_module.flt_nvnmd(inputs_reshape)
         inputs_reshape = tf.ensure_shape(inputs_reshape, [None, 4])
@@ -180,23 +200,25 @@ def filter_lower_R42GR(
         # s
         s = tf.reshape(tf.slice(inputs_reshape, [0, 0], [-1, 1]), [-1, 1])
         # G
-        table = GLOBAL_NP_FLOAT_PRECISION(nvnmd_cfg.map['g'][type_i])
-        table_grad = GLOBAL_NP_FLOAT_PRECISION(nvnmd_cfg.map['g_grad'][type_i])
-        table_info = nvnmd_cfg.map['cfg_s2g']
+        table = GLOBAL_NP_FLOAT_PRECISION(nvnmd_cfg.map["g"][type_i])
+        table_grad = GLOBAL_NP_FLOAT_PRECISION(nvnmd_cfg.map["g_grad"][type_i])
+        table_info = nvnmd_cfg.map["cfg_s2g"]
         table_info = np.array([np.float64(v) for vs in table_info for v in vs])
         table_info = GLOBAL_NP_FLOAT_PRECISION(table_info)
-        with tf.variable_scope('g', reuse=True):
+        with tf.variable_scope("g", reuse=True):
             G = op_module.map_flt_nvnmd(s, table, table_grad, table_info)
             G = tf.ensure_shape(G, [None, 1, M1])
             G = op_module.flt_nvnmd(G)
             G = tf.ensure_shape(G, [None, 1, M1])
-            log.debug('#g: %s', G)
+            log.debug("#g: %s", G)
         # G
         xyz_scatter = G
         xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, M1))
         # GR
         inputs_reshape2 = tf.reshape(inputs_reshape2, [-1, shape_i[1] // 4, 4])
-        GR = op_module.matmul_flt2fix_nvnmd(tf.transpose(inputs_reshape2, [0, 2, 1]), xyz_scatter, 23)
+        GR = op_module.matmul_flt2fix_nvnmd(
+            tf.transpose(inputs_reshape2, [0, 2, 1]), xyz_scatter, 23
+        )
         GR = tf.ensure_shape(GR, [None, 4, M1])
         return GR
 
@@ -206,11 +228,11 @@ def filter_lower_R42GR(
             trainable = False
             embedding_net_variables = {}
             for key in nvnmd_cfg.weight.keys():
-                if 'filter_type' in key:
-                    key2 = key.replace('.', '/')
+                if "filter_type" in key:
+                    key2 = key.replace(".", "/")
                     embedding_net_variables[key2] = nvnmd_cfg.weight[key]
 
-        if (not is_exclude):
+        if not is_exclude:
             xyz_scatter = embedding_net(
                 xyz_scatter,
                 filter_neuron,
@@ -223,12 +245,13 @@ def filter_lower_R42GR(
                 seed=seed,
                 trainable=trainable,
                 uniform_seed=uniform_seed,
-                initial_variables=embedding_net_variables)
+                initial_variables=embedding_net_variables,
+            )
             if (not uniform_seed) and (seed is not None):
                 seed += seed_shift
         else:
             # we can safely return the final xyz_scatter filled with zero directly
-            return tf.cast(tf.fill((natom, 4, M1), 0.), GLOBAL_TF_FLOAT_PRECISION)
+            return tf.cast(tf.fill((natom, 4, M1), 0.0), GLOBAL_TF_FLOAT_PRECISION)
         # natom x nei_type_i x out_size
         xyz_scatter = tf.reshape(xyz_scatter, (-1, shape_i[1] // 4, M1))
         # When using tf.reshape(inputs_i, [-1, shape_i[1]//4, 4]) below
@@ -236,25 +259,28 @@ def filter_lower_R42GR(
         # but if sel is zero
         # [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4]
         # So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1
-        return tf.matmul(tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]), xyz_scatter, transpose_a=True)
+        return tf.matmul(
+            tf.reshape(inputs_i, [natom, shape_i[1] // 4, 4]),
+            xyz_scatter,
+            transpose_a=True,
+        )
 
 
 def filter_GR2D(xyz_scatter_1):
-    r"""Replace se_a.py/_filter
-    """
-    NIX = nvnmd_cfg.dscp['NIX']
-    M1 = nvnmd_cfg.dscp['M1']
-    M2 = nvnmd_cfg.dscp['M2']
-    NBIT_DATA_FL = nvnmd_cfg.nbit['NBIT_FIXD_FL']
+    r"""Replace se_a.py/_filter"""
+    NIX = nvnmd_cfg.dscp["NIX"]
+    M1 = nvnmd_cfg.dscp["M1"]
+    M2 = nvnmd_cfg.dscp["M2"]
+    NBIT_DATA_FL = nvnmd_cfg.nbit["NBIT_FIXD_FL"]
 
-    if (nvnmd_cfg.quantize_descriptor):
+    if nvnmd_cfg.quantize_descriptor:
         xyz_scatter_1 = tf.reshape(xyz_scatter_1, [-1, 4 * M1])
         # fix the number of bits of gradient
         xyz_scatter_1 = xyz_scatter_1 * (1.0 / NIX)
-        
-        with tf.variable_scope('gr', reuse=True):
+
+        with tf.variable_scope("gr", reuse=True):
             xyz_scatter_1 = op_module.flt_nvnmd(xyz_scatter_1)
-            log.debug('#gr: %s', xyz_scatter_1)
+            log.debug("#gr: %s", xyz_scatter_1)
             xyz_scatter_1 = tf.ensure_shape(xyz_scatter_1, [None, 4 * M1])
         xyz_scatter_1 = tf.reshape(xyz_scatter_1, [-1, 4, M1])
 
@@ -266,7 +292,9 @@ def filter_GR2D(xyz_scatter_1):
         qmat = tf.transpose(qmat, perm=[0, 2, 1])
         # D': natom x outputs_size x outputs_size_2
         xyz_scatter_1_T = tf.transpose(xyz_scatter_1, [0, 2, 1])
-        result = op_module.matmul_flt_nvnmd(xyz_scatter_1_T, xyz_scatter_2, 1*16+0, 1*16+0)
+        result = op_module.matmul_flt_nvnmd(
+            xyz_scatter_1_T, xyz_scatter_2, 1 * 16 + 0, 1 * 16 + 0
+        )
         result = tf.ensure_shape(result, [None, M1, M1])
         # D': natom x (outputs_size x outputs_size_2)
         result = tf.reshape(result, [-1, M1 * M1])
@@ -278,13 +306,13 @@ def filter_GR2D(xyz_scatter_1):
         index_subset = tf.constant(np.int32(np.array(index_subset)))
         result = tf.gather(result, index_subset, axis=1)
 
-        with tf.variable_scope('d', reuse=True):
+        with tf.variable_scope("d", reuse=True):
             result = op_module.flt_nvnmd(result)
-            log.debug('#d: %s', result)
-            result = tf.ensure_shape(result, [None, M1*M2])
+            log.debug("#d: %s", result)
+            result = tf.ensure_shape(result, [None, M1 * M2])
 
         result = op_module.quantize_nvnmd(result, 0, NBIT_DATA_FL, NBIT_DATA_FL, -1)
-        result = tf.ensure_shape(result, [None, M1*M2])
+        result = tf.ensure_shape(result, [None, M1 * M2])
     else:
         # natom x 4 x outputs_size
         xyz_scatter_1 = xyz_scatter_1 * (1.0 / NIX)
diff --git a/deepmd/nvnmd/entrypoints/__init__.py b/deepmd/nvnmd/entrypoints/__init__.py
index 037c74d76a..6f737c710b 100644
--- a/deepmd/nvnmd/entrypoints/__init__.py
+++ b/deepmd/nvnmd/entrypoints/__init__.py
@@ -1,9 +1,11 @@
-from .freeze import save_weight
-from .mapt import MapTable
-from .wrap import Wrap
+from .freeze import (
+    save_weight,
+)
+from .mapt import (
+    MapTable,
+)
+from .wrap import (
+    Wrap,
+)
 
-__all__ = [
-    "save_weight",
-    "MapTable",
-    "Wrap"
-]
+__all__ = ["save_weight", "MapTable", "Wrap"]
diff --git a/deepmd/nvnmd/entrypoints/freeze.py b/deepmd/nvnmd/entrypoints/freeze.py
index bd23959064..ff3e9617f2 100644
--- a/deepmd/nvnmd/entrypoints/freeze.py
+++ b/deepmd/nvnmd/entrypoints/freeze.py
@@ -1,9 +1,15 @@
-
 #!/usr/bin/env python3
 
-from deepmd.env import tf
-from deepmd.nvnmd.utils.fio import FioDic
-from deepmd.utils.graph import get_tensor_by_name_from_graph
+from deepmd.env import (
+    tf,
+)
+from deepmd.nvnmd.utils.fio import (
+    FioDic,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+)
+
 
 def filter_tensorVariableList(tensorVariableList) -> dict:
     r"""Get the name of variable for NVNMD
@@ -19,27 +25,26 @@ def filter_tensorVariableList(tensorVariableList) -> dict:
     | :code:`final_layer_type_{atom i}/bias:0`
     """
     nameList = [tv.name for tv in tensorVariableList]
-    nameList = [name.replace(':0', '') for name in nameList]
-    nameList = [name.replace('/', '.') for name in nameList]
+    nameList = [name.replace(":0", "") for name in nameList]
+    nameList = [name.replace("/", ".") for name in nameList]
 
     dic_name_tv = {}
     for ii in range(len(nameList)):
         name = nameList[ii]
         tv = tensorVariableList[ii]
-        p1 = name.startswith('descrpt_attr')
-        p1 = p1 or name.startswith('filter_type_')
-        p1 = p1 or name.startswith('layer_')
-        p1 = p1 or name.startswith('final_layer_type_')
-        p2 = 'Adam' not in name
-        p3 = 'XXX' not in name
+        p1 = name.startswith("descrpt_attr")
+        p1 = p1 or name.startswith("filter_type_")
+        p1 = p1 or name.startswith("layer_")
+        p1 = p1 or name.startswith("final_layer_type_")
+        p2 = "Adam" not in name
+        p3 = "XXX" not in name
         if p1 and p2 and p3:
             dic_name_tv[name] = tv
     return dic_name_tv
 
 
-def save_weight(sess, file_name: str = 'nvnmd/weight.npy'):
-    r"""Save the dictionary of weight to a npy file
-    """
+def save_weight(sess, file_name: str = "nvnmd/weight.npy"):
+    r"""Save the dictionary of weight to a npy file"""
     tvs = tf.global_variables()
     dic_key_tv = filter_tensorVariableList(tvs)
     dic_key_value = {}
@@ -47,9 +52,11 @@ def save_weight(sess, file_name: str = 'nvnmd/weight.npy'):
         value = sess.run(dic_key_tv[key])
         dic_key_value[key] = value
     namelist = [n.name for n in tf.get_default_graph().as_graph_def().node]
-    if 'train_attr/min_nbor_dist' in namelist:
-        min_dist = get_tensor_by_name_from_graph(tf.get_default_graph(), 'train_attr/min_nbor_dist')
+    if "train_attr/min_nbor_dist" in namelist:
+        min_dist = get_tensor_by_name_from_graph(
+            tf.get_default_graph(), "train_attr/min_nbor_dist"
+        )
     else:
         min_dist = 0.0
-    dic_key_value['train_attr.min_nbor_dist'] = min_dist
+    dic_key_value["train_attr.min_nbor_dist"] = min_dist
     FioDic().save(file_name, dic_key_value)
diff --git a/deepmd/nvnmd/entrypoints/mapt.py b/deepmd/nvnmd/entrypoints/mapt.py
index a4ac8f5203..bca8b334ad 100644
--- a/deepmd/nvnmd/entrypoints/mapt.py
+++ b/deepmd/nvnmd/entrypoints/mapt.py
@@ -1,19 +1,35 @@
-
-import numpy as np
 import logging
+from typing import (
+    List,
+    Optional,
+)
 
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.utils.sess import run_sess
-
-from deepmd.nvnmd.utils.fio import FioDic
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.utils.weight import get_normalize, get_filter_weight
-from deepmd.nvnmd.utils.network import get_sess
-
-from deepmd.nvnmd.data.data import jdata_sys, jdata_deepmd_input
+import numpy as np
 
-from typing import List, Optional
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.nvnmd.data.data import (
+    jdata_deepmd_input,
+    jdata_sys,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.fio import (
+    FioDic,
+)
+from deepmd.nvnmd.utils.network import (
+    get_sess,
+)
+from deepmd.nvnmd.utils.weight import (
+    get_filter_weight,
+    get_normalize,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 log = logging.getLogger(__name__)
 
@@ -59,26 +75,21 @@ class MapTable:
     DOI: 10.1038/s41524-022-00773-z
     """
 
-    def __init__(
-            self,
-            config_file: str,
-            weight_file: str,
-            map_file: str
-    ):
+    def __init__(self, config_file: str, weight_file: str, map_file: str):
         self.config_file = config_file
         self.weight_file = weight_file
         self.map_file = map_file
 
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['config_file'] = config_file
-        jdata['weight_file'] = weight_file
-        jdata['enable'] = True
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["config_file"] = config_file
+        jdata["weight_file"] = weight_file
+        jdata["enable"] = True
 
         nvnmd_cfg.init_from_jdata(jdata)
 
     def build_map(self):
-        ntypex = nvnmd_cfg.dscp['ntypex']
-        ntype = nvnmd_cfg.dscp['ntype']
+        ntypex = nvnmd_cfg.dscp["ntypex"]
+        ntype = nvnmd_cfg.dscp["ntype"]
         # calculate grid point
         dic_u2s, dic_u2s_ref = self.run_u2s()
         dic_s2g, dic_s2g_ref = self.run_s2g()
@@ -86,31 +97,56 @@ def build_map(self):
         rank = 4
 
         dic_map = {}
-        u = dic_u2s['u']
-        cfg_u2s = [
-            [u[0], u[512], u[1] - u[0], 0, 512]
-        ]
-        dic_map['s'], dic_map['s_grad'] = self.build_map_coef(cfg_u2s, u, dic_u2s['s'], dic_u2s['s_grad'], dic_u2s['s_grad_grad'], ntype, 1, rank)
-        dic_map['h'], dic_map['h_grad'] = self.build_map_coef(cfg_u2s, u, dic_u2s['h'], dic_u2s['h_grad'], dic_u2s['h_grad_grad'], ntype, 1, rank)
+        u = dic_u2s["u"]
+        cfg_u2s = [[u[0], u[512], u[1] - u[0], 0, 512]]
+        dic_map["s"], dic_map["s_grad"] = self.build_map_coef(
+            cfg_u2s,
+            u,
+            dic_u2s["s"],
+            dic_u2s["s_grad"],
+            dic_u2s["s_grad_grad"],
+            ntype,
+            1,
+            rank,
+        )
+        dic_map["h"], dic_map["h_grad"] = self.build_map_coef(
+            cfg_u2s,
+            u,
+            dic_u2s["h"],
+            dic_u2s["h_grad"],
+            dic_u2s["h_grad_grad"],
+            ntype,
+            1,
+            rank,
+        )
 
         dic_map2 = {}
-        s = dic_s2g['s']
+        s = dic_s2g["s"]
         cfg_s2g = [
-            [s[0], s[256], s[1]-s[0], 0, 256],
-            [s[0], s[4096], s[16]-s[0], 256, 512]
+            [s[0], s[256], s[1] - s[0], 0, 256],
+            [s[0], s[4096], s[16] - s[0], 256, 512],
         ]
-        dic_map2['g'], dic_map2['g_grad'] = self.build_map_coef(cfg_s2g, s, dic_s2g['g'], dic_s2g['g_grad'], dic_s2g['g_grad_grad'], ntype, 32, rank)
+        dic_map2["g"], dic_map2["g_grad"] = self.build_map_coef(
+            cfg_s2g,
+            s,
+            dic_s2g["g"],
+            dic_s2g["g_grad"],
+            dic_s2g["g_grad_grad"],
+            ntype,
+            32,
+            rank,
+        )
         # run mapping to test
-        if jdata_sys['debug']:
-            dic_u2s_prd = self.mapping2(dic_u2s_ref['u'], dic_map, cfg_u2s, rank)
-            dic_s2g_prd = self.mapping2(dic_s2g_ref['s'], dic_map2, cfg_s2g, rank)
+        if jdata_sys["debug"]:
+            dic_u2s_prd = self.mapping2(dic_u2s_ref["u"], dic_map, cfg_u2s, rank)
+            dic_s2g_prd = self.mapping2(dic_s2g_ref["s"], dic_map2, cfg_s2g, rank)
 
-            self.plot_lines(dic_u2s_ref['u'], dic_u2s_prd, dic_u2s_ref)
-            self.plot_lines(dic_s2g_ref['s'], dic_s2g_prd, dic_s2g_ref)
+            self.plot_lines(dic_u2s_ref["u"], dic_u2s_prd, dic_u2s_ref)
+            self.plot_lines(dic_s2g_ref["s"], dic_s2g_prd, dic_s2g_ref)
         # save
         self.map = {}
-        self.map['cfg_u2s'] = cfg_u2s
-        self.map['cfg_s2g'] = cfg_s2g
+        self.map["cfg_u2s"] = cfg_u2s
+        self.map["cfg_s2g"] = cfg_s2g
         self.map.update(dic_map)
         self.map.update(dic_map2)
 
@@ -119,8 +155,7 @@ def build_map(self):
         return self.map
 
     def mapping(self, x, dic_map, cfgs, rank=4):
-        r""" Evaluate value by mapping table operation of tensorflow
-        """
+        r"""Evaluate value by mapping table operation of tensorflow"""
         n = len(x)
         dic_val = {}
         for key in dic_map.keys():
@@ -148,24 +183,23 @@ def mapping(self, x, dic_map, cfgs, rank=4):
                         coef = val_i[idx_k]
                         if rank == 4:
                             coef = np.reshape(coef, [nc, 4])
-                            a, b, c, d = coef[:,0], coef[:,1], coef[:, 2], coef[:, 3]
-                            dat_i[kk, :] = d + (c + (b + a * dxx_k) * dxx_k ) * dxx_k
+                            a, b, c, d = coef[:, 0], coef[:, 1], coef[:, 2], coef[:, 3]
+                            dat_i[kk, :] = d + (c + (b + a * dxx_k) * dxx_k) * dxx_k
                         elif rank == 2:
                             coef = np.reshape(coef, [nc, 2])
-                            a, b = coef[:,0], coef[:,1]
+                            a, b = coef[:, 0], coef[:, 1]
                             dat_i[kk, :] = b + a * dxx_k
                     dats.append(dat_i)
                 dic_val[key] = dats
         return dic_val
 
     def mapping2(self, x, dic_map, cfgs, rank=4):
-        r""" Evaluate value by mapping table of numpy   
-        """
+        r"""Evaluate value by mapping table of numpy"""
         tf.reset_default_graph()
-        t_x = tf.placeholder(tf.float64, [None, 1], 't_x')
-        t_table = tf.placeholder(tf.float64, [None, None], 't_table')
-        t_table_grad = tf.placeholder(tf.float64, [None, None], 't_table_grad')
-        t_table_info = tf.placeholder(tf.float64, [None], 't_table_info')
+        t_x = tf.placeholder(tf.float64, [None, 1], "t_x")
+        t_table = tf.placeholder(tf.float64, [None, None], "t_table")
+        t_table_grad = tf.placeholder(tf.float64, [None, None], "t_table_grad")
+        t_table_info = tf.placeholder(tf.float64, [None], "t_table_info")
         t_y = op_module.map_flt_nvnmd(t_x, t_table, t_table_grad, t_table_info)
         sess = get_sess()
         #
@@ -178,10 +212,10 @@ def mapping2(self, x, dic_map, cfgs, rank=4):
                 for ii in range(len(val)):
                     val_i = val[ii]
                     feed_dict = {
-                        t_x : x,
-                        t_table : val_i,
-                        t_table_grad : val_i * 0.0,
-                        t_table_info : np.reshape(np.array(cfgs), [-1])
+                        t_x: x,
+                        t_table: val_i,
+                        t_table_grad: val_i * 0.0,
+                        t_table_info: np.reshape(np.array(cfgs), [-1]),
                     }
                     dat_i = run_sess(sess, t_y, feed_dict=feed_dict)
                     dat_i = np.reshape(dat_i, [n, -1])
@@ -190,8 +224,7 @@ def mapping2(self, x, dic_map, cfgs, rank=4):
         return dic_val
 
     def plot_lines(self, x, dic1, dic2=None):
-        r""" Plot lines to see accuracy
-        """
+        r"""Plot lines to see accuracy"""
         for key in dic1.keys():
             val1 = dic1[key]
             if dic2 is None:
@@ -206,7 +239,7 @@ def plot_lines(self, x, dic1, dic2=None):
                     nc = np.shape(val1_i)[1]
 
     def build_map_coef(self, cfgs, x, ys, grads, grad_grads, Nr, Nc, rank=4):
-        r""" Build mapping table coefficient
+        r"""Build mapping table coefficient
         cfgs: cfg list
         cfg = x0, x1, dx
 
@@ -222,13 +255,18 @@ def build_map_coef(self, cfgs, x, ys, grads, grad_grads, Nr, Nc, rank=4):
         | b = (3 y1 - dx dy' - 2dx y0' - 3y0) / dx^2
         \ a = (dx y1' - 2 y1 + dx y0' + 2 y0) / dx^3
         """
+
         def cal_coef2(cfg, x, y, dy):
             x = np.reshape(x, [-1])
             coefs = []
             for cfg in cfgs:
                 x0, x1, dx, N0, N1 = cfg
                 Nd = N1 - N0
-                idx = np.logical_and(x >= x0, x <= x1, np.abs((x-x0)-np.floor((x-x0)/dx)*dx) < 1e-4)
+                idx = np.logical_and(
+                    x >= x0,
+                    x <= x1,
+                    np.abs((x - x0) - np.floor((x - x0) / dx) * dx) < 1e-4,
+                )
                 y0 = y[idx][:-1]
                 y1 = y[idx][1:]
                 y0 = y0[:Nd]
@@ -247,8 +285,8 @@ def cal_coef4(cfg, x, y, dy):
             for cfg in cfgs:
                 x0, x1, dx, N0, N1 = cfg
                 Nd = N1 - N0
-                diff_x = np.abs((x-x0)-np.round((x-x0)/dx)*dx)
-                idx = np.logical_and(np.logical_and(x >= x0, x <= x1) , diff_x < 1.0e-4)
+                diff_x = np.abs((x - x0) - np.round((x - x0) / dx) * dx)
+                idx = np.logical_and(np.logical_and(x >= x0, x <= x1), diff_x < 1.0e-4)
                 y0 = y[idx][:-1]
                 y1 = y[idx][1:]
                 dy0 = dy[idx][:-1]
@@ -258,8 +296,8 @@ def cal_coef4(cfg, x, y, dy):
                 dy0 = dy0[:Nd]
                 dy1 = dy1[:Nd]
                 #
-                a = (dx*dy1 - 2*y1 + dx*dy0 + 2*y0) / dx**3
-                b = (3*y1 - dx*dy1 - 2*dx*dy0 - 3*y0) / dx**2
+                a = (dx * dy1 - 2 * y1 + dx * dy0 + 2 * y0) / dx**3
+                b = (3 * y1 - dx * dy1 - 2 * dx * dy0 - 3 * y0) / dx**2
                 c = dy0
                 d = y0
                 coef = np.concatenate([a, b, c, d])
@@ -267,6 +305,7 @@ def cal_coef4(cfg, x, y, dy):
                 coefs.append(coef)
             coefs = np.concatenate(coefs)
             return coefs
+
         #
         cal_coef = cal_coef4 if (rank == 4) else cal_coef2
         coefs = []
@@ -279,9 +318,9 @@ def cal_coef4(cfg, x, y, dy):
             coef_i = []
             coef_grad_i = []
             for jj in range(Nc):
-                y_ij = y_i[:,jj]
-                grad_ij = grad_i[:,jj]
-                grad_grad_ij = grad_grad_i[:,jj]
+                y_ij = y_i[:, jj]
+                grad_ij = grad_i[:, jj]
+                grad_grad_ij = grad_grad_i[:, jj]
                 coef_ij = cal_coef(cfgs, x, y_ij, grad_ij)
                 coef_grad_ij = cal_coef(cfgs, x, grad_ij, grad_grad_ij)
                 coef_i.append(coef_ij)
@@ -290,11 +329,10 @@ def cal_coef4(cfg, x, y, dy):
             coef_grad_i = np.concatenate(coef_grad_i, axis=1)
             coefs.append(coef_i)
             coef_grads.append(coef_grad_i)
-        return coefs, coef_grads                
+        return coefs, coef_grads
 
     def build_grad(self, x, y, Nr, Nc):
-        r""": Build gradient of tensor y of x
-        """
+        r""": Build gradient of tensor y of x"""
         grads = []
         grad_grads = []
         for ii in range(Nr):
@@ -302,7 +340,7 @@ def build_grad(self, x, y, Nr, Nc):
             grad_i = []
             grad_grad_i = []
             for jj in range(Nc):
-                y_ij = y_i[:,jj]
+                y_ij = y_i[:, jj]
                 grad_ij = tf.gradients(y_ij, x)[0]
                 grad_grad_ij = tf.gradients(grad_ij, x)[0]
                 grad_i.append(grad_ij)
@@ -314,14 +352,13 @@ def build_grad(self, x, y, Nr, Nc):
         return grads, grad_grads
 
     def build_u2s(self, r2):
-        r""" Build tensor s, s=s(r2)
-        """
-        rmin = nvnmd_cfg.dscp['rcut_smth']
-        rmax = nvnmd_cfg.dscp['rcut']
-        ntype = nvnmd_cfg.dscp['ntype']
+        r"""Build tensor s, s=s(r2)"""
+        rmin = nvnmd_cfg.dscp["rcut_smth"]
+        rmax = nvnmd_cfg.dscp["rcut"]
+        ntype = nvnmd_cfg.dscp["ntype"]
 
-        if 'train_attr.min_nbor_dist' in nvnmd_cfg.weight.keys():
-            min_dist = nvnmd_cfg.weight['train_attr.min_nbor_dist']
+        if "train_attr.min_nbor_dist" in nvnmd_cfg.weight.keys():
+            min_dist = nvnmd_cfg.weight["train_attr.min_nbor_dist"]
         else:
             min_dist = rmin
         min_dist = 0.5 if (min_dist > 0.5) else (min_dist - 0.1)
@@ -349,74 +386,76 @@ def build_u2s(self, r2):
         return sl, hl
 
     def build_u2s_grad(self):
-        r""" Build gradient of s with respect to u (r^2)
-        """
-        ntype = nvnmd_cfg.dscp['ntype']
+        r"""Build gradient of s with respect to u (r^2)"""
+        ntype = nvnmd_cfg.dscp["ntype"]
         #
         dic_ph = {}
-        dic_ph['u'] = tf.placeholder(tf.float64, [None, 1], 't_u')
-        dic_ph['s'], dic_ph['h'] = self.build_u2s(dic_ph['u'])
-        dic_ph['s_grad'], dic_ph['s_grad_grad'] = self.build_grad(dic_ph['u'], dic_ph['s'], ntype, 1)
-        dic_ph['h_grad'], dic_ph['h_grad_grad'] = self.build_grad(dic_ph['u'], dic_ph['h'], ntype, 1)
+        dic_ph["u"] = tf.placeholder(tf.float64, [None, 1], "t_u")
+        dic_ph["s"], dic_ph["h"] = self.build_u2s(dic_ph["u"])
+        dic_ph["s_grad"], dic_ph["s_grad_grad"] = self.build_grad(
+            dic_ph["u"], dic_ph["s"], ntype, 1
+        )
+        dic_ph["h_grad"], dic_ph["h_grad_grad"] = self.build_grad(
+            dic_ph["u"], dic_ph["h"], ntype, 1
+        )
         return dic_ph
 
     def run_u2s(self):
-        r""" Build u->s graph and run it to get value of mapping table
-        """
+        r"""Build u->s graph and run it to get value of mapping table"""
         # ntypex = nvnmd_cfg.dscp['ntypex']
-        ntype = nvnmd_cfg.dscp['ntype']
+        ntype = nvnmd_cfg.dscp["ntype"]
         avg, std = get_normalize(nvnmd_cfg.weight)
         avg, std = np.float64(avg), np.float64(std)
-        rc_max = nvnmd_cfg.dscp['rc_max']
+        rc_max = nvnmd_cfg.dscp["rc_max"]
 
         tf.reset_default_graph()
         dic_ph = self.build_u2s_grad()
         sess = get_sess()
 
-        # N = NUM_MAPT 
+        # N = NUM_MAPT
         N = 512
-        N2 = int(rc_max ** 2)
+        N2 = int(rc_max**2)
         # N+1 ranther than N for calculating defference
         keys = list(dic_ph.keys())
         vals = list(dic_ph.values())
 
         u = N2 * np.reshape(np.arange(0, N + 1) / N, [-1, 1])
-        res_lst = run_sess(sess, vals, feed_dict={dic_ph['u']: u})
+        res_lst = run_sess(sess, vals, feed_dict={dic_ph["u"]: u})
         res_dic = dict(zip(keys, res_lst))
 
-        u2 = N2 * np.reshape(np.arange(0, N*16 + 1) / (N*16), [-1, 1])
-        res_lst2 = run_sess(sess, vals, feed_dict={dic_ph['u']: u2})
-        res_dic2 = dict(zip(keys, res_lst2)) # reference for commpare
+        u2 = N2 * np.reshape(np.arange(0, N * 16 + 1) / (N * 16), [-1, 1])
+        res_lst2 = run_sess(sess, vals, feed_dict={dic_ph["u"]: u2})
+        res_dic2 = dict(zip(keys, res_lst2))  # reference for commpare
 
         # change value
         for tt in range(ntype):
-            res_dic['s'][tt][0] = -avg[tt, 0] / std[tt, 0]
-            res_dic['s_grad'][tt][0] = 0
-            res_dic['s_grad_grad'][tt][0] = 0
-            res_dic['h'][tt][0] = 0
-            res_dic['h_grad'][tt][0] = 0
-            res_dic['h_grad_grad'][tt][0] = 0
+            res_dic["s"][tt][0] = -avg[tt, 0] / std[tt, 0]
+            res_dic["s_grad"][tt][0] = 0
+            res_dic["s_grad_grad"][tt][0] = 0
+            res_dic["h"][tt][0] = 0
+            res_dic["h_grad"][tt][0] = 0
+            res_dic["h_grad_grad"][tt][0] = 0
             #
-            res_dic2['s'][tt][0] = -avg[tt, 0] / std[tt, 0]
-            res_dic2['s_grad'][tt][0] = 0
-            res_dic2['s_grad_grad'][tt][0] = 0
-            res_dic2['h'][tt][0] = 0
-            res_dic2['h_grad'][tt][0] = 0
-            res_dic2['h_grad_grad'][tt][0] = 0
+            res_dic2["s"][tt][0] = -avg[tt, 0] / std[tt, 0]
+            res_dic2["s_grad"][tt][0] = 0
+            res_dic2["s_grad_grad"][tt][0] = 0
+            res_dic2["h"][tt][0] = 0
+            res_dic2["h_grad"][tt][0] = 0
+            res_dic2["h_grad_grad"][tt][0] = 0
 
         sess.close()
         return res_dic, res_dic2
 
     def build_s2g(self, s):
-        r""" Build s->G
+        r"""Build s->G
         s is switch function
         G is embedding net output
         """
-        ntypex = nvnmd_cfg.dscp['ntypex']
-        ntype = nvnmd_cfg.dscp['ntype']
+        ntypex = nvnmd_cfg.dscp["ntypex"]
+        ntype = nvnmd_cfg.dscp["ntype"]
 
         activation_fn = tf.tanh
-        outputs_size = nvnmd_cfg.dscp['NNODE_FEAS']
+        outputs_size = nvnmd_cfg.dscp["NNODE_FEAS"]
 
         xyz_scatters = []
         for tt in range(ntypex):
@@ -428,30 +467,32 @@ def build_s2g(self, s):
                     if outputs_size[ll] == outputs_size[ll - 1]:
                         xyz_scatter += activation_fn(tf.matmul(xyz_scatter, w) + b)
                     elif outputs_size[ll] == outputs_size[ll - 1] * 2:
-                        xyz_scatter = tf.concat([xyz_scatter, xyz_scatter], 1) + activation_fn(tf.matmul(xyz_scatter, w) + b)
+                        xyz_scatter = tf.concat(
+                            [xyz_scatter, xyz_scatter], 1
+                        ) + activation_fn(tf.matmul(xyz_scatter, w) + b)
                     else:
                         xyz_scatter = activation_fn(tf.matmul(xyz_scatter, w) + b)
                 xyz_scatters.append(xyz_scatter)
         return xyz_scatters
 
     def build_s2g_grad(self):
-        r""" Build gradient of G with respect to s
-        """
-        ntypex = nvnmd_cfg.dscp['ntypex']
-        ntype = nvnmd_cfg.dscp['ntype']
-        M1 = nvnmd_cfg.dscp['M1']
+        r"""Build gradient of G with respect to s"""
+        ntypex = nvnmd_cfg.dscp["ntypex"]
+        ntype = nvnmd_cfg.dscp["ntype"]
+        M1 = nvnmd_cfg.dscp["M1"]
         #
         dic_ph = {}
-        dic_ph['s'] = tf.placeholder(tf.float64, [None, 1], 't_s')
-        dic_ph['g'] = self.build_s2g(dic_ph['s'])
-        dic_ph['g_grad'], dic_ph['g_grad_grad'] = self.build_grad(dic_ph['s'], dic_ph['g'], ntypex*ntype, M1)
+        dic_ph["s"] = tf.placeholder(tf.float64, [None, 1], "t_s")
+        dic_ph["g"] = self.build_s2g(dic_ph["s"])
+        dic_ph["g_grad"], dic_ph["g_grad_grad"] = self.build_grad(
+            dic_ph["s"], dic_ph["g"], ntypex * ntype, M1
+        )
         return dic_ph
 
     def run_s2g(self):
-        r""" Build s-> graph and run it to get value of mapping table
-        """
-        smin = nvnmd_cfg.dscp['smin']
-        smax = nvnmd_cfg.dscp['smax']
+        r"""Build s-> graph and run it to get value of mapping table"""
+        smin = nvnmd_cfg.dscp["smin"]
+        smax = nvnmd_cfg.dscp["smax"]
 
         tf.reset_default_graph()
         dic_ph = self.build_s2g_grad()
@@ -470,11 +511,11 @@ def run_s2g(self):
         vals = list(dic_ph.values())
 
         s = N2 * np.reshape(np.arange(0, N + 1) / N, [-1, 1]) + smin_
-        res_lst = run_sess(sess, vals, feed_dict={dic_ph['s']: s})
+        res_lst = run_sess(sess, vals, feed_dict={dic_ph["s"]: s})
         res_dic = dict(zip(keys, res_lst))
 
-        s2 = N2 * np.reshape(np.arange(0, N*16 + 1) / (N*16), [-1, 1]) + smin_
-        res_lst2 = run_sess(sess, vals, feed_dict={dic_ph['s']: s2})
+        s2 = N2 * np.reshape(np.arange(0, N * 16 + 1) / (N * 16), [-1, 1]) + smin_
+        res_lst2 = run_sess(sess, vals, feed_dict={dic_ph["s"]: s2})
         res_dic2 = dict(zip(keys, res_lst2))
 
         sess.close()
@@ -483,10 +524,10 @@ def run_s2g(self):
 
 def mapt(
     *,
-    nvnmd_config: Optional[str] = 'nvnmd/config.npy',
-    nvnmd_weight: Optional[str] = 'nvnmd/weight.npy',
-    nvnmd_map: Optional[str] = 'nvnmd/map.npy',
-    **kwargs
+    nvnmd_config: Optional[str] = "nvnmd/config.npy",
+    nvnmd_weight: Optional[str] = "nvnmd/weight.npy",
+    nvnmd_map: Optional[str] = "nvnmd/map.npy",
+    **kwargs,
 ):
     # build mapping table
     mapObj = MapTable(nvnmd_config, nvnmd_weight, nvnmd_map)
diff --git a/deepmd/nvnmd/entrypoints/train.py b/deepmd/nvnmd/entrypoints/train.py
index f4cd67068f..bef92de602 100644
--- a/deepmd/nvnmd/entrypoints/train.py
+++ b/deepmd/nvnmd/entrypoints/train.py
@@ -1,17 +1,36 @@
-
-import os
-from typing import Dict, List, Optional, Any
 import logging
+import os
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+)
 
-from deepmd.env import tf
-from deepmd.entrypoints.train import train
-from deepmd.entrypoints.freeze import freeze
-from deepmd.nvnmd.entrypoints.mapt import mapt
-from deepmd.nvnmd.entrypoints.wrap import wrap
-
-from deepmd.nvnmd.utils.fio import FioDic
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.data.data import jdata_deepmd_input
+from deepmd.entrypoints.freeze import (
+    freeze,
+)
+from deepmd.entrypoints.train import (
+    train,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.nvnmd.data.data import (
+    jdata_deepmd_input,
+)
+from deepmd.nvnmd.entrypoints.mapt import (
+    mapt,
+)
+from deepmd.nvnmd.entrypoints.wrap import (
+    wrap,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.fio import (
+    FioDic,
+)
 
 log = logging.getLogger(__name__)
 
@@ -24,75 +43,75 @@
     "mpi_log": "master",
     "log_level": 2,
     "log_path": "train.log",
-    "is_compress": False
+    "is_compress": False,
 }
 
 jdata_cmd_freeze = {
-    "checkpoint_folder": '.',
-    "output": 'frozen_model.pb',
+    "checkpoint_folder": ".",
+    "output": "frozen_model.pb",
     "node_names": None,
-    "nvnmd_weight": "nvnmd/weight.npy"
+    "nvnmd_weight": "nvnmd/weight.npy",
 }
 
 
 def normalized_input(fn, PATH_CNN, CONFIG_CNN):
-    r"""Normalize a input script file for continuous neural network
-    """
+    r"""Normalize a input script file for continuous neural network"""
     f = FioDic()
     jdata = f.load(fn, jdata_deepmd_input)
     # nvnmd
-    jdata_nvnmd = jdata_deepmd_input['nvnmd']
-    jdata_nvnmd['enable'] = True
-    jdata_nvnmd['config_file'] = CONFIG_CNN
-    jdata_nvnmd_ = f.get(jdata, 'nvnmd', jdata_nvnmd)
+    jdata_nvnmd = jdata_deepmd_input["nvnmd"]
+    jdata_nvnmd["enable"] = True
+    jdata_nvnmd["config_file"] = CONFIG_CNN
+    jdata_nvnmd_ = f.get(jdata, "nvnmd", jdata_nvnmd)
     jdata_nvnmd = f.update(jdata_nvnmd_, jdata_nvnmd)
     # model
     jdata_model = {
         "descriptor": {
             "seed": 1,
             "sel": jdata_nvnmd_["sel"],
-            "rcut": jdata_nvnmd_['rcut'],
-            "rcut_smth": jdata_nvnmd_['rcut_smth']
+            "rcut": jdata_nvnmd_["rcut"],
+            "rcut_smth": jdata_nvnmd_["rcut_smth"],
         },
-        "fitting_net": {
-            "seed": 1
-        },
-        "type_map": []
-        }
-    jdata_model['type_map'] = f.get(jdata_nvnmd_, 'type_map', [])
+        "fitting_net": {"seed": 1},
+        "type_map": [],
+    }
+    jdata_model["type_map"] = f.get(jdata_nvnmd_, "type_map", [])
     nvnmd_cfg.init_from_jdata(jdata_nvnmd)
     nvnmd_cfg.init_from_deepmd_input(jdata_model)
-    nvnmd_cfg.init_train_mode('cnn')
+    nvnmd_cfg.init_train_mode("cnn")
     # training
-    jdata_train = f.get(jdata, 'training', {})
-    jdata_train['disp_training'] = True
-    jdata_train['time_training'] = True
-    jdata_train['profiling'] = False
-    jdata_train['disp_file'] = os.path.join(PATH_CNN, os.path.split(jdata_train['disp_file'])[1])
-    jdata_train['save_ckpt'] = os.path.join(PATH_CNN, os.path.split(jdata_train['save_ckpt'])[1])
+    jdata_train = f.get(jdata, "training", {})
+    jdata_train["disp_training"] = True
+    jdata_train["time_training"] = True
+    jdata_train["profiling"] = False
+    jdata_train["disp_file"] = os.path.join(
+        PATH_CNN, os.path.split(jdata_train["disp_file"])[1]
+    )
+    jdata_train["save_ckpt"] = os.path.join(
+        PATH_CNN, os.path.split(jdata_train["save_ckpt"])[1]
+    )
     #
-    jdata['model'] = nvnmd_cfg.get_model_jdata()
-    jdata['nvnmd'] = nvnmd_cfg.get_nvnmd_jdata()
+    jdata["model"] = nvnmd_cfg.get_model_jdata()
+    jdata["nvnmd"] = nvnmd_cfg.get_nvnmd_jdata()
     return jdata
 
 
 def normalized_input_qnn(jdata, PATH_QNN, CONFIG_CNN, WEIGHT_CNN, MAP_CNN):
-    r"""Normalize a input script file for quantize neural network
-    """
+    r"""Normalize a input script file for quantize neural network"""
     #
-    jdata_nvnmd = jdata_deepmd_input['nvnmd']
-    jdata_nvnmd['enable'] = True
-    jdata_nvnmd['config_file'] = CONFIG_CNN
-    jdata_nvnmd['weight_file'] = WEIGHT_CNN
-    jdata_nvnmd['map_file'] = MAP_CNN
+    jdata_nvnmd = jdata_deepmd_input["nvnmd"]
+    jdata_nvnmd["enable"] = True
+    jdata_nvnmd["config_file"] = CONFIG_CNN
+    jdata_nvnmd["weight_file"] = WEIGHT_CNN
+    jdata_nvnmd["map_file"] = MAP_CNN
     nvnmd_cfg.init_from_jdata(jdata_nvnmd)
-    nvnmd_cfg.init_train_mode('qnn')
-    jdata['nvnmd'] = nvnmd_cfg.get_nvnmd_jdata()
+    nvnmd_cfg.init_train_mode("qnn")
+    jdata["nvnmd"] = nvnmd_cfg.get_nvnmd_jdata()
     # training
-    jdata2 = jdata['training']
-    jdata2['disp_file'] = os.path.join(PATH_QNN, os.path.split(jdata2['disp_file'])[1])
-    jdata2['save_ckpt'] = os.path.join(PATH_QNN, os.path.split(jdata2['save_ckpt'])[1])
-    jdata['training'] = jdata2
+    jdata2 = jdata["training"]
+    jdata2["disp_file"] = os.path.join(PATH_QNN, os.path.split(jdata2["disp_file"])[1])
+    jdata2["save_ckpt"] = os.path.join(PATH_QNN, os.path.split(jdata2["save_ckpt"])[1])
+    jdata["training"] = jdata2
     return jdata
 
 
@@ -105,15 +124,15 @@ def train_nvnmd(
 ):
     # test input
     if not os.path.exists(INPUT):
-        log.warning("The input script %s does not exist"%(INPUT))
+        log.warning("The input script %s does not exist" % (INPUT))
     # STEP1
-    PATH_CNN = 'nvnmd_cnn'
-    CONFIG_CNN = os.path.join(PATH_CNN, 'config.npy')
-    INPUT_CNN = os.path.join(PATH_CNN, 'train.json')
-    WEIGHT_CNN = os.path.join(PATH_CNN, 'weight.npy')
-    FRZ_MODEL_CNN = os.path.join(PATH_CNN, 'frozen_model.pb')
-    MAP_CNN = os.path.join(PATH_CNN, 'map.npy')
-    LOG_CNN = os.path.join(PATH_CNN, 'train.log')
+    PATH_CNN = "nvnmd_cnn"
+    CONFIG_CNN = os.path.join(PATH_CNN, "config.npy")
+    INPUT_CNN = os.path.join(PATH_CNN, "train.json")
+    WEIGHT_CNN = os.path.join(PATH_CNN, "weight.npy")
+    FRZ_MODEL_CNN = os.path.join(PATH_CNN, "frozen_model.pb")
+    MAP_CNN = os.path.join(PATH_CNN, "map.npy")
+    LOG_CNN = os.path.join(PATH_CNN, "train.log")
     if step == "s1":
         # normailize input file
         jdata = normalized_input(INPUT, PATH_CNN, CONFIG_CNN)
@@ -121,34 +140,34 @@ def train_nvnmd(
         nvnmd_cfg.save(CONFIG_CNN)
         # train cnn
         jdata = jdata_cmd_train.copy()
-        jdata['INPUT'] = INPUT_CNN
-        jdata['log_path'] = LOG_CNN
-        jdata['restart'] = restart
+        jdata["INPUT"] = INPUT_CNN
+        jdata["log_path"] = LOG_CNN
+        jdata["restart"] = restart
         train(**jdata)
         tf.reset_default_graph()
         # freeze
         jdata = jdata_cmd_freeze.copy()
-        jdata['checkpoint_folder'] = PATH_CNN
-        jdata['output'] = FRZ_MODEL_CNN
-        jdata['nvnmd_weight'] = WEIGHT_CNN
+        jdata["checkpoint_folder"] = PATH_CNN
+        jdata["output"] = FRZ_MODEL_CNN
+        jdata["nvnmd_weight"] = WEIGHT_CNN
         freeze(**jdata)
         tf.reset_default_graph()
         # map table
         jdata = {
             "nvnmd_config": CONFIG_CNN,
             "nvnmd_weight": WEIGHT_CNN,
-            "nvnmd_map": MAP_CNN
+            "nvnmd_map": MAP_CNN,
         }
         mapt(**jdata)
         tf.reset_default_graph()
     # STEP2
-    PATH_QNN = 'nvnmd_qnn'
-    CONFIG_QNN = os.path.join(PATH_QNN, 'config.npy')
-    INPUT_QNN = os.path.join(PATH_QNN, 'train.json')
-    WEIGHT_QNN = os.path.join(PATH_QNN, 'weight.npy')
-    FRZ_MODEL_QNN = os.path.join(PATH_QNN, 'frozen_model.pb')
-    MODEL_QNN = os.path.join(PATH_QNN, 'model.pb')
-    LOG_QNN = os.path.join(PATH_QNN, 'train.log')
+    PATH_QNN = "nvnmd_qnn"
+    CONFIG_QNN = os.path.join(PATH_QNN, "config.npy")
+    INPUT_QNN = os.path.join(PATH_QNN, "train.json")
+    WEIGHT_QNN = os.path.join(PATH_QNN, "weight.npy")
+    FRZ_MODEL_QNN = os.path.join(PATH_QNN, "frozen_model.pb")
+    MODEL_QNN = os.path.join(PATH_QNN, "model.pb")
+    LOG_QNN = os.path.join(PATH_QNN, "train.log")
 
     if step == "s2":
         # normailize input file
@@ -158,15 +177,15 @@ def train_nvnmd(
         nvnmd_cfg.save(CONFIG_QNN)
         # train qnn
         jdata = jdata_cmd_train.copy()
-        jdata['INPUT'] = INPUT_QNN
-        jdata['log_path'] = LOG_QNN
+        jdata["INPUT"] = INPUT_QNN
+        jdata["log_path"] = LOG_QNN
         train(**jdata)
         tf.reset_default_graph()
         # freeze
         jdata = jdata_cmd_freeze.copy()
-        jdata['checkpoint_folder'] = PATH_QNN
-        jdata['output'] = FRZ_MODEL_QNN
-        jdata['nvnmd_weight'] = WEIGHT_QNN
+        jdata["checkpoint_folder"] = PATH_QNN
+        jdata["output"] = FRZ_MODEL_QNN
+        jdata["nvnmd_weight"] = WEIGHT_QNN
         freeze(**jdata)
         tf.reset_default_graph()
         # wrap
@@ -174,7 +193,7 @@ def train_nvnmd(
             "nvnmd_config": CONFIG_QNN,
             "nvnmd_weight": WEIGHT_QNN,
             "nvnmd_map": MAP_CNN,
-            "nvnmd_model": MODEL_QNN
+            "nvnmd_model": MODEL_QNN,
         }
         wrap(**jdata)
         tf.reset_default_graph()
diff --git a/deepmd/nvnmd/entrypoints/wrap.py b/deepmd/nvnmd/entrypoints/wrap.py
index 625f596232..86dd98e2de 100644
--- a/deepmd/nvnmd/entrypoints/wrap.py
+++ b/deepmd/nvnmd/entrypoints/wrap.py
@@ -1,25 +1,46 @@
-
-import numpy as np
 import logging
+from typing import (
+    List,
+    Optional,
+)
 
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.utils.sess import run_sess
-
-from deepmd.nvnmd.utils.fio import FioBin, FioTxt
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.utils.weight import get_fitnet_weight
-from deepmd.nvnmd.utils.encode import Encode
-from deepmd.nvnmd.utils.op import map_nvnmd
-from deepmd.nvnmd.utils.network import get_sess
+import numpy as np
 
-from deepmd.nvnmd.data.data import jdata_deepmd_input, jdata_sys
-from typing import List, Optional
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.nvnmd.data.data import (
+    jdata_deepmd_input,
+    jdata_sys,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.encode import (
+    Encode,
+)
+from deepmd.nvnmd.utils.fio import (
+    FioBin,
+    FioTxt,
+)
+from deepmd.nvnmd.utils.network import (
+    get_sess,
+)
+from deepmd.nvnmd.utils.op import (
+    map_nvnmd,
+)
+from deepmd.nvnmd.utils.weight import (
+    get_fitnet_weight,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 log = logging.getLogger(__name__)
 
 
-class Wrap():
+class Wrap:
     r"""Generate the binary model file (model.pb)
     the model file can be use to run the NVNMD with lammps
     the pair style need set as:
@@ -50,22 +71,18 @@ class Wrap():
     """
 
     def __init__(
-        self,
-        config_file: str,
-        weight_file: str,
-        map_file: str,
-        model_file: str
+        self, config_file: str, weight_file: str, map_file: str, model_file: str
     ):
         self.config_file = config_file
         self.weight_file = weight_file
         self.map_file = map_file
         self.model_file = model_file
 
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['config_file'] = config_file
-        jdata['weight_file'] = weight_file
-        jdata['map_file'] = map_file
-        jdata['enable'] = True
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["config_file"] = config_file
+        jdata["weight_file"] = weight_file
+        jdata["map_file"] = map_file
+        jdata["enable"] = True
 
         nvnmd_cfg.init_from_jdata(jdata)
 
@@ -73,10 +90,10 @@ def wrap(self):
         dscp = nvnmd_cfg.dscp
         ctrl = nvnmd_cfg.ctrl
 
-        M1 = dscp['M1']
-        ntype = dscp['ntype']
-        ntype_max = dscp['ntype_max']
-        NSTDM_M1X = ctrl['NSTDM_M1X']
+        M1 = dscp["M1"]
+        ntype = dscp["ntype"]
+        ntype_max = dscp["ntype_max"]
+        NSTDM_M1X = ctrl["NSTDM_M1X"]
         e = Encode()
 
         bcfg = self.wrap_dscp()
@@ -100,7 +117,7 @@ def wrap(self):
         # extend data according to the number of bits per row of BRAM
         nhex = 32
         datas = [hcfg, hfps, hbps, hswt, hdsw, hfea, hgra]
-        keys = 'cfg fps bps swt dsw fea gra'.split()
+        keys = "cfg fps bps swt dsw fea gra".split()
         nhs = []
         nws = []
         for ii in range(len(datas)):
@@ -109,18 +126,18 @@ def wrap(self):
             h = len(d)
             w = len(d[0])
             nhs.append(h)
-            nws.append(w) # nhex * 4 // 8 = nbyte
+            nws.append(w)  # nhex * 4 // 8 = nbyte
             #
-            w_full = np.ceil(w*4/nhex) * nhex # 32 bit per data
+            w_full = np.ceil(w * 4 / nhex) * nhex  # 32 bit per data
             d = e.extend_hex(d, w_full)
             # DEVELOP_DEBUG
-            if jdata_sys['debug']:
-                log.info("%s: %d x % d bit" % (k, h, w*4))
-                FioTxt().save('nvnmd/wrap/h%s.txt'%(k), d)
-            datas[ii] = d 
+            if jdata_sys["debug"]:
+                log.info("%s: %d x % d bit" % (k, h, w * 4))
+                FioTxt().save("nvnmd/wrap/h%s.txt" % (k), d)
+            datas[ii] = d
         #
-        nvnmd_cfg.size['NH_DATA'] = nhs
-        nvnmd_cfg.size['NW_DATA'] = nws
+        nvnmd_cfg.size["NH_DATA"] = nhs
+        nvnmd_cfg.size["NW_DATA"] = nws
         nvnmd_cfg.save(nvnmd_cfg.config_file)
         head = self.wrap_head(nhs, nws)
         #
@@ -133,11 +150,11 @@ def wrap(self):
 
     def wrap_head(self, nhs, nws):
         nbit = nvnmd_cfg.nbit
-        NBIT_MODEL_HEAD = nbit['NBIT_MODEL_HEAD']
-        NBIT_FIXD_FL = nbit['NBIT_FIXD_FL']
-        rcut = nvnmd_cfg.dscp['rcut']
+        NBIT_MODEL_HEAD = nbit["NBIT_MODEL_HEAD"]
+        NBIT_FIXD_FL = nbit["NBIT_FIXD_FL"]
+        rcut = nvnmd_cfg.dscp["rcut"]
 
-        bs = ''
+        bs = ""
         e = Encode()
         # height
         for n in nhs:
@@ -150,7 +167,7 @@ def wrap_head(self, nhs, nws):
         bs = e.dec2bin(RCUT, NBIT_MODEL_HEAD)[0] + bs
         # extend
         hs = e.bin2hex(bs)
-        hs = e.extend_hex(hs, NBIT_MODEL_HEAD*32)
+        hs = e.extend_hex(hs, NBIT_MODEL_HEAD * 32)
         return hs
 
     def wrap_dscp(self):
@@ -165,33 +182,33 @@ def wrap_dscp(self):
         dscp = nvnmd_cfg.dscp
         nbit = nvnmd_cfg.nbit
         mapt = nvnmd_cfg.map
-        NBIT_IDX_S2G = nbit['NBIT_IDX_S2G']
-        NBIT_NEIB = nbit['NBIT_NEIB']
-        NBIT_FLTE = nbit['NBIT_FLTE']
-        NBIT_FIXD = nbit['NBIT_FIXD']
-        NBIT_FIXD_FL = nbit['NBIT_FIXD_FL']
-        M1 = dscp['M1']
-        ntype = dscp['ntype']
-        ntype_max = dscp['ntype_max']
-
-        bs = ''
+        NBIT_IDX_S2G = nbit["NBIT_IDX_S2G"]
+        NBIT_NEIB = nbit["NBIT_NEIB"]
+        NBIT_FLTE = nbit["NBIT_FLTE"]
+        NBIT_FIXD = nbit["NBIT_FIXD"]
+        NBIT_FIXD_FL = nbit["NBIT_FIXD_FL"]
+        M1 = dscp["M1"]
+        ntype = dscp["ntype"]
+        ntype_max = dscp["ntype_max"]
+
+        bs = ""
         e = Encode()
         # shift_idx_s2g
-        x_st, x_ed, x_dt, N0, N1 = mapt['cfg_s2g'][0]
-        shift_idx_s2g = int(np.round(- x_st / x_dt))
+        x_st, x_ed, x_dt, N0, N1 = mapt["cfg_s2g"][0]
+        shift_idx_s2g = int(np.round(-x_st / x_dt))
         bs = e.dec2bin(shift_idx_s2g, NBIT_IDX_S2G)[0] + bs
         # sel
-        SEL = dscp['SEL']
+        SEL = dscp["SEL"]
         bs = e.dec2bin(SEL[0], NBIT_NEIB)[0] + bs
         bs = e.dec2bin(SEL[1], NBIT_NEIB)[0] + bs
         bs = e.dec2bin(SEL[2], NBIT_NEIB)[0] + bs
         bs = e.dec2bin(SEL[3], NBIT_NEIB)[0] + bs
         # GS
         tf.reset_default_graph()
-        t_x = tf.placeholder(tf.float64, [None, 1], 't_x')
-        t_table = tf.placeholder(tf.float64, [None, None], 't_table')
-        t_table_grad = tf.placeholder(tf.float64, [None, None], 't_table_grad')
-        t_table_info = tf.placeholder(tf.float64, [None], 't_table_info')
+        t_x = tf.placeholder(tf.float64, [None, 1], "t_x")
+        t_table = tf.placeholder(tf.float64, [None, None], "t_table")
+        t_table_grad = tf.placeholder(tf.float64, [None, None], "t_table_grad")
+        t_table_info = tf.placeholder(tf.float64, [None], "t_table_info")
         t_y = op_module.map_flt_nvnmd(t_x, t_table, t_table_grad, t_table_info)
         sess = get_sess()
         #
@@ -200,26 +217,26 @@ def wrap_dscp(self):
             for tt2 in range(ntype_max):
                 if (tt < ntype) and (tt2 < ntype):
                     # s
-                    mi = mapt['s'][tt]
-                    cfgs = mapt['cfg_u2s']
+                    mi = mapt["s"][tt]
+                    cfgs = mapt["cfg_u2s"]
                     cfgs = np.array([np.float64(v) for vs in cfgs for v in vs])
                     feed_dict = {
-                        t_x : np.ones([1, 1]) * 0.0,
-                        t_table : mi,
-                        t_table_grad : mi * 0.0,
-                        t_table_info : cfgs
+                        t_x: np.ones([1, 1]) * 0.0,
+                        t_table: mi,
+                        t_table_grad: mi * 0.0,
+                        t_table_info: cfgs,
                     }
                     si = run_sess(sess, t_y, feed_dict=feed_dict)
                     si = np.reshape(si, [-1])[0]
                     # G
-                    mi = mapt['g'][tt2]
-                    cfgs = mapt['cfg_s2g']
+                    mi = mapt["g"][tt2]
+                    cfgs = mapt["cfg_s2g"]
                     cfgs = np.array([np.float64(v) for vs in cfgs for v in vs])
                     feed_dict = {
-                        t_x : np.ones([1, 1]) * si,
-                        t_table : mi,
-                        t_table_grad : mi * 0.0,
-                        t_table_info : cfgs
+                        t_x: np.ones([1, 1]) * si,
+                        t_table: mi,
+                        t_table_grad: mi * 0.0,
+                        t_table_info: cfgs,
                     }
                     gi = run_sess(sess, t_y, feed_dict=feed_dict)
                     gsi = np.reshape(si, [-1]) * np.reshape(gi, [-1])
@@ -227,35 +244,34 @@ def wrap_dscp(self):
                     gsi = np.zeros(M1)
                 for ii in range(M1):
                     GSs.extend(e.dec2bin(e.qr(gsi[ii], NBIT_FIXD_FL), NBIT_FIXD, True))
-        sGSs = ''.join(GSs[::-1])
+        sGSs = "".join(GSs[::-1])
         bs = sGSs + bs
         #
-        NIX = dscp['NIX']
+        NIX = dscp["NIX"]
         ln2_NIX = -int(np.log2(NIX))
         bs = e.dec2bin(ln2_NIX, NBIT_FLTE, signed=True)[0] + bs
         return bs
 
     def wrap_fitn(self):
-        r"""Wrap the weights of fitting net
-        """
+        r"""Wrap the weights of fitting net"""
         dscp = nvnmd_cfg.dscp
         fitn = nvnmd_cfg.fitn
         weight = nvnmd_cfg.weight
         nbit = nvnmd_cfg.nbit
         ctrl = nvnmd_cfg.ctrl
 
-        ntype = dscp['ntype']
-        ntype_max = dscp['ntype_max']
-        nlayer_fit = fitn['nlayer_fit']
-        NNODE_FITS = fitn['NNODE_FITS']
+        ntype = dscp["ntype"]
+        ntype_max = dscp["ntype_max"]
+        nlayer_fit = fitn["nlayer_fit"]
+        NNODE_FITS = fitn["NNODE_FITS"]
 
-        NBIT_FIT_DATA = nbit['NBIT_FIT_DATA']
-        NBIT_FIT_DATA_FL = nbit['NBIT_FIT_DATA_FL']
-        NBIT_FIT_WEIGHT = nbit['NBIT_FIT_WEIGHT']
-        NBIT_FIT_DISP = nbit['NBIT_FIT_DISP']
-        NBIT_FIT_WXDB = nbit['NBIT_FIT_WXDB']
-        NSTDM = ctrl['NSTDM']
-        NSEL = ctrl['NSEL']
+        NBIT_FIT_DATA = nbit["NBIT_FIT_DATA"]
+        NBIT_FIT_DATA_FL = nbit["NBIT_FIT_DATA_FL"]
+        NBIT_FIT_WEIGHT = nbit["NBIT_FIT_WEIGHT"]
+        NBIT_FIT_DISP = nbit["NBIT_FIT_DISP"]
+        NBIT_FIT_WXDB = nbit["NBIT_FIT_WXDB"]
+        NSTDM = ctrl["NSTDM"]
+        NSEL = ctrl["NSEL"]
 
         # encode all parameters
         bb, bdr, bdc, bwr, bwc = [], [], [], [], []
@@ -263,17 +279,19 @@ def wrap_fitn(self):
             bbt, bdrt, bdct, bwrt, bwct = [], [], [], [], []
             for tt in range(ntype_max):
                 # get parameters: weight and bias
-                if (tt < ntype):
+                if tt < ntype:
                     w, b = get_fitnet_weight(weight, tt, ll, nlayer_fit)
                 else:
                     w, b = get_fitnet_weight(weight, 0, ll, nlayer_fit)
                     w = w * 0
                     b = b * 0
                 # restrict the shift value of energy
-                if (ll == (nlayer_fit - 1)):
+                if ll == (nlayer_fit - 1):
                     b = b * 0
                 bbi = self.wrap_bias(b, NBIT_FIT_WXDB, NBIT_FIT_DATA_FL)
-                bdri, bdci, bwri, bwci  = self.wrap_weight(w, NBIT_FIT_DISP, NBIT_FIT_WEIGHT)
+                bdri, bdci, bwri, bwci = self.wrap_weight(
+                    w, NBIT_FIT_DISP, NBIT_FIT_WEIGHT
+                )
                 bbt.append(bbi)
                 bdrt.append(bdri)
                 bdct.append(bdci)
@@ -296,26 +314,42 @@ def wrap_fitn(self):
                 nc = NNODE_FITS[ll + 1]
                 nrs = int(np.ceil(nr / NSTDM))
                 ncs = int(np.ceil(nc / NSTDM))
-                if (nc == 1):
+                if nc == 1:
                     # fp
-                    bfp += [bwc[ll][tt][sr * nrs + rr][cc] for rr in range(nrs) for cc in range(nc)]
+                    bfp += [
+                        bwc[ll][tt][sr * nrs + rr][cc]
+                        for rr in range(nrs)
+                        for cc in range(nc)
+                    ]
                     bfp += [bdc[ll][tt][sc * ncs * 0 + cc] for cc in range(ncs)]
                     bfp += [bb[ll][tt][sc * ncs * 0 + cc] for cc in range(ncs)]
                     # bp
-                    bbp += [bwc[ll][tt][sr * nrs + rr][cc] for rr in range(nrs) for cc in range(nc)]
+                    bbp += [
+                        bwc[ll][tt][sr * nrs + rr][cc]
+                        for rr in range(nrs)
+                        for cc in range(nc)
+                    ]
                     bbp += [bdc[ll][tt][sc * ncs * 0 + cc] for cc in range(ncs)]
                     bbp += [bb[ll][tt][sc * ncs * 0 + cc] for cc in range(ncs)]
                 else:
                     # fp
-                    bfp += [bwc[ll][tt][rr][sc * ncs + cc] for cc in range(ncs) for rr in range(nr)]
+                    bfp += [
+                        bwc[ll][tt][rr][sc * ncs + cc]
+                        for cc in range(ncs)
+                        for rr in range(nr)
+                    ]
                     bfp += [bdc[ll][tt][sc * ncs + cc] for cc in range(ncs)]
                     bfp += [bb[ll][tt][sc * ncs + cc] for cc in range(ncs)]
                     # bp
-                    bbp += [bwr[ll][tt][sr * nrs + rr][cc] for rr in range(nrs) for cc in range(nc)]
+                    bbp += [
+                        bwr[ll][tt][sr * nrs + rr][cc]
+                        for rr in range(nrs)
+                        for cc in range(nc)
+                    ]
                     bbp += [bdr[ll][tt][sc * ncs + cc] for cc in range(ncs)]
                     bbp += [bb[ll][tt][sc * ncs + cc] for cc in range(ncs)]
-            bfps.append(''.join(bfp[::-1]))
-            bbps.append(''.join(bbp[::-1]))
+            bfps.append("".join(bfp[::-1]))
+            bbps.append("".join(bbp[::-1]))
         return bfps, bbps
 
     def wrap_bias(self, bias, NBIT_DATA, NBIT_DATA_FL):
@@ -325,7 +359,7 @@ def wrap_bias(self, bias, NBIT_DATA, NBIT_DATA_FL):
         return Bs
 
     def wrap_weight(self, weight, NBIT_DISP, NBIT_WEIGHT):
-        r""" weight: weights of fittingNet
+        r"""weight: weights of fittingNet
         NBIT_DISP: nbits of exponent of weight max value
         NBIT_WEIGHT: nbits of mantissa of weights
         """
@@ -342,7 +376,7 @@ def wrap_weight(self, weight, NBIT_DISP, NBIT_WEIGHT):
             wi = weight[ii, :]
             wi, expo_max = e.norm_expo(wi, NBIT_WEIGHT_FL, 0)
             nrs[ii] = expo_max
-            wrs[ii,:] = wi
+            wrs[ii, :] = wi
         # column
         for ii in range(nc):
             wi = weight[:, ii]
@@ -360,19 +394,18 @@ def wrap_weight(self, weight, NBIT_DISP, NBIT_WEIGHT):
         return NRs, NCs, WRs, WCs
 
     def wrap_map(self):
-        r"""Wrap the mapping table of embedding network
-        """
+        r"""Wrap the mapping table of embedding network"""
         dscp = nvnmd_cfg.dscp
         maps = nvnmd_cfg.map
         nbit = nvnmd_cfg.nbit
 
-        M1 = dscp['M1']
-        ntype = dscp['ntype']
-        ntype_max = dscp['ntype_max']
+        M1 = dscp["M1"]
+        ntype = dscp["ntype"]
+        ntype_max = dscp["ntype_max"]
 
-        NBIT_FLTD = nbit['NBIT_FLTD']
-        NBIT_FLTE = nbit['NBIT_FLTE']
-        NBIT_FLTF = nbit['NBIT_FLTF']
+        NBIT_FLTD = nbit["NBIT_FLTD"]
+        NBIT_FLTE = nbit["NBIT_FLTE"]
+        NBIT_FLTF = nbit["NBIT_FLTF"]
 
         e = Encode()
         # get mapt
@@ -381,16 +414,16 @@ def wrap_map(self):
         feas = []
         gras = []
         for tt in range(ntype_max):
-            if (tt < ntype):
-                swt = np.concatenate([maps['s'][tt], maps['h'][tt]], axis=1)
-                dsw = np.concatenate([maps['s_grad'][tt], maps['h_grad'][tt]], axis=1)
-                fea = maps['g'][tt]
-                gra = maps['g_grad'][tt]
+            if tt < ntype:
+                swt = np.concatenate([maps["s"][tt], maps["h"][tt]], axis=1)
+                dsw = np.concatenate([maps["s_grad"][tt], maps["h_grad"][tt]], axis=1)
+                fea = maps["g"][tt]
+                gra = maps["g_grad"][tt]
             else:
-                swt = np.concatenate([maps['s'][0], maps['h'][0]], axis=1)
-                dsw = np.concatenate([maps['s_grad'][0], maps['h_grad'][0]], axis=1)
-                fea = maps['g'][0]
-                gra = maps['g_grad'][0]
+                swt = np.concatenate([maps["s"][0], maps["h"][0]], axis=1)
+                dsw = np.concatenate([maps["s_grad"][0], maps["h_grad"][0]], axis=1)
+                fea = maps["g"][0]
+                gra = maps["g_grad"][0]
                 swt *= 0
                 dsw *= 0
                 fea *= 0
@@ -403,16 +436,16 @@ def wrap_map(self):
         # reshape
         opt_uram = True  # for reduce uram resource version
         if opt_uram:
-            nmerges = [2*2, 2*2, 4*2, 4*2]  # n*(4/2)
+            nmerges = [2 * 2, 2 * 2, 4 * 2, 4 * 2]  # n*(4/2)
         else:
-            nmerges = [2*4, 2*4, 4*4, 4*4]  # n*(4)
+            nmerges = [2 * 4, 2 * 4, 4 * 4, 4 * 4]  # n*(4)
         bss = []
         for ii in range(4):
             d = mapts[ii]
             d = np.reshape(d, [ntype_max, -1, 4])
             if opt_uram:
-                d1 = d[:,:,0:2]
-                d2 = d[:,:,2:4]
+                d1 = d[:, :, 0:2]
+                d2 = d[:, :, 2:4]
                 d = np.concatenate([d1, d2])
             #
             bs = e.flt2bin(d, NBIT_FLTE, NBIT_FLTF)
@@ -425,10 +458,10 @@ def wrap_map(self):
 
 def wrap(
     *,
-    nvnmd_config: Optional[str] = 'nvnmd/config.npy',
-    nvnmd_weight: Optional[str] = 'nvnmd/weight.npy',
-    nvnmd_map: Optional[str] = 'nvnmd/map.npy',
-    nvnmd_model: Optional[str] = 'nvnmd/model.pb',
+    nvnmd_config: Optional[str] = "nvnmd/config.npy",
+    nvnmd_weight: Optional[str] = "nvnmd/weight.npy",
+    nvnmd_map: Optional[str] = "nvnmd/map.npy",
+    nvnmd_model: Optional[str] = "nvnmd/model.pb",
     **kwargs
 ):
     wrapObj = Wrap(nvnmd_config, nvnmd_weight, nvnmd_map, nvnmd_model)
diff --git a/deepmd/nvnmd/fit/__init__.py b/deepmd/nvnmd/fit/__init__.py
index 4d7e88e30d..ce85804acb 100644
--- a/deepmd/nvnmd/fit/__init__.py
+++ b/deepmd/nvnmd/fit/__init__.py
@@ -6,4 +6,4 @@
     1. continuous fitting network
     2. quantized fitting network
 
-"""
\ No newline at end of file
+"""
diff --git a/deepmd/nvnmd/fit/ener.py b/deepmd/nvnmd/fit/ener.py
index 31bcab7588..f98cedf936 100644
--- a/deepmd/nvnmd/fit/ener.py
+++ b/deepmd/nvnmd/fit/ener.py
@@ -1,5 +1,8 @@
-
-from deepmd.env import tf
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.nvnmd.utils.config import nvnmd_cfg
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
 from deepmd.nvnmd.utils.network import one_layer as one_layer_nvnmd
diff --git a/deepmd/nvnmd/utils/__init__.py b/deepmd/nvnmd/utils/__init__.py
index f888413ad1..4e201d917e 100644
--- a/deepmd/nvnmd/utils/__init__.py
+++ b/deepmd/nvnmd/utils/__init__.py
@@ -1,11 +1,27 @@
-
-from .argcheck import nvnmd_args
-from .config import nvnmd_cfg
-from .encode import Encode
-from .fio import FioBin, FioDic, FioTxt
-from .network import one_layer
-from .op import map_nvnmd
-from .weight import get_filter_weight, get_fitnet_weight
+from .argcheck import (
+    nvnmd_args,
+)
+from .config import (
+    nvnmd_cfg,
+)
+from .encode import (
+    Encode,
+)
+from .fio import (
+    FioBin,
+    FioDic,
+    FioTxt,
+)
+from .network import (
+    one_layer,
+)
+from .op import (
+    map_nvnmd,
+)
+from .weight import (
+    get_filter_weight,
+    get_fitnet_weight,
+)
 
 __all__ = [
     "nvnmd_args",
diff --git a/deepmd/nvnmd/utils/argcheck.py b/deepmd/nvnmd/utils/argcheck.py
index 7903ffd361..a53dae0d5a 100644
--- a/deepmd/nvnmd/utils/argcheck.py
+++ b/deepmd/nvnmd/utils/argcheck.py
@@ -1,29 +1,63 @@
-
-
-from dargs import Argument
+from dargs import (
+    Argument,
+)
 
 
 def nvnmd_args():
-    doc_net_size_file = "configuration the number of nodes of fitting_net, just can be set as 128"
+    doc_net_size_file = (
+        "configuration the number of nodes of fitting_net, just can be set as 128"
+    )
     doc_map_file = "A file containing the mapping tables to replace the calculation of embedding nets"
     doc_config_file = "A file containing the parameters about how to implement the model in certain hardware"
     doc_weight_file = "a *.npy file containing the weights of the model"
     doc_enable = "enable the nvnmd training"
-    doc_restore_descriptor = "enable to restore the parameter of embedding_net from weight.npy"
-    doc_restore_fitting_net = "enable to restore the parameter of fitting_net from weight.npy"
+    doc_restore_descriptor = (
+        "enable to restore the parameter of embedding_net from weight.npy"
+    )
+    doc_restore_fitting_net = (
+        "enable to restore the parameter of fitting_net from weight.npy"
+    )
     doc_quantize_descriptor = "enable the quantizatioin of descriptor"
     doc_quantize_fitting_net = "enable the quantizatioin of fitting_net"
     args = [
         Argument("net_size", int, optional=False, default=128, doc=doc_net_size_file),
-        Argument("map_file", str, optional=False, default='none', doc=doc_map_file),
-        Argument("config_file", str, optional=False, default='none', doc=doc_config_file),
-        Argument("weight_file", str, optional=False, default='none', doc=doc_weight_file),
+        Argument("map_file", str, optional=False, default="none", doc=doc_map_file),
+        Argument(
+            "config_file", str, optional=False, default="none", doc=doc_config_file
+        ),
+        Argument(
+            "weight_file", str, optional=False, default="none", doc=doc_weight_file
+        ),
         Argument("enable", bool, optional=False, default=False, doc=doc_enable),
-        Argument("restore_descriptor", bool, optional=False, default=False, doc=doc_restore_descriptor),
-        Argument("restore_fitting_net", bool, optional=False, default=False, doc=doc_restore_fitting_net),
-        Argument("quantize_descriptor", bool, optional=False, default=False, doc=doc_quantize_descriptor),
-        Argument("quantize_fitting_net", bool, optional=False, default=False, doc=doc_quantize_fitting_net),
+        Argument(
+            "restore_descriptor",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_restore_descriptor,
+        ),
+        Argument(
+            "restore_fitting_net",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_restore_fitting_net,
+        ),
+        Argument(
+            "quantize_descriptor",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_quantize_descriptor,
+        ),
+        Argument(
+            "quantize_fitting_net",
+            bool,
+            optional=False,
+            default=False,
+            doc=doc_quantize_fitting_net,
+        ),
     ]
 
-    doc_nvnmd = 'The nvnmd options.'
-    return Argument("nvnmd", dict, args, [], optional=True, doc = doc_nvnmd)
\ No newline at end of file
+    doc_nvnmd = "The nvnmd options."
+    return Argument("nvnmd", dict, args, [], optional=True, doc=doc_nvnmd)
diff --git a/deepmd/nvnmd/utils/config.py b/deepmd/nvnmd/utils/config.py
index 9e187ee337..6b3d4de713 100644
--- a/deepmd/nvnmd/utils/config.py
+++ b/deepmd/nvnmd/utils/config.py
@@ -1,17 +1,25 @@
-
-import numpy as np
 import logging
 
-from deepmd.nvnmd.data.data import jdata_config, jdata_configs, jdata_deepmd_input
-from deepmd.nvnmd.data.data import NVNMD_WELCOME, NVNMD_CITATION
-from deepmd.nvnmd.utils.fio import FioDic
+import numpy as np
 
-from deepmd.nvnmd.utils.op import r2s
+from deepmd.nvnmd.data.data import (
+    NVNMD_CITATION,
+    NVNMD_WELCOME,
+    jdata_config,
+    jdata_configs,
+    jdata_deepmd_input,
+)
+from deepmd.nvnmd.utils.fio import (
+    FioDic,
+)
+from deepmd.nvnmd.utils.op import (
+    r2s,
+)
 
 log = logging.getLogger(__name__)
 
 
-class NvnmdConfig():
+class NvnmdConfig:
     r"""Configuration for NVNMD
     record the message of model such as size, using nvnmd or not
 
@@ -25,31 +33,27 @@ class NvnmdConfig():
     DOI: 10.1038/s41524-022-00773-z
     """
 
-    def __init__(
-        self,
-        jdata: dict
-    ):
+    def __init__(self, jdata: dict):
         self.map = {}
         self.config = jdata_config
-        self.save_path = 'nvnmd/config.npy'
+        self.save_path = "nvnmd/config.npy"
         self.weight = {}
         self.init_from_jdata(jdata)
 
     def init_from_jdata(self, jdata: dict = {}):
-        r"""Initial this class with `jdata` loaded from input script
-        """
+        r"""Initial this class with `jdata` loaded from input script"""
         if jdata == {}:
             return None
 
-        self.net_size = jdata['net_size']
-        self.map_file = jdata['map_file']
-        self.config_file = jdata['config_file']
-        self.enable = jdata['enable']
-        self.weight_file = jdata['weight_file']
-        self.restore_descriptor = jdata['restore_descriptor']
-        self.restore_fitting_net = jdata['restore_fitting_net']
-        self.quantize_descriptor = jdata['quantize_descriptor']
-        self.quantize_fitting_net = jdata['quantize_fitting_net']
+        self.net_size = jdata["net_size"]
+        self.map_file = jdata["map_file"]
+        self.config_file = jdata["config_file"]
+        self.enable = jdata["enable"]
+        self.weight_file = jdata["weight_file"]
+        self.restore_descriptor = jdata["restore_descriptor"]
+        self.restore_fitting_net = jdata["restore_fitting_net"]
+        self.quantize_descriptor = jdata["quantize_descriptor"]
+        self.quantize_fitting_net = jdata["quantize_fitting_net"]
 
         # load data
         if self.enable:
@@ -57,63 +61,58 @@ def init_from_jdata(self, jdata: dict = {}):
             self.weight = FioDic().load(self.weight_file, {})
 
             jdata_config_ = jdata_config.copy()
-            jdata_config_['fitn']['neuron'][0] = self.net_size
+            jdata_config_["fitn"]["neuron"][0] = self.net_size
             load_config = FioDic().load(self.config_file, jdata_config_)
             self.init_from_config(load_config)
             # if load the file, set net_size
             self.init_net_size()
 
     def init_value(self):
-        r"""Initial member with dict
-        """
-        self.dscp = self.config['dscp']
-        self.fitn = self.config['fitn']
-        self.dpin = self.config['dpin']
-        self.size = self.config['size']
-        self.ctrl = self.config['ctrl']
-        self.nbit = self.config['nbit']
-    
+        r"""Initial member with dict"""
+        self.dscp = self.config["dscp"]
+        self.fitn = self.config["fitn"]
+        self.dpin = self.config["dpin"]
+        self.size = self.config["size"]
+        self.ctrl = self.config["ctrl"]
+        self.nbit = self.config["nbit"]
+
     def update_config(self):
-        r"""Update config from dict
-        """
-        self.config['dscp'] = self.dscp
-        self.config['fitn'] = self.fitn
-        self.config['dpin'] = self.dpin
-        self.config['size'] = self.size
-        self.config['ctrl'] = self.ctrl
-        self.config['nbit'] = self.nbit
-
-    def init_train_mode(self, mod='cnn'):
-        r"""Configure for taining cnn or qnn
-        """
-        if mod == 'cnn':
+        r"""Update config from dict"""
+        self.config["dscp"] = self.dscp
+        self.config["fitn"] = self.fitn
+        self.config["dpin"] = self.dpin
+        self.config["size"] = self.size
+        self.config["ctrl"] = self.ctrl
+        self.config["nbit"] = self.nbit
+
+    def init_train_mode(self, mod="cnn"):
+        r"""Configure for taining cnn or qnn"""
+        if mod == "cnn":
             self.restore_descriptor = False
             self.restore_fitting_net = False
             self.quantize_descriptor = False
             self.quantize_fitting_net = False
-        elif mod == 'qnn':
+        elif mod == "qnn":
             self.restore_descriptor = True
             self.restore_fitting_net = True
             self.quantize_descriptor = True
             self.quantize_fitting_net = True
 
     def init_from_config(self, jdata):
-        r"""Initial member element one by one
-        """
+        r"""Initial member element one by one"""
         self.config = FioDic().update(jdata, self.config)
-        self.config['dscp'] = self.init_dscp(self.config['dscp'], self.config)
-        self.config['fitn'] = self.init_fitn(self.config['fitn'], self.config)
-        self.config['dpin'] = self.init_dpin(self.config['dpin'], self.config)
-        self.config['size'] = self.init_size(self.config['size'], self.config)
-        self.config['ctrl'] = self.init_ctrl(self.config['ctrl'], self.config)
-        self.config['nbit'] = self.init_nbit(self.config['nbit'], self.config)
+        self.config["dscp"] = self.init_dscp(self.config["dscp"], self.config)
+        self.config["fitn"] = self.init_fitn(self.config["fitn"], self.config)
+        self.config["dpin"] = self.init_dpin(self.config["dpin"], self.config)
+        self.config["size"] = self.init_size(self.config["size"], self.config)
+        self.config["ctrl"] = self.init_ctrl(self.config["ctrl"], self.config)
+        self.config["nbit"] = self.init_nbit(self.config["nbit"], self.config)
         self.init_value()
 
     def init_net_size(self):
-        r"""Initial net_size
-        """
+        r"""Initial net_size"""
         # self.net_size = self.fitn['neuron'][0]
-        self.net_size = self.config['fitn']['neuron'][0]
+        self.net_size = self.config["fitn"]["neuron"][0]
         if self.enable:
             key = str(self.net_size)
             if key in jdata_configs.keys():
@@ -123,201 +122,185 @@ def init_net_size(self):
                 log.error("NVNMD: don't have the configure of net_size")
 
     def init_from_deepmd_input(self, jdata):
-        r"""Initial members with input script of deepmd
-        """
+        r"""Initial members with input script of deepmd"""
         fioObj = FioDic()
-        self.config['dscp'] = fioObj.update(jdata['descriptor'], self.config['dscp'])
-        self.config['fitn'] = fioObj.update(jdata['fitting_net'], self.config['fitn'])
-        self.config['dscp'] = self.init_dscp(self.config['dscp'], self.config)
-        self.config['fitn'] = self.init_fitn(self.config['fitn'], self.config)
-        dp_in = {
-            "type_map": fioObj.get(jdata, 'type_map', [])
-        }
-        self.config['dpin'] = fioObj.update(dp_in, self.config['dpin'])
+        self.config["dscp"] = fioObj.update(jdata["descriptor"], self.config["dscp"])
+        self.config["fitn"] = fioObj.update(jdata["fitting_net"], self.config["fitn"])
+        self.config["dscp"] = self.init_dscp(self.config["dscp"], self.config)
+        self.config["fitn"] = self.init_fitn(self.config["fitn"], self.config)
+        dp_in = {"type_map": fioObj.get(jdata, "type_map", [])}
+        self.config["dpin"] = fioObj.update(dp_in, self.config["dpin"])
         #
         self.init_net_size()
         self.init_value()
 
     def init_dscp(self, jdata: dict, jdata_parent: dict = {}) -> dict:
-        r"""Initial members about descriptor
-        """
+        r"""Initial members about descriptor"""
         # embedding
-        jdata['M1'] = jdata['neuron'][-1]
-        jdata['M2'] = jdata['axis_neuron']
-        jdata['SEL'] = (jdata['sel'] + [0, 0, 0, 0])[0:4]
-        jdata['NNODE_FEAS'] = [1] + jdata['neuron']
-        jdata['nlayer_fea'] = len(jdata['neuron'])
-        jdata['same_net'] = int(1) if jdata['type_one_side'] else int(0)
+        jdata["M1"] = jdata["neuron"][-1]
+        jdata["M2"] = jdata["axis_neuron"]
+        jdata["SEL"] = (jdata["sel"] + [0, 0, 0, 0])[0:4]
+        jdata["NNODE_FEAS"] = [1] + jdata["neuron"]
+        jdata["nlayer_fea"] = len(jdata["neuron"])
+        jdata["same_net"] = int(1) if jdata["type_one_side"] else int(0)
         # neighbor
-        jdata['NIDP'] = int(np.sum(jdata['sel']))
-        jdata['NIX'] = 2 ** int(np.ceil(np.log2(jdata['NIDP'] / 1.5)))
+        jdata["NIDP"] = int(np.sum(jdata["sel"]))
+        jdata["NIX"] = 2 ** int(np.ceil(np.log2(jdata["NIDP"] / 1.5)))
         # type
-        jdata['ntype'] = len(jdata['sel'])
-        jdata['ntypex'] = 1 if(jdata['same_net']) else jdata['ntype']
+        jdata["ntype"] = len(jdata["sel"])
+        jdata["ntypex"] = 1 if (jdata["same_net"]) else jdata["ntype"]
         return jdata
 
     def init_fitn(self, jdata: dict, jdata_parent: dict = {}) -> dict:
-        r"""Initial members about fitting network
-        """
-        M1 = jdata_parent['dscp']['M1']
-        M2 = jdata_parent['dscp']['M2']
+        r"""Initial members about fitting network"""
+        M1 = jdata_parent["dscp"]["M1"]
+        M2 = jdata_parent["dscp"]["M2"]
 
-        jdata['NNODE_FITS'] = [int(M1 * M2)] + jdata['neuron'] + [1]
-        jdata['nlayer_fit'] = len(jdata['neuron']) + 1
-        jdata['NLAYER'] = jdata['nlayer_fit']
+        jdata["NNODE_FITS"] = [int(M1 * M2)] + jdata["neuron"] + [1]
+        jdata["nlayer_fit"] = len(jdata["neuron"]) + 1
+        jdata["NLAYER"] = jdata["nlayer_fit"]
 
         return jdata
 
     def init_dpin(self, jdata: dict, jdata_parent: dict = {}) -> dict:
-        r""" initial members about other deepmd input
-        """
+        r"""initial members about other deepmd input"""
 
         return jdata
 
     def init_size(self, jdata: dict, jdata_parent: dict = {}) -> dict:
-        r"""Initial members about ram capacity
-        """
-        jdata['NAEXT'] = jdata['Na']
-        jdata['NTYPE'] = jdata_parent['dscp']['ntype_max']
-        jdata['NTYPEX'] = jdata_parent['dscp']['ntypex_max']
+        r"""Initial members about ram capacity"""
+        jdata["NAEXT"] = jdata["Na"]
+        jdata["NTYPE"] = jdata_parent["dscp"]["ntype_max"]
+        jdata["NTYPEX"] = jdata_parent["dscp"]["ntypex_max"]
         return jdata
 
     def init_ctrl(self, jdata: dict, jdata_parent: dict = {}) -> dict:
-        r"""Initial members about control signal
-        """
-        ntype_max = jdata_parent['dscp']['ntype_max']
-        jdata['NSADV'] = jdata['NSTDM'] + 1
-        jdata['NSEL'] = jdata['NSTDM'] * ntype_max
-        if (32 % jdata['NSTDM_M1X'] > 0):
-            log.warning("NVNMD: NSTDM_M1X must be divisor of 32 for the right runing in data_merge module")
+        r"""Initial members about control signal"""
+        ntype_max = jdata_parent["dscp"]["ntype_max"]
+        jdata["NSADV"] = jdata["NSTDM"] + 1
+        jdata["NSEL"] = jdata["NSTDM"] * ntype_max
+        if 32 % jdata["NSTDM_M1X"] > 0:
+            log.warning(
+                "NVNMD: NSTDM_M1X must be divisor of 32 for the right runing in data_merge module"
+            )
         return jdata
 
     def init_nbit(self, jdata: dict, jdata_parent: dict = {}) -> dict:
-        r"""Initial members about quantification precision
-        """
-        Na = jdata_parent['size']['Na']
-        NaX = jdata_parent['size']['NaX']
-        ntype_max = jdata_parent['dscp']['ntype_max']
-        NSEL = jdata_parent['ctrl']['NSEL']
+        r"""Initial members about quantification precision"""
+        Na = jdata_parent["size"]["Na"]
+        NaX = jdata_parent["size"]["NaX"]
+        ntype_max = jdata_parent["dscp"]["ntype_max"]
+        NSEL = jdata_parent["ctrl"]["NSEL"]
         # general
-        jdata['NBIT_FLTM'] = 1+jdata['NBIT_FLTF']
-        jdata['NBIT_FLTH'] = 1+jdata['NBIT_FLTM']
+        jdata["NBIT_FLTM"] = 1 + jdata["NBIT_FLTF"]
+        jdata["NBIT_FLTH"] = 1 + jdata["NBIT_FLTM"]
         # atom
-        jdata['NBIT_ENE_FL'] = jdata['NBIT_FIT_DATA_FL']
-        jdata['NBIT_SPE'] = int(np.ceil(np.log2(ntype_max)))
-        jdata['NBIT_LST'] = int(np.ceil(np.log2(NaX)))
-        jdata['NBIT_CRD3'] = jdata['NBIT_CRD'] * 3
-        jdata['NBIT_ATOM'] = jdata['NBIT_SPE'] + jdata['NBIT_CRD3']
+        jdata["NBIT_ENE_FL"] = jdata["NBIT_FIT_DATA_FL"]
+        jdata["NBIT_SPE"] = int(np.ceil(np.log2(ntype_max)))
+        jdata["NBIT_LST"] = int(np.ceil(np.log2(NaX)))
+        jdata["NBIT_CRD3"] = jdata["NBIT_CRD"] * 3
+        jdata["NBIT_ATOM"] = jdata["NBIT_SPE"] + jdata["NBIT_CRD3"]
         # middle result
-        jdata['NBIT_SEL'] = int(np.ceil(np.log2(NSEL)))
+        jdata["NBIT_SEL"] = int(np.ceil(np.log2(NSEL)))
         return jdata
 
     def save(self, file_name=None):
-        r"""Save all configuration to file
-        """
+        r"""Save all configuration to file"""
         if file_name is None:
             file_name = self.save_path
         else:
             self.save_path = file_name
         self.update_config()
         FioDic().save(file_name, self.config)
-    
+
     def get_s_range(self, davg, dstd):
-        rmin = nvnmd_cfg.dscp['rcut_smth']
-        rmax = nvnmd_cfg.dscp['rcut']
-        ntype = self.dscp['ntype']
-        dmin = self.dscp['dmin']
+        rmin = nvnmd_cfg.dscp["rcut_smth"]
+        rmax = nvnmd_cfg.dscp["rcut"]
+        ntype = self.dscp["ntype"]
+        dmin = self.dscp["dmin"]
         #
         s0 = r2s(dmin, rmin, rmax)
         smin_ = -davg[:ntype, 0] / dstd[:ntype, 0]
         smax_ = (s0 - davg[:ntype, 0]) / dstd[:ntype, 0]
         smin = np.min(smin_)
         smax = np.max(smax_)
-        self.dscp['smin'] = smin
-        self.dscp['smax'] = smax
+        self.dscp["smin"] = smin
+        self.dscp["smax"] = smax
         nvnmd_cfg.save()
         # check
         log.info(f"the range of s is [{smin}, {smax}]")
-        if (smax - smin > 16.0):
+        if smax - smin > 16.0:
             log.warning(f"the range of s is over the limit (smax - smin) > 16.0")
-            log.warning(f"Please reset the rcut_smth as a bigger value to fix this warning")
-
+            log.warning(
+                f"Please reset the rcut_smth as a bigger value to fix this warning"
+            )
 
     def get_dscp_jdata(self):
-        r"""Generate `model/descriptor` in input script
-        """
+        r"""Generate `model/descriptor` in input script"""
         dscp = self.dscp
-        jdata = jdata_deepmd_input['model']['descriptor']
-        jdata['sel'] = dscp['sel']
-        jdata['rcut'] = dscp['rcut']
-        jdata['rcut_smth'] = dscp['rcut_smth']
-        jdata['neuron'] = dscp['neuron']
-        jdata['type_one_side'] = dscp['type_one_side']
-        jdata['axis_neuron'] = dscp['axis_neuron']
+        jdata = jdata_deepmd_input["model"]["descriptor"]
+        jdata["sel"] = dscp["sel"]
+        jdata["rcut"] = dscp["rcut"]
+        jdata["rcut_smth"] = dscp["rcut_smth"]
+        jdata["neuron"] = dscp["neuron"]
+        jdata["type_one_side"] = dscp["type_one_side"]
+        jdata["axis_neuron"] = dscp["axis_neuron"]
         return jdata
 
     def get_fitn_jdata(self):
-        r"""Generate `model/fitting_net` in input script
-        """
+        r"""Generate `model/fitting_net` in input script"""
         fitn = self.fitn
-        jdata = jdata_deepmd_input['model']['fitting_net']
-        jdata['neuron'] = fitn['neuron']
+        jdata = jdata_deepmd_input["model"]["fitting_net"]
+        jdata["neuron"] = fitn["neuron"]
         return jdata
 
     def get_model_jdata(self):
-        r"""Generate `model` in input script
-        """
-        jdata = jdata_deepmd_input['model']
-        jdata['descriptor'] = self.get_dscp_jdata()
-        jdata['fitting_net'] = self.get_fitn_jdata()
-        if len(self.dpin['type_map']) > 0:
-            jdata['type_map'] = self.dpin['type_map']
+        r"""Generate `model` in input script"""
+        jdata = jdata_deepmd_input["model"]
+        jdata["descriptor"] = self.get_dscp_jdata()
+        jdata["fitting_net"] = self.get_fitn_jdata()
+        if len(self.dpin["type_map"]) > 0:
+            jdata["type_map"] = self.dpin["type_map"]
         return jdata
 
     def get_nvnmd_jdata(self):
-        r"""Generate `nvnmd` in input script
-        """
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['net_size'] = self.net_size
-        jdata['config_file'] = self.config_file
-        jdata['weight_file'] = self.weight_file
-        jdata['map_file'] = self.map_file
-        jdata['enable'] = self.enable
-        jdata['restore_descriptor'] = self.restore_descriptor
-        jdata['restore_fitting_net'] = self.restore_fitting_net
-        jdata['quantize_descriptor'] = self.quantize_descriptor
-        jdata['quantize_fitting_net'] = self.quantize_fitting_net
+        r"""Generate `nvnmd` in input script"""
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["net_size"] = self.net_size
+        jdata["config_file"] = self.config_file
+        jdata["weight_file"] = self.weight_file
+        jdata["map_file"] = self.map_file
+        jdata["enable"] = self.enable
+        jdata["restore_descriptor"] = self.restore_descriptor
+        jdata["restore_fitting_net"] = self.restore_fitting_net
+        jdata["quantize_descriptor"] = self.quantize_descriptor
+        jdata["quantize_fitting_net"] = self.quantize_fitting_net
         return jdata
 
     def get_learning_rate_jdata(self):
-        r"""Generate `learning_rate` in input script
-        """
-        return jdata_deepmd_input['learning_rate']
+        r"""Generate `learning_rate` in input script"""
+        return jdata_deepmd_input["learning_rate"]
 
     def get_loss_jdata(self):
-        r"""Generate `loss` in input script
-        """
-        return jdata_deepmd_input['loss']
+        r"""Generate `loss` in input script"""
+        return jdata_deepmd_input["loss"]
 
     def get_training_jdata(self):
-        r"""Generate `training` in input script
-        """
-        return jdata_deepmd_input['training']
+        r"""Generate `training` in input script"""
+        return jdata_deepmd_input["training"]
 
     def get_deepmd_jdata(self):
-        r"""Generate input script with member element one by one
-        """
+        r"""Generate input script with member element one by one"""
         jdata = jdata_deepmd_input.copy()
-        jdata['model'] = self.get_model_jdata()
-        jdata['nvnmd'] = self.get_nvnmd_jdata()
-        jdata['learning_rate'] = self.get_learning_rate_jdata()
-        jdata['loss'] = self.get_loss_jdata()
-        jdata['training'] = self.get_training_jdata()
+        jdata["model"] = self.get_model_jdata()
+        jdata["nvnmd"] = self.get_nvnmd_jdata()
+        jdata["learning_rate"] = self.get_learning_rate_jdata()
+        jdata["loss"] = self.get_loss_jdata()
+        jdata["training"] = self.get_training_jdata()
         return jdata
 
     def disp_message(self):
-        r"""Display the log of NVNMD
-        """
+        r"""Display the log of NVNMD"""
         NVNMD_CONFIG = (
             f"enable: {self.enable}",
             f"net_size: {self.net_size}",
@@ -334,4 +317,4 @@ def disp_message(self):
 
 
 # global configuration for nvnmd
-nvnmd_cfg = NvnmdConfig(jdata_deepmd_input['nvnmd'])
+nvnmd_cfg = NvnmdConfig(jdata_deepmd_input["nvnmd"])
diff --git a/deepmd/nvnmd/utils/encode.py b/deepmd/nvnmd/utils/encode.py
index 42eecfd5f8..7a1dcbe4cc 100644
--- a/deepmd/nvnmd/utils/encode.py
+++ b/deepmd/nvnmd/utils/encode.py
@@ -1,34 +1,32 @@
+import logging
 
 import numpy as np
-import logging 
 
-from deepmd.nvnmd.data.data import jdata_sys
+from deepmd.nvnmd.data.data import (
+    jdata_sys,
+)
 
 log = logging.getLogger(__name__)
 
 
-class Encode():
-    r"""Encoding value as hex, bin, and dec format
-    """
+class Encode:
+    r"""Encoding value as hex, bin, and dec format"""
 
     def __init__(self):
         pass
 
     def qr(self, v, nbit: int = 14):
-        r"""Quantize value using round
-        """
+        r"""Quantize value using round"""
         return np.round(v * (2**nbit))
 
     def qf(self, v, nbit: int = 14):
-        r"""Quantize value using floor
-        """
+        r"""Quantize value using floor"""
         return np.floor(v * (2**nbit))
 
     def qc(self, v, nbit: int = 14):
-        r"""Quantize value using ceil
-        """
+        r"""Quantize value using ceil"""
         return np.ceil(v * (2**nbit))
-    
+
     def split_expo_mant(self, v, min=-1000):
         vabs = np.abs(v)
         expo = np.log2(vabs)
@@ -36,15 +34,15 @@ def split_expo_mant(self, v, min=-1000):
         prec = 1.0 / 2.0**expo
         mant = v * prec
         return expo, mant
-    
+
     def find_max_expo(self, v, expo_min=-1000):
         vabs = np.abs(v)
         vmax = np.max(vabs)
-        expo_max = np.log2(vmax+1e-50)
+        expo_max = np.log2(vmax + 1e-50)
         expo_max = np.maximum(expo_max, expo_min)
         expo_max = np.floor(expo_max)
         return expo_max
-    
+
     def norm_expo(self, v, nbit_frac=20, expo_min=-1000):
         expo_max = self.find_max_expo(v, expo_min)
         prec_expo = 2 ** (nbit_frac - expo_max)
@@ -55,49 +53,48 @@ def norm_expo(self, v, nbit_frac=20, expo_min=-1000):
         return sign * vabs, expo_max
 
     def flt2bin_one(self, v, nbit_expo, nbit_frac):
-        v = float(v) # 64-bit float
+        v = float(v)  # 64-bit float
         h = v.hex()
         n = len(h)
         st = n
         for ii in range(n):
-            if h[ii] == 'x':
+            if h[ii] == "x":
                 st = ii + 1
-            if h[ii] == 'p':
-                ed = ii+1
-        is_zero = h[st] == '0'
+            if h[ii] == "p":
+                ed = ii + 1
+        is_zero = h[st] == "0"
         #
         if is_zero:
-            return '0' * (1 + nbit_expo + nbit_frac)
+            return "0" * (1 + nbit_expo + nbit_frac)
         else:
-            s = '1' if h[0] == '-' else '0'
-            e = int(h[ed:]) + int(2**(nbit_expo-1) + 2**nbit_expo)
-            e = bin(e)[3:] # 0b1xxxxxxx
-            fh = h[st+2:ed-1]
+            s = "1" if h[0] == "-" else "0"
+            e = int(h[ed:]) + int(2 ** (nbit_expo - 1) + 2**nbit_expo)
+            e = bin(e)[3:]  # 0b1xxxxxxx
+            fh = h[st + 2 : ed - 1]
             fb = self.hex2bin_str(fh)
             f = fb[0:nbit_frac]
-            return s+e+f
-    
+            return s + e + f
+
     def flt2bin(self, data, nbit_expo, nbit_frac):
-        r"""Convert float into binary string list
-        """
+        r"""Convert float into binary string list"""
         data = np.reshape(np.array(data), [-1])
         return [self.flt2bin_one(d, nbit_expo, nbit_frac) for d in data]
 
     def byte2hex(self, bs, nbyte):
-        r"""Convert byte into hex 
+        r"""Convert byte into hex
         bs: low byte in the first
         hex: low byte in the right
         """
         nl = len(bs) // nbyte
         hs = []
         for ii in range(nl):
-            b = bs[nbyte*ii:nbyte*(ii+1)]
+            b = bs[nbyte * ii : nbyte * (ii + 1)]
             b = b[::-1]
             h = b.hex()
             hs.append(h)
         return hs
 
-    def check_dec(self, idec, nbit, signed=False, name=''):
+    def check_dec(self, idec, nbit, signed=False, name=""):
         r"""Check whether the data (idec) is in the range
         range is :math:`[0, 2^nbit-1]` for unsigned
         range is :math:`[-2^{nbit-1}, 2^{nbit-1}-1]` for signed
@@ -112,11 +109,15 @@ def check_dec(self, idec, nbit, signed=False, name=''):
         I1 = idec < pmin
         I2 = idec > pmax
 
-        if jdata_sys['debug']:
+        if jdata_sys["debug"]:
             if np.sum(I1) > 0:
-                log.warning(f"NVNMD: there are data {name} smaller than the lower limit {pmin}")
+                log.warning(
+                    f"NVNMD: there are data {name} smaller than the lower limit {pmin}"
+                )
             if np.sum(I2) > 0:
-                log.warning(f"NVNMD: there are data {name} bigger than the upper limit {pmax}")
+                log.warning(
+                    f"NVNMD: there are data {name} bigger than the upper limit {pmax}"
+                )
 
     def extend_list(self, slbin, nfull):
         r"""Extend the list (slbin) to the length (nfull)
@@ -134,14 +135,14 @@ def extend_list(self, slbin, nfull):
         nfull = int(nfull)
         n = len(slbin)
         dn = nfull - n
-        ds = '0' * len(slbin[0])
+        ds = "0" * len(slbin[0])
         return slbin + [ds for ii in range(dn)]
 
     def extend_bin(self, slbin, nfull):
         r"""Extend the element of list (slbin) to the length (nfull)
 
         such as, when
-        
+
         | slbin = ['10010','10100'],
         | nfull = 6
 
@@ -152,21 +153,19 @@ def extend_bin(self, slbin, nfull):
         nfull = int(nfull)
         n = len(slbin[0])
         dn = nfull - n
-        ds = '0' * int(dn)
+        ds = "0" * int(dn)
         return [ds + s for s in slbin]
 
     def extend_hex(self, slhex, nfull):
-        r"""Extend the element of list (slhex) to the length (nfull)
-        """
+        r"""Extend the element of list (slhex) to the length (nfull)"""
         nfull = int(nfull)
         n = len(slhex[0])
         dn = (nfull // 4) - n
-        ds = '0' * int(dn)
+        ds = "0" * int(dn)
         return [ds + s for s in slhex]
 
     def split_bin(self, sbin, nbit: int):
-        r"""Split sbin into many segment with the length nbit
-        """
+        r"""Split sbin into many segment with the length nbit"""
         if isinstance(sbin, list):
             sl = []
             for s in sbin:
@@ -175,34 +174,35 @@ def split_bin(self, sbin, nbit: int):
         else:
             n = len(sbin)
             nseg = int(np.ceil(n / nbit))
-            s = '0' * int(nseg * nbit - n)
+            s = "0" * int(nseg * nbit - n)
             sbin = s + sbin
 
-            sl = [sbin[ii * nbit:(ii + 1) * nbit] for ii in range(nseg)]
+            sl = [sbin[ii * nbit : (ii + 1) * nbit] for ii in range(nseg)]
             sl = sl[::-1]
             return sl
 
     def reverse_bin(self, slbin, nreverse):
-        r"""Reverse binary string list per `nreverse` value
-        """
+        r"""Reverse binary string list per `nreverse` value"""
         nreverse = int(nreverse)
         # consider that {len(slbin)} can not be divided by {nreverse} without remainder
         n = int(np.ceil(len(slbin) / nreverse))
         slbin = self.extend_list(slbin, n * nreverse)
-        return [slbin[ii * nreverse + nreverse - 1 - jj] for ii in range(n) for jj in range(nreverse)]
+        return [
+            slbin[ii * nreverse + nreverse - 1 - jj]
+            for ii in range(n)
+            for jj in range(nreverse)
+        ]
 
     def merge_bin(self, slbin, nmerge):
-        r"""Merge binary string list per `nmerge` value
-        """
+        r"""Merge binary string list per `nmerge` value"""
         nmerge = int(nmerge)
         # consider that {len(slbin)} can not be divided by {nmerge} without remainder
         n = int(np.ceil(len(slbin) / nmerge))
         slbin = self.extend_list(slbin, n * nmerge)
-        return [''.join(slbin[nmerge * ii: nmerge * (ii + 1)]) for ii in range(n)]
+        return ["".join(slbin[nmerge * ii : nmerge * (ii + 1)]) for ii in range(n)]
 
-    def dec2bin(self, idec, nbit=10, signed=False, name=''):
-        r"""Convert dec array to binary string list
-        """
+    def dec2bin(self, idec, nbit=10, signed=False, name=""):
+        r"""Convert dec array to binary string list"""
         idec = np.int64(np.reshape(np.array(idec), [-1]))
         self.check_dec(idec, nbit, signed, name)
 
@@ -226,35 +226,31 @@ def dec2bin(self, idec, nbit=10, signed=False, name=''):
         return sl
 
     def hex2bin_str(self, shex):
-        r"""Convert hex string to binary string
-        """
+        r"""Convert hex string to binary string"""
         n = len(shex)
         sl = []
         for ii in range(n):
             si = bin(int(shex[ii], 16) + 16)
             sl.append(si[-4:])
-        return ''.join(sl)
+        return "".join(sl)
 
     def hex2bin(self, data):
-        r"""Convert hex string list to binary string list
-        """
+        r"""Convert hex string list to binary string list"""
         data = np.reshape(np.array(data), [-1])
         return [self.hex2bin_str(d) for d in data]
 
     def bin2hex_str(self, sbin):
-        r"""Convert binary string to hex string
-        """
+        r"""Convert binary string to hex string"""
         n = len(sbin)
         nx = int(np.ceil(n / 4))
-        sbin = ('0' * (nx * 4 - n)) + sbin
+        sbin = ("0" * (nx * 4 - n)) + sbin
         sl = []
         for ii in range(nx):
-            si = hex(int(sbin[4 * ii: 4 * (ii + 1)], 2) + 16)
+            si = hex(int(sbin[4 * ii : 4 * (ii + 1)], 2) + 16)
             sl.append(si[-1])
-        return ''.join(sl)
+        return "".join(sl)
 
     def bin2hex(self, data):
-        r"""Convert binary string list to hex string list
-        """
+        r"""Convert binary string list to hex string list"""
         data = np.reshape(np.array(data), [-1])
         return [self.bin2hex_str(d) for d in data]
diff --git a/deepmd/nvnmd/utils/fio.py b/deepmd/nvnmd/utils/fio.py
index 4c5e95795f..c1f4e299bf 100644
--- a/deepmd/nvnmd/utils/fio.py
+++ b/deepmd/nvnmd/utils/fio.py
@@ -1,33 +1,35 @@
-
-import os
-import numpy as np
 import json
+import logging
+import os
 import struct
-from typing import List
+from typing import (
+    List,
+)
+
+import numpy as np
 
-import logging
 log = logging.getLogger(__name__)
 
 
 class Fio:
-    r"""Basic class for FIO
-    """
+    r"""Basic class for FIO"""
+
     def __init__(self):
         pass
 
-    def exits(self, file_name=''):
-        if file_name == '':
+    def exits(self, file_name=""):
+        if file_name == "":
             return True
         return os.path.exists(file_name)
 
-    def mkdir(self, path_name=''):
+    def mkdir(self, path_name=""):
         if not self.exits(path_name):
             os.makedirs(path_name)
 
-    def create_file_path(self, file_name=''):
-        pars = file_name.split('/')
+    def create_file_path(self, file_name=""):
+        pars = file_name.split("/")
         if len(pars) > 0:
-            path_name = '/'.join(pars[:-1])
+            path_name = "/".join(pars[:-1])
             self.mkdir(path_name)
 
     def is_path(self, path):
@@ -56,21 +58,22 @@ class FioDic:
     r"""Input and output for dict class data
     the file can be .json or .npy file containing a dictionary
     """
+
     def __init__(self) -> None:
         pass
 
-    def load(self, file_name='', default_value={}):
-        if file_name.endswith('.json'):
+    def load(self, file_name="", default_value={}):
+        if file_name.endswith(".json"):
             return FioJsonDic().load(file_name, default_value)
-        elif file_name.endswith('.npy'):
+        elif file_name.endswith(".npy"):
             return FioNpyDic().load(file_name, default_value)
         else:
             return FioNpyDic().load(file_name, default_value)
 
-    def save(self, file_name='', dic={}):
-        if file_name.endswith('.json'):
+    def save(self, file_name="", dic={}):
+        if file_name.endswith(".json"):
             FioJsonDic().save(file_name, dic)
-        elif file_name.endswith('.npy'):
+        elif file_name.endswith(".npy"):
             FioNpyDic().save(file_name, dic)
         else:
             FioNpyDic().save(file_name, dic)
@@ -101,12 +104,12 @@ def update(self, jdata, jdata_o):
 
 
 class FioNpyDic:
-    r"""Input and output for .npy file containing dictionary
-    """
+    r"""Input and output for .npy file containing dictionary"""
+
     def __init__(self):
         pass
 
-    def load(self, file_name='', default_value={}):
+    def load(self, file_name="", default_value={}):
         if Fio().exits(file_name):
             log.info(f"load {file_name}")
             dat = np.load(file_name, allow_pickle=True)[0]
@@ -115,23 +118,22 @@ def load(self, file_name='', default_value={}):
             log.warning(f"can not find {file_name}")
             return default_value
 
-    def save(self, file_name='', dic={}):
+    def save(self, file_name="", dic={}):
         Fio().create_file_path(file_name)
         np.save(file_name, [dic])
 
 
 class FioJsonDic:
-    r"""Input and output for .json file containing dictionary
-    """
+    r"""Input and output for .json file containing dictionary"""
+
     def __init__(self):
         pass
 
-    def load(self, file_name='', default_value={}):
-        r"""Load .json file into dict
-        """
+    def load(self, file_name="", default_value={}):
+        r"""Load .json file into dict"""
         if Fio().exits(file_name):
             log.info(f"load {file_name}")
-            with open(file_name, 'r') as fr:
+            with open(file_name, "r") as fr:
                 jdata = fr.read()
             dat = json.loads(jdata)
             return dat
@@ -139,28 +141,26 @@ def load(self, file_name='', default_value={}):
             log.warning(f"can not find {file_name}")
             return default_value
 
-    def save(self, file_name='', dic={}):
-        r"""Save dict into .json file
-        """
+    def save(self, file_name="", dic={}):
+        r"""Save dict into .json file"""
         log.info(f"write jdata to {file_name}")
         Fio().create_file_path(file_name)
-        with open(file_name, 'w') as fw:
+        with open(file_name, "w") as fw:
             json.dump(dic, fw, indent=4)
 
 
-class FioBin():
-    r"""Input and output for binary file
-    """
+class FioBin:
+    r"""Input and output for binary file"""
+
     def __init__(self):
         pass
 
-    def load(self, file_name='', default_value=''):
-        r"""Load binary file into bytes value
-        """
+    def load(self, file_name="", default_value=""):
+        r"""Load binary file into bytes value"""
         if Fio().exits(file_name):
             log.info(f"load {file_name}")
             dat = ""
-            with open(file_name, 'rb') as fr:
+            with open(file_name, "rb") as fr:
                 dat = fr.read()
             return dat
         else:
@@ -168,8 +168,7 @@ def load(self, file_name='', default_value=''):
             return default_value
 
     def save(self, file_name: str, data: List[str]):
-        r"""Save hex string into binary file
-        """
+        r"""Save hex string into binary file"""
         log.info(f"write binary to {file_name}")
         Fio().create_file_path(file_name)
         # si is H->L
@@ -179,36 +178,35 @@ def save(self, file_name: str, data: List[str]):
         for si in data:
             buff.extend(list(bytearray.fromhex(si))[::-1])
         #
-        with open(file_name, 'wb') as fp:
-            fp.write(struct.pack('%sB' % len(buff), *buff))
+        with open(file_name, "wb") as fp:
+            fp.write(struct.pack("%sB" % len(buff), *buff))
+
+
+class FioTxt:
+    r"""Input and output for .txt file with string"""
 
-class FioTxt():
-    r"""Input and output for .txt file with string
-    """
     def __init__(self):
         pass
 
-    def load(self, file_name='', default_value=[]):
-        r"""Load .txt file into string list
-        """
+    def load(self, file_name="", default_value=[]):
+        r"""Load .txt file into string list"""
         if Fio().exits(file_name):
             log.info(f"load {file_name}")
-            with open(file_name, 'r', encoding='utf-8') as fr:
+            with open(file_name, "r", encoding="utf-8") as fr:
                 dat = fr.readlines()
-            dat = [d.replace('\n', '') for d in dat]
+            dat = [d.replace("\n", "") for d in dat]
             return dat
         else:
             log.info(f"can not find {file_name}")
             return default_value
 
-    def save(self, file_name: str = '', data: list = []):
-        r"""Save string list into .txt file
-        """
+    def save(self, file_name: str = "", data: list = []):
+        r"""Save string list into .txt file"""
         log.info(f"write string to txt file {file_name}")
         Fio().create_file_path(file_name)
 
         if isinstance(data, str):
             data = [data]
-        data = [d + '\n' for d in data]
-        with open(file_name, 'w') as fw:
+        data = [d + "\n" for d in data]
+        with open(file_name, "w") as fw:
             fw.writelines(data)
diff --git a/deepmd/nvnmd/utils/network.py b/deepmd/nvnmd/utils/network.py
index a70f93d640..70237e3985 100644
--- a/deepmd/nvnmd/utils/network.py
+++ b/deepmd/nvnmd/utils/network.py
@@ -1,18 +1,28 @@
-
-import numpy as np
 import logging
 
-from deepmd.env import tf
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import op_module
+import numpy as np
 
-from deepmd.nvnmd.data.data import jdata_sys
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.utils.weight import get_constant_initializer
-from deepmd.utils.network import variable_summaries
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+from deepmd.nvnmd.data.data import (
+    jdata_sys,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.weight import (
+    get_constant_initializer,
+)
+from deepmd.utils.network import (
+    variable_summaries,
+)
 
 log = logging.getLogger(__name__)
 
+
 def get_sess():
     init_op = tf.global_variables_initializer()
     sess = tf.Session()
@@ -52,8 +62,7 @@ def matmul3_qq(a, b, nbit):
 
 
 def qf(x, nbit):
-    r"""Quantize and floor tensor `x` with quantification precision `nbit`.
-    """
+    r"""Quantize and floor tensor `x` with quantification precision `nbit`."""
     prec = 2**nbit
 
     y = tf.floor(x * prec) / prec
@@ -62,23 +71,24 @@ def qf(x, nbit):
 
 
 def qr(x, nbit):
-    r"""Quantize and round tensor `x` with quantification precision `nbit`.
-    """
+    r"""Quantize and round tensor `x` with quantification precision `nbit`."""
     prec = 2**nbit
 
     y = tf.round(x * prec) / prec
     y = x + tf.stop_gradient(y - x)
     return y
 
+
 def tanh4(x):
     with tf.name_scope("tanh4"):
         sign = tf.sign(x)
         xclp = tf.clip_by_value(x, -2, 2)
         xabs = tf.abs(xclp)
-        y1 = (1.0/16.0) * tf.pow(xabs, 4) + (-1.0/4.0) * tf.pow(xabs, 3) + xabs
+        y1 = (1.0 / 16.0) * tf.pow(xabs, 4) + (-1.0 / 4.0) * tf.pow(xabs, 3) + xabs
         y2 = y1 * sign
         return y2
 
+
 def one_layer_wb(
     shape,
     outputs_size,
@@ -89,89 +99,107 @@ def one_layer_wb(
     initial_variables,
     seed,
     uniform_seed,
-    name
+    name,
 ):
     if nvnmd_cfg.restore_fitting_net:
         # initializer
-        w_initializer = get_constant_initializer(nvnmd_cfg.weight, 'matrix')
-        b_initializer = get_constant_initializer(nvnmd_cfg.weight, 'bias')
+        w_initializer = get_constant_initializer(nvnmd_cfg.weight, "matrix")
+        b_initializer = get_constant_initializer(nvnmd_cfg.weight, "bias")
     else:
         w_initializer = tf.random_normal_initializer(
             stddev=stddev / np.sqrt(shape[1] + outputs_size),
-            seed=seed if (seed is None or uniform_seed) else seed + 0)
+            seed=seed if (seed is None or uniform_seed) else seed + 0,
+        )
         b_initializer = tf.random_normal_initializer(
             stddev=stddev,
             mean=bavg,
-            seed=seed if (seed is None or uniform_seed) else seed + 1)
+            seed=seed if (seed is None or uniform_seed) else seed + 1,
+        )
         if initial_variables is not None:
-            w_initializer = tf.constant_initializer(initial_variables[name + '/matrix'])
-            b_initializer = tf.constant_initializer(initial_variables[name + '/bias'])
+            w_initializer = tf.constant_initializer(initial_variables[name + "/matrix"])
+            b_initializer = tf.constant_initializer(initial_variables[name + "/bias"])
     # variable
-    w = tf.get_variable('matrix',
-                        [shape[1], outputs_size],
-                        precision,
-                        w_initializer,
-                        trainable=trainable)
-    variable_summaries(w, 'matrix')
-    b = tf.get_variable('bias',
-                        [outputs_size],
-                        precision,
-                        b_initializer,
-                        trainable=trainable)
-    variable_summaries(b, 'bias')
+    w = tf.get_variable(
+        "matrix",
+        [shape[1], outputs_size],
+        precision,
+        w_initializer,
+        trainable=trainable,
+    )
+    variable_summaries(w, "matrix")
+    b = tf.get_variable(
+        "bias", [outputs_size], precision, b_initializer, trainable=trainable
+    )
+    variable_summaries(b, "bias")
 
     return w, b
 
-def one_layer(inputs,
-              outputs_size,
-              activation_fn=tf.nn.tanh,
-              precision=GLOBAL_TF_FLOAT_PRECISION,
-              stddev=1.0,
-              bavg=0.0,
-              name='linear',
-              reuse=None,
-              seed=None,
-              use_timestep=False,
-              trainable=True,
-              useBN=False,
-              uniform_seed=False,
-              initial_variables=None,
-              mixed_prec=None,
-              final_layer=False):
+
+def one_layer(
+    inputs,
+    outputs_size,
+    activation_fn=tf.nn.tanh,
+    precision=GLOBAL_TF_FLOAT_PRECISION,
+    stddev=1.0,
+    bavg=0.0,
+    name="linear",
+    reuse=None,
+    seed=None,
+    use_timestep=False,
+    trainable=True,
+    useBN=False,
+    uniform_seed=False,
+    initial_variables=None,
+    mixed_prec=None,
+    final_layer=False,
+):
     r"""Build one layer with continuous or quantized value.
     Its weight and bias can be initialed with random or constant value.
     """
     # USE FOR NEW FITTINGNET
     with tf.variable_scope(name, reuse=reuse):
         shape = inputs.get_shape().as_list()
-        w, b = one_layer_wb(shape, outputs_size, bavg, stddev, precision, trainable, initial_variables, seed, uniform_seed, name)
+        w, b = one_layer_wb(
+            shape,
+            outputs_size,
+            bavg,
+            stddev,
+            precision,
+            trainable,
+            initial_variables,
+            seed,
+            uniform_seed,
+            name,
+        )
         if nvnmd_cfg.quantize_fitting_net:
-            NBIT_DATA_FL = nvnmd_cfg.nbit['NBIT_FIT_DATA_FL']
-            NBIT_SHORT_FL = nvnmd_cfg.nbit['NBIT_FIT_SHORT_FL']
+            NBIT_DATA_FL = nvnmd_cfg.nbit["NBIT_FIT_DATA_FL"]
+            NBIT_SHORT_FL = nvnmd_cfg.nbit["NBIT_FIT_SHORT_FL"]
             # w
-            with tf.variable_scope('w', reuse=reuse):
+            with tf.variable_scope("w", reuse=reuse):
                 w = op_module.quantize_nvnmd(w, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                 w = tf.ensure_shape(w, [shape[1], outputs_size])
             # b
-            with tf.variable_scope('b', reuse=reuse):
+            with tf.variable_scope("b", reuse=reuse):
                 b = op_module.quantize_nvnmd(b, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                 b = tf.ensure_shape(b, [outputs_size])
             # x
-            with tf.variable_scope('x', reuse=reuse):
+            with tf.variable_scope("x", reuse=reuse):
                 x = op_module.quantize_nvnmd(inputs, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                 inputs = tf.ensure_shape(x, [None, shape[1]])
             # wx
             # normlize weight mode: 0 all | 1 column
             norm_mode = 0 if final_layer else 1
-            wx = op_module.matmul_fitnet_nvnmd(inputs, w, NBIT_DATA_FL, NBIT_SHORT_FL, norm_mode)
+            wx = op_module.matmul_fitnet_nvnmd(
+                inputs, w, NBIT_DATA_FL, NBIT_SHORT_FL, norm_mode
+            )
 
-            with tf.variable_scope('wx', reuse=reuse):
-                wx = op_module.quantize_nvnmd(wx, 1, NBIT_DATA_FL, NBIT_DATA_FL-2, -1)
+            with tf.variable_scope("wx", reuse=reuse):
+                wx = op_module.quantize_nvnmd(wx, 1, NBIT_DATA_FL, NBIT_DATA_FL - 2, -1)
                 wx = tf.ensure_shape(wx, [None, outputs_size])
             # wxb
             wxb = wx + b
 
-            with tf.variable_scope('wxb', reuse=reuse):
+            with tf.variable_scope("wxb", reuse=reuse):
                 wxb = op_module.quantize_nvnmd(wxb, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                 wxb = tf.ensure_shape(wxb, [None, outputs_size])
             # actfun
@@ -181,7 +209,7 @@ def one_layer(inputs,
             else:
                 y = wxb
 
-            with tf.variable_scope('actfun', reuse=reuse):
+            with tf.variable_scope("actfun", reuse=reuse):
                 y = op_module.quantize_nvnmd(y, 1, NBIT_DATA_FL, NBIT_DATA_FL, -1)
                 y = tf.ensure_shape(y, [None, outputs_size])
         else:
diff --git a/deepmd/nvnmd/utils/op.py b/deepmd/nvnmd/utils/op.py
index c030c60467..442242fb27 100644
--- a/deepmd/nvnmd/utils/op.py
+++ b/deepmd/nvnmd/utils/op.py
@@ -1,18 +1,18 @@
-
 import numpy as np
 
+
 def r2s(r, rmin, rmax):
     v = 0.0
     if (r > 0.01) and (r <= rmin):
         v = 1.0 / r
     elif (r > rmin) and (r <= rmax):
         uu = (r - rmin) / (rmax - rmin)
-        v = (uu*uu*uu * (-6 * uu*uu + 15 * uu - 10) + 1) / r
+        v = (uu * uu * uu * (-6 * uu * uu + 15 * uu - 10) + 1) / r
     return v
 
+
 def map_nvnmd(x, map_y, map_dy, prec, nbit=None):
-    r"""Mapping function implemented by numpy
-    """
+    r"""Mapping function implemented by numpy"""
     xk = int(np.floor(x / prec))
     dx = x - xk * prec
     y = map_y[xk] + map_dy[xk] * dx
diff --git a/deepmd/nvnmd/utils/weight.py b/deepmd/nvnmd/utils/weight.py
index 681331958b..caadca585d 100644
--- a/deepmd/nvnmd/utils/weight.py
+++ b/deepmd/nvnmd/utils/weight.py
@@ -1,15 +1,16 @@
+import logging
 
 import numpy as np
-import logging
 
-from deepmd.env import tf
+from deepmd.env import (
+    tf,
+)
 
 log = logging.getLogger(__name__)
 
 
 def get_weight(weights, key):
-    r"""Get weight value according to key
-    """
+    r"""Get weight value according to key"""
     if key in weights.keys():
         return weights[key]
     else:
@@ -18,8 +19,7 @@ def get_weight(weights, key):
 
 
 def get_normalize(weights: dict):
-    r"""Get normalize parameter (avg and std) of :math:`s_{ji}`
-    """
+    r"""Get normalize parameter (avg and std) of :math:`s_{ji}`"""
     key = "descrpt_attr.t_avg"
     avg = get_weight(weights, key)
     key = "descrpt_attr.t_std"
@@ -76,9 +76,8 @@ def get_fitnet_weight(weights: dict, spe_i: int, layer_l: int, nlayer: int = 10)
 
 
 def get_constant_initializer(weights, name):
-    r"""Get initial value by name and create a initializer
-    """
+    r"""Get initial value by name and create a initializer"""
     scope = tf.get_variable_scope().name
-    name = scope + '.' + name
+    name = scope + "." + name
     value = get_weight(weights, name)
     return tf.constant_initializer(value)
diff --git a/deepmd/op/__init__.py b/deepmd/op/__init__.py
index aa9b309888..cf7b8a90ea 100644
--- a/deepmd/op/__init__.py
+++ b/deepmd/op/__init__.py
@@ -1,8 +1,10 @@
 """This module will house cust Tf OPs after CMake installation."""
 
-from pathlib import Path
 import importlib
 import logging
+from pathlib import (
+    Path,
+)
 
 NOT_LOADABLE = ("__init__.py",)
 PACKAGE_BASE = "deepmd.op"
diff --git a/deepmd/train/__init__.py b/deepmd/train/__init__.py
index 8b13789179..e69de29bb2 100644
--- a/deepmd/train/__init__.py
+++ b/deepmd/train/__init__.py
@@ -1 +0,0 @@
-
diff --git a/deepmd/train/run_options.py b/deepmd/train/run_options.py
index 9cf8095edb..f26f198791 100644
--- a/deepmd/train/run_options.py
+++ b/deepmd/train/run_options.py
@@ -2,13 +2,31 @@
 
 import logging
 import os
-from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
+from pathlib import (
+    Path,
+)
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
 
 import numpy as np
-from deepmd.cluster import get_resource
-from deepmd.env import get_tf_default_nthreads, tf, GLOBAL_CONFIG, global_float_prec
-from deepmd.loggers import set_log_handles
+
+from deepmd.cluster import (
+    get_resource,
+)
+from deepmd.env import (
+    GLOBAL_CONFIG,
+    get_tf_default_nthreads,
+    global_float_prec,
+    tf,
+)
+from deepmd.loggers import (
+    set_log_handles,
+)
 
 if TYPE_CHECKING:
     import horovod.tensorflow as HVD
@@ -49,7 +67,7 @@
     f"build float prec:     {global_float_prec}",
     f"build variant:        {GLOBAL_CONFIG['dp_variant']}",
     f"build with tf inc:    {GLOBAL_CONFIG['tf_include_dir']}",
-    f"build with tf lib:    {GLOBAL_CONFIG['tf_libs'].replace(';', _sep)}"  # noqa
+    f"build with tf lib:    {GLOBAL_CONFIG['tf_libs'].replace(';', _sep)}",  # noqa
 )
 
 
@@ -93,7 +111,7 @@ def __init__(
         restart: Optional[str] = None,
         log_path: Optional[str] = None,
         log_level: int = 0,
-        mpi_log: str = "master"
+        mpi_log: str = "master",
     ):
         self._try_init_distrib()
 
@@ -135,10 +153,10 @@ def print_resource_summary(self):
         log.info(f"running on:           {self.nodename}")
         log.info(f"computing device:     {self.my_device}")
         if tf.test.is_built_with_cuda():
-            env_value = os.environ.get('CUDA_VISIBLE_DEVICES', 'unset')
+            env_value = os.environ.get("CUDA_VISIBLE_DEVICES", "unset")
             log.info(f"CUDA_VISIBLE_DEVICES: {env_value}")
-        if hasattr(tf.test, 'is_built_with_rocm') and tf.test.is_built_with_rocm():
-            env_value = os.environ.get('HIP_VISIBLE_DEVICES', 'unset')
+        if hasattr(tf.test, "is_built_with_rocm") and tf.test.is_built_with_rocm():
+            env_value = os.environ.get("HIP_VISIBLE_DEVICES", "unset")
             log.info(f"HIP_VISIBLE_DEVICES:  {env_value}")
         log.info(f"Count of visible GPU: {len(self.gpus or [])}")
         intra, inter = get_tf_default_nthreads()
@@ -183,6 +201,7 @@ def _setup_logger(
     def _try_init_distrib(self):
         try:
             import horovod.tensorflow as HVD
+
             HVD.init()
             self.is_distrib = HVD.size() > 1
         except ImportError:
@@ -215,7 +234,9 @@ def _init_distributed(self, HVD: "HVD"):
         if gpus is not None:
             gpu_idx = HVD.local_rank()
             if gpu_idx >= len(gpus):
-                raise RuntimeError('Count of local processes is larger than that of available GPUs!')
+                raise RuntimeError(
+                    "Count of local processes is larger than that of available GPUs!"
+                )
             self.my_device = f"gpu:{gpu_idx:d}"
         else:
             self.my_device = "cpu:0"
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 5a5593ff9d..d7f8889eaf 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -1,45 +1,92 @@
 #!/usr/bin/env python3
-from deepmd.descriptor.descriptor import Descriptor
+import glob
 import logging
 import os
-import glob
 import platform
-import time
 import shutil
+import time
+
 import google.protobuf.message
 import numpy as np
-from packaging.version import Version
-
-from deepmd.env import tf, tfv2
-from deepmd.env import get_tf_session_config
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.fit import EnerFitting, PolarFittingSeA, DipoleFittingSeA
-from deepmd.descriptor import Descriptor
-from deepmd.model import EnerModel, WFCModel, DipoleModel, PolarModel, GlobalPolarModel, MultiModel
-from deepmd.loss import EnerStdLoss, EnerDipoleLoss, TensorLoss
-from deepmd.utils.errors import GraphTooLargeError
-from deepmd.utils.learning_rate import LearningRateExp
-from deepmd.utils.neighbor_stat import NeighborStat
-from deepmd.utils.sess import run_sess
-from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
-from deepmd.utils.argcheck import type_embedding_args
-from deepmd.utils import random as dp_random
-
-from tensorflow.python.client import timeline
-from deepmd.env import op_module, TF_VERSION
-from deepmd.utils.errors import GraphWithoutTensorError
+from packaging.version import (
+    Version,
+)
+from tensorflow.python.client import (
+    timeline,
+)
 
 # load grad of force module
 import deepmd.op
-
-from deepmd.common import j_must_have, data_requirement, get_precision
+from deepmd.common import (
+    data_requirement,
+    get_precision,
+    j_must_have,
+)
+from deepmd.descriptor import (
+    Descriptor,
+)
+from deepmd.descriptor.descriptor import (
+    Descriptor,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    TF_VERSION,
+    get_tf_session_config,
+    op_module,
+    tf,
+    tfv2,
+)
+from deepmd.fit import (
+    DipoleFittingSeA,
+    EnerFitting,
+    PolarFittingSeA,
+)
+from deepmd.loss import (
+    EnerDipoleLoss,
+    EnerStdLoss,
+    TensorLoss,
+)
+from deepmd.model import (
+    DipoleModel,
+    EnerModel,
+    GlobalPolarModel,
+    MultiModel,
+    PolarModel,
+    WFCModel,
+)
+from deepmd.utils import random as dp_random
+from deepmd.utils.argcheck import (
+    type_embedding_args,
+)
+from deepmd.utils.errors import (
+    GraphTooLargeError,
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name_from_graph,
+    load_graph_def,
+)
+from deepmd.utils.learning_rate import (
+    LearningRateExp,
+)
+from deepmd.utils.neighbor_stat import (
+    NeighborStat,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
 
 log = logging.getLogger(__name__)
 
 # nvnmd
-from deepmd.nvnmd.utils.config import nvnmd_cfg
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+
 
 def _is_subdir(path, directory):
     path = os.path.realpath(path)
@@ -48,56 +95,56 @@ def _is_subdir(path, directory):
         return False
     relative = os.path.relpath(path, directory) + os.sep
     return not relative.startswith(os.pardir + os.sep)
-    
 
-class DPTrainer (object):
-    def __init__(self, 
-                 jdata, 
-                 run_opt,
-                 is_compress = False):
+
+class DPTrainer(object):
+    def __init__(self, jdata, run_opt, is_compress=False):
         self.run_opt = run_opt
         self._init_param(jdata)
         self.is_compress = is_compress
 
     def _init_param(self, jdata):
-        # model config        
-        model_param = j_must_have(jdata, 'model')
-        self.multi_task_mode = 'fitting_net_dict' in model_param
-        descrpt_param = j_must_have(model_param, 'descriptor')
-        fitting_param = j_must_have(model_param, 'fitting_net') \
-            if not self.multi_task_mode else j_must_have(model_param, 'fitting_net_dict')
-        typeebd_param = model_param.get('type_embedding', None)
-        self.model_param    = model_param
-        self.descrpt_param  = descrpt_param
-        
+        # model config
+        model_param = j_must_have(jdata, "model")
+        self.multi_task_mode = "fitting_net_dict" in model_param
+        descrpt_param = j_must_have(model_param, "descriptor")
+        fitting_param = (
+            j_must_have(model_param, "fitting_net")
+            if not self.multi_task_mode
+            else j_must_have(model_param, "fitting_net_dict")
+        )
+        typeebd_param = model_param.get("type_embedding", None)
+        self.model_param = model_param
+        self.descrpt_param = descrpt_param
+
         # nvnmd
-        self.nvnmd_param = jdata.get('nvnmd', {})
+        self.nvnmd_param = jdata.get("nvnmd", {})
         nvnmd_cfg.init_from_jdata(self.nvnmd_param)
         if nvnmd_cfg.enable:
             nvnmd_cfg.init_from_deepmd_input(model_param)
             nvnmd_cfg.disp_message()
             nvnmd_cfg.save()
-        
+
         # descriptor
         try:
-            descrpt_type = descrpt_param['type']
+            descrpt_type = descrpt_param["type"]
             self.descrpt_type = descrpt_type
         except KeyError:
-            raise KeyError('the type of descriptor should be set by `type`')
+            raise KeyError("the type of descriptor should be set by `type`")
 
-        if descrpt_param['type'] in ['se_atten']:
-            descrpt_param['ntypes'] = len(model_param['type_map'])
+        if descrpt_param["type"] in ["se_atten"]:
+            descrpt_param["ntypes"] = len(model_param["type_map"])
         if self.multi_task_mode:
-            descrpt_param['multi_task'] = True
+            descrpt_param["multi_task"] = True
         self.descrpt = Descriptor(**descrpt_param)
 
         # fitting net
         def fitting_net_init(fitting_type_, descrpt_type_, params):
-            if fitting_type_ == 'ener':
+            if fitting_type_ == "ener":
                 return EnerFitting(**params)
-            elif fitting_type_ == 'dipole':
+            elif fitting_type_ == "dipole":
                 return DipoleFittingSeA(**params)
-            elif fitting_type_ == 'polar':
+            elif fitting_type_ == "polar":
                 return PolarFittingSeA(**params)
             # elif fitting_type_ == 'global_polar':
             #     if descrpt_type_ == 'se_e2_a':
@@ -105,13 +152,13 @@ def fitting_net_init(fitting_type_, descrpt_type_, params):
             #     else:
             #         raise RuntimeError('fitting global_polar only supports descrptors: loc_frame and se_e2_a')
             else:
-                raise RuntimeError('unknown fitting type ' + fitting_type_)
+                raise RuntimeError("unknown fitting type " + fitting_type_)
 
         if not self.multi_task_mode:
-            fitting_type = fitting_param.get('type', 'ener')
+            fitting_type = fitting_param.get("type", "ener")
             self.fitting_type = fitting_type
-            fitting_param.pop('type', None)
-            fitting_param['descrpt'] = self.descrpt
+            fitting_param.pop("type", None)
+            fitting_param["descrpt"] = self.descrpt
             self.fitting = fitting_net_init(fitting_type, descrpt_type, fitting_param)
         else:
             self.fitting_dict = {}
@@ -119,37 +166,39 @@ def fitting_net_init(fitting_type_, descrpt_type_, params):
             self.nfitting = len(fitting_param)
             for item in fitting_param:
                 item_fitting_param = fitting_param[item]
-                item_fitting_type = item_fitting_param.get('type', 'ener')
+                item_fitting_type = item_fitting_param.get("type", "ener")
                 self.fitting_type_dict[item] = item_fitting_type
-                item_fitting_param.pop('type', None)
-                item_fitting_param['descrpt'] = self.descrpt
-                self.fitting_dict[item] = fitting_net_init(item_fitting_type, descrpt_type, item_fitting_param)
+                item_fitting_param.pop("type", None)
+                item_fitting_param["descrpt"] = self.descrpt
+                self.fitting_dict[item] = fitting_net_init(
+                    item_fitting_type, descrpt_type, item_fitting_param
+                )
 
         # type embedding
         padding = False
-        if descrpt_type == 'se_atten':
+        if descrpt_type == "se_atten":
             padding = True
         if typeebd_param is not None:
             self.typeebd = TypeEmbedNet(
-                neuron=typeebd_param['neuron'],
-                resnet_dt=typeebd_param['resnet_dt'],
-                activation_function=typeebd_param['activation_function'],
-                precision=typeebd_param['precision'],
-                trainable=typeebd_param['trainable'],
-                seed=typeebd_param['seed'],
-                padding=padding
+                neuron=typeebd_param["neuron"],
+                resnet_dt=typeebd_param["resnet_dt"],
+                activation_function=typeebd_param["activation_function"],
+                precision=typeebd_param["precision"],
+                trainable=typeebd_param["trainable"],
+                seed=typeebd_param["seed"],
+                padding=padding,
             )
-        elif descrpt_type == 'se_atten':
+        elif descrpt_type == "se_atten":
             default_args = type_embedding_args()
             default_args_dict = {i.name: i.default for i in default_args}
             self.typeebd = TypeEmbedNet(
-                neuron=default_args_dict['neuron'],
-                resnet_dt=default_args_dict['resnet_dt'],
+                neuron=default_args_dict["neuron"],
+                resnet_dt=default_args_dict["resnet_dt"],
                 activation_function=None,
-                precision=default_args_dict['precision'],
-                trainable=default_args_dict['trainable'],
-                seed=default_args_dict['seed'],
-                padding=padding
+                precision=default_args_dict["precision"],
+                trainable=default_args_dict["trainable"],
+                seed=default_args_dict["seed"],
+                padding=padding,
             )
         else:
             self.typeebd = None
@@ -157,38 +206,38 @@ def fitting_net_init(fitting_type_, descrpt_type_, params):
         # init model
         # infer model type by fitting_type
         if not self.multi_task_mode:
-            if self.fitting_type == 'ener':
+            if self.fitting_type == "ener":
                 self.model = EnerModel(
                     self.descrpt,
                     self.fitting,
                     self.typeebd,
-                    model_param.get('type_map'),
-                    model_param.get('data_stat_nbatch', 10),
-                    model_param.get('data_stat_protect', 1e-2),
-                    model_param.get('use_srtab'),
-                    model_param.get('smin_alpha'),
-                    model_param.get('sw_rmin'),
-                    model_param.get('sw_rmax')
+                    model_param.get("type_map"),
+                    model_param.get("data_stat_nbatch", 10),
+                    model_param.get("data_stat_protect", 1e-2),
+                    model_param.get("use_srtab"),
+                    model_param.get("smin_alpha"),
+                    model_param.get("sw_rmin"),
+                    model_param.get("sw_rmax"),
                 )
             # elif fitting_type == 'wfc':
             #     self.model = WFCModel(model_param, self.descrpt, self.fitting)
-            elif self.fitting_type == 'dipole':
+            elif self.fitting_type == "dipole":
                 self.model = DipoleModel(
                     self.descrpt,
                     self.fitting,
                     self.typeebd,
-                    model_param.get('type_map'),
-                    model_param.get('data_stat_nbatch', 10),
-                    model_param.get('data_stat_protect', 1e-2)
+                    model_param.get("type_map"),
+                    model_param.get("data_stat_nbatch", 10),
+                    model_param.get("data_stat_protect", 1e-2),
                 )
-            elif self.fitting_type == 'polar':
+            elif self.fitting_type == "polar":
                 self.model = PolarModel(
                     self.descrpt,
                     self.fitting,
                     self.typeebd,
-                    model_param.get('type_map'),
-                    model_param.get('data_stat_nbatch', 10),
-                    model_param.get('data_stat_protect', 1e-2)
+                    model_param.get("type_map"),
+                    model_param.get("data_stat_nbatch", 10),
+                    model_param.get("data_stat_protect", 1e-2),
                 )
             # elif self.fitting_type == 'global_polar':
             #     self.model = GlobalPolarModel(
@@ -198,96 +247,110 @@ def fitting_net_init(fitting_type_, descrpt_type_, params):
             #         model_param.get('data_stat_nbatch', 10),
             #         model_param.get('data_stat_protect', 1e-2)
             #     )
-            else :
-                raise RuntimeError('get unknown fitting type when building model')
+            else:
+                raise RuntimeError("get unknown fitting type when building model")
         else:  # multi-task mode
             self.model = MultiModel(
                 self.descrpt,
                 self.fitting_dict,
                 self.fitting_type_dict,
                 self.typeebd,
-                model_param.get('type_map'),
-                model_param.get('data_stat_nbatch', 10),
-                model_param.get('data_stat_protect', 1e-2),
-                model_param.get('use_srtab'),
-                model_param.get('smin_alpha'),
-                model_param.get('sw_rmin'),
-                model_param.get('sw_rmax')
+                model_param.get("type_map"),
+                model_param.get("data_stat_nbatch", 10),
+                model_param.get("data_stat_protect", 1e-2),
+                model_param.get("use_srtab"),
+                model_param.get("smin_alpha"),
+                model_param.get("sw_rmin"),
+                model_param.get("sw_rmax"),
             )
 
         # learning rate
-        lr_param = j_must_have(jdata, 'learning_rate')
-        scale_by_worker = lr_param.get('scale_by_worker', 'linear')
-        if scale_by_worker == 'linear':
+        lr_param = j_must_have(jdata, "learning_rate")
+        scale_by_worker = lr_param.get("scale_by_worker", "linear")
+        if scale_by_worker == "linear":
             self.scale_lr_coef = float(self.run_opt.world_size)
-        elif scale_by_worker == 'sqrt':
+        elif scale_by_worker == "sqrt":
             self.scale_lr_coef = np.sqrt(self.run_opt.world_size).real
         else:
-            self.scale_lr_coef = 1.
-        lr_type = lr_param.get('type', 'exp')
-        if lr_type == 'exp':
-            self.lr = LearningRateExp(lr_param['start_lr'],
-                                      lr_param['stop_lr'],
-                                      lr_param['decay_steps'])
-        else :
-            raise RuntimeError('unknown learning_rate type ' + lr_type)        
+            self.scale_lr_coef = 1.0
+        lr_type = lr_param.get("type", "exp")
+        if lr_type == "exp":
+            self.lr = LearningRateExp(
+                lr_param["start_lr"], lr_param["stop_lr"], lr_param["decay_steps"]
+            )
+        else:
+            raise RuntimeError("unknown learning_rate type " + lr_type)
 
         # loss
         # infer loss type by fitting_type
         def loss_init(_loss_param, _fitting_type, _fitting, _lr):
-            _loss_type = _loss_param.get('type', 'ener')
-            if _fitting_type == 'ener':
-                _loss_param.pop('type', None)
-                _loss_param['starter_learning_rate'] = _lr.start_lr()
-                if _loss_type == 'ener':
+            _loss_type = _loss_param.get("type", "ener")
+            if _fitting_type == "ener":
+                _loss_param.pop("type", None)
+                _loss_param["starter_learning_rate"] = _lr.start_lr()
+                if _loss_type == "ener":
                     loss = EnerStdLoss(**_loss_param)
-                elif _loss_type == 'ener_dipole':
+                elif _loss_type == "ener_dipole":
                     loss = EnerDipoleLoss(**_loss_param)
                 else:
-                    raise RuntimeError('unknown loss type')
-            elif _fitting_type == 'wfc':
-                loss = TensorLoss(_loss_param,
-                                  model=_fitting,
-                                  tensor_name='wfc',
-                                  tensor_size=_fitting.get_out_size(),
-                                  label_name='wfc')
-            elif _fitting_type == 'dipole':
-                loss = TensorLoss(_loss_param,
-                                  model=_fitting,
-                                  tensor_name='dipole',
-                                  tensor_size=3,
-                                  label_name='dipole')
-            elif _fitting_type == 'polar':
-                loss = TensorLoss(_loss_param,
-                                  model=_fitting,
-                                  tensor_name='polar',
-                                  tensor_size=9,
-                                  label_name='polarizability')
-            elif _fitting_type == 'global_polar':
-                loss = TensorLoss(_loss_param,
-                                  model=_fitting,
-                                  tensor_name='global_polar',
-                                  tensor_size=9,
-                                  atomic=False,
-                                  label_name='polarizability')
+                    raise RuntimeError("unknown loss type")
+            elif _fitting_type == "wfc":
+                loss = TensorLoss(
+                    _loss_param,
+                    model=_fitting,
+                    tensor_name="wfc",
+                    tensor_size=_fitting.get_out_size(),
+                    label_name="wfc",
+                )
+            elif _fitting_type == "dipole":
+                loss = TensorLoss(
+                    _loss_param,
+                    model=_fitting,
+                    tensor_name="dipole",
+                    tensor_size=3,
+                    label_name="dipole",
+                )
+            elif _fitting_type == "polar":
+                loss = TensorLoss(
+                    _loss_param,
+                    model=_fitting,
+                    tensor_name="polar",
+                    tensor_size=9,
+                    label_name="polarizability",
+                )
+            elif _fitting_type == "global_polar":
+                loss = TensorLoss(
+                    _loss_param,
+                    model=_fitting,
+                    tensor_name="global_polar",
+                    tensor_size=9,
+                    atomic=False,
+                    label_name="polarizability",
+                )
             else:
-                raise RuntimeError('get unknown fitting type when building loss function')
+                raise RuntimeError(
+                    "get unknown fitting type when building loss function"
+                )
             return loss
 
         if not self.multi_task_mode:
-            loss_param = jdata.get('loss', {})
+            loss_param = jdata.get("loss", {})
             self.loss = loss_init(loss_param, self.fitting_type, self.fitting, self.lr)
         else:
             self.loss_dict = {}
-            loss_param_dict = jdata.get('loss_dict', {})
+            loss_param_dict = jdata.get("loss_dict", {})
             for fitting_key in self.fitting_type_dict:
                 loss_param = loss_param_dict.get(fitting_key, {})
-                self.loss_dict[fitting_key] = loss_init(loss_param, self.fitting_type_dict[fitting_key],
-                                                               self.fitting_dict[fitting_key], self.lr)
+                self.loss_dict[fitting_key] = loss_init(
+                    loss_param,
+                    self.fitting_type_dict[fitting_key],
+                    self.fitting_dict[fitting_key],
+                    self.lr,
+                )
 
         # training
-        tr_data = jdata['training']
-        self.fitting_weight = tr_data.get('fitting_weight', None)
+        tr_data = jdata["training"]
+        self.fitting_weight = tr_data.get("fitting_weight", None)
         if self.multi_task_mode:
             self.fitting_key_list = []
             self.fitting_prob = []
@@ -295,30 +358,34 @@ def loss_init(_loss_param, _fitting_type, _fitting, _lr):
                 self.fitting_key_list.append(fitting_key)
                 # multi-task mode must have self.fitting_weight
                 self.fitting_prob.append(self.fitting_weight[fitting_key])
-        self.disp_file = tr_data.get('disp_file', 'lcurve.out')
-        self.disp_freq = tr_data.get('disp_freq', 1000)
-        self.save_freq = tr_data.get('save_freq', 1000)
-        self.save_ckpt = tr_data.get('save_ckpt', 'model.ckpt')
-        self.display_in_training = tr_data.get('disp_training', True)
-        self.timing_in_training  = tr_data.get('time_training', True)
-        self.profiling = self.run_opt.is_chief and tr_data.get('profiling', False)
-        self.profiling_file = tr_data.get('profiling_file', 'timeline.json')
-        self.enable_profiler = tr_data.get('enable_profiler', False)
-        self.tensorboard = self.run_opt.is_chief and tr_data.get('tensorboard', False)
-        self.tensorboard_log_dir = tr_data.get('tensorboard_log_dir', 'log')
-        self.tensorboard_freq = tr_data.get('tensorboard_freq', 1)
-        self.mixed_prec = tr_data.get('mixed_precision', None)
+        self.disp_file = tr_data.get("disp_file", "lcurve.out")
+        self.disp_freq = tr_data.get("disp_freq", 1000)
+        self.save_freq = tr_data.get("save_freq", 1000)
+        self.save_ckpt = tr_data.get("save_ckpt", "model.ckpt")
+        self.display_in_training = tr_data.get("disp_training", True)
+        self.timing_in_training = tr_data.get("time_training", True)
+        self.profiling = self.run_opt.is_chief and tr_data.get("profiling", False)
+        self.profiling_file = tr_data.get("profiling_file", "timeline.json")
+        self.enable_profiler = tr_data.get("enable_profiler", False)
+        self.tensorboard = self.run_opt.is_chief and tr_data.get("tensorboard", False)
+        self.tensorboard_log_dir = tr_data.get("tensorboard_log_dir", "log")
+        self.tensorboard_freq = tr_data.get("tensorboard_freq", 1)
+        self.mixed_prec = tr_data.get("mixed_precision", None)
         if self.mixed_prec is not None:
-            if (self.mixed_prec['compute_prec'] not in ('float16', 'bfloat16') or self.mixed_prec['output_prec'] != 'float32'):
+            if (
+                self.mixed_prec["compute_prec"] not in ("float16", "bfloat16")
+                or self.mixed_prec["output_prec"] != "float32"
+            ):
                 raise RuntimeError(
                     "Unsupported mixed precision option [output_prec, compute_prec]: [%s, %s], "
                     " Supported: [float32, float16/bfloat16], Please set mixed precision option correctly!"
-                     % (self.mixed_prec['output_prec'], self.mixed_prec['compute_prec']))
+                    % (self.mixed_prec["output_prec"], self.mixed_prec["compute_prec"])
+                )
         # self.sys_probs = tr_data['sys_probs']
         # self.auto_prob_style = tr_data['auto_prob']
         self.useBN = False
         if not self.multi_task_mode:
-            if self.fitting_type == 'ener' and  self.fitting.get_numb_fparam() > 0 :
+            if self.fitting_type == "ener" and self.fitting.get_numb_fparam() > 0:
                 self.numb_fparam = self.fitting.get_numb_fparam()
             else:
                 self.numb_fparam = 0
@@ -331,14 +398,21 @@ def loss_init(_loss_param, _fitting_type, _fitting, _lr):
             self.numb_fparam_dict = {}
             self.valid_numb_batch_dict = {}
             for fitting_key in self.fitting_type_dict:
-                if self.fitting_type_dict[fitting_key] == 'ener' and self.fitting_dict[fitting_key].get_numb_fparam() > 0 :
-                    self.numb_fparam_dict[fitting_key] = self.fitting_dict[fitting_key].get_numb_fparam()
+                if (
+                    self.fitting_type_dict[fitting_key] == "ener"
+                    and self.fitting_dict[fitting_key].get_numb_fparam() > 0
+                ):
+                    self.numb_fparam_dict[fitting_key] = self.fitting_dict[
+                        fitting_key
+                    ].get_numb_fparam()
                 else:
                     self.numb_fparam_dict[fitting_key] = 0
             data_dict = tr_data.get("data_dict", None)
             for systems in data_dict:
                 if data_dict[systems].get("validation_data", None) is not None:
-                    self.valid_numb_batch_dict[systems] = data_dict[systems]["validation_data"].get("numb_btch", 1)
+                    self.valid_numb_batch_dict[systems] = data_dict[systems][
+                        "validation_data"
+                    ].get("numb_btch", 1)
                 else:
                     self.valid_numb_batch_dict[systems] = 1
 
@@ -347,44 +421,54 @@ def loss_init(_loss_param, _fitting_type, _fitting, _lr):
         self.ckpt_meta = None
         self.model_type = None
 
-
-    def build (self, 
-               data = None, 
-               stop_batch = 0,
-               origin_type_map = None,
-               suffix = "") :
+    def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""):
         self.ntypes = self.model.get_ntypes()
         self.stop_batch = stop_batch
 
         if not self.multi_task_mode:
             if not self.is_compress and data.mixed_type:
-                assert self.descrpt_type in ['se_atten'], 'Data in mixed_type format must use attention descriptor!'
-                assert self.fitting_type in ['ener'], 'Data in mixed_type format must use ener fitting!'
-
-            if self.numb_fparam > 0 :
+                assert self.descrpt_type in [
+                    "se_atten"
+                ], "Data in mixed_type format must use attention descriptor!"
+                assert self.fitting_type in [
+                    "ener"
+                ], "Data in mixed_type format must use ener fitting!"
+
+            if self.numb_fparam > 0:
                 log.info("training with %d frame parameter(s)" % self.numb_fparam)
             else:
                 log.info("training without frame parameter")
         else:
-            assert not self.is_compress, 'You should not reach here, multi-task input could not be compressed! '
+            assert (
+                not self.is_compress
+            ), "You should not reach here, multi-task input could not be compressed! "
             self.valid_fitting_key = []
             for fitting_key in data:
                 self.valid_fitting_key.append(fitting_key)
                 if data[fitting_key].mixed_type:
-                    assert self.descrpt_type in ['se_atten'], 'Data for fitting net {} in mixed_type format ' \
-                                                              'must use attention descriptor!'.format(fitting_key)
-                    assert self.fitting_type_dict[fitting_key] in ['ener'], \
-                        'Data for fitting net {} in mixed_type format must use ener fitting!'.format(fitting_key)
+                    assert self.descrpt_type in ["se_atten"], (
+                        "Data for fitting net {} in mixed_type format "
+                        "must use attention descriptor!".format(fitting_key)
+                    )
+                    assert self.fitting_type_dict[fitting_key] in [
+                        "ener"
+                    ], "Data for fitting net {} in mixed_type format must use ener fitting!".format(
+                        fitting_key
+                    )
 
                 if self.numb_fparam_dict[fitting_key] > 0:
-                    log.info("fitting net %s training with %d frame parameter(s)" %
-                             (fitting_key, self.numb_fparam_dict[fitting_key]))
+                    log.info(
+                        "fitting net %s training with %d frame parameter(s)"
+                        % (fitting_key, self.numb_fparam_dict[fitting_key])
+                    )
                 else:
-                    log.info("fitting net %s training without frame parameter" % fitting_key)
+                    log.info(
+                        "fitting net %s training without frame parameter" % fitting_key
+                    )
 
         if not self.is_compress:
             # Usually, the type number of the model should be equal to that of the data
-            # However, nt_model > nt_data should be allowed, since users may only want to 
+            # However, nt_model > nt_data should be allowed, since users may only want to
             # train using a dataset that only have some of elements
             if not self.multi_task_mode:
                 single_data = data
@@ -397,9 +481,9 @@ def build (self,
                     "You may need to reset one or both of them. Usually, the former "
                     "is given by `model/type_map` in the training parameter (if set) "
                     "or the maximum number in the training data. The latter is given "
-                    "by `model/descriptor/sel` in the training parameter." % (
-                        single_data.get_ntypes(), self.ntypes
-                ))
+                    "by `model/descriptor/sel` in the training parameter."
+                    % (single_data.get_ntypes(), self.ntypes)
+                )
             self.type_map = single_data.get_type_map()
             if not self.multi_task_mode:
                 self.batch_size = data.get_batch_size()
@@ -407,142 +491,188 @@ def build (self,
                 self.batch_size = {}
                 for fitting_key in data:
                     self.batch_size[fitting_key] = data[fitting_key].get_batch_size()
-            if self.run_opt.init_mode not in ('init_from_model', 'restart', 'init_from_frz_model', 'finetune'):
+            if self.run_opt.init_mode not in (
+                "init_from_model",
+                "restart",
+                "init_from_frz_model",
+                "finetune",
+            ):
                 # self.saver.restore (in self._init_session) will restore avg and std variables, so data_stat is useless
                 # init_from_frz_model will restore data_stat variables in `init_variables` method
                 log.info("data stating... (this step may take long time)")
                 self.model.data_stat(data)
 
             # config the init_frz_model command
-            if self.run_opt.init_mode == 'init_from_frz_model':
+            if self.run_opt.init_mode == "init_from_frz_model":
                 self._init_from_frz_model()
-            elif self.run_opt.init_mode == 'init_model':
+            elif self.run_opt.init_mode == "init_model":
                 self.ckpt_meta = self.run_opt.init_model
-            elif self.run_opt.init_mode == 'restart':
+            elif self.run_opt.init_mode == "restart":
                 self.ckpt_meta = self.run_opt.restart
-            elif self.run_opt.init_mode == 'finetune':
-                self._init_from_pretrained_model(data=data, origin_type_map=origin_type_map)
+            elif self.run_opt.init_mode == "finetune":
+                self._init_from_pretrained_model(
+                    data=data, origin_type_map=origin_type_map
+                )
 
             # neighbor_stat is moved to train.py as duplicated
             # TODO: this is a simple fix but we should have a clear
             #       architecture to call neighbor stat
-        else :
-            graph, graph_def = load_graph_def(self.model_param['compress']['model_file'])
-            self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], graph, graph_def, self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3])
+        else:
+            graph, graph_def = load_graph_def(
+                self.model_param["compress"]["model_file"]
+            )
+            self.descrpt.enable_compression(
+                self.model_param["compress"]["min_nbor_dist"],
+                graph,
+                graph_def,
+                self.model_param["compress"]["table_config"][0],
+                self.model_param["compress"]["table_config"][1],
+                self.model_param["compress"]["table_config"][2],
+                self.model_param["compress"]["table_config"][3],
+            )
             # for fparam or aparam settings in 'ener' type fitting net
             self.fitting.init_variables(graph, graph_def)
-        
-        if self.is_compress or self.model_type == 'compressed_model':
-            tf.constant("compressed_model", name = 'model_type', dtype = tf.string)
+
+        if self.is_compress or self.model_type == "compressed_model":
+            tf.constant("compressed_model", name="model_type", dtype=tf.string)
         else:
-            tf.constant("original_model", name = 'model_type', dtype = tf.string)
-        
+            tf.constant("original_model", name="model_type", dtype=tf.string)
+
         if self.mixed_prec is not None:
             self.descrpt.enable_mixed_precision(self.mixed_prec)
             if not self.multi_task_mode:
                 self.fitting.enable_mixed_precision(self.mixed_prec)
             else:
                 for fitting_key in self.fitting_dict:
-                    self.fitting_dict[fitting_key].enable_mixed_precision(self.mixed_prec)
+                    self.fitting_dict[fitting_key].enable_mixed_precision(
+                        self.mixed_prec
+                    )
 
         self._build_lr()
         self._build_network(data, suffix)
         self._build_training()
 
-
     def _build_lr(self):
-        self._extra_train_ops   = []
+        self._extra_train_ops = []
         self.global_step = tf.train.get_or_create_global_step()
         self.learning_rate = self.lr.build(self.global_step, self.stop_batch)
         log.info("built lr")
 
     def _build_network(self, data, suffix=""):
         self.place_holders = {}
-        if self.is_compress :
-            for kk in ['coord', 'box']:
-                self.place_holders[kk] = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], 't_' + kk)
+        if self.is_compress:
+            for kk in ["coord", "box"]:
+                self.place_holders[kk] = tf.placeholder(
+                    GLOBAL_TF_FLOAT_PRECISION, [None], "t_" + kk
+                )
             self._get_place_horders(data_requirement)
-        else :
+        else:
             if not self.multi_task_mode:
                 self._get_place_horders(data.get_data_dict())
             else:
                 self._get_place_horders(data[list(data.keys())[0]].get_data_dict())
 
-        self.place_holders['type']      = tf.placeholder(tf.int32,   [None], name='t_type')
-        self.place_holders['natoms_vec']        = tf.placeholder(tf.int32,   [self.ntypes+2], name='t_natoms')
-        self.place_holders['default_mesh']      = tf.placeholder(tf.int32,   [None], name='t_mesh')
-        self.place_holders['is_training']       = tf.placeholder(tf.bool)
-        self.model_pred\
-            = self.model.build (self.place_holders['coord'], 
-                                self.place_holders['type'], 
-                                self.place_holders['natoms_vec'], 
-                                self.place_holders['box'], 
-                                self.place_holders['default_mesh'],
-                                self.place_holders,
-                                frz_model = self.frz_model,
-                                ckpt_meta = self.ckpt_meta,
-                                suffix = suffix,
-                                reuse = False)
+        self.place_holders["type"] = tf.placeholder(tf.int32, [None], name="t_type")
+        self.place_holders["natoms_vec"] = tf.placeholder(
+            tf.int32, [self.ntypes + 2], name="t_natoms"
+        )
+        self.place_holders["default_mesh"] = tf.placeholder(
+            tf.int32, [None], name="t_mesh"
+        )
+        self.place_holders["is_training"] = tf.placeholder(tf.bool)
+        self.model_pred = self.model.build(
+            self.place_holders["coord"],
+            self.place_holders["type"],
+            self.place_holders["natoms_vec"],
+            self.place_holders["box"],
+            self.place_holders["default_mesh"],
+            self.place_holders,
+            frz_model=self.frz_model,
+            ckpt_meta=self.ckpt_meta,
+            suffix=suffix,
+            reuse=False,
+        )
 
         if not self.multi_task_mode:
-            self.l2_l, self.l2_more\
-                = self.loss.build (self.learning_rate,
-                                   self.place_holders['natoms_vec'],
-                                   self.model_pred,
-                                   self.place_holders,
-                                   suffix = "test")
+            self.l2_l, self.l2_more = self.loss.build(
+                self.learning_rate,
+                self.place_holders["natoms_vec"],
+                self.model_pred,
+                self.place_holders,
+                suffix="test",
+            )
 
             if self.mixed_prec is not None:
-                self.l2_l = tf.cast(self.l2_l, get_precision(self.mixed_prec['output_prec']))
+                self.l2_l = tf.cast(
+                    self.l2_l, get_precision(self.mixed_prec["output_prec"])
+                )
         else:
             self.l2_l, self.l2_more = {}, {}
             for fitting_key in self.fitting_type_dict:
-                self.l2_l[fitting_key], self.l2_more[fitting_key]\
-                    = self.loss_dict[fitting_key].build(self.learning_rate,
-                                                        self.place_holders['natoms_vec'],
-                                                        self.model_pred[fitting_key],
-                                                        self.place_holders,
-                                                        suffix=fitting_key)
+                self.l2_l[fitting_key], self.l2_more[fitting_key] = self.loss_dict[
+                    fitting_key
+                ].build(
+                    self.learning_rate,
+                    self.place_holders["natoms_vec"],
+                    self.model_pred[fitting_key],
+                    self.place_holders,
+                    suffix=fitting_key,
+                )
                 if self.mixed_prec is not None:
-                    self.l2_l[fitting_key] = tf.cast(self.l2_l[fitting_key],
-                                                     get_precision(self.mixed_prec['output_prec']))
+                    self.l2_l[fitting_key] = tf.cast(
+                        self.l2_l[fitting_key],
+                        get_precision(self.mixed_prec["output_prec"]),
+                    )
 
         log.info("built network")
 
     def _build_training(self):
         trainable_variables = tf.trainable_variables()
         if self.run_opt.is_distrib:
-            if self.scale_lr_coef > 1.:
-                log.info('Scale learning rate by coef: %f', self.scale_lr_coef)
-                optimizer = tf.train.AdamOptimizer(self.learning_rate*self.scale_lr_coef)
+            if self.scale_lr_coef > 1.0:
+                log.info("Scale learning rate by coef: %f", self.scale_lr_coef)
+                optimizer = tf.train.AdamOptimizer(
+                    self.learning_rate * self.scale_lr_coef
+                )
             else:
                 optimizer = tf.train.AdamOptimizer(self.learning_rate)
             optimizer = self.run_opt._HVD.DistributedOptimizer(optimizer)
         else:
-            optimizer = tf.train.AdamOptimizer(learning_rate = self.learning_rate)
+            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
         if self.mixed_prec is not None:
             _TF_VERSION = Version(TF_VERSION)
-            # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
-            if _TF_VERSION < Version('1.14.0'):
-                raise RuntimeError("TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!" % TF_VERSION)
-            elif _TF_VERSION < Version('2.4.0'):
-                optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer)
+            # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed
+            if _TF_VERSION < Version("1.14.0"):
+                raise RuntimeError(
+                    "TensorFlow version %s is not compatible with the mixed precision setting. Please consider upgrading your TF version!"
+                    % TF_VERSION
+                )
+            elif _TF_VERSION < Version("2.4.0"):
+                optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(
+                    optimizer
+                )
             else:
-                optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(optimizer)
+                optimizer = tf.mixed_precision.enable_mixed_precision_graph_rewrite(
+                    optimizer
+                )
         if not self.multi_task_mode:
-            apply_op = optimizer.minimize(loss=self.l2_l,
-                                          global_step=self.global_step,
-                                          var_list=trainable_variables,
-                                          name='train_step')
+            apply_op = optimizer.minimize(
+                loss=self.l2_l,
+                global_step=self.global_step,
+                var_list=trainable_variables,
+                name="train_step",
+            )
             train_ops = [apply_op] + self._extra_train_ops
             self.train_op = tf.group(*train_ops)
         else:
             self.train_op = {}
             for fitting_key in self.fitting_type_dict:
-                apply_op = optimizer.minimize(loss=self.l2_l[fitting_key],
-                                              global_step=self.global_step,
-                                              var_list=trainable_variables,
-                                              name='train_step_{}'.format(fitting_key))
+                apply_op = optimizer.minimize(
+                    loss=self.l2_l[fitting_key],
+                    global_step=self.global_step,
+                    var_list=trainable_variables,
+                    name="train_step_{}".format(fitting_key),
+                )
                 train_ops = [apply_op] + self._extra_train_ops
                 self.train_op[fitting_key] = tf.group(*train_ops)
         log.info("built training")
@@ -558,35 +688,35 @@ def _init_session(self):
         init_op = tf.global_variables_initializer()
         if self.run_opt.is_chief:
             self.saver = tf.train.Saver(save_relative_paths=True)
-            if self.run_opt.init_mode == 'init_from_scratch' :
+            if self.run_opt.init_mode == "init_from_scratch":
                 log.info("initialize model from scratch")
                 run_sess(self.sess, init_op)
                 if not self.is_compress:
                     fp = open(self.disp_file, "w")
-                    fp.close ()
-            elif self.run_opt.init_mode == 'init_from_model' :
+                    fp.close()
+            elif self.run_opt.init_mode == "init_from_model":
                 log.info("initialize from model %s" % self.run_opt.init_model)
                 run_sess(self.sess, init_op)
-                self.saver.restore (self.sess, self.run_opt.init_model)            
+                self.saver.restore(self.sess, self.run_opt.init_model)
                 run_sess(self.sess, self.global_step.assign(0))
                 fp = open(self.disp_file, "w")
-                fp.close ()
-            elif self.run_opt.init_mode == 'restart' :
+                fp.close()
+            elif self.run_opt.init_mode == "restart":
                 log.info("restart from model %s" % self.run_opt.restart)
                 run_sess(self.sess, init_op)
-                self.saver.restore (self.sess, self.run_opt.restart)
-            elif self.run_opt.init_mode == 'init_from_frz_model' :
+                self.saver.restore(self.sess, self.run_opt.restart)
+            elif self.run_opt.init_mode == "init_from_frz_model":
                 log.info("initialize training from the frozen model")
                 run_sess(self.sess, init_op)
                 fp = open(self.disp_file, "w")
-                fp.close ()
-            elif self.run_opt.init_mode == 'finetune' :
+                fp.close()
+            elif self.run_opt.init_mode == "finetune":
                 log.info("initialize training from the frozen pretrained model")
                 run_sess(self.sess, init_op)
                 fp = open(self.disp_file, "w")
                 fp.close()
-            else :
-                raise RuntimeError ("unknown init mode")
+            else:
+                raise RuntimeError("unknown init mode")
         else:
             run_sess(self.sess, init_op)
             self.saver = None
@@ -595,12 +725,12 @@ def _init_session(self):
         if self.run_opt.is_distrib:
             bcast_op = self.run_opt._HVD.broadcast_global_variables(0)
             if self.run_opt.is_chief:
-                log.info('broadcast global variables to other tasks')
+                log.info("broadcast global variables to other tasks")
             else:
-                log.info('receive global variables from task#0')
+                log.info("receive global variables from task#0")
             run_sess(self.sess, bcast_op)
 
-    def train (self, train_data = None, valid_data=None) :
+    def train(self, train_data=None, valid_data=None):
 
         # if valid_data is None:  # no validation set specified.
         #     valid_data = train_data  # using training set as validation set.
@@ -611,18 +741,21 @@ def train (self, train_data = None, valid_data=None) :
         # Before data shard is enabled, only cheif do evaluation and record it
         # self.print_head()
         fp = None
-        if self.run_opt.is_chief :
+        if self.run_opt.is_chief:
             fp = open(self.disp_file, "a")
 
         cur_batch = run_sess(self.sess, self.global_step)
         is_first_step = True
         self.cur_batch = cur_batch
-        log.info("start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e" % 
-                 (run_sess(self.sess, self.learning_rate),
-                  self.lr.value(cur_batch), 
-                  self.lr.decay_steps_,
-                  self.lr.decay_rate_,
-                  self.lr.value(stop_batch)) 
+        log.info(
+            "start training at lr %.2e (== %.2e), decay_step %d, decay_rate %f, final lr will be %.2e"
+            % (
+                run_sess(self.sess, self.learning_rate),
+                self.lr.value(cur_batch),
+                self.lr.decay_steps_,
+                self.lr.decay_rate_,
+                self.lr.value(stop_batch),
+            )
         )
 
         prf_options = None
@@ -638,7 +771,7 @@ def train (self, train_data = None, valid_data=None) :
             try:
                 shutil.rmtree(self.tensorboard_log_dir)
             except FileNotFoundError:
-                pass  # directory does not exist, this is OK
+                pass  # directory does not exist, this is OK
             except Exception as e:
                 # general error when removing directory, warn user
                 log.exception(
@@ -647,58 +780,95 @@ def train (self, train_data = None, valid_data=None) :
                 )
             else:
                 log.debug("Removing old tensorboard log directory.")
-            tb_train_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/train', self.sess.graph)
-            tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + '/test')
+            tb_train_writer = tf.summary.FileWriter(
+                self.tensorboard_log_dir + "/train", self.sess.graph
+            )
+            tb_valid_writer = tf.summary.FileWriter(self.tensorboard_log_dir + "/test")
         else:
             tb_train_writer = None
             tb_valid_writer = None
         if self.enable_profiler:
             # https://www.tensorflow.org/guide/profiler
             tfv2.profiler.experimental.start(self.tensorboard_log_dir)
-        
+
         train_time = 0
 
-        while cur_batch < stop_batch :
+        while cur_batch < stop_batch:
 
             # first round validation:
             if not self.multi_task_mode:
                 train_batch = train_data.get_batch()
                 batch_train_op = self.train_op
             else:
-                fitting_idx = dp_random.choice(np.arange(self.nfitting), p=np.array(self.fitting_prob))
+                fitting_idx = dp_random.choice(
+                    np.arange(self.nfitting), p=np.array(self.fitting_prob)
+                )
                 fitting_key = self.fitting_key_list[fitting_idx]
                 train_batch = train_data[fitting_key].get_batch()
                 batch_train_op = self.train_op[fitting_key]
             if self.display_in_training and is_first_step:
                 if self.run_opt.is_chief:
                     if not self.multi_task_mode:
-                        valid_batches = [valid_data.get_batch() for ii in range(self.valid_numb_batch)] if valid_data is not None else None
-                        self.valid_on_the_fly(fp, [train_batch], valid_batches, print_header=True)
+                        valid_batches = (
+                            [
+                                valid_data.get_batch()
+                                for ii in range(self.valid_numb_batch)
+                            ]
+                            if valid_data is not None
+                            else None
+                        )
+                        self.valid_on_the_fly(
+                            fp, [train_batch], valid_batches, print_header=True
+                        )
                     else:
                         train_batches = {}
                         valid_batches = {}
                         # valid_numb_batch_dict
                         for fitting_key in train_data:
-                            train_batches[fitting_key] = [train_data[fitting_key].get_batch()]
-                            valid_batches[fitting_key] = [valid_data[fitting_key].get_batch()
-                                                          for ii in range(self.valid_numb_batch_dict[fitting_key])] \
-                                if fitting_key in valid_data else None
-                        self.valid_on_the_fly(fp, train_batches, valid_batches, print_header=True)
+                            train_batches[fitting_key] = [
+                                train_data[fitting_key].get_batch()
+                            ]
+                            valid_batches[fitting_key] = (
+                                [
+                                    valid_data[fitting_key].get_batch()
+                                    for ii in range(
+                                        self.valid_numb_batch_dict[fitting_key]
+                                    )
+                                ]
+                                if fitting_key in valid_data
+                                else None
+                            )
+                        self.valid_on_the_fly(
+                            fp, train_batches, valid_batches, print_header=True
+                        )
                 is_first_step = False
 
-            if self.timing_in_training: tic = time.time()
+            if self.timing_in_training:
+                tic = time.time()
             train_feed_dict = self.get_feed_dict(train_batch, is_training=True)
             # use tensorboard to visualize the training of deepmd-kit
             # it will takes some extra execution time to generate the tensorboard data
             if self.tensorboard and (cur_batch % self.tensorboard_freq == 0):
-                summary, _ = run_sess(self.sess, [summary_merged_op, batch_train_op], feed_dict=train_feed_dict,
-                                           options=prf_options, run_metadata=prf_run_metadata)
+                summary, _ = run_sess(
+                    self.sess,
+                    [summary_merged_op, batch_train_op],
+                    feed_dict=train_feed_dict,
+                    options=prf_options,
+                    run_metadata=prf_run_metadata,
+                )
                 tb_train_writer.add_summary(summary, cur_batch)
             else:
-                run_sess(self.sess, [batch_train_op], feed_dict=train_feed_dict,
-                              options=prf_options, run_metadata=prf_run_metadata)
-            if self.timing_in_training: toc = time.time()
-            if self.timing_in_training: train_time += toc - tic
+                run_sess(
+                    self.sess,
+                    [batch_train_op],
+                    feed_dict=train_feed_dict,
+                    options=prf_options,
+                    run_metadata=prf_run_metadata,
+                )
+            if self.timing_in_training:
+                toc = time.time()
+            if self.timing_in_training:
+                train_time += toc - tic
             cur_batch = run_sess(self.sess, self.global_step)
             self.cur_batch = cur_batch
 
@@ -708,40 +878,68 @@ def train (self, train_data = None, valid_data=None) :
                     tic = time.time()
                 if self.run_opt.is_chief:
                     if not self.multi_task_mode:
-                        valid_batches = [valid_data.get_batch() for ii in range(self.valid_numb_batch)] if valid_data is not None else None
+                        valid_batches = (
+                            [
+                                valid_data.get_batch()
+                                for ii in range(self.valid_numb_batch)
+                            ]
+                            if valid_data is not None
+                            else None
+                        )
                         self.valid_on_the_fly(fp, [train_batch], valid_batches)
                     else:
                         train_batches = {}
                         valid_batches = {}
                         for fitting_key in train_data:
-                            train_batches[fitting_key] = [train_data[fitting_key].get_batch()]
-                            valid_batches[fitting_key] = [valid_data[fitting_key].get_batch()
-                                                          for ii in range(self.valid_numb_batch_dict[fitting_key])] \
-                                if fitting_key in valid_data else None
+                            train_batches[fitting_key] = [
+                                train_data[fitting_key].get_batch()
+                            ]
+                            valid_batches[fitting_key] = (
+                                [
+                                    valid_data[fitting_key].get_batch()
+                                    for ii in range(
+                                        self.valid_numb_batch_dict[fitting_key]
+                                    )
+                                ]
+                                if fitting_key in valid_data
+                                else None
+                            )
                         self.valid_on_the_fly(fp, train_batches, valid_batches)
                 if self.timing_in_training:
                     toc = time.time()
                     test_time = toc - tic
-                    log.info("batch %7d training time %.2f s, testing time %.2f s"
-                                  % (cur_batch, train_time, test_time))
+                    log.info(
+                        "batch %7d training time %.2f s, testing time %.2f s"
+                        % (cur_batch, train_time, test_time)
+                    )
                     train_time = 0
-                if self.save_freq > 0 and cur_batch % self.save_freq == 0 and self.saver is not None:
+                if (
+                    self.save_freq > 0
+                    and cur_batch % self.save_freq == 0
+                    and self.saver is not None
+                ):
                     self.save_checkpoint(cur_batch)
-        if (self.save_freq == 0 or cur_batch == 0 or cur_batch % self.save_freq != 0) and self.saver is not None:
+        if (
+            self.save_freq == 0 or cur_batch == 0 or cur_batch % self.save_freq != 0
+        ) and self.saver is not None:
             self.save_checkpoint(cur_batch)
-        if self.run_opt.is_chief: 
-            fp.close ()
-        if self.profiling and self.run_opt.is_chief :
+        if self.run_opt.is_chief:
+            fp.close()
+        if self.profiling and self.run_opt.is_chief:
             fetched_timeline = timeline.Timeline(prf_run_metadata.step_stats)
             chrome_trace = fetched_timeline.generate_chrome_trace_format()
-            with open(self.profiling_file, 'w') as f:
+            with open(self.profiling_file, "w") as f:
                 f.write(chrome_trace)
         if self.enable_profiler and self.run_opt.is_chief:
             tfv2.profiler.experimental.stop()
 
     def save_checkpoint(self, cur_batch: int):
         try:
-            ckpt_prefix = self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt), global_step=cur_batch)
+            ckpt_prefix = self.saver.save(
+                self.sess,
+                os.path.join(os.getcwd(), self.save_ckpt),
+                global_step=cur_batch,
+            )
         except google.protobuf.message.DecodeError as e:
             raise GraphTooLargeError(
                 "The graph size exceeds 2 GB, the hard limitation of protobuf."
@@ -752,13 +950,13 @@ def save_checkpoint(self, cur_batch: int):
         # get all checkpoint files
         original_files = glob.glob(ckpt_prefix + ".*")
         for ori_ff in original_files:
-            new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix):]
+            new_ff = self.save_ckpt + ori_ff[len(ckpt_prefix) :]
             try:
                 # remove old one
                 os.remove(new_ff)
             except OSError:
                 pass
-            if platform.system() != 'Windows':
+            if platform.system() != "Windows":
                 # by default one does not have access to create symlink on Windows
                 os.symlink(ori_ff, new_ff)
             else:
@@ -768,17 +966,17 @@ def save_checkpoint(self, cur_batch: int):
     def get_feed_dict(self, batch, is_training):
         feed_dict = {}
         for kk in batch.keys():
-            if kk == 'find_type' or kk == 'type' or kk == 'real_natoms_vec':
+            if kk == "find_type" or kk == "type" or kk == "real_natoms_vec":
                 continue
-            if 'find_' in kk:
+            if "find_" in kk:
                 feed_dict[self.place_holders[kk]] = batch[kk]
             else:
                 feed_dict[self.place_holders[kk]] = np.reshape(batch[kk], [-1])
-        for ii in ['type']:
+        for ii in ["type"]:
             feed_dict[self.place_holders[ii]] = np.reshape(batch[ii], [-1])
-        for ii in ['natoms_vec', 'default_mesh']:
+        for ii in ["natoms_vec", "default_mesh"]:
             feed_dict[self.place_holders[ii]] = batch[ii]
-        feed_dict[self.place_holders['is_training']] = is_training
+        feed_dict[self.place_holders["is_training"]] = is_training
         return feed_dict
 
     def get_global_step(self):
@@ -793,11 +991,7 @@ def get_global_step(self):
     #         fp.write(print_str)
     #         fp.close ()
 
-    def valid_on_the_fly(self,
-                         fp,
-                         train_batches,
-                         valid_batches,
-                         print_header=False):
+    def valid_on_the_fly(self, fp, train_batches, valid_batches, print_header=False):
         train_results = self.get_evaluation_results(train_batches)
         valid_results = self.get_evaluation_results(valid_batches)
 
@@ -805,38 +999,47 @@ def valid_on_the_fly(self,
         current_lr = run_sess(self.sess, self.learning_rate)
         if print_header:
             self.print_header(fp, train_results, valid_results, self.multi_task_mode)
-        self.print_on_training(fp, train_results, valid_results, cur_batch, current_lr, self.multi_task_mode)
+        self.print_on_training(
+            fp,
+            train_results,
+            valid_results,
+            cur_batch,
+            current_lr,
+            self.multi_task_mode,
+        )
 
     @staticmethod
     def print_header(fp, train_results, valid_results, multi_task_mode=False):
-        print_str = ''
-        print_str += "# %5s" % 'step'
+        print_str = ""
+        print_str += "# %5s" % "step"
         if not multi_task_mode:
             if valid_results is not None:
-                prop_fmt = '   %11s %11s'
+                prop_fmt = "   %11s %11s"
                 for k in train_results.keys():
-                    print_str += prop_fmt % (k + '_val', k + '_trn')
+                    print_str += prop_fmt % (k + "_val", k + "_trn")
             else:
-                prop_fmt = '   %11s'
+                prop_fmt = "   %11s"
                 for k in train_results.keys():
-                    print_str += prop_fmt % (k + '_trn')
+                    print_str += prop_fmt % (k + "_trn")
         else:
             for fitting_key in train_results:
                 if valid_results[fitting_key] is not None:
-                    prop_fmt = '   %11s %11s'
+                    prop_fmt = "   %11s %11s"
                     for k in train_results[fitting_key].keys():
-                        print_str += prop_fmt % (k + '_val', k + '_trn')
+                        print_str += prop_fmt % (k + "_val", k + "_trn")
                 else:
-                    prop_fmt = '   %11s'
+                    prop_fmt = "   %11s"
                     for k in train_results[fitting_key].keys():
-                        print_str += prop_fmt % (k + '_trn')
-        print_str += '   %8s\n' % 'lr'
+                        print_str += prop_fmt % (k + "_trn")
+        print_str += "   %8s\n" % "lr"
         fp.write(print_str)
         fp.flush()
 
     @staticmethod
-    def print_on_training(fp, train_results, valid_results, cur_batch, cur_lr, multi_task_mode=False):
-        print_str = ''
+    def print_on_training(
+        fp, train_results, valid_results, cur_batch, cur_lr, multi_task_mode=False
+    ):
+        print_str = ""
         print_str += "%7d" % cur_batch
         if not multi_task_mode:
             if valid_results is not None:
@@ -854,7 +1057,10 @@ def print_on_training(fp, train_results, valid_results, cur_batch, cur_lr, multi
                     prop_fmt = "   %11.2e %11.2e"
                     for k in valid_results[fitting_key].keys():
                         # assert k in train_results[fitting_key].keys()
-                        print_str += prop_fmt % (valid_results[fitting_key][k], train_results[fitting_key][k])
+                        print_str += prop_fmt % (
+                            valid_results[fitting_key][k],
+                            train_results[fitting_key][k],
+                        )
                 else:
                     prop_fmt = "   %11.2e"
                     for k in train_results[fitting_key].keys():
@@ -864,7 +1070,7 @@ def print_on_training(fp, train_results, valid_results, cur_batch, cur_lr, multi
         fp.flush()
 
     @staticmethod
-    def eval_single_list(single_batch_list, loss, sess, get_feed_dict_func, prefix=''):
+    def eval_single_list(single_batch_list, loss, sess, get_feed_dict_func, prefix=""):
         if single_batch_list is None:
             return None
         numb_batch = len(single_batch_list)
@@ -880,38 +1086,50 @@ def eval_single_list(single_batch_list, loss, sess, get_feed_dict_func, prefix='
                 if k == "natoms":
                     sum_natoms += v
                 else:
-                    sum_results[k] = sum_results.get(k, 0.) + v * results["natoms"]
-        single_results = {prefix + k: v / sum_natoms for k, v in sum_results.items() if not k == "natoms"}
+                    sum_results[k] = sum_results.get(k, 0.0) + v * results["natoms"]
+        single_results = {
+            prefix + k: v / sum_natoms
+            for k, v in sum_results.items()
+            if not k == "natoms"
+        }
         return single_results
 
     def get_evaluation_results(self, batch_list):
         if not self.multi_task_mode:
-            avg_results = self.eval_single_list(batch_list, self.loss, self.sess, self.get_feed_dict)
+            avg_results = self.eval_single_list(
+                batch_list, self.loss, self.sess, self.get_feed_dict
+            )
         else:
             avg_results = {}
             for fitting_key in batch_list:
-                avg_results[fitting_key] = \
-                    self.eval_single_list(batch_list[fitting_key], self.loss_dict[fitting_key], self.sess,
-                                          self.get_feed_dict, prefix='{}_'.format(fitting_key))
+                avg_results[fitting_key] = self.eval_single_list(
+                    batch_list[fitting_key],
+                    self.loss_dict[fitting_key],
+                    self.sess,
+                    self.get_feed_dict,
+                    prefix="{}_".format(fitting_key),
+                )
         return avg_results
-    
+
     def save_compressed(self):
         """
         Save the compressed graph
         """
         self._init_session()
         if self.is_compress:
-            self.saver.save (self.sess, os.path.join(os.getcwd(), self.save_ckpt))
+            self.saver.save(self.sess, os.path.join(os.getcwd(), self.save_ckpt))
 
     def _get_place_horders(self, data_dict):
         for kk in data_dict.keys():
-            if kk == 'type':
+            if kk == "type":
                 continue
             prec = GLOBAL_TF_FLOAT_PRECISION
-            if data_dict[kk]['high_prec'] :
+            if data_dict[kk]["high_prec"]:
                 prec = GLOBAL_ENER_FLOAT_PRECISION
-            self.place_holders[kk] = tf.placeholder(prec, [None], name = 't_' + kk)
-            self.place_holders['find_' + kk] = tf.placeholder(tf.float32, name = 't_find_' + kk)
+            self.place_holders[kk] = tf.placeholder(prec, [None], name="t_" + kk)
+            self.place_holders["find_" + kk] = tf.placeholder(
+                tf.float32, name="t_find_" + kk
+            )
 
     def _init_from_frz_model(self):
         try:
@@ -919,24 +1137,31 @@ def _init_from_frz_model(self):
         except FileNotFoundError as e:
             # throw runtime error if there's no frozen model
             raise RuntimeError(
-                "The input frozen model %s (%s) does not exist! Please check the path of the frozen model. " % (self.run_opt.init_frz_model, os.path.abspath(self.run_opt.init_frz_model))
+                "The input frozen model %s (%s) does not exist! Please check the path of the frozen model. "
+                % (
+                    self.run_opt.init_frz_model,
+                    os.path.abspath(self.run_opt.init_frz_model),
+                )
             ) from e
         # get the model type from the frozen model(self.run_opt.init_frz_model)
         try:
-            t_model_type = get_tensor_by_name_from_graph(graph, 'model_type')
+            t_model_type = get_tensor_by_name_from_graph(graph, "model_type")
         except GraphWithoutTensorError as e:
             # throw runtime error if the frozen_model has no model type information...
             raise RuntimeError(
                 "The input frozen model: %s has no 'model_type' information, "
-                "which is not supported by the 'dp train init-frz-model' interface. " % self.run_opt.init_frz_model
+                "which is not supported by the 'dp train init-frz-model' interface. "
+                % self.run_opt.init_frz_model
             ) from e
         else:
             self.model_type = bytes.decode(t_model_type)
-        if self.model_type == 'compressed_model':
+        if self.model_type == "compressed_model":
             self.frz_model = self.run_opt.init_frz_model
         self.model.init_variables(graph, graph_def, model_type=self.model_type)
 
-    def _init_from_pretrained_model(self, data, origin_type_map=None, bias_shift='delta'):
+    def _init_from_pretrained_model(
+        self, data, origin_type_map=None, bias_shift="delta"
+    ):
         """
         Init the embedding net variables with the given frozen model
 
@@ -958,28 +1183,45 @@ def _init_from_pretrained_model(self, data, origin_type_map=None, bias_shift='de
             # throw runtime error if there's no frozen model
             raise RuntimeError(
                 "The input frozen pretrained model %s (%s) does not exist! "
-                "Please check the path of the frozen pretrained model. " % (self.run_opt.finetune,
-                                                                            os.path.abspath(self.run_opt.finetune))
+                "Please check the path of the frozen pretrained model. "
+                % (self.run_opt.finetune, os.path.abspath(self.run_opt.finetune))
             ) from e
         # get the model type from the frozen model(self.run_opt.finetune)
         try:
-            t_model_type = get_tensor_by_name_from_graph(graph, 'model_type')
+            t_model_type = get_tensor_by_name_from_graph(graph, "model_type")
         except GraphWithoutTensorError as e:
             # throw runtime error if the frozen_model has no model type information...
             raise RuntimeError(
                 "The input frozen pretrained model: %s has no 'model_type' information, "
-                "which is not supported by the 'dp train finetune' interface. " % self.run_opt.finetune
+                "which is not supported by the 'dp train finetune' interface. "
+                % self.run_opt.finetune
             ) from e
         else:
             self.model_type = bytes.decode(t_model_type)
-        assert self.model_type != 'compressed_model', "Compressed models are not supported for finetuning!"
+        assert (
+            self.model_type != "compressed_model"
+        ), "Compressed models are not supported for finetuning!"
         self.model.init_variables(graph, graph_def, model_type=self.model_type)
-        log.info("Changing energy bias in pretrained model for types {}... "
-                 "(this step may take long time)".format(str(origin_type_map)))
-        self._change_energy_bias(data, self.run_opt.finetune, origin_type_map, bias_shift)
+        log.info(
+            "Changing energy bias in pretrained model for types {}... "
+            "(this step may take long time)".format(str(origin_type_map))
+        )
+        self._change_energy_bias(
+            data, self.run_opt.finetune, origin_type_map, bias_shift
+        )
 
-    def _change_energy_bias(self, data, frozen_model, origin_type_map, bias_shift='delta'):
+    def _change_energy_bias(
+        self, data, frozen_model, origin_type_map, bias_shift="delta"
+    ):
         full_type_map = data.get_type_map()
-        assert self.fitting_type == 'ener', "energy bias changing only supports 'ener' fitting net!"
-        self.model.fitting.change_energy_bias(data, frozen_model, origin_type_map, full_type_map, bias_shift=bias_shift,
-                                              ntest=self.model_param.get('data_bias_nsample', 10))
+        assert (
+            self.fitting_type == "ener"
+        ), "energy bias changing only supports 'ener' fitting net!"
+        self.model.fitting.change_energy_bias(
+            data,
+            frozen_model,
+            origin_type_map,
+            full_type_map,
+            bias_shift=bias_shift,
+            ntest=self.model_param.get("data_bias_nsample", 10),
+        )
diff --git a/deepmd/utils/__init__.py b/deepmd/utils/__init__.py
index c49afa752f..14e73bb176 100644
--- a/deepmd/utils/__init__.py
+++ b/deepmd/utils/__init__.py
@@ -1,6 +1,17 @@
 #
-from .data import DeepmdData
-from .data_system import DeepmdDataSystem
-from .pair_tab import PairTab
-from .learning_rate import LearningRateExp
-from .plugin import Plugin, PluginVariant
+from .data import (
+    DeepmdData,
+)
+from .data_system import (
+    DeepmdDataSystem,
+)
+from .learning_rate import (
+    LearningRateExp,
+)
+from .pair_tab import (
+    PairTab,
+)
+from .plugin import (
+    Plugin,
+    PluginVariant,
+)
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 13c351731f..8ac8c79721 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1,15 +1,34 @@
+import json
 import logging
-from typing import List, Callable
+from typing import (
+    Callable,
+    List,
+)
+
+from dargs import (
+    Argument,
+    ArgumentEncoder,
+    Variant,
+    dargs,
+)
+
+from deepmd import (
+    descriptor,
+)
+from deepmd.common import (
+    ACTIVATION_FN_DICT,
+    PRECISION_DICT,
+)
+from deepmd.nvnmd.utils.argcheck import (
+    nvnmd_args,
+)
+from deepmd.utils.plugin import (
+    Plugin,
+)
 
-from dargs import dargs, Argument, Variant, ArgumentEncoder
-from deepmd import descriptor
-from deepmd.common import ACTIVATION_FN_DICT, PRECISION_DICT
-from deepmd.utils.plugin import Plugin
-import json
+log = logging.getLogger(__name__)
 
-from deepmd.nvnmd.utils.argcheck import nvnmd_args
 
-log = logging.getLogger(__name__)
 def list_to_doc(xx):
     items = []
     for ii in xx:
@@ -17,42 +36,54 @@ def list_to_doc(xx):
             items.append(f'"{ii}"')
         else:
             items.append(f', "{ii}"')
-    items.append('.')
-    return ''.join(items)
+    items.append(".")
+    return "".join(items)
 
 
 def make_link(content, ref_key):
-    return f'`{content} <{ref_key}_>`_' if not dargs.RAW_ANCHOR \
-        else f'`{content} <#{ref_key}>`_'
+    return (
+        f"`{content} <{ref_key}_>`_"
+        if not dargs.RAW_ANCHOR
+        else f"`{content} <#{ref_key}>`_"
+    )
 
 
 def type_embedding_args():
-    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_seed = 'Random seed for parameter initialization'
+    doc_seed = "Random seed for parameter initialization"
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
-    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_trainable = 'If the parameters in the embedding net are trainable'
-    
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+
     return [
-        Argument("neuron", list, optional = True, default = [8], doc = doc_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
-        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
-        Argument("seed", [int,None], optional = True, default = None, doc = doc_seed),
-    ]        
+        Argument("neuron", list, optional=True, default=[8], doc=doc_neuron),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, default=None, doc=doc_seed),
+    ]
 
 
 #  --- Descriptor configurations: --- #
 
+
 class ArgsPlugin:
     def __init__(self) -> None:
         self.__plugin = Plugin()
 
-    def register(self, name : str, alias : List[str] = None) -> Callable[[], List[Argument]]:
+    def register(
+        self, name: str, alias: List[str] = None
+    ) -> Callable[[], List[Argument]]:
         """Regiester a descriptor argument plugin.
-        
+
         Parameters
         ----------
         name : str
@@ -64,7 +95,7 @@ def register(self, name : str, alias : List[str] = None) -> Callable[[], List[Ar
         -------
         Callable[[], List[Argument]]
             the regiestered descriptor argument method
-        
+
         Examples
         --------
         >>> some_plugin = ArgsPlugin()
@@ -84,7 +115,7 @@ def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
         ----------
         exclude_hybrid : bool
             exclude hybrid descriptor to prevent circular calls
-        
+
         Returns
         -------
         List[Argument]
@@ -94,30 +125,33 @@ def get_all_argument(self, exclude_hybrid: bool = False) -> List[Argument]:
         for (name, alias), metd in self.__plugin.plugins.items():
             if exclude_hybrid and name == "hybrid":
                 continue
-            arguments.append(Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias))
+            arguments.append(
+                Argument(name=name, dtype=dict, sub_fields=metd(), alias=alias)
+            )
         return arguments
 
 
 descrpt_args_plugin = ArgsPlugin()
 
+
 @descrpt_args_plugin.register("loc_frame")
-def descrpt_local_frame_args ():
-    doc_sel_a = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor.'
-    doc_sel_r = 'A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius.'
-    doc_rcut = 'The cut-off radius. The default value is 6.0'
-    doc_axis_rule = 'A list of integers. The length should be 6 times of the number of types. \n\n\
+def descrpt_local_frame_args():
+    doc_sel_a = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor."
+    doc_sel_r = "A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius."
+    doc_rcut = "The cut-off radius. The default value is 6.0"
+    doc_axis_rule = "A list of integers. The length should be 6 times of the number of types. \n\n\
 - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
 - axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\
 - axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\
 - axis_rule[i*6+3]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\
 - axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\
-- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance.'
-    
+- axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
+
     return [
-        Argument("sel_a", list, optional = False, doc = doc_sel_a),
-        Argument("sel_r", list, optional = False, doc = doc_sel_r),
-        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
-        Argument("axis_rule", list, optional = False, doc = doc_axis_rule)
+        Argument("sel_a", list, optional=False, doc=doc_sel_a),
+        Argument("sel_r", list, optional=False, doc=doc_sel_r),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("axis_rule", list, optional=False, doc=doc_axis_rule),
     ]
 
 
@@ -126,119 +160,155 @@ def descrpt_se_a_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
     - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = 'The cut-off radius.'
-    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
-    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
-    doc_axis_neuron = 'Size of the submatrix of G (embedding matrix).'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r'If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.'
-    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_trainable = 'If the parameters in the embedding net is trainable'
-    doc_seed = 'Random seed for parameter initialization'
-    doc_exclude_types = 'The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.'
-    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
-    
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
     return [
-        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
-        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
-        Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
-        Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
-        Argument("axis_neuron", int, optional = True, default = 4, alias = ['n_axis_neuron'], doc = doc_axis_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
-        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
-        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
-        Argument("seed", [int,None], optional = True, doc = doc_seed),
-        Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
-        Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
+        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
     ]
 
 
-@descrpt_args_plugin.register("se_e3", alias=['se_at', 'se_a_3be', 'se_t'])
+@descrpt_args_plugin.register("se_e3", alias=["se_at", "se_a_3be", "se_t"])
 def descrpt_se_t_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
     - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = 'The cut-off radius.'
-    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
-    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_trainable = 'If the parameters in the embedding net are trainable'
-    doc_seed = 'Random seed for parameter initialization'
-    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
-    
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
     return [
-        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
-        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
-        Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
-        Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
-        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
-        Argument("seed", [int,None], optional = True, doc = doc_seed),
-        Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
+        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
     ]
 
 
-
-@descrpt_args_plugin.register("se_a_tpe", alias=['se_a_ebd'])
+@descrpt_args_plugin.register("se_a_tpe", alias=["se_a_ebd"])
 def descrpt_se_a_tpe_args():
-    doc_type_nchanl = 'number of channels for type embedding'
-    doc_type_nlayer = 'number of hidden layers of type embedding net'
-    doc_numb_aparam = 'dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded.'
-
-    return descrpt_se_a_args() + [        
-        Argument("type_nchanl", int, optional = True, default = 4, doc = doc_type_nchanl),
-        Argument("type_nlayer", int, optional = True, default = 2, doc = doc_type_nlayer),
-        Argument("numb_aparam", int, optional = True, default = 0, doc = doc_numb_aparam)
+    doc_type_nchanl = "number of channels for type embedding"
+    doc_type_nlayer = "number of hidden layers of type embedding net"
+    doc_numb_aparam = "dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded."
+
+    return descrpt_se_a_args() + [
+        Argument("type_nchanl", int, optional=True, default=4, doc=doc_type_nchanl),
+        Argument("type_nlayer", int, optional=True, default=2, doc=doc_type_nlayer),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
     ]
 
 
-@descrpt_args_plugin.register("se_e2_r", alias=['se_r'])
+@descrpt_args_plugin.register("se_e2_r", alias=["se_r"])
 def descrpt_se_r_args():
     doc_sel = 'This parameter set the number of selected neighbors for each type of atom. It can be:\n\n\
     - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. `sel[i]` is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. It is noted that the total sel value must be less than 4096 in a GPU environment.\n\n\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = 'The cut-off radius.'
-    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
-    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r'If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.'
-    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_trainable = 'If the parameters in the embedding net are trainable'
-    doc_seed = 'Random seed for parameter initialization'
-    doc_exclude_types = 'The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.'
-    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
-    
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net are trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+
     return [
-        Argument("sel", [list,str], optional = True, default = "auto", doc = doc_sel),
-        Argument("rcut", float, optional = True, default = 6.0, doc = doc_rcut),
-        Argument("rcut_smth", float, optional = True, default = 0.5, doc = doc_rcut_smth),
-        Argument("neuron", list, optional = True, default = [10,20,40], doc = doc_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("resnet_dt", bool, optional = True, default = False, doc = doc_resnet_dt),
-        Argument("type_one_side", bool, optional = True, default = False, doc = doc_type_one_side),
-        Argument("precision", str, optional = True, default = "default", doc = doc_precision),
-        Argument("trainable", bool, optional = True, default = True, doc = doc_trainable),
-        Argument("seed", [int,None], optional = True, doc = doc_seed),
-        Argument("exclude_types", list, optional = True, default = [], doc = doc_exclude_types),
-        Argument("set_davg_zero", bool, optional = True, default = False, doc = doc_set_davg_zero)
+        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
+        Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
+        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
     ]
 
 
 @descrpt_args_plugin.register("hybrid")
 def descrpt_hybrid_args():
-    doc_list = f'A list of descriptor definitions'
-    
-    return [
-        Argument("list", list, optional = False, doc = doc_list)
-    ]
+    doc_list = f"A list of descriptor definitions"
+
+    return [Argument("list", list, optional=False, doc=doc_list)]
 
 
 @descrpt_args_plugin.register("se_atten")
@@ -247,77 +317,97 @@ def descrpt_se_atten_args():
     - `int`. The maximum number of neighbor atoms to be considered. We recommend it to be less than 200. \n\n\
     - `List[int]`. The length of the list should be the same as the number of atom types in the system. `sel[i]` gives the selected number of type-i neighbors. Only the summation of `sel[i]` matters, and it is recommended to be less than 200.\
     - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".'
-    doc_rcut = 'The cut-off radius.'
-    doc_rcut_smth = 'Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`'
-    doc_neuron = 'Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.'
-    doc_axis_neuron = 'Size of the submatrix of G (embedding matrix).'
+    doc_rcut = "The cut-off radius."
+    doc_rcut_smth = "Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`"
+    doc_neuron = "Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built."
+    doc_axis_neuron = "Size of the submatrix of G (embedding matrix)."
     doc_activation_function = f'The activation function in the embedding net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_type_one_side = r'If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.'
-    doc_precision = f'The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_trainable = 'If the parameters in the embedding net is trainable'
-    doc_seed = 'Random seed for parameter initialization'
-    doc_exclude_types = 'The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.'
-    doc_set_davg_zero = 'Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used'
-    doc_attn = 'The length of hidden vectors in attention layers'
-    doc_attn_layer = 'The number of attention layers'
-    doc_attn_dotr = 'Whether to do dot product with the normalized relative coordinates'
-    doc_attn_mask = 'Whether to do mask on the diagonal in the attention matrix'
+    doc_type_one_side = r"If true, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters."
+    doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_trainable = "If the parameters in the embedding net is trainable"
+    doc_seed = "Random seed for parameter initialization"
+    doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1."
+    doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
+    doc_attn = "The length of hidden vectors in attention layers"
+    doc_attn_layer = "The number of attention layers"
+    doc_attn_dotr = "Whether to do dot product with the normalized relative coordinates"
+    doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
 
     return [
         Argument("sel", [int, list, str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
         Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
-        Argument("axis_neuron", int, optional=True, default=4, alias=['n_axis_neuron'], doc=doc_axis_neuron),
-        Argument("activation_function", str, optional=True, default='tanh', doc=doc_activation_function),
+        Argument(
+            "axis_neuron",
+            int,
+            optional=True,
+            default=4,
+            alias=["n_axis_neuron"],
+            doc=doc_axis_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
         Argument("resnet_dt", bool, optional=True, default=False, doc=doc_resnet_dt),
-        Argument("type_one_side", bool, optional=True, default=False, doc=doc_type_one_side),
+        Argument(
+            "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
+        ),
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
         Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument("exclude_types", list, optional=True, default=[], doc=doc_exclude_types),
-        Argument("set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero),
+        Argument(
+            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+        ),
+        Argument(
+            "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
+        ),
         Argument("attn", int, optional=True, default=128, doc=doc_attn),
         Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
         Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr),
-        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask)
+        Argument("attn_mask", bool, optional=True, default=False, doc=doc_attn_mask),
     ]
 
+
 def descrpt_variant_type_args(exclude_hybrid: bool = False) -> Variant:
-    link_lf = make_link('loc_frame', 'model/descriptor[loc_frame]')
-    link_se_e2_a = make_link('se_e2_a', 'model/descriptor[se_e2_a]')
-    link_se_e2_r = make_link('se_e2_r', 'model/descriptor[se_e2_r]')
-    link_se_e3 = make_link('se_e3', 'model/descriptor[se_e3]')
-    link_se_a_tpe = make_link('se_a_tpe', 'model/descriptor[se_a_tpe]')
-    link_hybrid = make_link('hybrid', 'model/descriptor[hybrid]')
-    link_se_atten = make_link('se_atten', 'model/descriptor[se_atten]')
-    doc_descrpt_type = f'The type of the descritpor. See explanation below. \n\n\
+    link_lf = make_link("loc_frame", "model/descriptor[loc_frame]")
+    link_se_e2_a = make_link("se_e2_a", "model/descriptor[se_e2_a]")
+    link_se_e2_r = make_link("se_e2_r", "model/descriptor[se_e2_r]")
+    link_se_e3 = make_link("se_e3", "model/descriptor[se_e3]")
+    link_se_a_tpe = make_link("se_a_tpe", "model/descriptor[se_a_tpe]")
+    link_hybrid = make_link("hybrid", "model/descriptor[hybrid]")
+    link_se_atten = make_link("se_atten", "model/descriptor[se_atten]")
+    doc_descrpt_type = f"The type of the descritpor. See explanation below. \n\n\
 - `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.\n\n\
 - `se_e2_a`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor.\n\n\
 - `se_e2_r`: Used by the smooth edition of Deep Potential. Only the distance between atoms is used to construct the descriptor.\n\n\
 - `se_e3`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Three-body embedding will be used by this descriptor.\n\n\
 - `se_a_tpe`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Type embedding will be used by this descriptor.\n\n\
 - `se_atten`: Used by the smooth edition of Deep Potential. The full relative coordinates are used to construct the descriptor. Attention mechanism will be used by this descriptor.\n\n\
-- `hybrid`: Concatenate of a list of descriptors as a new descriptor.'
-    
-    return Variant("type", descrpt_args_plugin.get_all_argument(), doc = doc_descrpt_type)
+- `hybrid`: Concatenate of a list of descriptors as a new descriptor."
+
+    return Variant("type", descrpt_args_plugin.get_all_argument(), doc=doc_descrpt_type)
 
 
 #  --- Fitting net configurations: --- #
 def fitting_ener():
-    doc_numb_fparam = 'The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams.'
-    doc_numb_aparam = 'The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams.'
-    doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.'
+    doc_numb_fparam = "The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams."
+    doc_numb_aparam = "The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams."
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
-    doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_trainable = 'Whether the parameters in the fitting net are trainable. This option can be\n\n\
+    doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1.'
-    doc_rcond = 'The condition number used to determine the inital energy shift for each type of atoms.'
-    doc_seed = 'Random seed for parameter initialization of the fitting net'
-    doc_atom_ener = 'Specify the atomic energy in vacuum for each type'
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
+    doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+    doc_atom_ener = "Specify the atomic energy in vacuum for each type"
     doc_layer_name = (
         "The name of the each layer. The length of this list should be equal to n_neuron + 1. "
         "If two layers, either in the same fitting or different fittings, "
@@ -327,82 +417,140 @@ def fitting_ener():
     )
 
     return [
-        Argument("numb_fparam", int, optional = True, default = 0, doc = doc_numb_fparam),
-        Argument("numb_aparam", int, optional = True, default = 0, doc = doc_numb_aparam),
-        Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("precision", str, optional = True, default = 'default', doc = doc_precision),
-        Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("trainable", [list,bool], optional = True, default = True, doc = doc_trainable),
-        Argument("rcond", float, optional = True, default = 1e-3, doc = doc_rcond),
-        Argument("seed", [int,None], optional = True, doc = doc_seed),
-        Argument("atom_ener", list, optional = True, default = [], doc = doc_atom_ener),
-        Argument("layer_name", list, optional = True, doc = doc_layer_name),
+        Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
+        Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
+        Argument(
+            "neuron",
+            list,
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument(
+            "trainable", [list, bool], optional=True, default=True, doc=doc_trainable
+        ),
+        Argument("rcond", float, optional=True, default=1e-3, doc=doc_rcond),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument("atom_ener", list, optional=True, default=[], doc=doc_atom_ener),
+        Argument("layer_name", list, optional=True, doc=doc_layer_name),
     ]
 
 
 def fitting_polar():
-    doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.'
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_scale = 'The output of the fitting net (polarizability matrix) will be scaled by ``scale``'
-    #doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
-    doc_fit_diag = 'Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix.'
-    doc_sel_type = 'The atom types for which the atomic polarizability will be provided. If not set, all types will be selected.'
-    doc_seed = 'Random seed for parameter initialization of the fitting net'
-    
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_scale = "The output of the fitting net (polarizability matrix) will be scaled by ``scale``"
+    # doc_diag_shift = 'The diagonal part of the polarizability matrix  will be shifted by ``diag_shift``. The shift operation is carried out after ``scale``.'
+    doc_fit_diag = "Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix."
+    doc_sel_type = "The atom types for which the atomic polarizability will be provided. If not set, all types will be selected."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
+
     # YWolfeee: user can decide whether to use shift diag
-    doc_shift_diag = 'Whether to shift the diagonal of polar, which is beneficial to training. Default is true.'
+    doc_shift_diag = "Whether to shift the diagonal of polar, which is beneficial to training. Default is true."
 
     return [
-        Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = 'default', doc = doc_precision),
-        Argument("fit_diag", bool, optional = True, default = True, doc = doc_fit_diag),
-        Argument("scale", [list,float], optional = True, default = 1.0, doc = doc_scale),
-        #Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
-        Argument("shift_diag", bool, optional = True, default = True, doc = doc_shift_diag),
-        Argument("sel_type", [list,int,None], optional = True, alias = ['pol_type'], doc = doc_sel_type),
-        Argument("seed", [int,None], optional = True, doc = doc_seed)
+        Argument(
+            "neuron",
+            list,
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
+        Argument("scale", [list, float], optional=True, default=1.0, doc=doc_scale),
+        # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
+        Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
+        Argument(
+            "sel_type",
+            [list, int, None],
+            optional=True,
+            alias=["pol_type"],
+            doc=doc_sel_type,
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
     ]
 
 
-#def fitting_global_polar():
+# def fitting_global_polar():
 #    return fitting_polar()
 
 
 def fitting_dipole():
-    doc_neuron = 'The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.'
+    doc_neuron = "The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built."
     doc_activation_function = f'The activation function in the fitting net. Supported activation functions are {list_to_doc(ACTIVATION_FN_DICT.keys())} Note that "gelu" denotes the custom operator version, and "gelu_tf" denotes the TF standard version.'
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
-    doc_precision = f'The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision.'
-    doc_sel_type = 'The atom types for which the atomic dipole will be provided. If not set, all types will be selected.'
-    doc_seed = 'Random seed for parameter initialization of the fitting net'
+    doc_precision = f"The precision of the fitting net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision."
+    doc_sel_type = "The atom types for which the atomic dipole will be provided. If not set, all types will be selected."
+    doc_seed = "Random seed for parameter initialization of the fitting net"
     return [
-        Argument("neuron", list, optional = True, default = [120,120,120], alias = ['n_neuron'], doc = doc_neuron),
-        Argument("activation_function", str, optional = True, default = 'tanh', doc = doc_activation_function),
-        Argument("resnet_dt", bool, optional = True, default = True, doc = doc_resnet_dt),
-        Argument("precision", str, optional = True, default = 'default', doc = doc_precision),
-        Argument("sel_type", [list,int,None], optional = True, alias = ['dipole_type'], doc = doc_sel_type),
-        Argument("seed", [int,None], optional = True, doc = doc_seed)
-    ]    
+        Argument(
+            "neuron",
+            list,
+            optional=True,
+            default=[120, 120, 120],
+            alias=["n_neuron"],
+            doc=doc_neuron,
+        ),
+        Argument(
+            "activation_function",
+            str,
+            optional=True,
+            default="tanh",
+            doc=doc_activation_function,
+        ),
+        Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
+        Argument("precision", str, optional=True, default="default", doc=doc_precision),
+        Argument(
+            "sel_type",
+            [list, int, None],
+            optional=True,
+            alias=["dipole_type"],
+            doc=doc_sel_type,
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+    ]
+
 
 #   YWolfeee: Delete global polar mode, merge it into polar mode and use loss setting to support.
 def fitting_variant_type_args():
-    doc_descrpt_type = 'The type of the fitting. See explanation below. \n\n\
+    doc_descrpt_type = "The type of the fitting. See explanation below. \n\n\
 - `ener`: Fit an energy model (potential energy surface).\n\n\
 - `dipole`: Fit an atomic dipole model. Global dipole labels or atomic dipole labels for all the selected atoms (see `sel_type`) should be provided by `dipole.npy` in each data system. The file either has number of frames lines and 3 times of number of selected atoms columns, or has number of frames lines and 3 columns. See `loss` parameter.\n\n\
-- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n'
-
-    return Variant("type", [Argument("ener", dict, fitting_ener()),
-                            Argument("dipole", dict, fitting_dipole()),
-                            Argument("polar", dict, fitting_polar()),
-                            ], 
-                   optional = True,
-                   default_tag = 'ener',
-                   doc = doc_descrpt_type)
+- `polar`: Fit an atomic polarizability model. Global polarizazbility labels or atomic polarizability labels for all the selected atoms (see `sel_type`) should be provided by `polarizability.npy` in each data system. The file eith has number of frames lines and 9 times of number of selected atoms columns, or has number of frames lines and 9 columns. See `loss` parameter.\n\n"
+
+    return Variant(
+        "type",
+        [
+            Argument("ener", dict, fitting_ener()),
+            Argument("dipole", dict, fitting_dipole()),
+            Argument("polar", dict, fitting_polar()),
+        ],
+        optional=True,
+        default_tag="ener",
+        doc=doc_descrpt_type,
+    )
 
 
 #  --- Modifier configurations: --- #
@@ -412,208 +560,362 @@ def modifier_dipole_charge():
     doc_sys_charge_map = f"The charge of real atoms. The list length should be the same as the {make_link('type_map', 'model/type_map')}"
     doc_ewald_h = f"The grid spacing of the FFT grid. Unit is A"
     doc_ewald_beta = f"The splitting parameter of Ewald sum. Unit is A^{-1}"
-    
+
     return [
-        Argument("model_name", str, optional = False, doc = doc_model_name),
-        Argument("model_charge_map", list, optional = False, doc = doc_model_charge_map),
-        Argument("sys_charge_map", list, optional = False, doc = doc_sys_charge_map),
-        Argument("ewald_beta", float, optional = True, default = 0.4, doc = doc_ewald_beta),
-        Argument("ewald_h", float, optional = True, default = 1.0, doc = doc_ewald_h),        
+        Argument("model_name", str, optional=False, doc=doc_model_name),
+        Argument("model_charge_map", list, optional=False, doc=doc_model_charge_map),
+        Argument("sys_charge_map", list, optional=False, doc=doc_sys_charge_map),
+        Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
+        Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
     ]
 
 
 def modifier_variant_type_args():
     doc_modifier_type = "The type of modifier. See explanation below.\n\n\
 -`dipole_charge`: Use WFCC to model the electronic structure of the system. Correct the long-range interaction"
-    return Variant("type", 
-                   [
-                       Argument("dipole_charge", dict, modifier_dipole_charge()),
-                   ],
-                   optional = False,
-                   doc = doc_modifier_type)
+    return Variant(
+        "type",
+        [
+            Argument("dipole_charge", dict, modifier_dipole_charge()),
+        ],
+        optional=False,
+        doc=doc_modifier_type,
+    )
+
 
 #  --- model compression configurations: --- #
 def model_compression():
-    doc_model_file = f"The input model file, which will be compressed by the DeePMD-kit."
+    doc_model_file = (
+        f"The input model file, which will be compressed by the DeePMD-kit."
+    )
     doc_table_config = f"The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check)."
-    doc_min_nbor_dist = f"The nearest distance between neighbor atoms saved in the frozen model."
-    
+    doc_min_nbor_dist = (
+        f"The nearest distance between neighbor atoms saved in the frozen model."
+    )
+
     return [
-        Argument("model_file", str, optional = False, doc = doc_model_file),
-        Argument("table_config", list, optional = False, doc = doc_table_config),
-        Argument("min_nbor_dist", float, optional = False, doc = doc_min_nbor_dist),
+        Argument("model_file", str, optional=False, doc=doc_model_file),
+        Argument("table_config", list, optional=False, doc=doc_table_config),
+        Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
     ]
 
+
 #  --- model compression configurations: --- #
 def model_compression_type_args():
     doc_compress_type = "The type of model compression, which should be consistent with the descriptor type."
-    
-    return Variant("type", [
-            Argument("se_e2_a", dict, model_compression(), alias = ['se_a'])
-        ],
-        optional = True,
-        default_tag = 'se_e2_a',
-        doc = doc_compress_type)
+
+    return Variant(
+        "type",
+        [Argument("se_e2_a", dict, model_compression(), alias=["se_a"])],
+        optional=True,
+        default_tag="se_e2_a",
+        doc=doc_compress_type,
+    )
 
 
-def model_args ():    
-    doc_type_map = 'A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect.'
-    doc_data_stat_nbatch = 'The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics.'
-    doc_data_stat_protect = 'Protect parameter for atomic energy regression.'
-    doc_data_bias_nsample = 'The number of training samples in a system to compute and change the energy bias.'
+def model_args():
+    doc_type_map = "A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment. If not given, type.raw in each system should use the same type indexes, and type_map.raw will take no effect."
+    doc_data_stat_nbatch = "The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics."
+    doc_data_stat_protect = "Protect parameter for atomic energy regression."
+    doc_data_bias_nsample = "The number of training samples in a system to compute and change the energy bias."
     doc_type_embedding = "The type embedding."
-    doc_descrpt = 'The descriptor of atomic environment.'
-    doc_fitting = 'The fitting of physical properties.'
-    doc_fitting_net_dict = 'The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`.'
-    doc_modifier = 'The modifier of model output.'
-    doc_use_srtab = 'The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.'
-    doc_smin_alpha = 'The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.'
-    doc_sw_rmin = 'The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.'
-    doc_sw_rmax = 'The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.'
-    doc_compress_config = 'Model compression configurations'
-
-    ca = Argument("model", dict, 
-                  [Argument("type_map", list, optional = True, doc = doc_type_map),
-                   Argument("data_stat_nbatch", int, optional = True, default = 10, doc = doc_data_stat_nbatch),
-                   Argument("data_stat_protect", float, optional = True, default = 1e-2, doc = doc_data_stat_protect),
-                   Argument("data_bias_nsample", int, optional=True, default=10, doc=doc_data_bias_nsample),
-                   Argument("use_srtab", str, optional = True, doc = doc_use_srtab),
-                   Argument("smin_alpha", float, optional = True, doc = doc_smin_alpha),
-                   Argument("sw_rmin", float, optional = True, doc = doc_sw_rmin),
-                   Argument("sw_rmax", float, optional = True, doc = doc_sw_rmax),
-                   Argument("type_embedding", dict, type_embedding_args(), [], optional = True, doc = doc_type_embedding),
-                   Argument("descriptor", dict, [], [descrpt_variant_type_args()], doc = doc_descrpt),
-                   Argument("fitting_net", dict, [], [fitting_variant_type_args()], optional=True, doc=doc_fitting),
-                   Argument("fitting_net_dict", dict, optional=True, doc=doc_fitting_net_dict),
-                   Argument("modifier", dict, [], [modifier_variant_type_args()], optional = True, doc = doc_modifier),
-                   Argument("compress", dict, [], [model_compression_type_args()], optional = True, doc = doc_compress_config)
-                  ])
+    doc_descrpt = "The descriptor of atomic environment."
+    doc_fitting = "The fitting of physical properties."
+    doc_fitting_net_dict = "The dictionary of multiple fitting nets in multi-task mode. Each fitting_net_dict[fitting_key] is the single definition of fitting of physical properties with user-defined name `fitting_key`."
+    doc_modifier = "The modifier of model output."
+    doc_use_srtab = "The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly."
+    doc_smin_alpha = "The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided."
+    doc_sw_rmin = "The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
+    doc_sw_rmax = "The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided."
+    doc_compress_config = "Model compression configurations"
+
+    ca = Argument(
+        "model",
+        dict,
+        [
+            Argument("type_map", list, optional=True, doc=doc_type_map),
+            Argument(
+                "data_stat_nbatch",
+                int,
+                optional=True,
+                default=10,
+                doc=doc_data_stat_nbatch,
+            ),
+            Argument(
+                "data_stat_protect",
+                float,
+                optional=True,
+                default=1e-2,
+                doc=doc_data_stat_protect,
+            ),
+            Argument(
+                "data_bias_nsample",
+                int,
+                optional=True,
+                default=10,
+                doc=doc_data_bias_nsample,
+            ),
+            Argument("use_srtab", str, optional=True, doc=doc_use_srtab),
+            Argument("smin_alpha", float, optional=True, doc=doc_smin_alpha),
+            Argument("sw_rmin", float, optional=True, doc=doc_sw_rmin),
+            Argument("sw_rmax", float, optional=True, doc=doc_sw_rmax),
+            Argument(
+                "type_embedding",
+                dict,
+                type_embedding_args(),
+                [],
+                optional=True,
+                doc=doc_type_embedding,
+            ),
+            Argument(
+                "descriptor", dict, [], [descrpt_variant_type_args()], doc=doc_descrpt
+            ),
+            Argument(
+                "fitting_net",
+                dict,
+                [],
+                [fitting_variant_type_args()],
+                optional=True,
+                doc=doc_fitting,
+            ),
+            Argument("fitting_net_dict", dict, optional=True, doc=doc_fitting_net_dict),
+            Argument(
+                "modifier",
+                dict,
+                [],
+                [modifier_variant_type_args()],
+                optional=True,
+                doc=doc_modifier,
+            ),
+            Argument(
+                "compress",
+                dict,
+                [],
+                [model_compression_type_args()],
+                optional=True,
+                doc=doc_compress_config,
+            ),
+        ],
+    )
     # print(ca.gen_doc())
     return ca
 
 
 #  --- Learning rate configurations: --- #
 def learning_rate_exp():
-    doc_start_lr = 'The learning rate the start of the training.'
-    doc_stop_lr = 'The desired learning rate at the end of the training.'
-    doc_decay_steps = 'The learning rate is decaying every this number of training steps.'
-    
-    args =  [
-        Argument("start_lr", float, optional = True, default = 1e-3, doc = doc_start_lr),
-        Argument("stop_lr", float, optional = True, default = 1e-8, doc = doc_stop_lr),
-        Argument("decay_steps", int, optional = True, default = 5000, doc = doc_decay_steps)
+    doc_start_lr = "The learning rate the start of the training."
+    doc_stop_lr = "The desired learning rate at the end of the training."
+    doc_decay_steps = (
+        "The learning rate is decaying every this number of training steps."
+    )
+
+    args = [
+        Argument("start_lr", float, optional=True, default=1e-3, doc=doc_start_lr),
+        Argument("stop_lr", float, optional=True, default=1e-8, doc=doc_stop_lr),
+        Argument("decay_steps", int, optional=True, default=5000, doc=doc_decay_steps),
     ]
     return args
-    
 
-def learning_rate_variant_type_args():
-    doc_lr = 'The type of the learning rate.'
 
-    return Variant("type", 
-                   [Argument("exp", dict, learning_rate_exp())],
-                   optional = True,
-                   default_tag = 'exp',
-                   doc = doc_lr)
+def learning_rate_variant_type_args():
+    doc_lr = "The type of the learning rate."
+
+    return Variant(
+        "type",
+        [Argument("exp", dict, learning_rate_exp())],
+        optional=True,
+        default_tag="exp",
+        doc=doc_lr,
+    )
 
 
 def learning_rate_args():
-    doc_scale_by_worker = 'When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`.'
-    doc_lr = "The definitio of learning rate" 
-    return Argument("learning_rate", dict,
-                    [Argument("scale_by_worker", str, optional=True, default='linear', doc=doc_scale_by_worker)],
-                    [learning_rate_variant_type_args()],
-                    doc = doc_lr)
+    doc_scale_by_worker = "When parallel training or batch size scaled, how to alter learning rate. Valid values are `linear`(default), `sqrt` or `none`."
+    doc_lr = "The definitio of learning rate"
+    return Argument(
+        "learning_rate",
+        dict,
+        [
+            Argument(
+                "scale_by_worker",
+                str,
+                optional=True,
+                default="linear",
+                doc=doc_scale_by_worker,
+            )
+        ],
+        [learning_rate_variant_type_args()],
+        doc=doc_lr,
+    )
 
 
 #  --- Loss configurations: --- #
 def start_pref(item):
-    return f'The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {item} label should be provided by file {item}.npy in each data system. If both start_pref_{item} and limit_pref_{item} are set to 0, then the {item} will be ignored.'
+    return f"The prefactor of {item} loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the {item} label should be provided by file {item}.npy in each data system. If both start_pref_{item} and limit_pref_{item} are set to 0, then the {item} will be ignored."
 
 
 def limit_pref(item):
-    return f'The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.'
+    return f"The prefactor of {item} loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity."
 
 
 def loss_ener():
-    doc_start_pref_e = start_pref('energy')
-    doc_limit_pref_e = limit_pref('energy')
-    doc_start_pref_f = start_pref('force')
-    doc_limit_pref_f = limit_pref('force')
-    doc_start_pref_v = start_pref('virial')
-    doc_limit_pref_v = limit_pref('virial')
-    doc_start_pref_ae = start_pref('atom_ener')
-    doc_limit_pref_ae = limit_pref('atom_ener')
-    doc_start_pref_pf = start_pref('atom_pref')
-    doc_limit_pref_pf = limit_pref('atom_pref')
-    doc_relative_f = 'If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label.'
+    doc_start_pref_e = start_pref("energy")
+    doc_limit_pref_e = limit_pref("energy")
+    doc_start_pref_f = start_pref("force")
+    doc_limit_pref_f = limit_pref("force")
+    doc_start_pref_v = start_pref("virial")
+    doc_limit_pref_v = limit_pref("virial")
+    doc_start_pref_ae = start_pref("atom_ener")
+    doc_limit_pref_ae = limit_pref("atom_ener")
+    doc_start_pref_pf = start_pref("atom_pref")
+    doc_limit_pref_pf = limit_pref("atom_pref")
+    doc_relative_f = "If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label."
     doc_enable_atom_ener_coeff = "If true, the energy will be computed as \\sum_i c_i E_i. c_i should be provided by file atom_ener_coeff.npy in each data system, otherwise it's 1."
     return [
-        Argument("start_pref_e", [float,int], optional = True, default = 0.02, doc = doc_start_pref_e),
-        Argument("limit_pref_e", [float,int], optional = True, default = 1.00, doc = doc_limit_pref_e),
-        Argument("start_pref_f", [float,int], optional = True, default = 1000, doc = doc_start_pref_f),
-        Argument("limit_pref_f", [float,int], optional = True, default = 1.00, doc = doc_limit_pref_f),
-        Argument("start_pref_v", [float,int], optional = True, default = 0.00, doc = doc_start_pref_v),
-        Argument("limit_pref_v", [float,int], optional = True, default = 0.00, doc = doc_limit_pref_v),
-        Argument("start_pref_ae", [float,int], optional = True, default = 0.00, doc = doc_start_pref_ae),
-        Argument("limit_pref_ae", [float,int], optional = True, default = 0.00, doc = doc_limit_pref_ae),
-        Argument("start_pref_pf", [float,int], optional = True, default = 0.00, doc = doc_start_pref_pf),
-        Argument("limit_pref_pf", [float,int], optional = True, default = 0.00, doc = doc_limit_pref_pf),
-        Argument("relative_f", [float,None], optional = True, doc = doc_relative_f),
-        Argument("enable_atom_ener_coeff", [bool], optional=True, default=False, doc=doc_enable_atom_ener_coeff),
+        Argument(
+            "start_pref_e",
+            [float, int],
+            optional=True,
+            default=0.02,
+            doc=doc_start_pref_e,
+        ),
+        Argument(
+            "limit_pref_e",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_e,
+        ),
+        Argument(
+            "start_pref_f",
+            [float, int],
+            optional=True,
+            default=1000,
+            doc=doc_start_pref_f,
+        ),
+        Argument(
+            "limit_pref_f",
+            [float, int],
+            optional=True,
+            default=1.00,
+            doc=doc_limit_pref_f,
+        ),
+        Argument(
+            "start_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_v,
+        ),
+        Argument(
+            "limit_pref_v",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_v,
+        ),
+        Argument(
+            "start_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_ae,
+        ),
+        Argument(
+            "limit_pref_ae",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_ae,
+        ),
+        Argument(
+            "start_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_start_pref_pf,
+        ),
+        Argument(
+            "limit_pref_pf",
+            [float, int],
+            optional=True,
+            default=0.00,
+            doc=doc_limit_pref_pf,
+        ),
+        Argument("relative_f", [float, None], optional=True, doc=doc_relative_f),
+        Argument(
+            "enable_atom_ener_coeff",
+            [bool],
+            optional=True,
+            default=False,
+            doc=doc_enable_atom_ener_coeff,
+        ),
     ]
 
+
 # YWolfeee: Modified to support tensor type of loss args.
 def loss_tensor():
-    #doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]." 
-    #doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well." 
-    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included." 
-    doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0." 
+    # doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If only `pref` is provided or both are not provided, training will be global mode, i.e. the shape of 'polarizability.npy` or `dipole.npy` should be #frams x [9 or 3]."
+    # doc_local_weight =  "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If only `pref_atomic` is provided, training will be atomic mode, i.e. the shape of `polarizability.npy` or `dipole.npy` should be #frames x ([9 or 3] x #selected atoms). If both `pref` and `pref_atomic` are provided, training will be combined mode, and atomic label should be provided as well."
+    doc_global_weight = "The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included."
+    doc_local_weight = "The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0."
     return [
-        Argument("pref", [float,int], optional = False, default = None, doc = doc_global_weight),
-        Argument("pref_atomic", [float,int], optional = False, default = None, doc = doc_local_weight),
+        Argument(
+            "pref", [float, int], optional=False, default=None, doc=doc_global_weight
+        ),
+        Argument(
+            "pref_atomic",
+            [float, int],
+            optional=False,
+            default=None,
+            doc=doc_local_weight,
+        ),
     ]
 
 
 def loss_variant_type_args():
-    doc_loss = 'The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`.'
-
-    
-    return Variant("type", 
-                   [Argument("ener", dict, loss_ener()),
-                    Argument("tensor", dict, loss_tensor()),
-                    #Argument("polar", dict, loss_tensor()),
-                    #Argument("global_polar", dict, loss_tensor("global"))
-                    ],
-                   optional = True,
-                   default_tag = 'ener',
-                   doc = doc_loss)
+    doc_loss = "The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`."
+
+    return Variant(
+        "type",
+        [
+            Argument("ener", dict, loss_ener()),
+            Argument("tensor", dict, loss_tensor()),
+            # Argument("polar", dict, loss_tensor()),
+            # Argument("global_polar", dict, loss_tensor("global"))
+        ],
+        optional=True,
+        default_tag="ener",
+        doc=doc_loss,
+    )
 
 
 def loss_args():
-    doc_loss = 'The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset.'
-    ca = Argument('loss', dict, [], 
-                  [loss_variant_type_args()],
-                  optional = True,
-                  doc = doc_loss)
+    doc_loss = "The definition of loss function. The loss type should be set to `tensor`, `ener` or left unset."
+    ca = Argument(
+        "loss", dict, [], [loss_variant_type_args()], optional=True, doc=doc_loss
+    )
     return ca
 
 
 def loss_dict_args():
-    doc_loss_dict = 'The dictionary of definitions of multiple loss functions in multi-task mode. ' \
-                    'Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n'
-    ca = Argument('loss_dict', dict, [], [],
-                  optional = True,
-                  doc = doc_loss_dict)
+    doc_loss_dict = (
+        "The dictionary of definitions of multiple loss functions in multi-task mode. "
+        "Each loss_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, is the single definition of loss function, whose type should be set to `tensor`, `ener` or left unset.\n"
+    )
+    ca = Argument("loss_dict", dict, [], [], optional=True, doc=doc_loss_dict)
     return ca
 
 
 #  --- Training configurations: --- #
 def training_data_args():  # ! added by Ziyao: new specification style for data systems.
     link_sys = make_link("systems", "training/training_data/systems")
-    doc_systems = 'The data systems for training. ' \
-        'This key can be provided with a list that specifies the systems, or be provided with a string ' \
-        'by which the prefix of all systems are given and the list of the systems is automatically generated.'
-    doc_set_prefix = f'The prefix of the sets in the {link_sys}.'
+    doc_systems = (
+        "The data systems for training. "
+        "This key can be provided with a list that specifies the systems, or be provided with a string "
+        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+    )
+    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
     doc_batch_size = f'This key can be \n\n\
 - list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
 - int: all {link_sys} use the same batch size.\n\n\
@@ -623,31 +925,61 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
 - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
 - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
 - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
-    doc_sys_probs = "A list of float if specified. " \
-        "Should be of the same length as `systems`, " \
+    doc_sys_probs = (
+        "A list of float if specified. "
+        "Should be of the same length as `systems`, "
         "specifying the probability of each system."
-
+    )
 
     args = [
         Argument("systems", [list, str], optional=False, default=".", doc=doc_systems),
-        Argument("set_prefix", str, optional=True, default='set', doc=doc_set_prefix),
-        Argument("batch_size", [list, int, str], optional=True, default='auto', doc=doc_batch_size),
-        Argument("auto_prob", str, optional=True, default="prob_sys_size",
-                 doc=doc_auto_prob_style, alias=["auto_prob_style",]),
-        Argument("sys_probs", list, optional=True, default=None, doc=doc_sys_probs, alias=["sys_weights"]),
+        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
+        Argument(
+            "batch_size",
+            [list, int, str],
+            optional=True,
+            default="auto",
+            doc=doc_batch_size,
+        ),
+        Argument(
+            "auto_prob",
+            str,
+            optional=True,
+            default="prob_sys_size",
+            doc=doc_auto_prob_style,
+            alias=[
+                "auto_prob_style",
+            ],
+        ),
+        Argument(
+            "sys_probs",
+            list,
+            optional=True,
+            default=None,
+            doc=doc_sys_probs,
+            alias=["sys_weights"],
+        ),
     ]
 
     doc_training_data = "Configurations of training data."
-    return Argument("training_data", dict, optional=True,
-                    sub_fields=args, sub_variants=[], doc=doc_training_data)
+    return Argument(
+        "training_data",
+        dict,
+        optional=True,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_training_data,
+    )
 
 
 def validation_data_args():  # ! added by Ziyao: new specification style for data systems.
     link_sys = make_link("systems", "training/validation_data/systems")
-    doc_systems = 'The data systems for validation. ' \
-                  'This key can be provided with a list that specifies the systems, or be provided with a string ' \
-                  'by which the prefix of all systems are given and the list of the systems is automatically generated.'
-    doc_set_prefix = f'The prefix of the sets in the {link_sys}.'
+    doc_systems = (
+        "The data systems for validation. "
+        "This key can be provided with a list that specifies the systems, or be provided with a string "
+        "by which the prefix of all systems are given and the list of the systems is automatically generated."
+    )
+    doc_set_prefix = f"The prefix of the sets in the {link_sys}."
     doc_batch_size = f'This key can be \n\n\
 - list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\
 - int: all {link_sys} use the same batch size.\n\n\
@@ -657,68 +989,123 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
 - "prob_uniform"  : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\
 - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\
 - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.'
-    doc_sys_probs = "A list of float if specified. " \
-                    "Should be of the same length as `systems`, " \
-                    "specifying the probability of each system."
+    doc_sys_probs = (
+        "A list of float if specified. "
+        "Should be of the same length as `systems`, "
+        "specifying the probability of each system."
+    )
     doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period."
 
     args = [
         Argument("systems", [list, str], optional=False, default=".", doc=doc_systems),
-        Argument("set_prefix", str, optional=True, default='set', doc=doc_set_prefix),
-        Argument("batch_size", [list, int, str], optional=True, default='auto', doc=doc_batch_size),
-        Argument("auto_prob", str, optional=True, default="prob_sys_size",
-                 doc=doc_auto_prob_style, alias=["auto_prob_style", ]),
-        Argument("sys_probs", list, optional=True, default=None, doc=doc_sys_probs, alias=["sys_weights"]),
-        Argument("numb_btch", int, optional=True, default=1, doc=doc_numb_btch, alias=["numb_batch", ])
+        Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
+        Argument(
+            "batch_size",
+            [list, int, str],
+            optional=True,
+            default="auto",
+            doc=doc_batch_size,
+        ),
+        Argument(
+            "auto_prob",
+            str,
+            optional=True,
+            default="prob_sys_size",
+            doc=doc_auto_prob_style,
+            alias=[
+                "auto_prob_style",
+            ],
+        ),
+        Argument(
+            "sys_probs",
+            list,
+            optional=True,
+            default=None,
+            doc=doc_sys_probs,
+            alias=["sys_weights"],
+        ),
+        Argument(
+            "numb_btch",
+            int,
+            optional=True,
+            default=1,
+            doc=doc_numb_btch,
+            alias=[
+                "numb_batch",
+            ],
+        ),
     ]
 
-    doc_validation_data = "Configurations of validation data. Similar to that of training data, " \
-                        "except that a `numb_btch` argument may be configured"
-    return Argument("validation_data", dict, optional=True, default=None,
-                    sub_fields=args, sub_variants=[], doc=doc_validation_data)
+    doc_validation_data = (
+        "Configurations of validation data. Similar to that of training data, "
+        "except that a `numb_btch` argument may be configured"
+    )
+    return Argument(
+        "validation_data",
+        dict,
+        optional=True,
+        default=None,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_validation_data,
+    )
 
 
 def mixed_precision_args():  # ! added by Denghui.
-    doc_output_prec  = 'The precision for mixed precision params. " \
+    doc_output_prec = 'The precision for mixed precision params. " \
         "The trainable variables precision during the mixed precision training process, " \
         "supported options are float32 only currently.'
-    doc_compute_prec  = 'The precision for mixed precision compute. " \
+    doc_compute_prec = 'The precision for mixed precision compute. " \
         "The compute precision during the mixed precision training process, "" \
         "supported options are float16 and bfloat16 currently.'
 
     args = [
-        Argument("output_prec", str, optional=True, default="float32", doc=doc_output_prec),
-        Argument("compute_prec", str, optional=False, default="float16", doc=doc_compute_prec),
+        Argument(
+            "output_prec", str, optional=True, default="float32", doc=doc_output_prec
+        ),
+        Argument(
+            "compute_prec", str, optional=False, default="float16", doc=doc_compute_prec
+        ),
     ]
 
     doc_mixed_precision = "Configurations of mixed precision."
-    return Argument("mixed_precision", dict, optional=True,
-                    sub_fields=args, sub_variants=[], doc=doc_mixed_precision)
+    return Argument(
+        "mixed_precision",
+        dict,
+        optional=True,
+        sub_fields=args,
+        sub_variants=[],
+        doc=doc_mixed_precision,
+    )
 
 
 def training_args():  # ! modified by Ziyao: data configuration isolated.
-    doc_numb_steps = 'Number of training batch. Each training uses one batch of data.'
-    doc_seed = 'The random seed for getting frames from the training data set.'
-    doc_disp_file = 'The file for printing learning curve.'
-    doc_disp_freq = 'The frequency of printing learning curve.'
-    doc_save_freq = 'The frequency of saving check point.'
-    doc_save_ckpt = 'The file name of saving check point.'
-    doc_disp_training = 'Displaying verbose information during training.'
-    doc_time_training = 'Timing durining training.'
-    doc_profiling = 'Profiling during training.'
-    doc_profiling_file = 'Output file for profiling.'
-    doc_enable_profiler = 'Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`.'
-    doc_tensorboard = 'Enable tensorboard'
-    doc_tensorboard_log_dir = 'The log directory of tensorboard outputs'
-    doc_tensorboard_freq = 'The frequency of writing tensorboard events.'
-    doc_data_dict = 'The dictionary of multi DataSystems in multi-task mode. ' \
-                    'Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, ' \
-                    'contains training data and optional validation data definitions.'
-    doc_fitting_weight = 'Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, ' \
-                         'is the training weight of fitting net `fitting_key`. ' \
-                         'Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. ' \
-                         'Weights will be normalized and minus ones will be ignored. ' \
-                         'If not set, each fitting net will be equally selected when training.'
+    doc_numb_steps = "Number of training batch. Each training uses one batch of data."
+    doc_seed = "The random seed for getting frames from the training data set."
+    doc_disp_file = "The file for printing learning curve."
+    doc_disp_freq = "The frequency of printing learning curve."
+    doc_save_freq = "The frequency of saving check point."
+    doc_save_ckpt = "The file name of saving check point."
+    doc_disp_training = "Displaying verbose information during training."
+    doc_time_training = "Timing durining training."
+    doc_profiling = "Profiling during training."
+    doc_profiling_file = "Output file for profiling."
+    doc_enable_profiler = "Enable TensorFlow Profiler (available in TensorFlow 2.3) to analyze performance. The log will be saved to `tensorboard_log_dir`."
+    doc_tensorboard = "Enable tensorboard"
+    doc_tensorboard_log_dir = "The log directory of tensorboard outputs"
+    doc_tensorboard_freq = "The frequency of writing tensorboard events."
+    doc_data_dict = (
+        "The dictionary of multi DataSystems in multi-task mode. "
+        "Each data_dict[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
+        "contains training data and optional validation data definitions."
+    )
+    doc_fitting_weight = (
+        "Each fitting_weight[fitting_key], with user-defined name `fitting_key` in `model/fitting_net_dict`, "
+        "is the training weight of fitting net `fitting_key`. "
+        "Fitting nets with higher weights will be selected with higher probabilities to be trained in one step. "
+        "Weights will be normalized and minus ones will be ignored. "
+        "If not set, each fitting net will be equally selected when training."
+    )
 
     arg_training_data = training_data_args()
     arg_validation_data = validation_data_args()
@@ -728,33 +1115,65 @@ def training_args():  # ! modified by Ziyao: data configuration isolated.
         arg_training_data,
         arg_validation_data,
         mixed_precision_data,
-        Argument("numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]),
-        Argument("seed", [int,None], optional=True, doc=doc_seed),
-        Argument("disp_file", str, optional=True, default='lcurve.out', doc=doc_disp_file),
+        Argument(
+            "numb_steps", int, optional=False, doc=doc_numb_steps, alias=["stop_batch"]
+        ),
+        Argument("seed", [int, None], optional=True, doc=doc_seed),
+        Argument(
+            "disp_file", str, optional=True, default="lcurve.out", doc=doc_disp_file
+        ),
         Argument("disp_freq", int, optional=True, default=1000, doc=doc_disp_freq),
         Argument("save_freq", int, optional=True, default=1000, doc=doc_save_freq),
-        Argument("save_ckpt", str, optional=True, default='model.ckpt', doc=doc_save_ckpt),
-        Argument("disp_training", bool, optional=True, default=True, doc=doc_disp_training),
-        Argument("time_training", bool, optional=True, default=True, doc=doc_time_training),
+        Argument(
+            "save_ckpt", str, optional=True, default="model.ckpt", doc=doc_save_ckpt
+        ),
+        Argument(
+            "disp_training", bool, optional=True, default=True, doc=doc_disp_training
+        ),
+        Argument(
+            "time_training", bool, optional=True, default=True, doc=doc_time_training
+        ),
         Argument("profiling", bool, optional=True, default=False, doc=doc_profiling),
-        Argument("profiling_file", str, optional=True, default='timeline.json', doc=doc_profiling_file),
-        Argument("enable_profiler", bool, optional=True, default=False, doc=doc_enable_profiler),
-        Argument("tensorboard", bool, optional=True, default=False, doc=doc_tensorboard),
-        Argument("tensorboard_log_dir", str, optional=True, default='log', doc=doc_tensorboard_log_dir),
-        Argument("tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq),
+        Argument(
+            "profiling_file",
+            str,
+            optional=True,
+            default="timeline.json",
+            doc=doc_profiling_file,
+        ),
+        Argument(
+            "enable_profiler",
+            bool,
+            optional=True,
+            default=False,
+            doc=doc_enable_profiler,
+        ),
+        Argument(
+            "tensorboard", bool, optional=True, default=False, doc=doc_tensorboard
+        ),
+        Argument(
+            "tensorboard_log_dir",
+            str,
+            optional=True,
+            default="log",
+            doc=doc_tensorboard_log_dir,
+        ),
+        Argument(
+            "tensorboard_freq", int, optional=True, default=1, doc=doc_tensorboard_freq
+        ),
         Argument("data_dict", dict, optional=True, doc=doc_data_dict),
         Argument("fitting_weight", dict, optional=True, doc=doc_fitting_weight),
     ]
 
-    doc_training = 'The training options.'
-    return Argument("training", dict, args, [], doc = doc_training)
+    doc_training = "The training options."
+    return Argument("training", dict, args, [], doc=doc_training)
 
 
 def make_index(keys):
     ret = []
     for ii in keys:
         ret.append(make_link(ii, ii))
-    return ', '.join(ret)
+    return ", ".join(ret)
 
 
 def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
@@ -775,40 +1194,49 @@ def gen_doc(*, make_anchor=True, make_link=True, **kwargs):
     ptr.append(nvnmda.gen_doc(make_anchor=make_anchor, make_link=make_link, **kwargs))
 
     key_words = []
-    for ii in "\n\n".join(ptr).split('\n'):
-        if 'argument path' in ii:
-            key_words.append(ii.split(':')[1].replace('`','').strip())
-    #ptr.insert(0, make_index(key_words))
+    for ii in "\n\n".join(ptr).split("\n"):
+        if "argument path" in ii:
+            key_words.append(ii.split(":")[1].replace("`", "").strip())
+    # ptr.insert(0, make_index(key_words))
 
     return "\n\n".join(ptr)
 
+
 def gen_json(**kwargs):
-    return json.dumps((
+    return json.dumps(
+        (
+            model_args(),
+            learning_rate_args(),
+            loss_args(),
+            loss_dict_args(),
+            training_args(),
+            nvnmd_args(),
+        ),
+        cls=ArgumentEncoder,
+    )
+
+
+def gen_args(**kwargs):
+    return [
         model_args(),
         learning_rate_args(),
         loss_args(),
         loss_dict_args(),
         training_args(),
         nvnmd_args(),
-    ), cls=ArgumentEncoder)
+    ]
 
-def gen_args(**kwargs):
-    return [model_args(),
-            learning_rate_args(),
-            loss_args(),
-            loss_dict_args(),
-            training_args(),
-            nvnmd_args()]
 
 def normalize_hybrid_list(hy_list):
     new_list = []
-    base = Argument("base", dict, [], [descrpt_variant_type_args()], doc = "")
+    base = Argument("base", dict, [], [descrpt_variant_type_args()], doc="")
     for ii in range(len(hy_list)):
         data = base.normalize_value(hy_list[ii], trim_pattern="_*")
         base.check_value(data, strict=True)
         new_list.append(data)
     return new_list
 
+
 def normalize_multi_task(data):
     # single-task or multi-task mode
     single_fitting_net = "fitting_net" in data["model"].keys()
@@ -819,31 +1247,56 @@ def normalize_multi_task(data):
     multi_training_data = "data_dict" in data["training"].keys()
     multi_loss = "loss_dict" in data.keys()
     multi_fitting_weight = "fitting_weight" in data["training"].keys()
-    assert (single_fitting_net == single_training_data) and \
-           (multi_fitting_net == multi_training_data), \
-            "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! " \
-            "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' " \
-            "must be defined at the same time! Please check your input script. "
-    assert not (single_fitting_net and multi_fitting_net), \
-        "Single-task mode and multi-task mode can not be performed together. " \
+    assert (single_fitting_net == single_training_data) and (
+        multi_fitting_net == multi_training_data
+    ), (
+        "In single-task mode, 'model/fitting_net' and 'training/training_data' must be defined at the same time! "
+        "While in multi-task mode, 'model/fitting_net_dict', 'training/data_dict' "
+        "must be defined at the same time! Please check your input script. "
+    )
+    assert not (single_fitting_net and multi_fitting_net), (
+        "Single-task mode and multi-task mode can not be performed together. "
         "Please check your input script and choose just one format! "
-    assert single_fitting_net or multi_fitting_net, "Please define your fitting net and training data! "
+    )
+    assert (
+        single_fitting_net or multi_fitting_net
+    ), "Please define your fitting net and training data! "
     if multi_fitting_net:
-        assert not single_valid_data, "In multi-task mode, 'training/validation_data' should not appear " \
-                                      "outside 'training/data_dict'! Please check your input script."
-        assert not single_loss, "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! "
-        assert "type_map" in data["model"], "In multi-task mode, 'model/type_map' must be defined! "
-        data["model"]["fitting_net_dict"] = normalize_fitting_net_dict(data["model"]["fitting_net_dict"])
-        data["training"]["data_dict"] = normalize_data_dict(data["training"]["data_dict"])
-        data["loss_dict"] = normalize_loss_dict(data["model"]["fitting_net_dict"].keys(),
-                                                data["loss_dict"]) if multi_loss else {}
-        fitting_weight = data["training"]["fitting_weight"] if multi_fitting_weight else None
-        data["training"]["fitting_weight"] = \
-            normalize_fitting_weight(data["model"]["fitting_net_dict"].keys(),
-                                     data["training"]["data_dict"].keys(),
-                                     fitting_weight=fitting_weight)
+        assert not single_valid_data, (
+            "In multi-task mode, 'training/validation_data' should not appear "
+            "outside 'training/data_dict'! Please check your input script."
+        )
+        assert (
+            not single_loss
+        ), "In multi-task mode, please use 'model/loss_dict' in stead of 'model/loss'! "
+        assert (
+            "type_map" in data["model"]
+        ), "In multi-task mode, 'model/type_map' must be defined! "
+        data["model"]["fitting_net_dict"] = normalize_fitting_net_dict(
+            data["model"]["fitting_net_dict"]
+        )
+        data["training"]["data_dict"] = normalize_data_dict(
+            data["training"]["data_dict"]
+        )
+        data["loss_dict"] = (
+            normalize_loss_dict(
+                data["model"]["fitting_net_dict"].keys(), data["loss_dict"]
+            )
+            if multi_loss
+            else {}
+        )
+        fitting_weight = (
+            data["training"]["fitting_weight"] if multi_fitting_weight else None
+        )
+        data["training"]["fitting_weight"] = normalize_fitting_weight(
+            data["model"]["fitting_net_dict"].keys(),
+            data["training"]["data_dict"].keys(),
+            fitting_weight=fitting_weight,
+        )
     else:
-        assert not multi_loss, "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
+        assert (
+            not multi_loss
+        ), "In single-task mode, please use 'model/loss' in stead of 'model/loss_dict'! "
     return data
 
 
@@ -851,7 +1304,9 @@ def normalize_fitting_net_dict(fitting_net_dict):
     new_dict = {}
     base = Argument("base", dict, [], [fitting_variant_type_args()], doc="")
     for fitting_key_item in fitting_net_dict:
-        data = base.normalize_value(fitting_net_dict[fitting_key_item], trim_pattern="_*")
+        data = base.normalize_value(
+            fitting_net_dict[fitting_key_item], trim_pattern="_*"
+        )
         base.check_value(data, strict=True)
         new_dict[fitting_key_item] = data
     return new_dict
@@ -859,7 +1314,9 @@ def normalize_fitting_net_dict(fitting_net_dict):
 
 def normalize_data_dict(data_dict):
     new_dict = {}
-    base = Argument("base", dict, [training_data_args(), validation_data_args()], [], doc="")
+    base = Argument(
+        "base", dict, [training_data_args(), validation_data_args()], [], doc=""
+    )
     for data_system_key_item in data_dict:
         data = base.normalize_value(data_dict[data_system_key_item], trim_pattern="_*")
         base.check_value(data, strict=True)
@@ -870,11 +1327,13 @@ def normalize_data_dict(data_dict):
 def normalize_loss_dict(fitting_keys, loss_dict):
     # check the loss dict
     failed_loss_keys = [item for item in loss_dict if item not in fitting_keys]
-    assert not failed_loss_keys, \
-        "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format(
-            str(failed_loss_keys), str(list(fitting_keys)))
+    assert (
+        not failed_loss_keys
+    ), "Loss dict key(s) {} not have corresponding fitting keys in {}! ".format(
+        str(failed_loss_keys), str(list(fitting_keys))
+    )
     new_dict = {}
-    base = Argument('base', dict, [], [loss_variant_type_args()], doc="")
+    base = Argument("base", dict, [], [loss_variant_type_args()], doc="")
     for item in loss_dict:
         data = base.normalize_value(loss_dict[item], trim_pattern="_*")
         base.check_value(data, strict=True)
@@ -885,9 +1344,11 @@ def normalize_loss_dict(fitting_keys, loss_dict):
 def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
     # check the mapping
     failed_data_keys = [item for item in data_keys if item not in fitting_keys]
-    assert not failed_data_keys, \
-        "Data dict key(s) {} not have corresponding fitting keys in {}! ".format(
-            str(failed_data_keys), str(list(fitting_keys)))
+    assert (
+        not failed_data_keys
+    ), "Data dict key(s) {} not have corresponding fitting keys in {}! ".format(
+        str(failed_data_keys), str(list(fitting_keys))
+    )
     empty_fitting_keys = []
     valid_fitting_keys = []
     for item in fitting_keys:
@@ -896,36 +1357,49 @@ def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
         else:
             valid_fitting_keys.append(item)
     if empty_fitting_keys:
-        log.warning("Fitting net(s) {} have no data and will not be used in training.".format(str(empty_fitting_keys)))
+        log.warning(
+            "Fitting net(s) {} have no data and will not be used in training.".format(
+                str(empty_fitting_keys)
+            )
+        )
     num_pair = len(valid_fitting_keys)
     assert num_pair > 0, "No valid training data systems for fitting nets!"
 
     # check and normalize the fitting weight
     new_weight = {}
     if fitting_weight is None:
-        equal_weight = 1. / num_pair
+        equal_weight = 1.0 / num_pair
         for item in fitting_keys:
-            new_weight[item] = equal_weight if item in valid_fitting_keys else 0.
+            new_weight[item] = equal_weight if item in valid_fitting_keys else 0.0
     else:
-        failed_weight_keys = [item for item in fitting_weight if item not in fitting_keys]
-        assert not failed_weight_keys, \
-            "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
-                str(failed_weight_keys), str(list(fitting_keys)))
-        sum_prob = 0.
+        failed_weight_keys = [
+            item for item in fitting_weight if item not in fitting_keys
+        ]
+        assert (
+            not failed_weight_keys
+        ), "Fitting weight key(s) {} not have corresponding fitting keys in {}! ".format(
+            str(failed_weight_keys), str(list(fitting_keys))
+        )
+        sum_prob = 0.0
         for item in fitting_keys:
             if item in valid_fitting_keys:
-                if item in fitting_weight \
-                        and isinstance(fitting_weight[item], (int, float)) and fitting_weight[item] > 0.:
+                if (
+                    item in fitting_weight
+                    and isinstance(fitting_weight[item], (int, float))
+                    and fitting_weight[item] > 0.0
+                ):
                     sum_prob += fitting_weight[item]
                     new_weight[item] = fitting_weight[item]
                 else:
                     valid_fitting_keys.remove(item)
-                    log.warning("Fitting net '{}' has zero or invalid weight "
-                                "and will not be used in training.".format(item))
-                    new_weight[item] = 0.
+                    log.warning(
+                        "Fitting net '{}' has zero or invalid weight "
+                        "and will not be used in training.".format(item)
+                    )
+                    new_weight[item] = 0.0
             else:
-                new_weight[item] = 0.
-        assert sum_prob > 0., "No valid training weight for fitting nets!"
+                new_weight[item] = 0.0
+        assert sum_prob > 0.0, "No valid training weight for fitting nets!"
         # normalize
         for item in new_weight:
             new_weight[item] /= sum_prob
@@ -934,8 +1408,9 @@ def normalize_fitting_weight(fitting_keys, data_keys, fitting_weight=None):
 
 def normalize(data):
     if "hybrid" == data["model"]["descriptor"]["type"]:
-        data["model"]["descriptor"]["list"] \
-            = normalize_hybrid_list(data["model"]["descriptor"]["list"])
+        data["model"]["descriptor"]["list"] = normalize_hybrid_list(
+            data["model"]["descriptor"]["list"]
+        )
 
     data = normalize_multi_task(data)
     ma = model_args()
@@ -952,6 +1427,5 @@ def normalize(data):
     return data
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     gen_doc()
-
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index d89784e9cf..ed54648443 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -1,12 +1,18 @@
-import os
 import logging
-from typing import Callable, Tuple
+import os
+from typing import (
+    Callable,
+    Tuple,
+)
 
 import numpy as np
 
-from deepmd.env import tf
-from deepmd.utils.errors import OutOfMemoryError
-
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils.errors import (
+    OutOfMemoryError,
+)
 
 log = logging.getLogger(__name__)
 
@@ -36,15 +42,16 @@ class AutoBatchSize:
     current_batch_size : int
         current batch size (number of total atoms)
     maximum_working_batch_size : int
-        maximum working batch size 
+        maximum working batch size
     minimal_not_working_batch_size : int
         minimal not working batch size
     """
-    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.) -> None:
+
+    def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
         # See also PyTorchLightning/pytorch-lightning#1638
         # TODO: discuss a proper initial batch size
         self.current_batch_size = initial_batch_size
-        DP_INFER_BATCH_SIZE = int(os.environ.get('DP_INFER_BATCH_SIZE', 0))
+        DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
         if DP_INFER_BATCH_SIZE > 0:
             self.current_batch_size = DP_INFER_BATCH_SIZE
             self.maximum_working_batch_size = DP_INFER_BATCH_SIZE
@@ -54,7 +61,9 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.) -> None:
             if tf.test.is_gpu_available():
                 self.minimal_not_working_batch_size = 2**31
             else:
-                self.minimal_not_working_batch_size = self.maximum_working_batch_size + 1
+                self.minimal_not_working_batch_size = (
+                    self.maximum_working_batch_size + 1
+                )
                 log.warning(
                     "You can use the environment variable DP_INFER_BATCH_SIZE to"
                     "control the inference batch size (nframes * natoms). "
@@ -63,9 +72,11 @@ def __init__(self, initial_batch_size: int = 1024, factor: float = 2.) -> None:
 
         self.factor = factor
 
-    def execute(self, callable: Callable, start_index: int, natoms: int) -> Tuple[int, tuple]:
+    def execute(
+        self, callable: Callable, start_index: int, natoms: int
+    ) -> Tuple[int, tuple]:
         """Excuate a method with given batch size.
-        
+
         Parameters
         ----------
         callable : Callable
@@ -75,7 +86,7 @@ def execute(self, callable: Callable, start_index: int, natoms: int) -> Tuple[in
             start index
         natoms : int
             natoms
-        
+
         Returns
         -------
         int
@@ -89,34 +100,53 @@ def execute(self, callable: Callable, start_index: int, natoms: int) -> Tuple[in
             OOM when batch size is 1
         """
         try:
-            n_batch, result = callable(max(self.current_batch_size // natoms, 1), start_index)
+            n_batch, result = callable(
+                max(self.current_batch_size // natoms, 1), start_index
+            )
         except OutOfMemoryError as e:
             # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
             # but luckily we only need to catch once
-            self.minimal_not_working_batch_size = min(self.minimal_not_working_batch_size, self.current_batch_size)
+            self.minimal_not_working_batch_size = min(
+                self.minimal_not_working_batch_size, self.current_batch_size
+            )
             if self.maximum_working_batch_size >= self.minimal_not_working_batch_size:
-                self.maximum_working_batch_size = int(self.minimal_not_working_batch_size / self.factor)
+                self.maximum_working_batch_size = int(
+                    self.minimal_not_working_batch_size / self.factor
+                )
             if self.minimal_not_working_batch_size <= natoms:
-                raise OutOfMemoryError("The callable still throws an out-of-memory (OOM) error even when batch size is 1!") from e
+                raise OutOfMemoryError(
+                    "The callable still throws an out-of-memory (OOM) error even when batch size is 1!"
+                ) from e
             # adjust the next batch size
-            self._adjust_batch_size(1./self.factor)
+            self._adjust_batch_size(1.0 / self.factor)
             return 0, None
         else:
             n_tot = n_batch * natoms
-            self.maximum_working_batch_size = max(self.maximum_working_batch_size, n_tot)
+            self.maximum_working_batch_size = max(
+                self.maximum_working_batch_size, n_tot
+            )
             # adjust the next batch size
-            if n_tot + natoms > self.current_batch_size and self.current_batch_size * self.factor < self.minimal_not_working_batch_size:
+            if (
+                n_tot + natoms > self.current_batch_size
+                and self.current_batch_size * self.factor
+                < self.minimal_not_working_batch_size
+            ):
                 self._adjust_batch_size(self.factor)
             return n_batch, result
 
     def _adjust_batch_size(self, factor: float):
         old_batch_size = self.current_batch_size
         self.current_batch_size = int(self.current_batch_size * factor)
-        log.info("Adjust batch size from %d to %d" % (old_batch_size, self.current_batch_size))
+        log.info(
+            "Adjust batch size from %d to %d"
+            % (old_batch_size, self.current_batch_size)
+        )
+
+    def execute_all(
+        self, callable: Callable, total_size: int, natoms: int, *args, **kwargs
+    ) -> Tuple[np.ndarray]:
+        """Excuate a method with all given data.
 
-    def execute_all(self, callable: Callable, total_size: int, natoms: int, *args, **kwargs) -> Tuple[np.ndarray]:
-        """Excuate a method with all given data. 
-        
         Parameters
         ----------
         callable : Callable
@@ -128,12 +158,29 @@ def execute_all(self, callable: Callable, total_size: int, natoms: int, *args, *
         **kwargs
             If 2D np.ndarray, assume the first axis is batch; otherwise do nothing.
         """
-        def execute_with_batch_size(batch_size: int, start_index: int) -> Tuple[int, Tuple[np.ndarray]]:
+
+        def execute_with_batch_size(
+            batch_size: int, start_index: int
+        ) -> Tuple[int, Tuple[np.ndarray]]:
             end_index = start_index + batch_size
             end_index = min(end_index, total_size)
             return (end_index - start_index), callable(
-                *[(vv[start_index:end_index] if isinstance(vv, np.ndarray) and vv.ndim > 1 else vv) for vv in args],
-                **{kk: (vv[start_index:end_index] if isinstance(vv, np.ndarray) and vv.ndim > 1 else vv) for kk, vv in kwargs.items()},
+                *[
+                    (
+                        vv[start_index:end_index]
+                        if isinstance(vv, np.ndarray) and vv.ndim > 1
+                        else vv
+                    )
+                    for vv in args
+                ],
+                **{
+                    kk: (
+                        vv[start_index:end_index]
+                        if isinstance(vv, np.ndarray) and vv.ndim > 1
+                        else vv
+                    )
+                    for kk, vv in kwargs.items()
+                },
             )
 
         index = 0
@@ -147,7 +194,7 @@ def execute_with_batch_size(batch_size: int, start_index: int) -> Tuple[int, Tup
                 for rr in result:
                     rr.reshape((n_batch, -1))
                 results.append(result)
-        
+
         r = tuple([np.concatenate(r, axis=0) for r in zip(*results)])
         if len(r) == 1:
             # avoid returning tuple if callable doesn't return tuple
diff --git a/deepmd/utils/compat.py b/deepmd/utils/compat.py
index 8a048a13a2..325d0507e9 100644
--- a/deepmd/utils/compat.py
+++ b/deepmd/utils/compat.py
@@ -2,11 +2,22 @@
 
 import json
 import warnings
-from pathlib import Path
-from typing import Any, Dict, Optional, Sequence, Union
+from pathlib import (
+    Path,
+)
+from typing import (
+    Any,
+    Dict,
+    Optional,
+    Sequence,
+    Union,
+)
 
 import numpy as np
-from deepmd.common import j_must_have
+
+from deepmd.common import (
+    j_must_have,
+)
 
 
 def convert_input_v0_v1(
@@ -43,8 +54,10 @@ def convert_input_v0_v1(
 
 
 def _warning_input_v0_v1(fname: Optional[Union[str, Path]]):
-    msg = "It seems that you are using a deepmd-kit input of version 0.x.x, " \
-          "which is deprecated. we have converted the input to >2.0.0 compatible"
+    msg = (
+        "It seems that you are using a deepmd-kit input of version 0.x.x, "
+        "which is deprecated. we have converted the input to >2.0.0 compatible"
+    )
     if fname is not None:
         msg += f", and output it to file {fname}"
     warnings.warn(msg)
@@ -112,7 +125,7 @@ def _smth_descriptor(jdata: Dict[str, Any]) -> Dict[str, Any]:
         descriptor["seed"] = seed
     descriptor["type"] = "se_a"
     descriptor["sel"] = jdata["sel_a"]
-    _jcopy(jdata, descriptor, ("rcut", ))
+    _jcopy(jdata, descriptor, ("rcut",))
     descriptor["rcut_smth"] = jdata.get("rcut_smth", descriptor["rcut"])
     descriptor["neuron"] = j_must_have(jdata, "filter_neuron")
     descriptor["axis_neuron"] = j_must_have(jdata, "axis_neuron", ["n_axis_neuron"])
@@ -257,7 +270,7 @@ def _jcopy(src: Dict[str, Any], dst: Dict[str, Any], keys: Sequence[str]):
 
 def remove_decay_rate(jdata: Dict[str, Any]):
     """convert decay_rate to stop_lr.
-    
+
     Parameters
     ----------
     jdata: Dict[str, Any]
@@ -274,9 +287,9 @@ def remove_decay_rate(jdata: Dict[str, Any]):
         lr.pop("decay_rate")
 
 
-def convert_input_v1_v2(jdata: Dict[str, Any],
-                        warning: bool = True,
-                        dump: Optional[Union[str, Path]] = None) -> Dict[str, Any]:
+def convert_input_v1_v2(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
 
     tr_cfg = jdata["training"]
     tr_data_keys = {
@@ -287,7 +300,7 @@ def convert_input_v1_v2(jdata: Dict[str, Any],
         "auto_prob",
         # alias included
         "sys_weights",
-        "auto_prob_style"
+        "auto_prob_style",
     }
 
     tr_data_cfg = {k: v for k, v in tr_cfg.items() if k in tr_data_keys}
@@ -309,20 +322,22 @@ def convert_input_v1_v2(jdata: Dict[str, Any],
 
 
 def _warning_input_v1_v2(fname: Optional[Union[str, Path]]):
-    msg = "It seems that you are using a deepmd-kit input of version 1.x.x, " \
-          "which is deprecated. we have converted the input to >2.0.0 compatible"
+    msg = (
+        "It seems that you are using a deepmd-kit input of version 1.x.x, "
+        "which is deprecated. we have converted the input to >2.0.0 compatible"
+    )
     if fname is not None:
         msg += f", and output it to file {fname}"
     warnings.warn(msg)
 
 
-def deprecate_numb_test(jdata: Dict[str, Any],
-                        warning: bool = True,
-                        dump: Optional[Union[str, Path]] = None) -> Dict[str, Any]:
+def deprecate_numb_test(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
     """Deprecate `numb_test` since v2.1. It has taken no effect since v2.0.
-    
+
     See `#1243 <https://github.com/deepmodeling/deepmd-kit/discussions/1243>`_.
-    
+
     Parameters
     ----------
     jdata : Dict[str, Any]
@@ -354,9 +369,9 @@ def deprecate_numb_test(jdata: Dict[str, Any],
     return jdata
 
 
-def update_deepmd_input(jdata: Dict[str, Any],
-                        warning: bool = True,
-                        dump: Optional[Union[str, Path]] = None) -> Dict[str, Any]:
+def update_deepmd_input(
+    jdata: Dict[str, Any], warning: bool = True, dump: Optional[Union[str, Path]] = None
+) -> Dict[str, Any]:
     def is_deepmd_v0_input(jdata):
         return "model" not in jdata.keys()
 
diff --git a/deepmd/utils/convert.py b/deepmd/utils/convert.py
index 70f9c24d0e..7a89fdec05 100644
--- a/deepmd/utils/convert.py
+++ b/deepmd/utils/convert.py
@@ -1,12 +1,18 @@
 import os
 import textwrap
-from deepmd.env import tf
-from google.protobuf import text_format
+
+from google.protobuf import (
+    text_format,
+)
+
+from deepmd.env import (
+    tf,
+)
 
 
 def convert_13_to_21(input_model: str, output_model: str):
     """Convert DP 1.3 graph to 2.1 graph.
-    
+
     Parameters
     ----------
     input_model : str
@@ -14,18 +20,18 @@ def convert_13_to_21(input_model: str, output_model: str):
     output_model : str
         filename of the output graph
     """
-    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
-    convert_dp13_to_dp20('frozen_model.pbtxt')
-    convert_dp20_to_dp21('frozen_model.pbtxt')
-    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
-    if os.path.isfile('frozen_model.pbtxt'):
-        os.remove('frozen_model.pbtxt')
+    convert_pb_to_pbtxt(input_model, "frozen_model.pbtxt")
+    convert_dp13_to_dp20("frozen_model.pbtxt")
+    convert_dp20_to_dp21("frozen_model.pbtxt")
+    convert_pbtxt_to_pb("frozen_model.pbtxt", output_model)
+    if os.path.isfile("frozen_model.pbtxt"):
+        os.remove("frozen_model.pbtxt")
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
 
 def convert_12_to_21(input_model: str, output_model: str):
     """Convert DP 1.2 graph to 2.1 graph.
-    
+
     Parameters
     ----------
     input_model : str
@@ -33,19 +39,19 @@ def convert_12_to_21(input_model: str, output_model: str):
     output_model : str
         filename of the output graph
     """
-    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
-    convert_dp12_to_dp13('frozen_model.pbtxt')
-    convert_dp13_to_dp20('frozen_model.pbtxt')
-    convert_dp20_to_dp21('frozen_model.pbtxt')
-    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
-    if os.path.isfile('frozen_model.pbtxt'):
-        os.remove('frozen_model.pbtxt')
+    convert_pb_to_pbtxt(input_model, "frozen_model.pbtxt")
+    convert_dp12_to_dp13("frozen_model.pbtxt")
+    convert_dp13_to_dp20("frozen_model.pbtxt")
+    convert_dp20_to_dp21("frozen_model.pbtxt")
+    convert_pbtxt_to_pb("frozen_model.pbtxt", output_model)
+    if os.path.isfile("frozen_model.pbtxt"):
+        os.remove("frozen_model.pbtxt")
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
 
 def convert_10_to_21(input_model: str, output_model: str):
     """Convert DP 1.0 graph to 2.1 graph.
-    
+
     Parameters
     ----------
     input_model : str
@@ -53,20 +59,20 @@ def convert_10_to_21(input_model: str, output_model: str):
     output_model : str
         filename of the output graph
     """
-    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
-    convert_dp10_to_dp11('frozen_model.pbtxt')
-    convert_dp12_to_dp13('frozen_model.pbtxt')
-    convert_dp13_to_dp20('frozen_model.pbtxt')
-    convert_dp20_to_dp21('frozen_model.pbtxt')
-    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
-    if os.path.isfile('frozen_model.pbtxt'):
-        os.remove('frozen_model.pbtxt')
+    convert_pb_to_pbtxt(input_model, "frozen_model.pbtxt")
+    convert_dp10_to_dp11("frozen_model.pbtxt")
+    convert_dp12_to_dp13("frozen_model.pbtxt")
+    convert_dp13_to_dp20("frozen_model.pbtxt")
+    convert_dp20_to_dp21("frozen_model.pbtxt")
+    convert_pbtxt_to_pb("frozen_model.pbtxt", output_model)
+    if os.path.isfile("frozen_model.pbtxt"):
+        os.remove("frozen_model.pbtxt")
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
 
 def convert_012_to_21(input_model: str, output_model: str):
     """Convert DP 0.12 graph to 2.1 graph.
-    
+
     Parameters
     ----------
     input_model : str
@@ -74,21 +80,21 @@ def convert_012_to_21(input_model: str, output_model: str):
     output_model : str
         filename of the output graph
     """
-    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
-    convert_dp012_to_dp10('frozen_model.pbtxt')
-    convert_dp10_to_dp11('frozen_model.pbtxt')
-    convert_dp12_to_dp13('frozen_model.pbtxt')
-    convert_dp13_to_dp20('frozen_model.pbtxt')
-    convert_dp20_to_dp21('frozen_model.pbtxt')
-    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
-    if os.path.isfile('frozen_model.pbtxt'):
-        os.remove('frozen_model.pbtxt')
+    convert_pb_to_pbtxt(input_model, "frozen_model.pbtxt")
+    convert_dp012_to_dp10("frozen_model.pbtxt")
+    convert_dp10_to_dp11("frozen_model.pbtxt")
+    convert_dp12_to_dp13("frozen_model.pbtxt")
+    convert_dp13_to_dp20("frozen_model.pbtxt")
+    convert_dp20_to_dp21("frozen_model.pbtxt")
+    convert_pbtxt_to_pb("frozen_model.pbtxt", output_model)
+    if os.path.isfile("frozen_model.pbtxt"):
+        os.remove("frozen_model.pbtxt")
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
 
 def convert_20_to_21(input_model: str, output_model: str):
     """Convert DP 2.0 graph to 2.1 graph.
-    
+
     Parameters
     ----------
     input_model : str
@@ -96,16 +102,17 @@ def convert_20_to_21(input_model: str, output_model: str):
     output_model : str
         filename of the output graph
     """
-    convert_pb_to_pbtxt(input_model, 'frozen_model.pbtxt')
-    convert_dp20_to_dp21('frozen_model.pbtxt')
-    convert_pbtxt_to_pb('frozen_model.pbtxt', output_model)
-    if os.path.isfile('frozen_model.pbtxt'):
-        os.remove('frozen_model.pbtxt')
+    convert_pb_to_pbtxt(input_model, "frozen_model.pbtxt")
+    convert_dp20_to_dp21("frozen_model.pbtxt")
+    convert_pbtxt_to_pb("frozen_model.pbtxt", output_model)
+    if os.path.isfile("frozen_model.pbtxt"):
+        os.remove("frozen_model.pbtxt")
     print("the converted output model (2.1 support) is saved in %s" % output_model)
 
+
 def convert_pb_to_pbtxt(pbfile: str, pbtxtfile: str):
     """Convert DP graph to graph text.
-    
+
     Parameters
     ----------
     pbfile : str
@@ -113,15 +120,16 @@ def convert_pb_to_pbtxt(pbfile: str, pbtxtfile: str):
     pbtxtfile : str
         filename of the output graph text
     """
-    with tf.gfile.GFile(pbfile, 'rb') as f:
+    with tf.gfile.GFile(pbfile, "rb") as f:
         graph_def = tf.GraphDef()
         graph_def.ParseFromString(f.read())
-        tf.import_graph_def(graph_def, name='')
-        tf.train.write_graph(graph_def, './', pbtxtfile, as_text=True)
+        tf.import_graph_def(graph_def, name="")
+        tf.train.write_graph(graph_def, "./", pbtxtfile, as_text=True)
+
 
 def convert_pbtxt_to_pb(pbtxtfile: str, pbfile: str):
     """Convert DP graph text to graph.
-    
+
     Parameters
     ----------
     pbtxtfile : str
@@ -129,17 +137,17 @@ def convert_pbtxt_to_pb(pbtxtfile: str, pbfile: str):
     pbfile : str
         filename of the output graph
     """
-    with tf.gfile.GFile(pbtxtfile, 'r') as f:
+    with tf.gfile.GFile(pbtxtfile, "r") as f:
         graph_def = tf.GraphDef()
         file_content = f.read()
         # Merges the human-readable string in `file_content` into `graph_def`.
         text_format.Merge(file_content, graph_def)
-        tf.train.write_graph(graph_def, './', pbfile, as_text=False)
+        tf.train.write_graph(graph_def, "./", pbfile, as_text=False)
 
 
 def convert_dp012_to_dp10(file: str):
     """Convert DP 0.12 graph text to 1.0 graph text.
-    
+
     Parameters
     ----------
     file : str
@@ -149,18 +157,20 @@ def convert_dp012_to_dp10(file: str):
         file_content = fp.read()
     # note: atom_energy must be put before energy,
     # otherwise atom_energy_test -> atom_o_energy
-    file_content = file_content\
-                   .replace('DescrptNorot', 'DescrptSeA') \
-                   .replace('ProdForceNorot', 'ProdForceSeA') \
-                   .replace('ProdVirialNorot', 'ProdVirialSeA') \
-                   .replace('t_rcut', 'descrpt_attr/rcut') \
-                   .replace('t_ntypes', 'descrpt_attr/ntypes') \
-                   .replace('atom_energy_test', 'o_atom_energy') \
-                   .replace('atom_virial_test', 'o_atom_virial') \
-                   .replace('energy_test', 'o_energy') \
-                   .replace('force_test', 'o_force') \
-                   .replace('virial_test', 'o_virial')
-    file_content += textwrap.dedent("""\
+    file_content = (
+        file_content.replace("DescrptNorot", "DescrptSeA")
+        .replace("ProdForceNorot", "ProdForceSeA")
+        .replace("ProdVirialNorot", "ProdVirialSeA")
+        .replace("t_rcut", "descrpt_attr/rcut")
+        .replace("t_ntypes", "descrpt_attr/ntypes")
+        .replace("atom_energy_test", "o_atom_energy")
+        .replace("atom_virial_test", "o_atom_virial")
+        .replace("energy_test", "o_energy")
+        .replace("force_test", "o_force")
+        .replace("virial_test", "o_virial")
+    )
+    file_content += textwrap.dedent(
+        """\
       node {
         name: "fitting_attr/dfparam"
         op: "Const"
@@ -182,8 +192,10 @@ def convert_dp012_to_dp10(file: str):
           }
         }
       }
-      """)
-    file_content += textwrap.dedent("""\
+      """
+    )
+    file_content += textwrap.dedent(
+        """\
       node {
         name: "model_attr/model_type"
         op: "Const"
@@ -205,8 +217,10 @@ def convert_dp012_to_dp10(file: str):
           }
         }
       }
-      """)
-    file_content += textwrap.dedent("""\
+      """
+    )
+    file_content += textwrap.dedent(
+        """\
       node {
         name: "model_attr/tmap"
         op: "Const"
@@ -227,22 +241,25 @@ def convert_dp012_to_dp10(file: str):
             }
           }
         }
-      }      
-      """)
-    with open(file, 'w') as fp:
+      }
+      """
+    )
+    with open(file, "w") as fp:
         fp.write(file_content)
 
 
 def convert_dp10_to_dp11(file: str):
     """Convert DP 1.0 graph text to 1.1 graph text.
-    
+
     Parameters
     ----------
     file : str
         filename of the graph text
     """
-    with open(file, 'a') as f:
-        f.write(textwrap.dedent("""\
+    with open(file, "a") as f:
+        f.write(
+            textwrap.dedent(
+                """\
           node {
             name: "fitting_attr/daparam"
             op: "Const"
@@ -263,12 +280,14 @@ def convert_dp10_to_dp11(file: str):
                 }
               }                                                                                                                                                 }
           }
-          """))
+          """
+            )
+        )
 
 
 def convert_dp12_to_dp13(file: str):
     """Convert DP 1.2 graph text to 1.3 graph text.
-    
+
     Parameters
     ----------
     file : str
@@ -278,27 +297,31 @@ def convert_dp12_to_dp13(file: str):
     with open(file, "r", encoding="utf-8") as f:
         ii = 0
         lines = f.readlines()
-        while (ii < len(lines)):
+        while ii < len(lines):
             line = lines[ii]
             file_data += line
-            ii+=1
-            if 'name' in line and ('DescrptSeA' in line or 'ProdForceSeA' in line or 'ProdVirialSeA' in line):
-                while not('attr' in lines[ii] and '{' in lines[ii]):
+            ii += 1
+            if "name" in line and (
+                "DescrptSeA" in line
+                or "ProdForceSeA" in line
+                or "ProdVirialSeA" in line
+            ):
+                while not ("attr" in lines[ii] and "{" in lines[ii]):
                     file_data += lines[ii]
-                    ii+=1
-                file_data += '  attr {\n'
-                file_data += '    key: \"T\"\n'
-                file_data += '    value {\n'
-                file_data += '      type: DT_DOUBLE\n'
-                file_data += '    }\n'
-                file_data += '  }\n'
+                    ii += 1
+                file_data += "  attr {\n"
+                file_data += '    key: "T"\n'
+                file_data += "    value {\n"
+                file_data += "      type: DT_DOUBLE\n"
+                file_data += "    }\n"
+                file_data += "  }\n"
     with open(file, "w", encoding="utf-8") as f:
         f.write(file_data)
 
 
 def convert_dp13_to_dp20(fname: str):
     """Convert DP 1.3 graph text to 2.0 graph text.
-    
+
     Parameters
     ----------
     file : str
@@ -306,7 +329,8 @@ def convert_dp13_to_dp20(fname: str):
     """
     with open(fname) as fp:
         file_content = fp.read()
-    file_content += textwrap.dedent("""\
+    file_content += textwrap.dedent(
+        """\
       node {
         name: "model_attr/model_version"
         op: "Const"
@@ -328,17 +352,20 @@ def convert_dp13_to_dp20(fname: str):
           }
         }
       }
-      """)
-    file_content = file_content\
-                   .replace('DescrptSeA', 'ProdEnvMatA')\
-                   .replace('DescrptSeR', 'ProdEnvMatR')
-    with open(fname, 'w') as fp:
+      """
+    )
+    file_content = file_content.replace("DescrptSeA", "ProdEnvMatA").replace(
+        "DescrptSeR", "ProdEnvMatR"
+    )
+    with open(fname, "w") as fp:
         fp.write(file_content)
 
+
 def convert_dp20_to_dp21(fname: str):
     with open(fname) as fp:
         file_content = fp.read()
-    old_model_version_node = textwrap.dedent("""\
+    old_model_version_node = textwrap.dedent(
+        """\
       node {
         name: "model_attr/model_version"
         op: "Const"
@@ -360,8 +387,10 @@ def convert_dp20_to_dp21(fname: str):
           }
         }
       }
-      """)
-    new_model_version_node = textwrap.dedent("""\
+      """
+    )
+    new_model_version_node = textwrap.dedent(
+        """\
       node {
         name: "model_attr/model_version"
         op: "Const"
@@ -383,11 +412,13 @@ def convert_dp20_to_dp21(fname: str):
           }
         }
       }
-      """)
-    file_content = file_content\
-                   .replace(old_model_version_node, new_model_version_node)\
-                   .replace('TabulateFusion', 'TabulateFusionSeA')\
-                   .replace('TabulateFusionGrad', 'TabulateFusionSeAGrad')\
-                   .replace('TabulateFusionGradGrad', 'TabulateFusionSeAGradGrad')
-    with open(fname, 'w') as fp:
+      """
+    )
+    file_content = (
+        file_content.replace(old_model_version_node, new_model_version_node)
+        .replace("TabulateFusion", "TabulateFusionSeA")
+        .replace("TabulateFusionGrad", "TabulateFusionSeAGrad")
+        .replace("TabulateFusionGradGrad", "TabulateFusionSeAGradGrad")
+    )
+    with open(fname, "w") as fp:
         fp.write(file_content)
diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py
index a5e697259b..f66abf80d0 100644
--- a/deepmd/utils/data.py
+++ b/deepmd/utils/data.py
@@ -1,22 +1,32 @@
 #!/usr/bin/env python3
 
-import time
 import glob
-import numpy as np
-import os.path
-from typing import Optional, Tuple, List
 import logging
+import os.path
+import time
+from typing import (
+    List,
+    Optional,
+    Tuple,
+)
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+)
 from deepmd.utils import random as dp_random
-from deepmd.utils.path import DPPath
+from deepmd.utils.path import (
+    DPPath,
+)
 
 log = logging.getLogger(__name__)
 
-class DeepmdData() :
+
+class DeepmdData:
     """
-    Class for a data system. 
+    Class for a data system.
 
     It loads data from hard disk, and mantains the data as a `data_dict`
 
@@ -37,21 +47,26 @@ class DeepmdData() :
     trn_all_set
             Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
     """
-    def __init__ (self, 
-                  sys_path : str, 
-                  set_prefix : str = 'set',
-                  shuffle_test : bool = True, 
-                  type_map : List[str] = None,
-                  optional_type_map : bool = True,
-                  modifier = None,
-                  trn_all_set : bool = False) :
+
+    def __init__(
+        self,
+        sys_path: str,
+        set_prefix: str = "set",
+        shuffle_test: bool = True,
+        type_map: List[str] = None,
+        optional_type_map: bool = True,
+        modifier=None,
+        trn_all_set: bool = False,
+    ):
         """
         Constructor
         """
         root = DPPath(sys_path)
         self.dirs = root.glob(set_prefix + ".*")
         self.dirs.sort()
-        self.mixed_type = self._check_mode(self.dirs[0])  # mixed_type format only has one set
+        self.mixed_type = self._check_mode(
+            self.dirs[0]
+        )  # mixed_type format only has one set
         # load atom type
         self.atom_type = self._load_type(root)
         self.natoms = len(self.atom_type)
@@ -60,29 +75,39 @@ def __init__ (self,
             self.atom_type_mix = self._load_type_mix(self.dirs[0])
         # load atom type map
         self.type_map = self._load_type_map(root)
-        assert optional_type_map or self.type_map is not None, \
-            'System {} must have type_map.raw in this mode! '.format(sys_path)
+        assert (
+            optional_type_map or self.type_map is not None
+        ), "System {} must have type_map.raw in this mode! ".format(sys_path)
         if self.type_map is not None:
-            assert(len(self.type_map) >= max(self.atom_type)+1)
+            assert len(self.type_map) >= max(self.atom_type) + 1
         # check pbc
         self.pbc = self._check_pbc(root)
         # enforce type_map if necessary
         if type_map is not None and self.type_map is not None:
             if not self.mixed_type:
-                atom_type_ = [type_map.index(self.type_map[ii]) for ii in self.atom_type]
-                self.atom_type = np.array(atom_type_, dtype = np.int32)
+                atom_type_ = [
+                    type_map.index(self.type_map[ii]) for ii in self.atom_type
+                ]
+                self.atom_type = np.array(atom_type_, dtype=np.int32)
             else:
                 sorter = np.argsort(type_map)
-                type_idx_map = sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)]
+                type_idx_map = sorter[
+                    np.searchsorted(type_map, self.type_map, sorter=sorter)
+                ]
                 try:
-                    atom_type_mix_ = np.array(type_idx_map)[self.atom_type_mix].astype(np.int32)
+                    atom_type_mix_ = np.array(type_idx_map)[self.atom_type_mix].astype(
+                        np.int32
+                    )
                 except RuntimeError as e:
-                    raise RuntimeError("some types in 'real_atom_types.npy' of sys {} are not contained in {} types!"
-                                       .format(self.dirs[0], self.get_ntypes())) from e
+                    raise RuntimeError(
+                        "some types in 'real_atom_types.npy' of sys {} are not contained in {} types!".format(
+                            self.dirs[0], self.get_ntypes()
+                        )
+                    ) from e
                 self.atom_type_mix = atom_type_mix_
             self.type_map = type_map
         if type_map is None and self.type_map is None and self.mixed_type:
-            raise RuntimeError('mixed_type format must have type_map!')
+            raise RuntimeError("mixed_type format must have type_map!")
         # make idx map
         self.idx_map = self._make_idx_map(self.atom_type)
         # train dirs
@@ -90,16 +115,16 @@ def __init__ (self,
         if trn_all_set:
             self.train_dirs = self.dirs
         else:
-            if len(self.dirs) == 1 :
+            if len(self.dirs) == 1:
                 self.train_dirs = self.dirs
-            else :
+            else:
                 self.train_dirs = self.dirs[:-1]
-        self.data_dict = {}        
+        self.data_dict = {}
         # add box and coord
-        self.add('box', 9, must = self.pbc)
-        self.add('coord', 3, atomic = True, must = True)
+        self.add("box", 9, must=self.pbc)
+        self.add("coord", 3, atomic=True, must=True)
         # the training times of each frame
-        self.add('numb_copy', 1, must=False, default=1, dtype=int)
+        self.add("numb_copy", 1, must=False, default=1, dtype=int)
         # set counters
         self.set_count = 0
         self.iterator = 0
@@ -107,24 +132,24 @@ def __init__ (self,
         # set modifier
         self.modifier = modifier
 
-
-    def add(self, 
-            key : str, 
-            ndof : int, 
-            atomic : bool = False, 
-            must : bool = False, 
-            high_prec : bool = False,
-            type_sel : List[int] = None,
-            repeat : int = 1,
-            default: float=0.,
-            dtype: Optional[np.dtype] = None,
-    ) :
+    def add(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: List[int] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+    ):
         """
         Add a data item that to be loaded
 
         Parameters
         ----------
-        key 
+        key
                 The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
         ndof
                 The number of dof
@@ -146,23 +171,20 @@ def add(self,
         dtype : np.dtype, optional
                 the dtype of data, overwrites `high_prec` if provided
         """
-        self.data_dict[key] = {'ndof': ndof, 
-                               'atomic': atomic,
-                               'must': must, 
-                               'high_prec': high_prec,
-                               'type_sel': type_sel,
-                               'repeat': repeat,
-                               'reduce': None,
-                               'default': default,
-                               'dtype': dtype,
+        self.data_dict[key] = {
+            "ndof": ndof,
+            "atomic": atomic,
+            "must": must,
+            "high_prec": high_prec,
+            "type_sel": type_sel,
+            "repeat": repeat,
+            "reduce": None,
+            "default": default,
+            "dtype": dtype,
         }
         return self
 
-    
-    def reduce(self, 
-               key_out : str,
-               key_in : str
-    ) :
+    def reduce(self, key_out: str, key_in: str):
         """
         Generate a new item from the reduction of another atom
 
@@ -173,18 +195,21 @@ def reduce(self,
         key_in
                 The name of the data item to be reduced
         """
-        assert (key_in in self.data_dict), 'cannot find input key'
-        assert (self.data_dict[key_in]['atomic']), 'reduced property should be atomic'
-        assert (not(key_out in self.data_dict)), 'output key should not have been added'
-        assert (self.data_dict[key_in]['repeat'] == 1), 'reduced proerties should not have been repeated'
-
-        self.data_dict[key_out] = {'ndof': self.data_dict[key_in]['ndof'],
-                                   'atomic': False,
-                                   'must': True,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'reduce': key_in,
+        assert key_in in self.data_dict, "cannot find input key"
+        assert self.data_dict[key_in]["atomic"], "reduced property should be atomic"
+        assert not (key_out in self.data_dict), "output key should not have been added"
+        assert (
+            self.data_dict[key_in]["repeat"] == 1
+        ), "reduced proerties should not have been repeated"
+
+        self.data_dict[key_out] = {
+            "ndof": self.data_dict[key_in]["ndof"],
+            "atomic": False,
+            "must": True,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "reduce": key_in,
         }
         return self
 
@@ -194,39 +219,47 @@ def get_data_dict(self) -> dict:
         """
         return self.data_dict
 
-    def check_batch_size (self, batch_size) :        
+    def check_batch_size(self, batch_size):
         """
         Check if the system can get a batch of data with `batch_size` frames.
         """
-        for ii in self.train_dirs :
-            if self.data_dict['coord']['high_prec'] :
-                tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+        for ii in self.train_dirs:
+            if self.data_dict["coord"]["high_prec"]:
+                tmpe = (
+                    (ii / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+                )
             else:
                 tmpe = (ii / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
             if tmpe.ndim == 1:
-                tmpe = tmpe.reshape([1,-1])            
-            if tmpe.shape[0] < batch_size :
+                tmpe = tmpe.reshape([1, -1])
+            if tmpe.shape[0] < batch_size:
                 return ii, tmpe.shape[0]
         return None
 
-    def check_test_size (self, test_size) :
+    def check_test_size(self, test_size):
         """
         Check if the system can get a test dataset with `test_size` frames.
         """
-        if self.data_dict['coord']['high_prec'] :
-            tmpe = (self.test_dir / "coord.npy").load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
+        if self.data_dict["coord"]["high_prec"]:
+            tmpe = (
+                (self.test_dir / "coord.npy")
+                .load_numpy()
+                .astype(GLOBAL_ENER_FLOAT_PRECISION)
+            )
         else:
-            tmpe = (self.test_dir / "coord.npy").load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)            
+            tmpe = (
+                (self.test_dir / "coord.npy")
+                .load_numpy()
+                .astype(GLOBAL_NP_FLOAT_PRECISION)
+            )
         if tmpe.ndim == 1:
-            tmpe = tmpe.reshape([1,-1])            
-        if tmpe.shape[0] < test_size :
+            tmpe = tmpe.reshape([1, -1])
+        if tmpe.shape[0] < test_size:
             return self.test_dir, tmpe.shape[0]
-        else :
+        else:
             return None
 
-    def get_batch(self, 
-                  batch_size : int
-    ) -> dict :
+    def get_batch(self, batch_size: int) -> dict:
         """
         Get a batch of data with `batch_size` frames. The frames are randomly picked from the data system.
 
@@ -235,44 +268,46 @@ def get_batch(self,
         batch_size
                 size of the batch
         """
-        if hasattr(self, 'batch_set') :
+        if hasattr(self, "batch_set"):
             set_size = self.batch_set["coord"].shape[0]
-        else :
+        else:
             set_size = 0
-        if self.iterator + batch_size > set_size :
-            self._load_batch_set (self.train_dirs[self.set_count % self.get_numb_set()])
+        if self.iterator + batch_size > set_size:
+            self._load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
             self.set_count += 1
             set_size = self.batch_set["coord"].shape[0]
             if self.modifier is not None:
                 self.modifier.modify_data(self.batch_set)
         iterator_1 = self.iterator + batch_size
-        if iterator_1 >= set_size :
+        if iterator_1 >= set_size:
             iterator_1 = set_size
-        idx = np.arange (self.iterator, iterator_1)
+        idx = np.arange(self.iterator, iterator_1)
         self.iterator += batch_size
         ret = self._get_subdata(self.batch_set, idx)
         return ret
 
-    def get_test (self, 
-                  ntests : int = -1
-    ) -> dict:
+    def get_test(self, ntests: int = -1) -> dict:
         """
-        Get the test data with `ntests` frames. 
+        Get the test data with `ntests` frames.
 
         Parameters
         ----------
         ntests
                 Size of the test data set. If `ntests` is -1, all test data will be get.
         """
-        if not hasattr(self, 'test_set') :            
+        if not hasattr(self, "test_set"):
             self._load_test_set(self.test_dir, self.shuffle_test)
         if ntests == -1:
             idx = None
-        else :
-            ntests_ = ntests if ntests < self.test_set['type'].shape[0] else self.test_set['type'].shape[0]
+        else:
+            ntests_ = (
+                ntests
+                if ntests < self.test_set["type"].shape[0]
+                else self.test_set["type"].shape[0]
+            )
             # print('ntest', self.test_set['type'].shape[0], ntests, ntests_)
             idx = np.arange(ntests_)
-        ret = self._get_subdata(self.test_set, idx = idx)
+        ret = self._get_subdata(self.test_set, idx=idx)
         if self.modifier is not None:
             self.modifier.modify_data(ret)
         return ret
@@ -298,16 +333,13 @@ def get_atom_type(self) -> List[int]:
         """
         return self.atom_type
 
-    def get_numb_set (self) -> int:
+    def get_numb_set(self) -> int:
         """
         Get number of training sets
         """
-        return len (self.train_dirs)
+        return len(self.train_dirs)
 
-    def get_numb_batch (self, 
-                        batch_size : int, 
-                        set_idx : int
-    ) -> int:
+    def get_numb_batch(self, batch_size: int, set_idx: int) -> int:
         """
         Get the number of batches in a set.
         """
@@ -317,25 +349,22 @@ def get_numb_batch (self,
             ret = 1
         return ret
 
-    def get_sys_numb_batch (self, 
-                            batch_size : int
-    ) -> int:
+    def get_sys_numb_batch(self, batch_size: int) -> int:
         """
         Get the number of batches in the data system.
         """
         ret = 0
-        for ii in range(len(self.train_dirs)) :
+        for ii in range(len(self.train_dirs)):
             ret += self.get_numb_batch(batch_size, ii)
         return ret
 
-    def get_natoms (self) :
+    def get_natoms(self):
         """
         Get number of atoms
         """
         return len(self.atom_type)
 
-    def get_natoms_vec (self,
-                        ntypes : int) :
+    def get_natoms_vec(self, ntypes: int):
         """
         Get number of atoms and number of atoms in different types
 
@@ -351,65 +380,64 @@ def get_natoms_vec (self,
                 natoms[1]: total number of atoms held by this processor
                 natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
         """
-        natoms, natoms_vec = self._get_natoms_2 (ntypes)
+        natoms, natoms_vec = self._get_natoms_2(ntypes)
         tmp = [natoms, natoms]
-        tmp = np.append (tmp, natoms_vec)
+        tmp = np.append(tmp, natoms_vec)
         return tmp.astype(np.int32)
-    
-    def avg(self, key) :
+
+    def avg(self, key):
         """
         Return the average value of an item.
         """
-        if key not in self.data_dict.keys() :
-            raise RuntimeError('key %s has not been added' % key)
-        info = self.data_dict[key]  
-        ndof = info['ndof']
+        if key not in self.data_dict.keys():
+            raise RuntimeError("key %s has not been added" % key)
+        info = self.data_dict[key]
+        ndof = info["ndof"]
         eners = []
         for ii in self.train_dirs:
             data = self._load_set(ii)
             ei = data[key].reshape([-1, ndof])
             eners.append(ei)
-        eners = np.concatenate(eners, axis = 0)
-        if eners.size == 0 :
+        eners = np.concatenate(eners, axis=0)
+        if eners.size == 0:
             return 0
-        else :
-            return np.average(eners, axis = 0)
+        else:
+            return np.average(eners, axis=0)
 
-    def _idx_map_sel(self, atom_type, type_sel) :
+    def _idx_map_sel(self, atom_type, type_sel):
         new_types = []
-        for ii in atom_type :
+        for ii in atom_type:
             if ii in type_sel:
                 new_types.append(ii)
-        new_types = np.array(new_types, dtype = int)
+        new_types = np.array(new_types, dtype=int)
         natoms = new_types.shape[0]
         idx = np.arange(natoms)
         idx_map = np.lexsort((idx, new_types))
         return idx_map
 
-    def _get_natoms_2 (self, ntypes) :
+    def _get_natoms_2(self, ntypes):
         sample_type = self.atom_type
         natoms = len(sample_type)
-        natoms_vec = np.zeros (ntypes).astype(int)
-        for ii in range (ntypes) :
+        natoms_vec = np.zeros(ntypes).astype(int)
+        for ii in range(ntypes):
             natoms_vec[ii] = np.count_nonzero(sample_type == ii)
         return natoms, natoms_vec
 
-    def _get_subdata(self, data, idx = None) :
+    def _get_subdata(self, data, idx=None):
         new_data = {}
         for ii in data:
             dd = data[ii]
-            if 'find_' in ii:
-                new_data[ii] = dd                
+            if "find_" in ii:
+                new_data[ii] = dd
             else:
                 if idx is not None:
                     new_data[ii] = dd[idx]
-                else :
+                else:
                     new_data[ii] = dd
         return new_data
 
-    def _load_batch_set (self,
-                         set_name: DPPath) :
-        if not hasattr(self, 'batch_set') or self.get_numb_set() > 1:
+    def _load_batch_set(self, set_name: DPPath):
+        if not hasattr(self, "batch_set") or self.get_numb_set() > 1:
             self.batch_set = self._load_set(set_name)
         self.batch_set, _ = self._shuffle_data(self.batch_set)
         self.reset_get_batch()
@@ -417,129 +445,139 @@ def _load_batch_set (self,
     def reset_get_batch(self):
         self.iterator = 0
 
-    def _load_test_set (self,
-                       set_name: DPPath, 
-                       shuffle_test) :
-        self.test_set = self._load_set(set_name)        
-        if shuffle_test :
+    def _load_test_set(self, set_name: DPPath, shuffle_test):
+        self.test_set = self._load_set(set_name)
+        if shuffle_test:
             self.test_set, _ = self._shuffle_data(self.test_set)
 
-    def _shuffle_data (self,
-                       data) :
+    def _shuffle_data(self, data):
         ret = {}
-        nframes = data['coord'].shape[0]
-        idx = np.arange (nframes)
+        nframes = data["coord"].shape[0]
+        idx = np.arange(nframes)
         # the training times of each frame
-        idx = np.repeat(idx, np.reshape(data['numb_copy'], (nframes,)))
+        idx = np.repeat(idx, np.reshape(data["numb_copy"], (nframes,)))
         dp_random.shuffle(idx)
-        for kk in data :
-            if type(data[kk]) == np.ndarray and \
-               len(data[kk].shape) == 2 and \
-               data[kk].shape[0] == nframes and \
-               not('find_' in kk):
+        for kk in data:
+            if (
+                type(data[kk]) == np.ndarray
+                and len(data[kk].shape) == 2
+                and data[kk].shape[0] == nframes
+                and not ("find_" in kk)
+            ):
                 ret[kk] = data[kk][idx]
-            else :
+            else:
                 ret[kk] = data[kk]
         return ret, idx
 
-    def _load_set(self, set_name: DPPath) :
+    def _load_set(self, set_name: DPPath):
         # get nframes
         if not isinstance(set_name, DPPath):
             set_name = DPPath(set_name)
         path = set_name / "coord.npy"
-        if self.data_dict['coord']['high_prec'] :
+        if self.data_dict["coord"]["high_prec"]:
             coord = path.load_numpy().astype(GLOBAL_ENER_FLOAT_PRECISION)
         else:
-            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)            
+            coord = path.load_numpy().astype(GLOBAL_NP_FLOAT_PRECISION)
         if coord.ndim == 1:
-            coord = coord.reshape([1,-1])
+            coord = coord.reshape([1, -1])
         nframes = coord.shape[0]
-        assert(coord.shape[1] == self.data_dict['coord']['ndof'] * self.natoms)
+        assert coord.shape[1] == self.data_dict["coord"]["ndof"] * self.natoms
         # load keys
         data = {}
         for kk in self.data_dict.keys():
-            if self.data_dict[kk]['reduce'] is None :
-                data['find_'+kk], data[kk] \
-                    = self._load_data(set_name, 
-                                      kk, 
-                                      nframes, 
-                                      self.data_dict[kk]['ndof'],
-                                      atomic = self.data_dict[kk]['atomic'],
-                                      high_prec = self.data_dict[kk]['high_prec'],
-                                      must = self.data_dict[kk]['must'], 
-                                      type_sel = self.data_dict[kk]['type_sel'],
-                                      repeat = self.data_dict[kk]['repeat'],
-                                      default=self.data_dict[kk]['default'],
-                                      dtype=self.data_dict[kk]['dtype'],
-                                      )
+            if self.data_dict[kk]["reduce"] is None:
+                data["find_" + kk], data[kk] = self._load_data(
+                    set_name,
+                    kk,
+                    nframes,
+                    self.data_dict[kk]["ndof"],
+                    atomic=self.data_dict[kk]["atomic"],
+                    high_prec=self.data_dict[kk]["high_prec"],
+                    must=self.data_dict[kk]["must"],
+                    type_sel=self.data_dict[kk]["type_sel"],
+                    repeat=self.data_dict[kk]["repeat"],
+                    default=self.data_dict[kk]["default"],
+                    dtype=self.data_dict[kk]["dtype"],
+                )
         for kk in self.data_dict.keys():
-            if self.data_dict[kk]['reduce'] is not None :
-                k_in = self.data_dict[kk]['reduce']
-                ndof = self.data_dict[kk]['ndof']
-                data['find_'+kk] = data['find_'+k_in]
+            if self.data_dict[kk]["reduce"] is not None:
+                k_in = self.data_dict[kk]["reduce"]
+                ndof = self.data_dict[kk]["ndof"]
+                data["find_" + kk] = data["find_" + k_in]
                 tmp_in = data[k_in].astype(GLOBAL_ENER_FLOAT_PRECISION)
-                data[kk] = np.sum(np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis = 1)
+                data[kk] = np.sum(
+                    np.reshape(tmp_in, [nframes, self.natoms, ndof]), axis=1
+                )
 
         if self.mixed_type:
             real_type = self.atom_type_mix.reshape([nframes, self.natoms])
-            data['type'] = real_type
-            natoms = data['type'].shape[1]
+            data["type"] = real_type
+            natoms = data["type"].shape[1]
             # nframes x ntypes
-            atom_type_nums = np.array([(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
-                                      dtype=np.int32).T
-            assert (atom_type_nums.sum(axis=-1) == natoms).all(), \
-                "some types in 'real_atom_types.npy' of sys {} are not contained in {} types!" \
-                .format(self.dirs[0], self.get_ntypes())
-            data['real_natoms_vec'] = np.concatenate((np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
-                                                      atom_type_nums), axis=-1)
+            atom_type_nums = np.array(
+                [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())],
+                dtype=np.int32,
+            ).T
+            assert (
+                atom_type_nums.sum(axis=-1) == natoms
+            ).all(), "some types in 'real_atom_types.npy' of sys {} are not contained in {} types!".format(
+                self.dirs[0], self.get_ntypes()
+            )
+            data["real_natoms_vec"] = np.concatenate(
+                (
+                    np.tile(np.array([natoms, natoms], dtype=np.int32), (nframes, 1)),
+                    atom_type_nums,
+                ),
+                axis=-1,
+            )
         else:
-            data['type'] = np.tile(self.atom_type[self.idx_map], (nframes, 1))
+            data["type"] = np.tile(self.atom_type[self.idx_map], (nframes, 1))
 
         return data
 
-
-    def _load_data(self,
-                  set_name,
-                  key,
-                  nframes,
-                  ndof_,
-                  atomic = False,
-                  must = True,
-                  repeat = 1,
-                  high_prec = False,
-                  type_sel = None,
-                  default: float = 0.,
-                  dtype: Optional[np.dtype] = None,
-                ):
+    def _load_data(
+        self,
+        set_name,
+        key,
+        nframes,
+        ndof_,
+        atomic=False,
+        must=True,
+        repeat=1,
+        high_prec=False,
+        type_sel=None,
+        default: float = 0.0,
+        dtype: Optional[np.dtype] = None,
+    ):
         if atomic:
             natoms = self.natoms
             idx_map = self.idx_map
             # if type_sel, then revise natoms and idx_map
             if type_sel is not None:
                 natoms = 0
-                for jj in type_sel :
-                    natoms += np.sum(self.atom_type == jj)                
+                for jj in type_sel:
+                    natoms += np.sum(self.atom_type == jj)
                 idx_map = self._idx_map_sel(self.atom_type, type_sel)
             ndof = ndof_ * natoms
         else:
             ndof = ndof_
         if dtype is not None:
             pass
-        elif high_prec :
+        elif high_prec:
             dtype = GLOBAL_ENER_FLOAT_PRECISION
         else:
             dtype = GLOBAL_NP_FLOAT_PRECISION
-        path = set_name / (key+".npy")
-        if path.is_file() :
+        path = set_name / (key + ".npy")
+        if path.is_file():
             data = path.load_numpy().astype(dtype)
-            try:    # YWolfeee: deal with data shape error
-                if atomic :
+            try:  # YWolfeee: deal with data shape error
+                if atomic:
                     data = data.reshape([nframes, natoms, -1])
-                    data = data[:,idx_map,:]
+                    data = data[:, idx_map, :]
                     data = data.reshape([nframes, -1])
                 data = np.reshape(data, [nframes, ndof])
             except ValueError as err_message:
-                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`." 
+                explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
                 log.error(str(err_message))
                 log.error(explanation)
                 raise ValueError(str(err_message) + ". " + explanation)
@@ -554,8 +592,7 @@ def _load_data(self,
                 data = np.repeat(data, repeat).reshape([nframes, -1])
             return np.float32(0.0), data
 
-        
-    def _load_type (self, sys_path: DPPath) :
+    def _load_type(self, sys_path: DPPath):
         atom_type = (sys_path / "type.raw").load_txt(dtype=np.int32, ndmin=1)
         return atom_type
 
@@ -566,22 +603,22 @@ def _load_type_mix(self, set_name: DPPath):
 
     def _make_idx_map(self, atom_type):
         natoms = atom_type.shape[0]
-        idx = np.arange (natoms)
-        idx_map = np.lexsort ((idx, atom_type))
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, atom_type))
         return idx_map
 
-    def _load_type_map(self, sys_path: DPPath) :
-        fname = sys_path / 'type_map.raw'
-        if fname.is_file() :            
+    def _load_type_map(self, sys_path: DPPath):
+        fname = sys_path / "type_map.raw"
+        if fname.is_file():
             return fname.load_txt(dtype=str, ndmin=1).tolist()
-        else :
+        else:
             return None
 
     def _check_pbc(self, sys_path: DPPath):
         pbc = True
-        if (sys_path / 'nopbc').is_file() :
+        if (sys_path / "nopbc").is_file():
             pbc = False
         return pbc
 
     def _check_mode(self, set_path: DPPath):
-        return (set_path / 'real_atom_types.npy').is_file()
+        return (set_path / "real_atom_types.npy").is_file()
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 92cf0f8bed..bdd965405b 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -1,41 +1,52 @@
 #!/usr/bin/env python3
 
+import collections
 import logging
 import os
-import collections
 import warnings
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, List
 
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
 from deepmd.utils import random as dp_random
-from deepmd.utils.data import DeepmdData
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
+from deepmd.utils.data import (
+    DeepmdData,
+)
 
 log = logging.getLogger(__name__)
 
 
-class DeepmdDataSystem() :
+class DeepmdDataSystem:
     """
-    Class for manipulating many data systems. 
+    Class for manipulating many data systems.
 
     It is implemented with the help of DeepmdData
     """
-    def __init__ (self,
-                  systems : List[str],
-                  batch_size : int,
-                  test_size : int,
-                  rcut : float,
-                  set_prefix : str = 'set',
-                  shuffle_test : bool = True,
-                  type_map : List[str] = None,
-                  optional_type_map : bool = True,
-                  modifier = None,
-                  trn_all_set = False,
-                  sys_probs = None,
-                  auto_prob_style ="prob_sys_size") :
+
+    def __init__(
+        self,
+        systems: List[str],
+        batch_size: int,
+        test_size: int,
+        rcut: float,
+        set_prefix: str = "set",
+        shuffle_test: bool = True,
+        type_map: List[str] = None,
+        optional_type_map: bool = True,
+        modifier=None,
+        trn_all_set=False,
+        sys_probs=None,
+        auto_prob_style="prob_sys_size",
+    ):
         """
         Constructor
-        
+
         Parameters
         ----------
         systems
@@ -55,7 +66,7 @@ def __init__ (self,
         optional_type_map
                 If the type_map.raw in each system is optional
         modifier
-                Data modifier that has the method `modify_data`        
+                Data modifier that has the method `modify_data`
         trn_all_set
                 Use all sets as training dataset. Otherwise, if the number of sets is more than 1, the last set is left for test.
         sys_probs: list of float
@@ -77,20 +88,23 @@ def __init__ (self,
         self.system_dirs = systems
         self.nsystems = len(self.system_dirs)
         self.data_systems = []
-        for ii in self.system_dirs :
+        for ii in self.system_dirs:
             self.data_systems.append(
                 DeepmdData(
-                    ii, 
-                    set_prefix=set_prefix, 
-                    shuffle_test=shuffle_test, 
-                    type_map = type_map,
-                    optional_type_map = optional_type_map,
-                    modifier = modifier, 
-                    trn_all_set = trn_all_set
-                ))
+                    ii,
+                    set_prefix=set_prefix,
+                    shuffle_test=shuffle_test,
+                    type_map=type_map,
+                    optional_type_map=optional_type_map,
+                    modifier=modifier,
+                    trn_all_set=trn_all_set,
+                )
+            )
         # check mix_type format
-        error_format_msg = "if one of the system is of mixed_type format, " \
-                           "then all of the systems should be of mixed_type format!"
+        error_format_msg = (
+            "if one of the system is of mixed_type format, "
+            "then all of the systems should be of mixed_type format!"
+        )
         if self.data_systems[0].mixed_type:
             for data_sys in self.data_systems[1:]:
                 assert data_sys.mixed_type, error_format_msg
@@ -105,35 +119,39 @@ def __init__ (self,
         if isinstance(self.batch_size, int):
             self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
         elif isinstance(self.batch_size, str):
-            words = self.batch_size.split(':')
-            if 'auto' == words[0] :
+            words = self.batch_size.split(":")
+            if "auto" == words[0]:
                 is_auto_bs = True
                 rule = 32
-                if len(words) == 2 :
+                if len(words) == 2:
                     rule = int(words[1])
             else:
-                raise RuntimeError('unknown batch_size rule ' + words[0])
+                raise RuntimeError("unknown batch_size rule " + words[0])
             self.batch_size = self._make_auto_bs(rule)
         elif isinstance(self.batch_size, list):
             pass
-        else :
-            raise RuntimeError('invalid batch_size')            
-        assert(isinstance(self.batch_size, (list,np.ndarray)))
-        assert(len(self.batch_size) == self.nsystems)
+        else:
+            raise RuntimeError("invalid batch_size")
+        assert isinstance(self.batch_size, (list, np.ndarray))
+        assert len(self.batch_size) == self.nsystems
 
         # natoms, nbatches
         ntypes = []
-        for ii in self.data_systems :
+        for ii in self.data_systems:
             ntypes.append(ii.get_ntypes())
         self.sys_ntypes = max(ntypes)
         self.natoms = []
         self.natoms_vec = []
         self.nbatches = []
         type_map_list = []
-        for ii in range(self.nsystems) :
+        for ii in range(self.nsystems):
             self.natoms.append(self.data_systems[ii].get_natoms())
-            self.natoms_vec.append(self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int))
-            self.nbatches.append(self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii]))
+            self.natoms_vec.append(
+                self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)
+            )
+            self.nbatches.append(
+                self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])
+            )
             type_map_list.append(self.data_systems[ii].get_type_map())
         self.type_map = self._check_type_map_consistency(type_map_list)
 
@@ -147,21 +165,21 @@ def __init__ (self,
         if isinstance(self.test_size, int):
             self.test_size = self.test_size * np.ones(self.nsystems, dtype=int)
         elif isinstance(self.test_size, str):
-            words = self.test_size.split('%')
+            words = self.test_size.split("%")
             try:
                 percent = int(words[0])
             except ValueError:
-                raise RuntimeError('unknown test_size rule ' + words[0])
+                raise RuntimeError("unknown test_size rule " + words[0])
             self.test_size = self._make_auto_ts(percent)
         elif isinstance(self.test_size, list):
             pass
-        else :
-            raise RuntimeError('invalid test_size')            
-        assert(isinstance(self.test_size, (list,np.ndarray)))
-        assert(len(self.test_size) == self.nsystems)
+        else:
+            raise RuntimeError("invalid test_size")
+        assert isinstance(self.test_size, (list, np.ndarray))
+        assert len(self.test_size) == self.nsystems
 
         # prob of batch, init pick idx
-        self.prob_nbatches = [ float(i) for i in self.nbatches] / np.sum(self.nbatches)        
+        self.prob_nbatches = [float(i) for i in self.nbatches] / np.sum(self.nbatches)
         self.pick_idx = 0
 
         # derive system probabilities
@@ -169,54 +187,61 @@ def __init__ (self,
         self.set_sys_probs(sys_probs, auto_prob_style)
 
         # check batch and test size
-        for ii in range(self.nsystems) :
+        for ii in range(self.nsystems):
             chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
             if chk_ret is not None and not is_auto_bs:
-                warnings.warn("system %s required batch size is larger than the size of the dataset %s (%d > %d)" % \
-                              (self.system_dirs[ii], chk_ret[0], self.batch_size[ii], chk_ret[1]))
+                warnings.warn(
+                    "system %s required batch size is larger than the size of the dataset %s (%d > %d)"
+                    % (
+                        self.system_dirs[ii],
+                        chk_ret[0],
+                        self.batch_size[ii],
+                        chk_ret[1],
+                    )
+                )
             chk_ret = self.data_systems[ii].check_test_size(self.test_size[ii])
             if chk_ret is not None and not is_auto_bs:
-                warnings.warn("system %s required test size is larger than the size of the dataset %s (%d > %d)" % \
-                              (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1]))
-
+                warnings.warn(
+                    "system %s required test size is larger than the size of the dataset %s (%d > %d)"
+                    % (self.system_dirs[ii], chk_ret[0], self.test_size[ii], chk_ret[1])
+                )
 
-    def _load_test(self, ntests = -1):
+    def _load_test(self, ntests=-1):
         self.test_data = collections.defaultdict(list)
-        for ii in range(self.nsystems) :
-            test_system_data = self.data_systems[ii].get_test(ntests = ntests)
+        for ii in range(self.nsystems):
+            test_system_data = self.data_systems[ii].get_test(ntests=ntests)
             for nn in test_system_data:
                 self.test_data[nn].append(test_system_data[nn])
 
-
     def _make_default_mesh(self):
         self.default_mesh = []
-        cell_size = np.max (self.rcut)
-        for ii in range(self.nsystems) :
-            if self.data_systems[ii].pbc :
+        cell_size = np.max(self.rcut)
+        for ii in range(self.nsystems):
+            if self.data_systems[ii].pbc:
                 test_system_data = self.data_systems[ii].get_batch(self.batch_size[ii])
                 self.data_systems[ii].reset_get_batch()
                 # test_system_data = self.data_systems[ii].get_test()
-                avg_box = np.average (test_system_data["box"], axis = 0)
-                avg_box = np.reshape (avg_box, [3,3])
-                ncell = (np.linalg.norm(avg_box, axis=1)/ cell_size).astype(np.int32)
+                avg_box = np.average(test_system_data["box"], axis=0)
+                avg_box = np.reshape(avg_box, [3, 3])
+                ncell = (np.linalg.norm(avg_box, axis=1) / cell_size).astype(np.int32)
                 ncell[ncell < 2] = 2
-                default_mesh = np.zeros (6, dtype = np.int32)
+                default_mesh = np.zeros(6, dtype=np.int32)
                 default_mesh[3:6] = ncell
                 self.default_mesh.append(default_mesh)
             else:
-                self.default_mesh.append(np.array([], dtype = np.int32))
+                self.default_mesh.append(np.array([], dtype=np.int32))
 
-
-    def compute_energy_shift(self, rcond = 1e-3, key = 'energy') :
+    def compute_energy_shift(self, rcond=1e-3, key="energy"):
         sys_ener = []
-        for ss in self.data_systems :
+        for ss in self.data_systems:
             sys_ener.append(ss.avg(key))
         sys_ener = np.concatenate(sys_ener)
         sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
-        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems,-1])
-        sys_tynatom = sys_tynatom[:,2:]
-        energy_shift,resd,rank,s_value \
-            = np.linalg.lstsq(sys_tynatom, sys_ener, rcond = rcond)
+        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
+        sys_tynatom = sys_tynatom[:, 2:]
+        energy_shift, resd, rank, s_value = np.linalg.lstsq(
+            sys_tynatom, sys_ener, rcond=rcond
+        )
         return energy_shift
 
     def add_dict(self, adict: dict) -> None:
@@ -226,43 +251,45 @@ def add_dict(self, adict: dict) -> None:
         .. code-block:: python
 
            adict[key] = {
-                   'ndof': ndof, 
-                   'atomic': atomic,
-                   'must': must, 
-                   'high_prec': high_prec,
-                   'type_sel': type_sel,
-                   'repeat': repeat,
+               "ndof": ndof,
+               "atomic": atomic,
+               "must": must,
+               "high_prec": high_prec,
+               "type_sel": type_sel,
+               "repeat": repeat,
            }
 
         For the explaination of the keys see `add`
         """
-        for kk in adict :
-            self.add(kk, 
-                     adict[kk]['ndof'], 
-                     atomic=adict[kk]['atomic'], 
-                     must=adict[kk]['must'], 
-                     high_prec=adict[kk]['high_prec'], 
-                     type_sel=adict[kk]['type_sel'], 
-                     repeat=adict[kk]['repeat'],
-                     default=adict[kk]['default'],
-                     )
-
-    def add(self, 
-            key : str, 
-            ndof : int, 
-            atomic : bool = False, 
-            must : bool = False, 
-            high_prec : bool = False,
-            type_sel : List[int] = None,
-            repeat : int = 1,
-            default: float=0.,
-    ) :
+        for kk in adict:
+            self.add(
+                kk,
+                adict[kk]["ndof"],
+                atomic=adict[kk]["atomic"],
+                must=adict[kk]["must"],
+                high_prec=adict[kk]["high_prec"],
+                type_sel=adict[kk]["type_sel"],
+                repeat=adict[kk]["repeat"],
+                default=adict[kk]["default"],
+            )
+
+    def add(
+        self,
+        key: str,
+        ndof: int,
+        atomic: bool = False,
+        must: bool = False,
+        high_prec: bool = False,
+        type_sel: List[int] = None,
+        repeat: int = 1,
+        default: float = 0.0,
+    ):
         """
         Add a data item that to be loaded
 
         Parameters
         ----------
-        key 
+        key
                 The key of the item. The corresponding data is stored in `sys_path/set.*/key.npy`
         ndof
                 The number of dof
@@ -283,7 +310,16 @@ def add(self,
                 Default value of data
         """
         for ii in self.data_systems:
-            ii.add(key, ndof, atomic=atomic, must=must, high_prec=high_prec, repeat=repeat, type_sel=type_sel, default=default)
+            ii.add(
+                key,
+                ndof,
+                atomic=atomic,
+                must=must,
+                high_prec=high_prec,
+                repeat=repeat,
+                type_sel=type_sel,
+                default=default,
+            )
 
     def reduce(self, key_out, key_in):
         """
@@ -302,11 +338,10 @@ def reduce(self, key_out, key_in):
     def get_data_dict(self, ii: int = 0) -> dict:
         return self.data_systems[ii].get_data_dict()
 
-    def set_sys_probs(self, sys_probs=None,
-                      auto_prob_style: str = "prob_sys_size"):
-        if sys_probs is None :
+    def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
+        if sys_probs is None:
             if auto_prob_style == "prob_uniform":
-                prob_v = 1./float(self.nsystems)
+                prob_v = 1.0 / float(self.nsystems)
                 probs = [prob_v for ii in range(self.nsystems)]
             elif auto_prob_style == "prob_sys_size":
                 probs = self.prob_nbatches
@@ -318,24 +353,22 @@ def set_sys_probs(self, sys_probs=None,
             probs = self._process_sys_probs(sys_probs)
         self.sys_probs = probs
 
-    def _get_sys_probs(self,
-                       sys_probs,
-                       auto_prob_style) :  # depreciated
-        if sys_probs is None :
-            if auto_prob_style == "prob_uniform" :
-                prob_v = 1./float(self.nsystems)
+    def _get_sys_probs(self, sys_probs, auto_prob_style):  # depreciated
+        if sys_probs is None:
+            if auto_prob_style == "prob_uniform":
+                prob_v = 1.0 / float(self.nsystems)
                 prob = [prob_v for ii in range(self.nsystems)]
-            elif auto_prob_style == "prob_sys_size" :
+            elif auto_prob_style == "prob_sys_size":
                 prob = self.prob_nbatches
-            elif auto_prob_style[:14] == "prob_sys_size;" :
+            elif auto_prob_style[:14] == "prob_sys_size;":
                 prob = self._prob_sys_size_ext(auto_prob_style)
-            else :
-                raise RuntimeError("unknown style " + auto_prob_style )
-        else :
+            else:
+                raise RuntimeError("unknown style " + auto_prob_style)
+        else:
             prob = self._process_sys_probs(sys_probs)
         return prob
 
-    def get_batch(self, sys_idx : int = None):
+    def get_batch(self, sys_idx: int = None):
         # batch generation style altered by Ziyao Li:
         # one should specify the "sys_prob" and "auto_prob_style" params
         # via set_sys_prob() function. The sys_probs this function uses is
@@ -347,44 +380,44 @@ def get_batch(self, sys_idx : int = None):
         Parameters
         ----------
         sys_idx: int
-            The index of system from which the batch is get. 
+            The index of system from which the batch is get.
             If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored
             If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following.
         """
-        if not hasattr(self, 'default_mesh') :
+        if not hasattr(self, "default_mesh"):
             self._make_default_mesh()
-        if sys_idx is not None :
+        if sys_idx is not None:
             self.pick_idx = sys_idx
-        else :
+        else:
             # prob = self._get_sys_probs(sys_probs, auto_prob_style)
             self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs)
-        b_data = self.data_systems[self.pick_idx].get_batch(self.batch_size[self.pick_idx])
+        b_data = self.data_systems[self.pick_idx].get_batch(
+            self.batch_size[self.pick_idx]
+        )
         b_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
         b_data["default_mesh"] = self.default_mesh[self.pick_idx]
         return b_data
 
     # ! altered by Marián Rynik
-    def get_test (self, 
-                  sys_idx : int = None,
-                  n_test : int = -1) :  # depreciated
+    def get_test(self, sys_idx: int = None, n_test: int = -1):  # depreciated
         """
         Get test data from the the data systems.
 
         Parameters
         ----------
         sys_idx
-                The test dat of system with index `sys_idx` will be returned. 
+                The test dat of system with index `sys_idx` will be returned.
                 If is None, the currently selected system will be returned.
         n_test
                 Number of test data. If set to -1 all test data will be get.
         """
-        if not hasattr(self, 'default_mesh') :
+        if not hasattr(self, "default_mesh"):
             self._make_default_mesh()
-        if not hasattr(self, 'test_data') :
-            self._load_test(ntests = n_test)
-        if sys_idx is not None :
+        if not hasattr(self, "test_data"):
+            self._load_test(ntests=n_test)
+        if sys_idx is not None:
             idx = sys_idx
-        else :
+        else:
             idx = self.pick_idx
 
         test_system_data = {}
@@ -399,36 +432,36 @@ def get_sys_ntest(self, sys_idx=None):
         Get number of tests for the currently selected system,
             or one defined by sys_idx.
         """
-        if sys_idx is not None :
+        if sys_idx is not None:
             return self.test_size[sys_idx]
-        else :
+        else:
             return self.test_size[self.pick_idx]
-            
+
     def get_type_map(self) -> List[str]:
         """
         Get the type map
         """
         return self.type_map
 
-    def get_nbatches (self) -> int: 
+    def get_nbatches(self) -> int:
         """
         Get the total number of batches
         """
         return self.nbatches
-    
-    def get_ntypes (self) -> int:
+
+    def get_ntypes(self) -> int:
         """
         Get the number of types
         """
         return self.sys_ntypes
 
-    def get_nsystems (self) -> int:
+    def get_nsystems(self) -> int:
         """
         Get the number of data systems
         """
         return self.nsystems
 
-    def get_sys (self, idx : int) -> DeepmdData:
+    def get_sys(self, idx: int) -> DeepmdData:
         """
         Get a certain data system
         """
@@ -440,34 +473,43 @@ def get_batch_size(self) -> int:
         """
         return self.batch_size
 
-    def _format_name_length(self, name, width) :
+    def _format_name_length(self, name, width):
         if len(name) <= width:
-            return '{: >{}}'.format(name, width)
-        else :
-            name = name[-(width-3):]
-            name = '-- ' + name
-            return name 
+            return "{: >{}}".format(name, width)
+        else:
+            name = name[-(width - 3) :]
+            name = "-- " + name
+            return name
 
-    def print_summary(self, name) :
+    def print_summary(self, name):
         # width 65
         sys_width = 42
-        log.info(f"---Summary of DataSystem: {name:13s}-----------------------------------------------")
+        log.info(
+            f"---Summary of DataSystem: {name:13s}-----------------------------------------------"
+        )
         log.info("found %d system(s):" % self.nsystems)
-        log.info(("%s  " % self._format_name_length('system', sys_width)) + 
-                 ("%6s  %6s  %6s  %5s  %3s" % ('natoms', 'bch_sz', 'n_bch', 'prob', 'pbc')))
-        for ii in range(self.nsystems) :
-            log.info("%s  %6d  %6d  %6d  %5.3f  %3s" % 
-                     (self._format_name_length(self.system_dirs[ii], sys_width),
-                      self.natoms[ii], 
-                      # TODO batch size * nbatches = number of structures
-                      self.batch_size[ii],
-                      self.nbatches[ii],
-                      self.sys_probs[ii],
-                      "T" if self.data_systems[ii].pbc else "F"
-                     ) )
-        log.info("--------------------------------------------------------------------------------------")
-
-    def _make_auto_bs(self, rule) :
+        log.info(
+            ("%s  " % self._format_name_length("system", sys_width))
+            + ("%6s  %6s  %6s  %5s  %3s" % ("natoms", "bch_sz", "n_bch", "prob", "pbc"))
+        )
+        for ii in range(self.nsystems):
+            log.info(
+                "%s  %6d  %6d  %6d  %5.3f  %3s"
+                % (
+                    self._format_name_length(self.system_dirs[ii], sys_width),
+                    self.natoms[ii],
+                    # TODO batch size * nbatches = number of structures
+                    self.batch_size[ii],
+                    self.nbatches[ii],
+                    self.sys_probs[ii],
+                    "T" if self.data_systems[ii].pbc else "F",
+                )
+            )
+        log.info(
+            "--------------------------------------------------------------------------------------"
+        )
+
+    def _make_auto_bs(self, rule):
         bs = []
         for ii in self.data_systems:
             ni = ii.get_natoms()
@@ -492,39 +534,43 @@ def _check_type_map_consistency(self, type_map_list):
         for ii in type_map_list:
             if ii is not None:
                 min_len = min([len(ii), len(ret)])
-                for idx in range(min_len) :
-                    if ii[idx] != ret[idx] :
-                        raise RuntimeError('inconsistent type map: %s %s' % (str(ret), str(ii)))
-                if len(ii) > len(ret) :
+                for idx in range(min_len):
+                    if ii[idx] != ret[idx]:
+                        raise RuntimeError(
+                            "inconsistent type map: %s %s" % (str(ret), str(ii))
+                        )
+                if len(ii) > len(ret):
                     ret = ii
         return ret
 
-    def _process_sys_probs(self, sys_probs) :
+    def _process_sys_probs(self, sys_probs):
         sys_probs = np.array(sys_probs)
         type_filter = sys_probs >= 0
         assigned_sum_prob = np.sum(type_filter * sys_probs)
         # 1e-8 is to handle floating point error; See #1917
-        assert assigned_sum_prob <= 1. + 1e-8, "the sum of assigned probability should be less than 1"
-        rest_sum_prob = 1. - assigned_sum_prob
+        assert (
+            assigned_sum_prob <= 1.0 + 1e-8
+        ), "the sum of assigned probability should be less than 1"
+        rest_sum_prob = 1.0 - assigned_sum_prob
         if not np.isclose(rest_sum_prob, 0):
             rest_nbatch = (1 - type_filter) * self.nbatches
             rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
             ret_prob = rest_prob + type_filter * sys_probs
-        else :
+        else:
             ret_prob = sys_probs
         assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
         return ret_prob
 
     def _prob_sys_size_ext(self, keywords):
-        block_str = keywords.split(';')[1:]
+        block_str = keywords.split(";")[1:]
         block_stt = []
         block_end = []
         block_weights = []
         for ii in block_str:
-            stt = int(ii.split(':')[0])
-            end = int(ii.split(':')[1])
-            weight = float(ii.split(':')[2])
-            assert(weight >= 0), "the weight of a block should be no less than 0"
+            stt = int(ii.split(":")[0])
+            end = int(ii.split(":")[1])
+            weight = float(ii.split(":")[2])
+            assert weight >= 0, "the weight of a block should be no less than 0"
             block_stt.append(stt)
             block_end.append(end)
             block_weights.append(weight)
@@ -532,7 +578,7 @@ def _prob_sys_size_ext(self, keywords):
         block_probs = np.array(block_weights) / np.sum(block_weights)
         sys_probs = np.zeros([self.get_nsystems()])
         for ii in range(nblocks):
-            nbatch_block = self.nbatches[block_stt[ii]:block_end[ii]]
+            nbatch_block = self.nbatches[block_stt[ii] : block_end[ii]]
             tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
-            sys_probs[block_stt[ii]:block_end[ii]] = tmp_prob * block_probs[ii]
+            sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
         return sys_probs
diff --git a/deepmd/utils/errors.py b/deepmd/utils/errors.py
index 3fb35fae00..ed5d60bdf8 100644
--- a/deepmd/utils/errors.py
+++ b/deepmd/utils/errors.py
@@ -1,8 +1,10 @@
 class GraphTooLargeError(Exception):
     """The graph is too large, exceeding protobuf's hard limit of 2GB."""
 
+
 class GraphWithoutTensorError(Exception):
     pass
 
+
 class OutOfMemoryError(Exception):
-    """This error is caused by out-of-memory (OOM)."""
\ No newline at end of file
+    """This error is caused by out-of-memory (OOM)."""
diff --git a/deepmd/utils/finetune.py b/deepmd/utils/finetune.py
index a06962ea1f..11fbf97e4f 100644
--- a/deepmd/utils/finetune.py
+++ b/deepmd/utils/finetune.py
@@ -1,22 +1,45 @@
-import logging
 import json
+import logging
+from functools import (
+    lru_cache,
+)
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Tuple,
+)
+
 import numpy as np
+from scipy.special import (
+    comb,
+)
+
 import deepmd
-from typing import Callable
-from typing import Tuple, List, Dict, Any
-from functools import lru_cache
-from scipy.special import comb
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.common import ACTIVATION_FN_DICT
-from deepmd.utils.graph import get_tensor_by_name
-from deepmd.utils.errors import GraphWithoutTensorError
-from deepmd.descriptor import Descriptor
+from deepmd.common import (
+    ACTIVATION_FN_DICT,
+)
+from deepmd.descriptor import (
+    Descriptor,
+)
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name,
+)
 
 log = logging.getLogger(__name__)
 
 
-def replace_model_params_with_pretrained_model(jdata: Dict[str, Any], pretrained_model: str):
+def replace_model_params_with_pretrained_model(
+    jdata: Dict[str, Any], pretrained_model: str
+):
     """Replace the model params in input script according to pretrained model.
 
     Parameters
@@ -28,55 +51,83 @@ def replace_model_params_with_pretrained_model(jdata: Dict[str, Any], pretrained
     """
     # Get the input script from the pretrained model
     try:
-        t_jdata = get_tensor_by_name(pretrained_model, 'train_attr/training_script')
+        t_jdata = get_tensor_by_name(pretrained_model, "train_attr/training_script")
     except GraphWithoutTensorError as e:
         raise RuntimeError(
             "The input frozen pretrained model: %s has no training script, "
             "which is not supported to perform finetuning. "
-            "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit." % input
+            "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit."
+            % input
         ) from e
     pretrained_jdata = json.loads(t_jdata)
 
     # Check the model type
-    assert pretrained_jdata['model']['descriptor']['type'] in ['se_atten'] and \
-           pretrained_jdata['model']['fitting_net']['type'] in ['ener'], \
-           "The finetune process only supports models pretrained with 'se_atten' descriptor and 'ener' fitting_net!"
+    assert pretrained_jdata["model"]["descriptor"]["type"] in [
+        "se_atten"
+    ] and pretrained_jdata["model"]["fitting_net"]["type"] in [
+        "ener"
+    ], "The finetune process only supports models pretrained with 'se_atten' descriptor and 'ener' fitting_net!"
 
     # Check the type map
-    pretrained_type_map = pretrained_jdata['model']['type_map']
-    cur_type_map = jdata['model'].get("type_map", [])
+    pretrained_type_map = pretrained_jdata["model"]["type_map"]
+    cur_type_map = jdata["model"].get("type_map", [])
     out_line_type = []
     for i in cur_type_map:
         if i not in pretrained_type_map:
             out_line_type.append(i)
-    assert not out_line_type, "{} type(s) not contained in the pretrained model! " \
-                              "Please choose another suitable one.".format(str(out_line_type))
+    assert not out_line_type, (
+        "{} type(s) not contained in the pretrained model! "
+        "Please choose another suitable one.".format(str(out_line_type))
+    )
     if cur_type_map != pretrained_type_map:
-        log.info("Change the type_map from {} to {}.".format(str(cur_type_map), str(pretrained_type_map)))
-        jdata['model']['type_map'] = pretrained_type_map
+        log.info(
+            "Change the type_map from {} to {}.".format(
+                str(cur_type_map), str(pretrained_type_map)
+            )
+        )
+        jdata["model"]["type_map"] = pretrained_type_map
 
     # Change model configurations
     log.info("Change the model configurations according to the pretrained one...")
-    for config_key in ['type_embedding', 'descriptor', 'fitting_net']:
-        if config_key not in jdata['model'].keys() and config_key in pretrained_jdata['model'].keys():
-            log.info("Add the '{}' from pretrained model: {}.".format(
-                config_key, str(pretrained_jdata['model'][config_key])))
-            jdata['model'][config_key] = pretrained_jdata['model'][config_key]
-        elif config_key == 'type_embedding' and \
-                config_key in jdata['model'].keys() and config_key not in pretrained_jdata['model'].keys():
+    for config_key in ["type_embedding", "descriptor", "fitting_net"]:
+        if (
+            config_key not in jdata["model"].keys()
+            and config_key in pretrained_jdata["model"].keys()
+        ):
+            log.info(
+                "Add the '{}' from pretrained model: {}.".format(
+                    config_key, str(pretrained_jdata["model"][config_key])
+                )
+            )
+            jdata["model"][config_key] = pretrained_jdata["model"][config_key]
+        elif (
+            config_key == "type_embedding"
+            and config_key in jdata["model"].keys()
+            and config_key not in pretrained_jdata["model"].keys()
+        ):
             # 'type_embedding' can be omitted using 'se_atten' descriptor, and the activation_function will be None.
-            cur_para = jdata['model'].pop(config_key)
-            if 'trainable' in cur_para and not cur_para['trainable']:
-                jdata['model'][config_key] = {'trainable': False, 'activation_function': 'None'}
+            cur_para = jdata["model"].pop(config_key)
+            if "trainable" in cur_para and not cur_para["trainable"]:
+                jdata["model"][config_key] = {
+                    "trainable": False,
+                    "activation_function": "None",
+                }
                 log.info("The type_embeddings from pretrained model will be frozen.")
-        elif config_key in jdata['model'].keys() and config_key in pretrained_jdata['model'].keys() and \
-                jdata['model'][config_key] != pretrained_jdata['model'][config_key]:
-            target_para = pretrained_jdata['model'][config_key]
-            cur_para = jdata['model'][config_key]
+        elif (
+            config_key in jdata["model"].keys()
+            and config_key in pretrained_jdata["model"].keys()
+            and jdata["model"][config_key] != pretrained_jdata["model"][config_key]
+        ):
+            target_para = pretrained_jdata["model"][config_key]
+            cur_para = jdata["model"][config_key]
             # keep some params that are irrelevant to model structures (need to discuss) TODO
-            if 'trainable' in cur_para.keys():
-                target_para['trainable'] = cur_para['trainable']
-            log.info("Change the '{}' from {} to {}.".format(config_key, str(cur_para), str(target_para)))
-            jdata['model'][config_key] = target_para
+            if "trainable" in cur_para.keys():
+                target_para["trainable"] = cur_para["trainable"]
+            log.info(
+                "Change the '{}' from {} to {}.".format(
+                    config_key, str(cur_para), str(target_para)
+                )
+            )
+            jdata["model"][config_key] = target_para
 
     return jdata, cur_type_map
diff --git a/deepmd/utils/graph.py b/deepmd/utils/graph.py
index a8d95ebb25..06754ac720 100644
--- a/deepmd/utils/graph.py
+++ b/deepmd/utils/graph.py
@@ -1,11 +1,27 @@
 import re
+from typing import (
+    Dict,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, Dict
-from deepmd.env import tf, EMBEDDING_NET_PATTERN, FITTING_NET_PATTERN, TYPE_EMBEDDING_PATTERN, ATTENTION_LAYER_PATTERN
-from deepmd.utils.sess import run_sess
-from deepmd.utils.errors import GraphWithoutTensorError
 
-# TODO (JZ): I think in this file we can merge some duplicated lines into one method... 
+from deepmd.env import (
+    ATTENTION_LAYER_PATTERN,
+    EMBEDDING_NET_PATTERN,
+    FITTING_NET_PATTERN,
+    TYPE_EMBEDDING_PATTERN,
+    tf,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
+
+
+# TODO (JZ): I think in this file we can merge some duplicated lines into one method...
 def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
     """
     Load graph as well as the graph_def from the frozen model(model_file)
@@ -26,12 +42,11 @@ def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
     with open(model_file, "rb") as f:
         graph_def.ParseFromString(f.read())
     with tf.Graph().as_default() as graph:
-        tf.import_graph_def(graph_def, name = "")
+        tf.import_graph_def(graph_def, name="")
     return graph, graph_def
 
 
-def get_tensor_by_name_from_graph(graph: tf.Graph,
-                                  tensor_name: str) -> tf.Tensor:
+def get_tensor_by_name_from_graph(graph: tf.Graph, tensor_name: str) -> tf.Tensor:
     """
     Load tensor value from the given tf.Graph object
 
@@ -53,7 +68,7 @@ def get_tensor_by_name_from_graph(graph: tf.Graph,
         Whether the tensor_name is within the frozen model
     """
     try:
-        tensor = graph.get_tensor_by_name(tensor_name + ':0')
+        tensor = graph.get_tensor_by_name(tensor_name + ":0")
     except KeyError as e:
         raise GraphWithoutTensorError() from e
     with tf.Session(graph=graph) as sess:
@@ -61,8 +76,7 @@ def get_tensor_by_name_from_graph(graph: tf.Graph,
     return tensor
 
 
-def get_tensor_by_name(model_file: str,
-                       tensor_name: str) -> tf.Tensor:
+def get_tensor_by_name(model_file: str, tensor_name: str) -> tf.Tensor:
     """
     Load tensor value from the frozen model(model_file)
 
@@ -87,8 +101,7 @@ def get_tensor_by_name(model_file: str,
     return get_tensor_by_name_from_graph(graph, tensor_name)
 
 
-def get_tensor_by_type(node,
-                       data_type : np.dtype) -> tf.Tensor:
+def get_tensor_by_type(node, data_type: np.dtype) -> tf.Tensor:
     """
     Get the tensor value within the given node according to the input data_type
 
@@ -98,7 +111,7 @@ def get_tensor_by_type(node,
         The given tensorflow graph node
     data_type
         The data type of the node
-    
+
     Returns
     ----------
     tf.Tensor
@@ -109,7 +122,7 @@ def get_tensor_by_type(node,
     elif data_type == np.float32:
         tensor = np.array(node.float_val)
     else:
-        raise RuntimeError('model compression does not support the half precision')
+        raise RuntimeError("model compression does not support the half precision")
     return tensor
 
 
@@ -123,7 +136,7 @@ def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> Di
         The input tf.GraphDef object
     pattern
         The node pattern within the graph_def
-    
+
     Returns
     ----------
     Dict
@@ -137,7 +150,9 @@ def get_pattern_nodes_from_graph_def(graph_def: tf.GraphDef, pattern: str) -> Di
     return nodes
 
 
-def get_embedding_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+def get_embedding_net_nodes_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the embedding net nodes with the given tf.GraphDef object
 
@@ -147,7 +162,7 @@ def get_embedding_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str =
         The input tf.GraphDef object
     suffix : str, optional
         The scope suffix
-    
+
     Returns
     ----------
     Dict
@@ -155,17 +170,21 @@ def get_embedding_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str =
     """
     # embedding_net_pattern = f"filter_type_\d+{suffix}/matrix_\d+_\d+|filter_type_\d+{suffix}/bias_\d+_\d+|filter_type_\d+{suffix}/idt_\d+_\d+|filter_type_all{suffix}/matrix_\d+_\d+|filter_type_all{suffix}/matrix_\d+_\d+_\d+|filter_type_all{suffix}/bias_\d+_\d+|filter_type_all{suffix}/bias_\d+_\d+_\d+|filter_type_all{suffix}/idt_\d+_\d+"
     if suffix != "":
-        embedding_net_pattern = EMBEDDING_NET_PATTERN\
-            .replace('/idt',    suffix + '/idt')\
-            .replace('/bias',   suffix + '/bias')\
-            .replace('/matrix', suffix + '/matrix')
+        embedding_net_pattern = (
+            EMBEDDING_NET_PATTERN.replace("/idt", suffix + "/idt")
+            .replace("/bias", suffix + "/bias")
+            .replace("/matrix", suffix + "/matrix")
+        )
     else:
         embedding_net_pattern = EMBEDDING_NET_PATTERN
 
-    embedding_net_nodes = get_pattern_nodes_from_graph_def(graph_def, embedding_net_pattern)
+    embedding_net_nodes = get_pattern_nodes_from_graph_def(
+        graph_def, embedding_net_pattern
+    )
     for key in embedding_net_nodes.keys():
-        assert key.find('bias') > 0 or key.find(
-            'matrix') > 0, "currently, only support weight matrix and bias matrix at the tabulation op!"
+        assert (
+            key.find("bias") > 0 or key.find("matrix") > 0
+        ), "currently, only support weight matrix and bias matrix at the tabulation op!"
     return embedding_net_nodes
 
 
@@ -179,7 +198,7 @@ def get_embedding_net_nodes(model_file: str, suffix: str = "") -> Dict:
         The input frozen model path
     suffix : str, optional
         The suffix of the scope
-   
+
     Returns
     ----------
     Dict
@@ -189,7 +208,9 @@ def get_embedding_net_nodes(model_file: str, suffix: str = "") -> Dict:
     return get_embedding_net_nodes_from_graph_def(graph_def, suffix=suffix)
 
 
-def get_embedding_net_variables_from_graph_def(graph_def : tf.GraphDef, suffix: str = "") -> Dict:
+def get_embedding_net_variables_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the embedding net variables with the given tf.GraphDef object
 
@@ -199,26 +220,31 @@ def get_embedding_net_variables_from_graph_def(graph_def : tf.GraphDef, suffix:
         The input tf.GraphDef object
     suffix : str, optional
         The suffix of the scope
-    
+
     Returns
     ----------
     Dict
-        The embedding net variables within the given tf.GraphDef object 
+        The embedding net variables within the given tf.GraphDef object
     """
     embedding_net_variables = {}
-    embedding_net_nodes = get_embedding_net_nodes_from_graph_def(graph_def, suffix=suffix)
+    embedding_net_nodes = get_embedding_net_nodes_from_graph_def(
+        graph_def, suffix=suffix
+    )
     for item in embedding_net_nodes:
         node = embedding_net_nodes[item]
         dtype = tf.as_dtype(node.dtype).as_numpy_dtype
         tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
         if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(node.tensor_content, dtype = tf.as_dtype(node.dtype).as_numpy_dtype)
+            tensor_value = np.frombuffer(
+                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
+            )
         else:
             tensor_value = get_tensor_by_type(node, dtype)
         embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape)
     return embedding_net_variables
 
-def get_embedding_net_variables(model_file : str, suffix: str = "") -> Dict:
+
+def get_embedding_net_variables(model_file: str, suffix: str = "") -> Dict:
     """
     Get the embedding net variables with the given frozen model(model_file)
 
@@ -228,7 +254,7 @@ def get_embedding_net_variables(model_file : str, suffix: str = "") -> Dict:
         The input frozen model path
     suffix : str, optional
         The suffix of the scope
-    
+
     Returns
     ----------
     Dict
@@ -238,7 +264,9 @@ def get_embedding_net_variables(model_file : str, suffix: str = "") -> Dict:
     return get_embedding_net_variables_from_graph_def(graph_def, suffix=suffix)
 
 
-def get_fitting_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+def get_fitting_net_nodes_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the fitting net nodes with the given tf.GraphDef object
 
@@ -248,27 +276,29 @@ def get_fitting_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str = "
         The input tf.GraphDef object
     suffix
         suffix of the scope
-    
+
     Returns
     ----------
     Dict
         The fitting net nodes within the given tf.GraphDef object
     """
     if suffix != "":
-        fitting_net_pattern = FITTING_NET_PATTERN\
-            .replace('/idt',    suffix + '/idt')\
-            .replace('/bias',   suffix + '/bias')\
-            .replace('/matrix', suffix + '/matrix')
+        fitting_net_pattern = (
+            FITTING_NET_PATTERN.replace("/idt", suffix + "/idt")
+            .replace("/bias", suffix + "/bias")
+            .replace("/matrix", suffix + "/matrix")
+        )
     else:
         fitting_net_pattern = FITTING_NET_PATTERN
     fitting_net_nodes = get_pattern_nodes_from_graph_def(graph_def, fitting_net_pattern)
     for key in fitting_net_nodes.keys():
-        assert key.find('bias') > 0 or key.find('matrix') > 0 or key.find(
-            'idt') > 0, "currently, only support weight matrix, bias and idt at the model compression process!"
+        assert (
+            key.find("bias") > 0 or key.find("matrix") > 0 or key.find("idt") > 0
+        ), "currently, only support weight matrix, bias and idt at the model compression process!"
     return fitting_net_nodes
 
 
-def get_fitting_net_nodes(model_file : str) -> Dict:
+def get_fitting_net_nodes(model_file: str) -> Dict:
     """
     Get the fitting net nodes with the given frozen model(model_file)
 
@@ -276,7 +306,7 @@ def get_fitting_net_nodes(model_file : str) -> Dict:
     ----------
     model_file
         The input frozen model path
-   
+
     Returns
     ----------
     Dict
@@ -286,7 +316,9 @@ def get_fitting_net_nodes(model_file : str) -> Dict:
     return get_fitting_net_nodes_from_graph_def(graph_def)
 
 
-def get_fitting_net_variables_from_graph_def(graph_def : tf.GraphDef, suffix: str = "") -> Dict:
+def get_fitting_net_variables_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the fitting net variables with the given tf.GraphDef object
 
@@ -296,26 +328,29 @@ def get_fitting_net_variables_from_graph_def(graph_def : tf.GraphDef, suffix: st
         The input tf.GraphDef object
     suffix
         suffix of the scope
-    
+
     Returns
     ----------
     Dict
-        The fitting net variables within the given tf.GraphDef object 
+        The fitting net variables within the given tf.GraphDef object
     """
     fitting_net_variables = {}
     fitting_net_nodes = get_fitting_net_nodes_from_graph_def(graph_def, suffix=suffix)
     for item in fitting_net_nodes:
         node = fitting_net_nodes[item]
-        dtype= tf.as_dtype(node.dtype).as_numpy_dtype
+        dtype = tf.as_dtype(node.dtype).as_numpy_dtype
         tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
         if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(node.tensor_content, dtype = tf.as_dtype(node.dtype).as_numpy_dtype)
+            tensor_value = np.frombuffer(
+                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
+            )
         else:
             tensor_value = get_tensor_by_type(node, dtype)
         fitting_net_variables[item] = np.reshape(tensor_value, tensor_shape)
     return fitting_net_variables
 
-def get_fitting_net_variables(model_file : str, suffix: str = "") -> Dict:
+
+def get_fitting_net_variables(model_file: str, suffix: str = "") -> Dict:
     """
     Get the fitting net variables with the given frozen model(model_file)
 
@@ -325,7 +360,7 @@ def get_fitting_net_variables(model_file : str, suffix: str = "") -> Dict:
         The input frozen model path
     suffix
         suffix of the scope
-    
+
     Returns
     ----------
     Dict
@@ -335,7 +370,9 @@ def get_fitting_net_variables(model_file : str, suffix: str = "") -> Dict:
     return get_fitting_net_variables_from_graph_def(graph_def, suffix=suffix)
 
 
-def get_type_embedding_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+def get_type_embedding_net_nodes_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the type embedding net nodes with the given tf.GraphDef object
 
@@ -345,25 +382,30 @@ def get_type_embedding_net_nodes_from_graph_def(graph_def: tf.GraphDef, suffix:
         The input tf.GraphDef object
     suffix : str, optional
         The scope suffix
-    
+
     Returns
     ----------
     Dict
         The type embedding net nodes within the given tf.GraphDef object
     """
     if suffix != "":
-        type_embedding_net_pattern = TYPE_EMBEDDING_PATTERN\
-            .replace('/idt',    suffix + '/idt')\
-            .replace('/bias',   suffix + '/bias')\
-            .replace('/matrix', suffix + '/matrix')
+        type_embedding_net_pattern = (
+            TYPE_EMBEDDING_PATTERN.replace("/idt", suffix + "/idt")
+            .replace("/bias", suffix + "/bias")
+            .replace("/matrix", suffix + "/matrix")
+        )
     else:
         type_embedding_net_pattern = TYPE_EMBEDDING_PATTERN
 
-    type_embedding_net_nodes = get_pattern_nodes_from_graph_def(graph_def, type_embedding_net_pattern)
+    type_embedding_net_nodes = get_pattern_nodes_from_graph_def(
+        graph_def, type_embedding_net_pattern
+    )
     return type_embedding_net_nodes
 
 
-def get_type_embedding_net_variables_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+def get_type_embedding_net_variables_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the type embedding net variables with the given tf.GraphDef object
 
@@ -373,27 +415,33 @@ def get_type_embedding_net_variables_from_graph_def(graph_def: tf.GraphDef, suff
         The input tf.GraphDef object
     suffix : str, optional
         The suffix of the scope
-    
+
     Returns
     ----------
     Dict
-        The embedding net variables within the given tf.GraphDef object 
+        The embedding net variables within the given tf.GraphDef object
     """
     type_embedding_net_variables = {}
-    type_embedding_net_nodes = get_type_embedding_net_nodes_from_graph_def(graph_def, suffix=suffix)
+    type_embedding_net_nodes = get_type_embedding_net_nodes_from_graph_def(
+        graph_def, suffix=suffix
+    )
     for item in type_embedding_net_nodes:
         node = type_embedding_net_nodes[item]
         dtype = tf.as_dtype(node.dtype).as_numpy_dtype
         tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
         if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(node.tensor_content, dtype = tf.as_dtype(node.dtype).as_numpy_dtype)
+            tensor_value = np.frombuffer(
+                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
+            )
         else:
             tensor_value = get_tensor_by_type(node, dtype)
         type_embedding_net_variables[item] = np.reshape(tensor_value, tensor_shape)
     return type_embedding_net_variables
 
 
-def get_attention_layer_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+def get_attention_layer_nodes_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the attention layer nodes with the given tf.GraphDef object
 
@@ -410,20 +458,25 @@ def get_attention_layer_nodes_from_graph_def(graph_def: tf.GraphDef, suffix: str
         The attention layer nodes within the given tf.GraphDef object
     """
     if suffix != "":
-        attention_layer_pattern = ATTENTION_LAYER_PATTERN \
-            .replace('/c_query', suffix + '/c_query') \
-            .replace('/c_key', suffix + '/c_key') \
-            .replace('/c_value', suffix + '/c_value') \
-            .replace('/c_out', suffix + '/c_out') \
-            .replace('/layer_normalization', suffix + '/layer_normalization')
+        attention_layer_pattern = (
+            ATTENTION_LAYER_PATTERN.replace("/c_query", suffix + "/c_query")
+            .replace("/c_key", suffix + "/c_key")
+            .replace("/c_value", suffix + "/c_value")
+            .replace("/c_out", suffix + "/c_out")
+            .replace("/layer_normalization", suffix + "/layer_normalization")
+        )
     else:
         attention_layer_pattern = ATTENTION_LAYER_PATTERN
 
-    attention_layer_nodes = get_pattern_nodes_from_graph_def(graph_def, attention_layer_pattern)
+    attention_layer_nodes = get_pattern_nodes_from_graph_def(
+        graph_def, attention_layer_pattern
+    )
     return attention_layer_nodes
 
 
-def get_attention_layer_variables_from_graph_def(graph_def: tf.GraphDef, suffix: str = "") -> Dict:
+def get_attention_layer_variables_from_graph_def(
+    graph_def: tf.GraphDef, suffix: str = ""
+) -> Dict:
     """
     Get the attention layer variables with the given tf.GraphDef object
 
@@ -440,13 +493,17 @@ def get_attention_layer_variables_from_graph_def(graph_def: tf.GraphDef, suffix:
         The attention layer variables within the given tf.GraphDef object
     """
     attention_layer_variables = {}
-    attention_layer_net_nodes = get_attention_layer_nodes_from_graph_def(graph_def, suffix=suffix)
+    attention_layer_net_nodes = get_attention_layer_nodes_from_graph_def(
+        graph_def, suffix=suffix
+    )
     for item in attention_layer_net_nodes:
         node = attention_layer_net_nodes[item]
         dtype = tf.as_dtype(node.dtype).as_numpy_dtype
         tensor_shape = tf.TensorShape(node.tensor_shape).as_list()
         if (len(tensor_shape) != 1) or (tensor_shape[0] != 1):
-            tensor_value = np.frombuffer(node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype)
+            tensor_value = np.frombuffer(
+                node.tensor_content, dtype=tf.as_dtype(node.dtype).as_numpy_dtype
+            )
         else:
             tensor_value = get_tensor_by_type(node, dtype)
         attention_layer_variables[item] = np.reshape(tensor_value, tensor_shape)
diff --git a/deepmd/utils/learning_rate.py b/deepmd/utils/learning_rate.py
index e51b2497bb..034e892af1 100644
--- a/deepmd/utils/learning_rate.py
+++ b/deepmd/utils/learning_rate.py
@@ -1,7 +1,11 @@
 import numpy as np
-from deepmd.env import tf
 
-class LearningRateExp (object) :
+from deepmd.env import (
+    tf,
+)
+
+
+class LearningRateExp(object):
     r"""
     The exponentially decaying learning rate.
 
@@ -10,7 +14,7 @@ class LearningRateExp (object) :
     .. math::
 
         \alpha(t) = \alpha_0 \lambda ^ { t / \tau }
-    
+
     where :math:`\alpha` is the learning rate, :math:`\alpha_0` is the starting learning rate,
     :math:`\lambda` is the decay rate, and :math:`\tau` is the decay steps.
 
@@ -23,29 +27,28 @@ class LearningRateExp (object) :
     decay_steps
             Learning rate decay every this number of steps :math:`\tau`
     decay_rate
-            The decay rate :math:`\lambda`. 
+            The decay rate :math:`\lambda`.
             If `stop_step` is provided in `build`, then it will be determined automatically and overwritten.
     """
-    def __init__ (self, 
-                  start_lr : float,
-                  stop_lr : float = 5e-8,
-                  decay_steps : int = 5000,
-                  decay_rate : float = 0.95
-    ) -> None :
+
+    def __init__(
+        self,
+        start_lr: float,
+        stop_lr: float = 5e-8,
+        decay_steps: int = 5000,
+        decay_rate: float = 0.95,
+    ) -> None:
         """
         Constructor
         """
         self.cd = {}
-        self.cd['start_lr'] = start_lr
-        self.cd['stop_lr'] = stop_lr
-        self.cd['decay_steps'] = decay_steps
-        self.cd['decay_rate'] = decay_rate
-        self.start_lr_ = self.cd['start_lr']
+        self.cd["start_lr"] = start_lr
+        self.cd["stop_lr"] = stop_lr
+        self.cd["decay_steps"] = decay_steps
+        self.cd["decay_rate"] = decay_rate
+        self.start_lr_ = self.cd["start_lr"]
 
-    def build(self, 
-              global_step : tf.Tensor, 
-              stop_step : int = None
-    ) -> tf.Tensor :
+    def build(self, global_step: tf.Tensor, stop_step: int = None) -> tf.Tensor:
         """
         Build the learning rate
 
@@ -61,33 +64,45 @@ def build(self,
         learning_rate
                 The learning rate
         """
-        if stop_step is None:            
-            self.decay_steps_ = self.cd['decay_steps'] if self.cd['decay_steps'] is not None else 5000
-            self.decay_rate_  = self.cd['decay_rate']  if self.cd['decay_rate']  is not None else 0.95
+        if stop_step is None:
+            self.decay_steps_ = (
+                self.cd["decay_steps"] if self.cd["decay_steps"] is not None else 5000
+            )
+            self.decay_rate_ = (
+                self.cd["decay_rate"] if self.cd["decay_rate"] is not None else 0.95
+            )
         else:
-            self.stop_lr_  = self.cd['stop_lr'] if self.cd['stop_lr'] is not None else 5e-8
+            self.stop_lr_ = (
+                self.cd["stop_lr"] if self.cd["stop_lr"] is not None else 5e-8
+            )
             default_ds = 100 if stop_step // 10 > 100 else stop_step // 100 + 1
-            self.decay_steps_ = self.cd['decay_steps'] if self.cd['decay_steps'] is not None else default_ds
+            self.decay_steps_ = (
+                self.cd["decay_steps"]
+                if self.cd["decay_steps"] is not None
+                else default_ds
+            )
             if self.decay_steps_ >= stop_step:
                 self.decay_steps_ = default_ds
-            self.decay_rate_ = np.exp(np.log(self.stop_lr_ / self.start_lr_) / (stop_step / self.decay_steps_))
-            
-        return tf.train.exponential_decay(self.start_lr_, 
-                                          global_step,
-                                          self.decay_steps_,
-                                          self.decay_rate_, 
-                                          staircase=True)
+            self.decay_rate_ = np.exp(
+                np.log(self.stop_lr_ / self.start_lr_) / (stop_step / self.decay_steps_)
+            )
+
+        return tf.train.exponential_decay(
+            self.start_lr_,
+            global_step,
+            self.decay_steps_,
+            self.decay_rate_,
+            staircase=True,
+        )
+
     def start_lr(self) -> float:
         """
         Get the start lr
         """
         return self.start_lr_
 
-    def value (self, 
-               step : int
-    ) -> float:
+    def value(self, step: int) -> float:
         """
         Get the lr at a certain step
         """
-        return self.start_lr_ * np.power (self.decay_rate_, (step // self.decay_steps_))
-
+        return self.start_lr_ * np.power(self.decay_rate_, (step // self.decay_steps_))
diff --git a/deepmd/utils/multi_init.py b/deepmd/utils/multi_init.py
index af224658aa..85a68ef59b 100644
--- a/deepmd/utils/multi_init.py
+++ b/deepmd/utils/multi_init.py
@@ -1,22 +1,45 @@
-import logging
 import json
+import logging
+from functools import (
+    lru_cache,
+)
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Tuple,
+)
+
 import numpy as np
+from scipy.special import (
+    comb,
+)
+
 import deepmd
-from typing import Callable
-from typing import Tuple, List, Dict, Any
-from functools import lru_cache
-from scipy.special import comb
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.common import ACTIVATION_FN_DICT
-from deepmd.utils.graph import get_tensor_by_name
-from deepmd.utils.errors import GraphWithoutTensorError
-from deepmd.descriptor import Descriptor
+from deepmd.common import (
+    ACTIVATION_FN_DICT,
+)
+from deepmd.descriptor import (
+    Descriptor,
+)
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.errors import (
+    GraphWithoutTensorError,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name,
+)
 
 log = logging.getLogger(__name__)
 
 
-def replace_model_params_with_frz_multi_model(jdata: Dict[str, Any], pretrained_model: str):
+def replace_model_params_with_frz_multi_model(
+    jdata: Dict[str, Any], pretrained_model: str
+):
     """Replace the model params in input script according to pretrained frozen multi-task united model.
 
     Parameters
@@ -28,78 +51,111 @@ def replace_model_params_with_frz_multi_model(jdata: Dict[str, Any], pretrained_
     """
     # Get the input script from the pretrained model
     try:
-        t_jdata = get_tensor_by_name(pretrained_model, 'train_attr/training_script')
+        t_jdata = get_tensor_by_name(pretrained_model, "train_attr/training_script")
     except GraphWithoutTensorError as e:
         raise RuntimeError(
             "The input frozen pretrained model: %s has no training script, "
             "which is not supported to perform multi-task training. "
-            "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit." % input
+            "Please use the model pretrained with v2.1.5 or higher version of DeePMD-kit."
+            % input
         ) from e
     pretrained_jdata = json.loads(t_jdata)
 
     # Check the model type
-    assert "fitting_net_dict" in pretrained_jdata["model"], \
-           "The multi-task init process only supports models trained in multi-task mode and frozen into united model!" \
-           "Please use '--united-model' argument in 'dp freeze' command."
+    assert "fitting_net_dict" in pretrained_jdata["model"], (
+        "The multi-task init process only supports models trained in multi-task mode and frozen into united model!"
+        "Please use '--united-model' argument in 'dp freeze' command."
+    )
 
     # Check the type map
-    pretrained_type_map = pretrained_jdata['model']['type_map']
-    cur_type_map = jdata['model'].get("type_map", [])
+    pretrained_type_map = pretrained_jdata["model"]["type_map"]
+    cur_type_map = jdata["model"].get("type_map", [])
     out_line_type = []
     for i in cur_type_map:
         if i not in pretrained_type_map:
             out_line_type.append(i)
-    assert not out_line_type, "{} type(s) not contained in the pretrained model! " \
-                              "Please choose another suitable one.".format(str(out_line_type))
+    assert not out_line_type, (
+        "{} type(s) not contained in the pretrained model! "
+        "Please choose another suitable one.".format(str(out_line_type))
+    )
     if cur_type_map != pretrained_type_map:
-        log.info("Change the type_map from {} to {}.".format(str(cur_type_map), str(pretrained_type_map)))
-        jdata['model']['type_map'] = pretrained_type_map
+        log.info(
+            "Change the type_map from {} to {}.".format(
+                str(cur_type_map), str(pretrained_type_map)
+            )
+        )
+        jdata["model"]["type_map"] = pretrained_type_map
 
     # Change model configurations
-    pretrained_fitting_keys = sorted(list(pretrained_jdata['model']['fitting_net_dict'].keys()))
-    cur_fitting_keys = sorted(list(jdata['model']['fitting_net_dict'].keys()))
+    pretrained_fitting_keys = sorted(
+        list(pretrained_jdata["model"]["fitting_net_dict"].keys())
+    )
+    cur_fitting_keys = sorted(list(jdata["model"]["fitting_net_dict"].keys()))
     newly_added_fittings = set(cur_fitting_keys) - set(pretrained_fitting_keys)
     reused_fittings = set(cur_fitting_keys) - newly_added_fittings
     log.info("Change the model configurations according to the pretrained one...")
 
-    for config_key in ['type_embedding', 'descriptor', 'fitting_net_dict']:
-        if config_key not in jdata['model'].keys() and config_key in pretrained_jdata['model'].keys():
-            log.info("Add the '{}' from pretrained model: {}.".format(
-                config_key, str(pretrained_jdata['model'][config_key])))
-            jdata['model'][config_key] = pretrained_jdata['model'][config_key]
-        elif config_key == 'type_embedding' and \
-                config_key in jdata['model'].keys() and config_key not in pretrained_jdata['model'].keys():
+    for config_key in ["type_embedding", "descriptor", "fitting_net_dict"]:
+        if (
+            config_key not in jdata["model"].keys()
+            and config_key in pretrained_jdata["model"].keys()
+        ):
+            log.info(
+                "Add the '{}' from pretrained model: {}.".format(
+                    config_key, str(pretrained_jdata["model"][config_key])
+                )
+            )
+            jdata["model"][config_key] = pretrained_jdata["model"][config_key]
+        elif (
+            config_key == "type_embedding"
+            and config_key in jdata["model"].keys()
+            and config_key not in pretrained_jdata["model"].keys()
+        ):
             # 'type_embedding' can be omitted using 'se_atten' descriptor, and the activation_function will be None.
-            cur_para = jdata['model'].pop(config_key)
-            if 'trainable' in cur_para and not cur_para['trainable']:
-                jdata['model'][config_key] = {'trainable': False, 'activation_function': 'None'}
+            cur_para = jdata["model"].pop(config_key)
+            if "trainable" in cur_para and not cur_para["trainable"]:
+                jdata["model"][config_key] = {
+                    "trainable": False,
+                    "activation_function": "None",
+                }
                 log.info("The type_embeddings from pretrained model will be frozen.")
-        elif config_key == 'fitting_net_dict':
+        elif config_key == "fitting_net_dict":
             if reused_fittings:
                 log.info(
-                    f"These fitting nets will use the configurations from pretrained frozen model : {reused_fittings}.")
+                    f"These fitting nets will use the configurations from pretrained frozen model : {reused_fittings}."
+                )
                 for fitting_key in reused_fittings:
-                    _change_sub_config(jdata['model'][config_key], pretrained_jdata['model'][config_key], fitting_key)
+                    _change_sub_config(
+                        jdata["model"][config_key],
+                        pretrained_jdata["model"][config_key],
+                        fitting_key,
+                    )
             if newly_added_fittings:
                 log.info(
-                    f"These fitting nets will be initialized from scratch : {newly_added_fittings}.")
-        elif config_key in jdata['model'].keys() and config_key in pretrained_jdata['model'].keys() and \
-                jdata['model'][config_key] != pretrained_jdata['model'][config_key]:
-            _change_sub_config(jdata['model'], pretrained_jdata['model'], config_key)
+                    f"These fitting nets will be initialized from scratch : {newly_added_fittings}."
+                )
+        elif (
+            config_key in jdata["model"].keys()
+            and config_key in pretrained_jdata["model"].keys()
+            and jdata["model"][config_key] != pretrained_jdata["model"][config_key]
+        ):
+            _change_sub_config(jdata["model"], pretrained_jdata["model"], config_key)
 
     # Change other multi-task configurations
     log.info("Change the training configurations according to the pretrained one...")
-    for config_key in ['loss_dict', 'training/data_dict']:
+    for config_key in ["loss_dict", "training/data_dict"]:
         cur_jdata = jdata
         target_jdata = pretrained_jdata
-        for sub_key in config_key.split('/'):
+        for sub_key in config_key.split("/"):
             cur_jdata = cur_jdata[sub_key]
             target_jdata = target_jdata[sub_key]
         for fitting_key in reused_fittings:
             if fitting_key not in cur_jdata:
                 target_para = target_jdata[fitting_key]
                 cur_jdata[fitting_key] = target_para
-                log.info(f"Add '{config_key}/{fitting_key}' configurations from the pretrained frozen model.")
+                log.info(
+                    f"Add '{config_key}/{fitting_key}' configurations from the pretrained frozen model."
+                )
 
     return jdata
 
@@ -108,7 +164,11 @@ def _change_sub_config(jdata: Dict[str, Any], src_jdata: Dict[str, Any], sub_key
     target_para = src_jdata[sub_key]
     cur_para = jdata[sub_key]
     # keep some params that are irrelevant to model structures (need to discuss) TODO
-    if 'trainable' in cur_para.keys():
-        target_para['trainable'] = cur_para['trainable']
-    log.info("Change the '{}' from {} to {}.".format(sub_key, str(cur_para), str(target_para)))
+    if "trainable" in cur_para.keys():
+        target_para["trainable"] = cur_para["trainable"]
+    log.info(
+        "Change the '{}' from {} to {}.".format(
+            sub_key, str(cur_para), str(target_para)
+        )
+    )
     jdata[sub_key] = target_para
diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py
index 87e5b83774..d0975c199e 100644
--- a/deepmd/utils/neighbor_stat.py
+++ b/deepmd/utils/neighbor_stat.py
@@ -1,19 +1,31 @@
-import math
 import logging
+import math
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from deepmd.env import tf
-from typing import Tuple, List
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.utils.parallel_op import ParallelOp
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.parallel_op import (
+    ParallelOp,
+)
 
 log = logging.getLogger(__name__)
 
-class NeighborStat():
+
+class NeighborStat:
     """
-    Class for getting training data information. 
+    Class for getting training data information.
 
     It loads data from DeepmdData object, and measures the data info, including neareest nbor distance between atoms, max nbor size of atoms and the output data range of the environment matrix.
 
@@ -26,11 +38,13 @@ class NeighborStat():
     one_type : bool, optional, default=False
         Treat all types as a single type.
     """
-    def __init__(self,
-                 ntypes : int,
-                 rcut: float,
-                 one_type : bool = False,
-                 ) -> None:
+
+    def __init__(
+        self,
+        ntypes: int,
+        rcut: float,
+        one_type: bool = False,
+    ) -> None:
         """
         Constructor
         """
@@ -41,35 +55,43 @@ def __init__(self,
 
         def builder():
             place_holders = {}
-            for ii in ['coord', 'box']:
-                place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name='t_'+ii)
-            place_holders['type'] = tf.placeholder(tf.int32, [None, None], name='t_type')
-            place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name='t_natoms')
-            place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name='t_mesh')
-            t_type = place_holders['type']
-            t_natoms = place_holders['natoms_vec']
+            for ii in ["coord", "box"]:
+                place_holders[ii] = tf.placeholder(
+                    GLOBAL_NP_FLOAT_PRECISION, [None, None], name="t_" + ii
+                )
+            place_holders["type"] = tf.placeholder(
+                tf.int32, [None, None], name="t_type"
+            )
+            place_holders["natoms_vec"] = tf.placeholder(
+                tf.int32, [self.ntypes + 2], name="t_natoms"
+            )
+            place_holders["default_mesh"] = tf.placeholder(
+                tf.int32, [None], name="t_mesh"
+            )
+            t_type = place_holders["type"]
+            t_natoms = place_holders["natoms_vec"]
             if self.one_type:
                 # all types = 0, natoms_vec = [natoms, natoms, natoms]
                 t_type = tf.zeros_like(t_type, dtype=tf.int32)
                 t_natoms = tf.repeat(t_natoms[0], 3)
 
-            _max_nbor_size, _min_nbor_dist \
-                = op_module.neighbor_stat(place_holders['coord'],
-                                         t_type,
-                                         t_natoms,
-                                         place_holders['box'],
-                                         place_holders['default_mesh'],
-                                         rcut = self.rcut)
-            place_holders['dir'] = tf.placeholder(tf.string)
-            return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders['dir'])
+            _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat(
+                place_holders["coord"],
+                t_type,
+                t_natoms,
+                place_holders["box"],
+                place_holders["default_mesh"],
+                rcut=self.rcut,
+            )
+            place_holders["dir"] = tf.placeholder(tf.string)
+            return place_holders, (_max_nbor_size, _min_nbor_dist, place_holders["dir"])
 
         with sub_graph.as_default():
             self.p = ParallelOp(builder, config=default_tf_session_config)
 
-        self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config)
+        self.sub_sess = tf.Session(graph=sub_graph, config=default_tf_session_config)
 
-    def get_stat(self,
-                 data : DeepmdDataSystem) -> Tuple[float, List[int]]:
+    def get_stat(self, data: DeepmdDataSystem) -> Tuple[float, List[int]]:
         """
         get the data statistics of the training data, including nearest nbor distance between atoms, max nbor size of atoms
 
@@ -77,7 +99,7 @@ def get_stat(self,
         ----------
         data
                 Class for manipulating many data systems. It is implemented with the help of DeepmdData.
-        
+
         Returns
         -------
         min_nbor_dist
@@ -88,20 +110,24 @@ def get_stat(self,
         self.min_nbor_dist = 100.0
         self.max_nbor_size = [0]
         if not self.one_type:
-            self.max_nbor_size *= self.ntypes            
+            self.max_nbor_size *= self.ntypes
 
         def feed():
             for ii in range(len(data.system_dirs)):
                 for jj in data.data_systems[ii].dirs:
                     data_set = data.data_systems[ii]._load_set(jj)
-                    for kk in range(np.array(data_set['type']).shape[0]):
+                    for kk in range(np.array(data_set["type"]).shape[0]):
                         yield {
-                            'coord': np.array(data_set['coord'])[kk].reshape([-1, data.natoms[ii] * 3]),
-                            'type': np.array(data_set['type'])[kk].reshape([-1, data.natoms[ii]]),
-                            'natoms_vec': np.array(data.natoms_vec[ii]),
-                            'box': np.array(data_set['box'])[kk].reshape([-1, 9]),
-                            'default_mesh': np.array(data.default_mesh[ii]),
-                            'dir': str(jj),
+                            "coord": np.array(data_set["coord"])[kk].reshape(
+                                [-1, data.natoms[ii] * 3]
+                            ),
+                            "type": np.array(data_set["type"])[kk].reshape(
+                                [-1, data.natoms[ii]]
+                            ),
+                            "natoms_vec": np.array(data.natoms_vec[ii]),
+                            "box": np.array(data_set["box"])[kk].reshape([-1, 9]),
+                            "default_mesh": np.array(data.default_mesh[ii]),
+                            "dir": str(jj),
                         }
 
         for mn, dt, jj in self.p.generate(self.sub_sess, feed()):
@@ -109,11 +135,14 @@ def feed():
                 dt = np.min(dt)
             else:
                 dt = self.rcut
-                log.warning("Atoms with no neighbors found in %s. Please make sure it's what you expected." % jj)
+                log.warning(
+                    "Atoms with no neighbors found in %s. Please make sure it's what you expected."
+                    % jj
+                )
             if dt < self.min_nbor_dist:
-                if math.isclose(dt, 0., rel_tol=1e-6):
+                if math.isclose(dt, 0.0, rel_tol=1e-6):
                     # it's unexpected that the distance between two atoms is zero
-                    # zero distance will cause nan (#874) 
+                    # zero distance will cause nan (#874)
                     raise RuntimeError(
                         "Some atoms are overlapping in %s. Please check your"
                         " training data to remove duplicated atoms." % jj
@@ -122,6 +151,6 @@ def feed():
             var = np.max(mn, axis=0)
             self.max_nbor_size = np.maximum(var, self.max_nbor_size)
 
-        log.info('training data with min nbor dist: ' + str(self.min_nbor_dist))
-        log.info('training data with max nbor size: ' + str(self.max_nbor_size))
+        log.info("training data with min nbor dist: " + str(self.min_nbor_dist))
+        log.info("training data with max nbor size: " + str(self.max_nbor_size))
         return self.min_nbor_dist, self.max_nbor_size
diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py
index 47d9d83351..ad9f41aebc 100644
--- a/deepmd/utils/network.py
+++ b/deepmd/utils/network.py
@@ -1,113 +1,129 @@
 import numpy as np
 
-from deepmd.env import tf
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.common import get_precision
+from deepmd.common import (
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+
 
 def one_layer_rand_seed_shift():
     return 3
 
-def one_layer(inputs, 
-              outputs_size, 
-              activation_fn=tf.nn.tanh, 
-              precision = GLOBAL_TF_FLOAT_PRECISION, 
-              stddev=1.0,
-              bavg=0.0,
-              name='linear',
-              scope='',
-              reuse=None,
-              seed=None, 
-              use_timestep = False, 
-              trainable = True,
-              useBN = False, 
-              uniform_seed = False,
-              initial_variables = None,
-              mixed_prec = None,
-              final_layer = False):
+
+def one_layer(
+    inputs,
+    outputs_size,
+    activation_fn=tf.nn.tanh,
+    precision=GLOBAL_TF_FLOAT_PRECISION,
+    stddev=1.0,
+    bavg=0.0,
+    name="linear",
+    scope="",
+    reuse=None,
+    seed=None,
+    use_timestep=False,
+    trainable=True,
+    useBN=False,
+    uniform_seed=False,
+    initial_variables=None,
+    mixed_prec=None,
+    final_layer=False,
+):
     # For good accuracy, the last layer of the fitting network uses a higher precision neuron network.
     if mixed_prec is not None and final_layer:
-        inputs = tf.cast(inputs, get_precision(mixed_prec['output_prec']))
+        inputs = tf.cast(inputs, get_precision(mixed_prec["output_prec"]))
     with tf.variable_scope(name, reuse=reuse):
         shape = inputs.get_shape().as_list()
-        w_initializer  = tf.random_normal_initializer(
-                            stddev=stddev / np.sqrt(shape[1] + outputs_size),
-                            seed=seed if (seed is None or uniform_seed) else seed + 0)
-        b_initializer  = tf.random_normal_initializer(
-                            stddev=stddev,
-                            mean=bavg,
-                            seed=seed if (seed is None or uniform_seed) else seed + 1)
+        w_initializer = tf.random_normal_initializer(
+            stddev=stddev / np.sqrt(shape[1] + outputs_size),
+            seed=seed if (seed is None or uniform_seed) else seed + 0,
+        )
+        b_initializer = tf.random_normal_initializer(
+            stddev=stddev,
+            mean=bavg,
+            seed=seed if (seed is None or uniform_seed) else seed + 1,
+        )
         if initial_variables is not None:
-            w_initializer = tf.constant_initializer(initial_variables[scope + name + '/matrix'])
-            b_initializer = tf.constant_initializer(initial_variables[scope + name + '/bias'])
-        w = tf.get_variable('matrix', 
-                            [shape[1], outputs_size], 
-                            precision,
-                            w_initializer, 
-                            trainable = trainable)
-        variable_summaries(w, 'matrix')
-        b = tf.get_variable('bias', 
-                            [outputs_size], 
-                            precision,
-                            b_initializer, 
-                            trainable = trainable)
-        variable_summaries(b, 'bias')
+            w_initializer = tf.constant_initializer(
+                initial_variables[scope + name + "/matrix"]
+            )
+            b_initializer = tf.constant_initializer(
+                initial_variables[scope + name + "/bias"]
+            )
+        w = tf.get_variable(
+            "matrix",
+            [shape[1], outputs_size],
+            precision,
+            w_initializer,
+            trainable=trainable,
+        )
+        variable_summaries(w, "matrix")
+        b = tf.get_variable(
+            "bias", [outputs_size], precision, b_initializer, trainable=trainable
+        )
+        variable_summaries(b, "bias")
 
         if mixed_prec is not None and not final_layer:
-            inputs = tf.cast(inputs, get_precision(mixed_prec['compute_prec']))
-            w = tf.cast(w, get_precision(mixed_prec['compute_prec']))
-            b = tf.cast(b, get_precision(mixed_prec['compute_prec']))
+            inputs = tf.cast(inputs, get_precision(mixed_prec["compute_prec"]))
+            w = tf.cast(w, get_precision(mixed_prec["compute_prec"]))
+            b = tf.cast(b, get_precision(mixed_prec["compute_prec"]))
 
         hidden = tf.nn.bias_add(tf.matmul(inputs, w), b)
-        if activation_fn != None and use_timestep :
+        if activation_fn != None and use_timestep:
             idt_initializer = tf.random_normal_initializer(
-                                    stddev=0.001,
-                                    mean=0.1,
-                                    seed=seed if (seed is None or uniform_seed) else seed + 2)
+                stddev=0.001,
+                mean=0.1,
+                seed=seed if (seed is None or uniform_seed) else seed + 2,
+            )
             if initial_variables is not None:
-                idt_initializer = tf.constant_initializer(initial_variables[scope + name + '/idt'])
-            idt = tf.get_variable('idt',
-                                  [outputs_size],
-                                  precision,
-                                  idt_initializer, 
-                                  trainable = trainable)
-            variable_summaries(idt, 'idt')
+                idt_initializer = tf.constant_initializer(
+                    initial_variables[scope + name + "/idt"]
+                )
+            idt = tf.get_variable(
+                "idt", [outputs_size], precision, idt_initializer, trainable=trainable
+            )
+            variable_summaries(idt, "idt")
         if activation_fn != None:
             if useBN:
                 None
-                # hidden_bn = self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)   
+                # hidden_bn = self._batch_norm(hidden, name=name+'_normalization', reuse=reuse)
                 # return activation_fn(hidden_bn)
             else:
-                if use_timestep :
+                if use_timestep:
                     if mixed_prec is not None and not final_layer:
-                       idt = tf.cast(idt, get_precision(mixed_prec['compute_prec']))
+                        idt = tf.cast(idt, get_precision(mixed_prec["compute_prec"]))
                     hidden = tf.reshape(activation_fn(hidden), [-1, outputs_size]) * idt
-                else :
-                    hidden = tf.reshape(activation_fn(hidden), [-1, outputs_size])                    
+                else:
+                    hidden = tf.reshape(activation_fn(hidden), [-1, outputs_size])
 
         if mixed_prec is not None:
-            hidden = tf.cast(hidden, get_precision(mixed_prec['output_prec']))
+            hidden = tf.cast(hidden, get_precision(mixed_prec["output_prec"]))
         return hidden
 
 
-def embedding_net_rand_seed_shift(
-        network_size
-):
+def embedding_net_rand_seed_shift(network_size):
     shift = 3 * (len(network_size) + 1)
     return shift
 
-def embedding_net(xx,
-                  network_size,
-                  precision,
-                  activation_fn = tf.nn.tanh,
-                  resnet_dt = False,
-                  name_suffix = '',
-                  stddev = 1.0,
-                  bavg = 0.0,
-                  seed = None,
-                  trainable = True, 
-                  uniform_seed = False,
-                  initial_variables = None,
-                  mixed_prec = None):
+
+def embedding_net(
+    xx,
+    network_size,
+    precision,
+    activation_fn=tf.nn.tanh,
+    resnet_dt=False,
+    name_suffix="",
+    stddev=1.0,
+    bavg=0.0,
+    seed=None,
+    trainable=True,
+    uniform_seed=False,
+    initial_variables=None,
+    mixed_prec=None,
+):
     r"""The embedding network.
 
     The embedding network function :math:`\mathcal{N}` is constructed by is the
@@ -136,18 +152,18 @@ def embedding_net(xx,
 
     Parameters
     ----------
-    xx : Tensor   
+    xx : Tensor
         Input tensor :math:`\mathbf{x}` of shape [-1,1]
     network_size: list of int
         Size of the embedding network. For example [16,32,64]
-    precision: 
+    precision:
         Precision of network weights. For example, tf.float64
     activation_fn:
         Activation function :math:`\boldsymbol{\phi}`
     resnet_dt: boolean
         Using time-step in the ResNet construction
     name_suffix: str
-        The name suffix append to each variable. 
+        The name suffix append to each variable.
     stddev: float
         Standard deviation of initializing network parameters
     bavg: float
@@ -175,74 +191,92 @@ def embedding_net(xx,
 
     for ii in range(1, len(outputs_size)):
         w_initializer = tf.random_normal_initializer(
-                            stddev=stddev/np.sqrt(outputs_size[ii]+outputs_size[ii-1]), 
-                            seed = seed if (seed is None or uniform_seed)  else seed + ii*3+0
-                        )
+            stddev=stddev / np.sqrt(outputs_size[ii] + outputs_size[ii - 1]),
+            seed=seed if (seed is None or uniform_seed) else seed + ii * 3 + 0,
+        )
         b_initializer = tf.random_normal_initializer(
-                            stddev=stddev, 
-                            mean = bavg, 
-                            seed = seed if (seed is None or uniform_seed) else seed + 3*ii+1
-                        )
+            stddev=stddev,
+            mean=bavg,
+            seed=seed if (seed is None or uniform_seed) else seed + 3 * ii + 1,
+        )
         if initial_variables is not None:
             scope = tf.get_variable_scope().name
-            w_initializer = tf.constant_initializer(initial_variables[scope+'/matrix_'+str(ii)+name_suffix])
-            b_initializer = tf.constant_initializer(initial_variables[scope+'/bias_'+str(ii)+name_suffix])
-        w = tf.get_variable('matrix_'+str(ii)+name_suffix,
-                            [outputs_size[ii - 1], outputs_size[ii]], 
-                            precision,
-                            w_initializer,
-                            trainable = trainable)
-        variable_summaries(w, 'matrix_'+str(ii)+name_suffix)
+            w_initializer = tf.constant_initializer(
+                initial_variables[scope + "/matrix_" + str(ii) + name_suffix]
+            )
+            b_initializer = tf.constant_initializer(
+                initial_variables[scope + "/bias_" + str(ii) + name_suffix]
+            )
+        w = tf.get_variable(
+            "matrix_" + str(ii) + name_suffix,
+            [outputs_size[ii - 1], outputs_size[ii]],
+            precision,
+            w_initializer,
+            trainable=trainable,
+        )
+        variable_summaries(w, "matrix_" + str(ii) + name_suffix)
 
-        b = tf.get_variable('bias_'+str(ii)+name_suffix, 
-                            [outputs_size[ii]], 
-                            precision,
-                            b_initializer, 
-                            trainable = trainable)
-        variable_summaries(b, 'bias_'+str(ii)+name_suffix)
+        b = tf.get_variable(
+            "bias_" + str(ii) + name_suffix,
+            [outputs_size[ii]],
+            precision,
+            b_initializer,
+            trainable=trainable,
+        )
+        variable_summaries(b, "bias_" + str(ii) + name_suffix)
 
         if mixed_prec is not None:
-            xx = tf.cast(xx, get_precision(mixed_prec['compute_prec']))
-            w  = tf.cast(w,  get_precision(mixed_prec['compute_prec']))
-            b  = tf.cast(b,  get_precision(mixed_prec['compute_prec']))
+            xx = tf.cast(xx, get_precision(mixed_prec["compute_prec"]))
+            w = tf.cast(w, get_precision(mixed_prec["compute_prec"]))
+            b = tf.cast(b, get_precision(mixed_prec["compute_prec"]))
         if activation_fn is not None:
-            hidden = tf.reshape(activation_fn(tf.nn.bias_add(tf.matmul(xx, w), b)), [-1, outputs_size[ii]])
+            hidden = tf.reshape(
+                activation_fn(tf.nn.bias_add(tf.matmul(xx, w), b)),
+                [-1, outputs_size[ii]],
+            )
         else:
-            hidden = tf.reshape(tf.nn.bias_add(tf.matmul(xx, w), b), [-1, outputs_size[ii]])
-        if resnet_dt :
+            hidden = tf.reshape(
+                tf.nn.bias_add(tf.matmul(xx, w), b), [-1, outputs_size[ii]]
+            )
+        if resnet_dt:
             idt_initializer = tf.random_normal_initializer(
-                                  stddev=0.001, 
-                                  mean = 1.0, 
-                                  seed = seed if (seed is None or uniform_seed) else seed + 3*ii+2
-                              )
+                stddev=0.001,
+                mean=1.0,
+                seed=seed if (seed is None or uniform_seed) else seed + 3 * ii + 2,
+            )
             if initial_variables is not None:
                 scope = tf.get_variable_scope().name
-                idt_initializer = tf.constant_initializer(initial_variables[scope+'/idt_'+str(ii)+name_suffix])
-            idt = tf.get_variable('idt_'+str(ii)+name_suffix, 
-                                  [1, outputs_size[ii]], 
-                                  precision,
-                                  idt_initializer, 
-                                  trainable = trainable)
-            variable_summaries(idt, 'idt_'+str(ii)+name_suffix)
+                idt_initializer = tf.constant_initializer(
+                    initial_variables[scope + "/idt_" + str(ii) + name_suffix]
+                )
+            idt = tf.get_variable(
+                "idt_" + str(ii) + name_suffix,
+                [1, outputs_size[ii]],
+                precision,
+                idt_initializer,
+                trainable=trainable,
+            )
+            variable_summaries(idt, "idt_" + str(ii) + name_suffix)
             if mixed_prec is not None:
-                idt = tf.cast(idt, get_precision(mixed_prec['compute_prec']))
+                idt = tf.cast(idt, get_precision(mixed_prec["compute_prec"]))
 
-        if outputs_size[ii] == outputs_size[ii-1]:
-            if resnet_dt :
+        if outputs_size[ii] == outputs_size[ii - 1]:
+            if resnet_dt:
                 xx += hidden * idt
-            else :
+            else:
                 xx += hidden
-        elif outputs_size[ii] == outputs_size[ii-1] * 2: 
-            if resnet_dt :
-                xx = tf.concat([xx,xx], 1) + hidden * idt
-            else :
-                xx = tf.concat([xx,xx], 1) + hidden
+        elif outputs_size[ii] == outputs_size[ii - 1] * 2:
+            if resnet_dt:
+                xx = tf.concat([xx, xx], 1) + hidden * idt
+            else:
+                xx = tf.concat([xx, xx], 1) + hidden
         else:
             xx = hidden
     if mixed_prec is not None:
-        xx = tf.cast(xx, get_precision(mixed_prec['output_prec']))
+        xx = tf.cast(xx, get_precision(mixed_prec["output_prec"]))
     return xx
 
+
 def variable_summaries(var: tf.Variable, name: str):
     """Attach a lot of summaries to a Tensor (for TensorBoard visualization).
 
@@ -255,11 +289,11 @@ def variable_summaries(var: tf.Variable, name: str):
     """
     with tf.name_scope(name):
         mean = tf.reduce_mean(var)
-        tf.summary.scalar('mean', mean)
+        tf.summary.scalar("mean", mean)
 
-        with tf.name_scope('stddev'):
+        with tf.name_scope("stddev"):
             stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
-        tf.summary.scalar('stddev', stddev)
-        tf.summary.scalar('max', tf.reduce_max(var))
-        tf.summary.scalar('min', tf.reduce_min(var))
-        tf.summary.histogram('histogram', var)
+        tf.summary.scalar("stddev", stddev)
+        tf.summary.scalar("max", tf.reduce_max(var))
+        tf.summary.scalar("min", tf.reduce_min(var))
+        tf.summary.histogram("histogram", var)
diff --git a/deepmd/utils/pair_tab.py b/deepmd/utils/pair_tab.py
index a0063ac476..dff03513e5 100644
--- a/deepmd/utils/pair_tab.py
+++ b/deepmd/utils/pair_tab.py
@@ -1,34 +1,37 @@
 #!/usr/bin/env python3
 
+from typing import (
+    List,
+    Tuple,
+)
+
 import numpy as np
-from typing import Tuple, List
+from scipy.interpolate import (
+    CubicSpline,
+)
 
-from scipy.interpolate import CubicSpline
 
-class PairTab (object):
+class PairTab(object):
     """
-    
+
     Parameters
     ----------
     filename
             File name for the short-range tabulated potential.
-            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. 
-            The first colume is the distance between atoms. 
-            The second to the last columes are energies for pairs of certain types. 
-            For example we have two atom types, 0 and 1. 
-            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.   
+            The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
+            The first colume is the distance between atoms.
+            The second to the last columes are energies for pairs of certain types.
+            For example we have two atom types, 0 and 1.
+            The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
     """
-    def __init__(self,
-                 filename : str
-    ) -> None:
+
+    def __init__(self, filename: str) -> None:
         """
-        Constructor             
+        Constructor
         """
         self.reinit(filename)
-        
-    def reinit(self,
-               filename : str
-    ) -> None:
+
+    def reinit(self, filename: str) -> None:
         """
         Initialize the tabulated interaction
 
@@ -36,11 +39,11 @@ def reinit(self,
         ----------
         filename
                 File name for the short-range tabulated potential.
-                The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. 
-                The first colume is the distance between atoms. 
-                The second to the last columes are energies for pairs of certain types. 
-                For example we have two atom types, 0 and 1. 
-                The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.                
+                The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes.
+                The first colume is the distance between atoms.
+                The second to the last columes are energies for pairs of certain types.
+                For example we have two atom types, 0 and 1.
+                The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
         """
         self.vdata = np.loadtxt(filename)
         self.rmin = self.vdata[0][0]
@@ -49,37 +52,45 @@ def reinit(self,
         ncol = self.vdata.shape[1] - 1
         n0 = (-1 + np.sqrt(1 + 8 * ncol)) * 0.5
         self.ntypes = int(n0 + 0.1)
-        assert(self.ntypes * (self.ntypes+1) // 2 == ncol),\
-            "number of volumes provided in %s does not match guessed number of types %d" % (filename, self.ntypes)
+        assert self.ntypes * (self.ntypes + 1) // 2 == ncol, (
+            "number of volumes provided in %s does not match guessed number of types %d"
+            % (filename, self.ntypes)
+        )
         self.tab_info = np.array([self.rmin, self.hh, self.nspline, self.ntypes])
         self.tab_data = self._make_data()
 
     def get(self) -> Tuple[np.array, np.array]:
         """
-        Get the serialized table. 
+        Get the serialized table.
         """
         return self.tab_info, self.tab_data
 
-    def _make_data(self) :
+    def _make_data(self):
         data = np.zeros([self.ntypes * self.ntypes * 4 * self.nspline])
         stride = 4 * self.nspline
         idx_iter = 0
-        xx = self.vdata[:,0]
-        for t0 in range(self.ntypes) :
-            for t1 in range(t0, self.ntypes) :
-                vv = self.vdata[:,1+idx_iter]
+        xx = self.vdata[:, 0]
+        for t0 in range(self.ntypes):
+            for t1 in range(t0, self.ntypes):
+                vv = self.vdata[:, 1 + idx_iter]
                 cs = CubicSpline(xx, vv)
                 dd = cs(xx, 1)
                 dd *= self.hh
                 dtmp = np.zeros(stride)
-                for ii in range(self.nspline) :
-                    dtmp[ii*4+0] = 2 * vv[ii] - 2 * vv[ii+1] +     dd[ii] + dd[ii+1]
-                    dtmp[ii*4+1] =-3 * vv[ii] + 3 * vv[ii+1] - 2 * dd[ii] - dd[ii+1]
-                    dtmp[ii*4+2] = dd[ii]
-                    dtmp[ii*4+3] = vv[ii]
-                data[(t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride + stride] \
-                    = dtmp
-                data[(t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride + stride] \
-                    = dtmp
+                for ii in range(self.nspline):
+                    dtmp[ii * 4 + 0] = 2 * vv[ii] - 2 * vv[ii + 1] + dd[ii] + dd[ii + 1]
+                    dtmp[ii * 4 + 1] = (
+                        -3 * vv[ii] + 3 * vv[ii + 1] - 2 * dd[ii] - dd[ii + 1]
+                    )
+                    dtmp[ii * 4 + 2] = dd[ii]
+                    dtmp[ii * 4 + 3] = vv[ii]
+                data[
+                    (t0 * self.ntypes + t1) * stride : (t0 * self.ntypes + t1) * stride
+                    + stride
+                ] = dtmp
+                data[
+                    (t1 * self.ntypes + t0) * stride : (t1 * self.ntypes + t0) * stride
+                    + stride
+                ] = dtmp
                 idx_iter += 1
         return data
diff --git a/deepmd/utils/parallel_op.py b/deepmd/utils/parallel_op.py
index 4fe652f064..3e60fbb2a2 100644
--- a/deepmd/utils/parallel_op.py
+++ b/deepmd/utils/parallel_op.py
@@ -1,12 +1,23 @@
-from typing import Optional, Callable, Generator, Tuple, Dict, Any
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    Optional,
+    Tuple,
+)
 
-from deepmd.env import tf
-from deepmd.utils.sess import run_sess
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils.sess import (
+    run_sess,
+)
 
 
 class ParallelOp:
     """Run an op with data parallelism.
-    
+
     Parameters
     ----------
     builder : Callable[..., Tuple[Dict[str, tf.Tensor], Tuple[tf.Tensor]]]
@@ -15,7 +26,7 @@ class ParallelOp:
         the number of threads
     config : tf.ConfigProto, optional
         tf.ConfigProto
-    
+
     Examples
     --------
     >>> from deepmd.env import tf
@@ -32,14 +43,20 @@ class ParallelOp:
     >>> print(*p.generate(tf.Session(), feed()))
     [1] [2] [3] [4] [5] [6] [7] [8] [9] [10]
     """
-    def __init__(self, builder: Callable[..., Tuple[Dict[str, tf.Tensor], Tuple[tf.Tensor]]], nthreads: Optional[int] = None, config: Optional[tf.ConfigProto] = None) -> None:
+
+    def __init__(
+        self,
+        builder: Callable[..., Tuple[Dict[str, tf.Tensor], Tuple[tf.Tensor]]],
+        nthreads: Optional[int] = None,
+        config: Optional[tf.ConfigProto] = None,
+    ) -> None:
         if nthreads is not None:
             self.nthreads = nthreads
         elif config is not None:
             self.nthreads = max(config.inter_op_parallelism_threads, 1)
         else:
             self.nthreads = 1
-        
+
         self.placeholders = []
         self.ops = []
         for ii in range(self.nthreads):
@@ -48,14 +65,16 @@ def __init__(self, builder: Callable[..., Tuple[Dict[str, tf.Tensor], Tuple[tf.T
                 self.placeholders.append(placeholder)
                 self.ops.append(op)
 
-    def generate(self, sess: tf.Session, feed: Generator[Dict[str, Any], None, None]) -> Generator[Tuple, None, None]:
+    def generate(
+        self, sess: tf.Session, feed: Generator[Dict[str, Any], None, None]
+    ) -> Generator[Tuple, None, None]:
         """Returns a generator.
 
         Parameters
         ----------
         feed : Generator[dict, None, None]
             generator which yields feed_dict
-        
+
         Yields
         ------
         Generator[Tuple, None, None]
@@ -73,8 +92,7 @@ def generate(self, sess: tf.Session, feed: Generator[Dict[str, Any], None, None]
                     nn = ii
                     break
                 for kk, vv in fd.items():
-                    feed_dict[self.placeholders[ii][kk]] = vv  
-            ops = self.ops[:nn]       
+                    feed_dict[self.placeholders[ii][kk]] = vv
+            ops = self.ops[:nn]
             for yy in run_sess(sess, ops, feed_dict=feed_dict):
                 yield yy
-        
diff --git a/deepmd/utils/path.py b/deepmd/utils/path.py
index b5dab4b135..5358362a7d 100644
--- a/deepmd/utils/path.py
+++ b/deepmd/utils/path.py
@@ -1,21 +1,35 @@
 import os
-from abc import ABC, abstractmethod
-from typing import Optional, List
-from pathlib import Path
-from functools import lru_cache
+from abc import (
+    ABC,
+    abstractmethod,
+)
+from functools import (
+    lru_cache,
+)
+from pathlib import (
+    Path,
+)
+from typing import (
+    List,
+    Optional,
+)
 
-import numpy as np
 import h5py
-from wcmatch.glob import globfilter
+import numpy as np
+from wcmatch.glob import (
+    globfilter,
+)
+
 
 class DPPath(ABC):
     """The path class to data system (DeepmdData).
-    
+
     Parameters
     ----------
     path : str
         path
     """
+
     def __new__(cls, path: str):
         if cls is DPPath:
             if os.path.isdir(path):
@@ -30,23 +44,23 @@ def __new__(cls, path: str):
     @abstractmethod
     def load_numpy(self) -> np.ndarray:
         """Load NumPy array.
-        
+
         Returns
         -------
         np.ndarray
             loaded NumPy array
         """
-    
+
     @abstractmethod
     def load_txt(self, **kwargs) -> np.ndarray:
         """Load NumPy array from text.
-        
+
         Returns
         -------
         np.ndarray
             loaded NumPy array
         """
-    
+
     @abstractmethod
     def glob(self, pattern: str) -> List["DPPath"]:
         """Search path using the glob pattern.
@@ -55,29 +69,29 @@ def glob(self, pattern: str) -> List["DPPath"]:
         ----------
         pattern : str
             glob pattern
-        
+
         Returns
         -------
         List[DPPath]
             list of paths
         """
-    
+
     @abstractmethod
     def rglob(self, pattern: str) -> List["DPPath"]:
         """This is like calling :meth:`DPPath.glob()` with `**/` added in front
         of the given relative pattern.
-        
+
         Parameters
         ----------
         pattern : str
             glob pattern
-        
+
         Returns
         -------
         List[DPPath]
             list of paths
         """
-    
+
     @abstractmethod
     def is_file(self) -> bool:
         """Check if self is file."""
@@ -85,37 +99,38 @@ def is_file(self) -> bool:
     @abstractmethod
     def is_dir(self) -> bool:
         """Check if self is directory."""
-    
+
     @abstractmethod
     def __truediv__(self, key: str) -> "DPPath":
         """Used for / operator."""
-    
+
     @abstractmethod
     def __lt__(self, other: "DPPath") -> bool:
         """whether this DPPath is less than other for sorting"""
-    
+
     @abstractmethod
     def __str__(self) -> str:
         """Represent string"""
-    
+
     def __repr__(self) -> str:
         return "%s (%s)" % (type(self), str(self))
-    
+
     def __eq__(self, other) -> bool:
         return str(self) == str(other)
-    
+
     def __hash__(self):
         return hash(str(self))
 
 
 class DPOSPath(DPPath):
     """The OS path class to data system (DeepmdData) for real directories.
-    
+
     Parameters
     ----------
     path : str
         path
     """
+
     def __init__(self, path: str) -> None:
         super().__init__()
         if isinstance(path, Path):
@@ -125,7 +140,7 @@ def __init__(self, path: str) -> None:
 
     def load_numpy(self) -> np.ndarray:
         """Load NumPy array.
-        
+
         Returns
         -------
         np.ndarray
@@ -135,7 +150,7 @@ def load_numpy(self) -> np.ndarray:
 
     def load_txt(self, **kwargs) -> np.ndarray:
         """Load NumPy array from text.
-        
+
         Returns
         -------
         np.ndarray
@@ -150,7 +165,7 @@ def glob(self, pattern: str) -> List["DPPath"]:
         ----------
         pattern : str
             glob pattern
-        
+
         Returns
         -------
         List[DPPath]
@@ -163,12 +178,12 @@ def glob(self, pattern: str) -> List["DPPath"]:
     def rglob(self, pattern: str) -> List["DPPath"]:
         """This is like calling :meth:`DPPath.glob()` with `**/` added in front
         of the given relative pattern.
-        
+
         Parameters
         ----------
         pattern : str
             glob pattern
-        
+
         Returns
         -------
         List[DPPath]
@@ -183,7 +198,7 @@ def is_file(self) -> bool:
     def is_dir(self) -> bool:
         """Check if self is directory."""
         return self.path.is_dir()
-    
+
     def __truediv__(self, key: str) -> "DPPath":
         """Used for / operator."""
         return type(self)(self.path / key)
@@ -205,12 +220,13 @@ class DPH5Path(DPPath):
     OS - HDF5 relationship:
         directory - Group
         file - Dataset
-    
+
     Parameters
     ----------
     path : str
         path
     """
+
     def __init__(self, path: str) -> None:
         super().__init__()
         # we use "#" to split path
@@ -220,12 +236,12 @@ def __init__(self, path: str) -> None:
         self.root = self._load_h5py(s[0])
         # h5 path: default is the root path
         self.name = s[1] if len(s) > 1 else "/"
-    
+
     @classmethod
     @lru_cache(None)
     def _load_h5py(cls, path: str) -> h5py.File:
         """Load hdf5 file.
-        
+
         Parameters
         ----------
         path : str
@@ -234,21 +250,21 @@ def _load_h5py(cls, path: str) -> h5py.File:
         # this method has cache to avoid duplicated
         # loading from different DPH5Path
         # However the file will be never closed?
-        return h5py.File(path, 'r')
+        return h5py.File(path, "r")
 
     def load_numpy(self) -> np.ndarray:
         """Load NumPy array.
-        
+
         Returns
         -------
         np.ndarray
             loaded NumPy array
         """
         return self.root[self.name][:]
-    
+
     def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray:
         """Load NumPy array from text.
-        
+
         Returns
         -------
         np.ndarray
@@ -258,7 +274,7 @@ def load_txt(self, dtype: Optional[np.dtype] = None, **kwargs) -> np.ndarray:
         if dtype:
             arr = arr.astype(dtype)
         return arr
-    
+
     def glob(self, pattern: str) -> List["DPPath"]:
         """Search path using the glob pattern.
 
@@ -266,7 +282,7 @@ def glob(self, pattern: str) -> List["DPPath"]:
         ----------
         pattern : str
             glob pattern
-        
+
         Returns
         -------
         List[DPPath]
@@ -274,17 +290,22 @@ def glob(self, pattern: str) -> List["DPPath"]:
         """
         # got paths starts with current path first, which is faster
         subpaths = [ii for ii in self._keys if ii.startswith(self.name)]
-        return list([type(self)("%s#%s"%(self.root_path, pp)) for pp in globfilter(subpaths, self._connect_path(pattern))])
+        return list(
+            [
+                type(self)("%s#%s" % (self.root_path, pp))
+                for pp in globfilter(subpaths, self._connect_path(pattern))
+            ]
+        )
 
     def rglob(self, pattern: str) -> List["DPPath"]:
         """This is like calling :meth:`DPPath.glob()` with `**/` added in front
         of the given relative pattern.
-        
+
         Parameters
         ----------
         pattern : str
             glob pattern
-        
+
         Returns
         -------
         List[DPPath]
@@ -316,23 +337,23 @@ def is_dir(self) -> bool:
         if self.name not in self._keys:
             return False
         return isinstance(self.root[self.name], h5py.Group)
-    
+
     def __truediv__(self, key: str) -> "DPPath":
         """Used for / operator."""
         return type(self)("%s#%s" % (self.root_path, self._connect_path(key)))
-    
+
     def _connect_path(self, path: str) -> str:
         """Connect self with path"""
         if self.name.endswith("/"):
             return "%s%s" % (self.name, path)
         return "%s/%s" % (self.name, path)
-    
+
     def __lt__(self, other: "DPH5Path") -> bool:
         """whether this DPPath is less than other for sorting"""
         if self.root_path == other.root_path:
             return self.name < other.name
         return self.root_path < other.root_path
-    
+
     def __str__(self) -> str:
         """returns path of self"""
         return "%s#%s" % (self.root_path, self.name)
diff --git a/deepmd/utils/plugin.py b/deepmd/utils/plugin.py
index af28a4632d..66b4569029 100644
--- a/deepmd/utils/plugin.py
+++ b/deepmd/utils/plugin.py
@@ -1,9 +1,12 @@
-
 """Base of plugin systems."""
 # copied from https://github.com/deepmodeling/dpdata/blob/a3e76d75de53f6076254de82d18605a010dc3b00/dpdata/plugin.py
 
-from abc import ABCMeta
-from typing import Callable
+from abc import (
+    ABCMeta,
+)
+from typing import (
+    Callable,
+)
 
 
 class Plugin:
@@ -22,6 +25,7 @@ def xxx():
             pass
     >>> print(plugin.plugins['xx'])
     """
+
     def __init__(self):
         self.plugins = {}
 
@@ -29,32 +33,34 @@ def __add__(self, other) -> "Plugin":
         self.plugins.update(other.plugins)
         return self
 
-    def register(self, key : str) -> Callable[[object], object]:
+    def register(self, key: str) -> Callable[[object], object]:
         """Register a plugin.
-        
+
         Parameters
         ----------
         key : str
             key of the plugin
-        
+
         Returns
         -------
         Callable[[object], object]
             decorator
         """
-        def decorator(object : object) -> object:
+
+        def decorator(object: object) -> object:
             self.plugins[key] = object
             return object
+
         return decorator
-    
+
     def get_plugin(self, key) -> object:
         """Visit a plugin by key.
-        
+
         Parameters
         ----------
         key : str
             key of the plugin
-        
+
         Returns
         -------
         object
@@ -62,23 +68,27 @@ def get_plugin(self, key) -> object:
         """
         return self.plugins[key]
 
+
 class VariantMeta:
     def __call__(cls, *args, **kwargs):
         """Remove `type` and keys that starts with underline."""
         obj = cls.__new__(cls, *args, **kwargs)
-        kwargs.pop('type', None)
+        kwargs.pop("type", None)
         to_pop = []
         for kk in kwargs:
-            if kk[0] == '_':
+            if kk[0] == "_":
                 to_pop.append(kk)
         for kk in to_pop:
             kwargs.pop(kk, None)
         obj.__init__(*args, **kwargs)
         return obj
 
+
 class VariantABCMeta(VariantMeta, ABCMeta):
     pass
 
+
 class PluginVariant(metaclass=VariantABCMeta):
     """A class to remove `type` from input arguments."""
-    pass
\ No newline at end of file
+
+    pass
diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py
index f0512cf321..57d0af4d55 100644
--- a/deepmd/utils/random.py
+++ b/deepmd/utils/random.py
@@ -1,8 +1,9 @@
-from typing import Optional
+from typing import (
+    Optional,
+)
 
 import numpy as np
 
-
 _RANDOM_GENERATOR = np.random.RandomState()
 
 
@@ -62,4 +63,4 @@ def shuffle(x: np.ndarray):
     _RANDOM_GENERATOR.shuffle(x)
 
 
-__all__ = ['choice', 'random', 'seed', 'shuffle']
+__all__ = ["choice", "random", "seed", "shuffle"]
diff --git a/deepmd/utils/sess.py b/deepmd/utils/sess.py
index 07723c13c4..5b2bd07abd 100644
--- a/deepmd/utils/sess.py
+++ b/deepmd/utils/sess.py
@@ -1,7 +1,11 @@
 import os
 
-from deepmd.env import tf
-from deepmd.utils.errors import OutOfMemoryError
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils.errors import (
+    OutOfMemoryError,
+)
 
 
 def run_sess(sess: tf.Session, *args, **kwargs):
@@ -33,7 +37,7 @@ def run_sess(sess: tf.Session, *args, **kwargs):
                 "4. Check if another program is using the same GPU by "
                 "execuating `nvidia-smi`. The usage of GPUs is "
                 "controlled by `CUDA_VISIBLE_DEVICES` environment "
-                "variable (current value: %s).\n" % (
-                    os.getenv("CUDA_VISIBLE_DEVICES", None),
-                ))
+                "variable (current value: %s).\n"
+                % (os.getenv("CUDA_VISIBLE_DEVICES", None),)
+            )
         raise OutOfMemoryError(MESSAGE) from e
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index 14696f4137..ee05eb5789 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -1,25 +1,44 @@
 import logging
+from functools import (
+    lru_cache,
+)
+from typing import (
+    Callable,
+    Dict,
+    List,
+    Tuple,
+)
+
 import numpy as np
+from scipy.special import (
+    comb,
+)
+
 import deepmd
-from typing import Callable
-from typing import Tuple, List, Dict
-from functools import lru_cache
-from scipy.special import comb
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.common import ACTIVATION_FN_DICT
-from deepmd.utils.graph import get_tensor_by_name_from_graph 
-from deepmd.utils.graph import get_embedding_net_nodes_from_graph_def
-from deepmd.descriptor import Descriptor
+from deepmd.common import (
+    ACTIVATION_FN_DICT,
+)
+from deepmd.descriptor import (
+    Descriptor,
+)
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.graph import (
+    get_embedding_net_nodes_from_graph_def,
+    get_tensor_by_name_from_graph,
+)
 
 log = logging.getLogger(__name__)
 
-class DPTabulate():
+
+class DPTabulate:
     """
     Class for tabulation.
 
-    Compress a model, which including tabulating the embedding-net. 
-    The table is composed of fifth-order polynomial coefficients and is assembled from two sub-tables. The first table takes the stride(parameter) as it\'s uniform stride, while the second table takes 10 * stride as it\'s uniform stride 
+    Compress a model, which including tabulating the embedding-net.
+    The table is composed of fifth-order polynomial coefficients and is assembled from two sub-tables. The first table takes the stride(parameter) as it\'s uniform stride, while the second table takes 10 * stride as it\'s uniform stride
     The range of the first table is automatically detected by deepmd-kit, while the second table ranges from the first table\'s upper boundary(upper) to the extrapolate(parameter) * upper.
 
     Parameters
@@ -42,16 +61,18 @@ class DPTabulate():
     suffix : str, optional
             The suffix of the scope
     """
-    def __init__(self,
-                 descrpt : Descriptor,
-                 neuron : List[int],
-                 graph: tf.Graph,
-                 graph_def: tf.GraphDef,
-                 type_one_side : bool = False,
-                 exclude_types : List[List[int]] = [],
-                 activation_fn : Callable[[tf.Tensor], tf.Tensor] = tf.nn.tanh,
-                 suffix : str = "",
-                 ) -> None:
+
+    def __init__(
+        self,
+        descrpt: Descriptor,
+        neuron: List[int],
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        type_one_side: bool = False,
+        exclude_types: List[List[int]] = [],
+        activation_fn: Callable[[tf.Tensor], tf.Tensor] = tf.nn.tanh,
+        suffix: str = "",
+    ) -> None:
         """
         Constructor
         """
@@ -62,7 +83,7 @@ def __init__(self,
         self.type_one_side = type_one_side
         self.exclude_types = exclude_types
         self.suffix = suffix
-        
+
         # functype
         if activation_fn == ACTIVATION_FN_DICT["tanh"]:
             self.functype = 1
@@ -80,10 +101,10 @@ def __init__(self,
             raise RuntimeError("Unknown actication function type!")
         self.activation_fn = activation_fn
 
-        #self.sess = tf.Session(graph = self.graph)
+        # self.sess = tf.Session(graph = self.graph)
 
         self.sub_graph, self.sub_graph_def = self._load_sub_graph()
-        self.sub_sess = tf.Session(graph = self.sub_graph)
+        self.sub_sess = tf.Session(graph=self.sub_graph)
 
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
             self.sel_a = self.descrpt.sel_r
@@ -100,11 +121,17 @@ def __init__(self,
         else:
             raise RuntimeError("Unsupported descriptor")
 
-        self.davg = get_tensor_by_name_from_graph(self.graph, f'descrpt_attr{self.suffix}/t_avg')
-        self.dstd = get_tensor_by_name_from_graph(self.graph, f'descrpt_attr{self.suffix}/t_std')
-        self.ntypes = get_tensor_by_name_from_graph(self.graph, 'descrpt_attr/ntypes')
+        self.davg = get_tensor_by_name_from_graph(
+            self.graph, f"descrpt_attr{self.suffix}/t_avg"
+        )
+        self.dstd = get_tensor_by_name_from_graph(
+            self.graph, f"descrpt_attr{self.suffix}/t_std"
+        )
+        self.ntypes = get_tensor_by_name_from_graph(self.graph, "descrpt_attr/ntypes")
 
-        self.embedding_net_nodes = get_embedding_net_nodes_from_graph_def(self.graph_def, suffix=self.suffix)
+        self.embedding_net_nodes = get_embedding_net_nodes_from_graph_def(
+            self.graph_def, suffix=self.suffix
+        )
 
         # move it to the descriptor class
         # for tt in self.exclude_types:
@@ -118,7 +145,7 @@ def __init__(self,
         self.bias = self._get_bias()
         self.matrix = self._get_matrix()
 
-        self.data_type  = self._get_data_type()
+        self.data_type = self._get_data_type()
         self.last_layer_size = self._get_last_layer_size()
 
         self.data = {}
@@ -126,12 +153,9 @@ def __init__(self,
         self.upper = {}
         self.lower = {}
 
-
-    def build(self, 
-              min_nbor_dist : float,
-              extrapolate : float, 
-              stride0 : float, 
-              stride1 : float) -> Tuple[Dict[str, int], Dict[str, int]]:
+    def build(
+        self, min_nbor_dist: float, extrapolate: float, stride0: float, stride1: float
+    ) -> Tuple[Dict[str, int], Dict[str, int]]:
         """
         Build the tables for model compression
 
@@ -159,89 +183,218 @@ def build(self,
         lower, upper = self._get_env_mat_range(min_nbor_dist)
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
             for ii in range(self.table_size):
-                if (self.type_one_side and not self._all_excluded(ii)) or (not self.type_one_side and (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types):
+                if (self.type_one_side and not self._all_excluded(ii)) or (
+                    not self.type_one_side
+                    and (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types
+                ):
                     if self.type_one_side:
                         net = "filter_-1_net_" + str(ii)
                         # upper and lower should consider all types which are not excluded and sel>0
-                        idx = [(type_i, ii) not in self.exclude_types and self.sel_a[type_i] > 0 for type_i in range(self.ntypes)]
+                        idx = [
+                            (type_i, ii) not in self.exclude_types
+                            and self.sel_a[type_i] > 0
+                            for type_i in range(self.ntypes)
+                        ]
                         uu = np.max(upper[idx])
                         ll = np.min(lower[idx])
                     else:
                         ielement = ii // self.ntypes
-                        net = "filter_" + str(ielement) + "_net_" + str(ii % self.ntypes)
+                        net = (
+                            "filter_" + str(ielement) + "_net_" + str(ii % self.ntypes)
+                        )
                         uu = upper[ielement]
                         ll = lower[ielement]
-                    xx = np.arange(ll, uu, stride0, dtype = self.data_type)
-                    xx = np.append(xx, np.arange(uu, extrapolate * uu, stride1, dtype = self.data_type))
-                    xx = np.append(xx, np.array([extrapolate * uu], dtype = self.data_type))
-                    nspline = ((uu - ll) / stride0 + (extrapolate * uu - uu) / stride1).astype(int)
-                    self._build_lower(net, xx, ii, uu, ll, stride0, stride1, extrapolate, nspline)
+                    xx = np.arange(ll, uu, stride0, dtype=self.data_type)
+                    xx = np.append(
+                        xx,
+                        np.arange(uu, extrapolate * uu, stride1, dtype=self.data_type),
+                    )
+                    xx = np.append(
+                        xx, np.array([extrapolate * uu], dtype=self.data_type)
+                    )
+                    nspline = (
+                        (uu - ll) / stride0 + (extrapolate * uu - uu) / stride1
+                    ).astype(int)
+                    self._build_lower(
+                        net, xx, ii, uu, ll, stride0, stride1, extrapolate, nspline
+                    )
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
             xx_all = []
             for ii in range(self.ntypes):
-                xx = np.arange(extrapolate * lower[ii], lower[ii], stride1, dtype = self.data_type)
-                xx = np.append(xx, np.arange(lower[ii], upper[ii], stride0, dtype = self.data_type))
-                xx = np.append(xx, np.arange(upper[ii], extrapolate * upper[ii], stride1, dtype = self.data_type))
-                xx = np.append(xx, np.array([extrapolate * upper[ii]], dtype = self.data_type))
+                xx = np.arange(
+                    extrapolate * lower[ii], lower[ii], stride1, dtype=self.data_type
+                )
+                xx = np.append(
+                    xx, np.arange(lower[ii], upper[ii], stride0, dtype=self.data_type)
+                )
+                xx = np.append(
+                    xx,
+                    np.arange(
+                        upper[ii],
+                        extrapolate * upper[ii],
+                        stride1,
+                        dtype=self.data_type,
+                    ),
+                )
+                xx = np.append(
+                    xx, np.array([extrapolate * upper[ii]], dtype=self.data_type)
+                )
                 xx_all.append(xx)
-            nspline = ((upper - lower) / stride0 + 2 * ((extrapolate * upper - upper) / stride1)).astype(int)
+            nspline = (
+                (upper - lower) / stride0
+                + 2 * ((extrapolate * upper - upper) / stride1)
+            ).astype(int)
             idx = 0
             for ii in range(self.ntypes):
                 for jj in range(ii, self.ntypes):
                     net = "filter_" + str(ii) + "_net_" + str(jj)
-                    self._build_lower(net, xx_all[ii], idx, upper[ii], lower[ii], stride0, stride1, extrapolate, nspline[ii])
+                    self._build_lower(
+                        net,
+                        xx_all[ii],
+                        idx,
+                        upper[ii],
+                        lower[ii],
+                        stride0,
+                        stride1,
+                        extrapolate,
+                        nspline[ii],
+                    )
                     idx += 1
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
             for ii in range(self.table_size):
-                if (self.type_one_side and not self._all_excluded(ii)) or (not self.type_one_side and (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types):
+                if (self.type_one_side and not self._all_excluded(ii)) or (
+                    not self.type_one_side
+                    and (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types
+                ):
                     if self.type_one_side:
                         net = "filter_-1_net_" + str(ii)
                         # upper and lower should consider all types which are not excluded and sel>0
-                        idx = [(type_i, ii) not in self.exclude_types and self.sel_a[type_i] > 0 for type_i in range(self.ntypes)]
+                        idx = [
+                            (type_i, ii) not in self.exclude_types
+                            and self.sel_a[type_i] > 0
+                            for type_i in range(self.ntypes)
+                        ]
                         uu = np.max(upper[idx])
                         ll = np.min(lower[idx])
                     else:
                         ielement = ii // self.ntypes
-                        net = "filter_" + str(ielement) + "_net_" + str(ii % self.ntypes)
+                        net = (
+                            "filter_" + str(ielement) + "_net_" + str(ii % self.ntypes)
+                        )
                         uu = upper[ielement]
                         ll = lower[ielement]
-                    xx = np.arange(ll, uu, stride0, dtype = self.data_type)
-                    xx = np.append(xx, np.arange(uu, extrapolate * uu, stride1, dtype = self.data_type))
-                    xx = np.append(xx, np.array([extrapolate * uu], dtype = self.data_type))
-                    nspline = ((uu - ll) / stride0 + (extrapolate * uu - uu) / stride1).astype(int)
-                    self._build_lower(net, xx, ii, uu, ll, stride0, stride1, extrapolate, nspline)
+                    xx = np.arange(ll, uu, stride0, dtype=self.data_type)
+                    xx = np.append(
+                        xx,
+                        np.arange(uu, extrapolate * uu, stride1, dtype=self.data_type),
+                    )
+                    xx = np.append(
+                        xx, np.array([extrapolate * uu], dtype=self.data_type)
+                    )
+                    nspline = (
+                        (uu - ll) / stride0 + (extrapolate * uu - uu) / stride1
+                    ).astype(int)
+                    self._build_lower(
+                        net, xx, ii, uu, ll, stride0, stride1, extrapolate, nspline
+                    )
         else:
             raise RuntimeError("Unsupported descriptor")
         self._convert_numpy_to_tensor()
 
         return self.lower, self.upper
 
-    def _build_lower(self, net, xx, idx, upper, lower, stride0, stride1, extrapolate, nspline):
+    def _build_lower(
+        self, net, xx, idx, upper, lower, stride0, stride1, extrapolate, nspline
+    ):
         vv, dd, d2 = self._make_data(xx, idx)
-        self.data[net] = np.zeros([nspline, 6 * self.last_layer_size], dtype = self.data_type)
+        self.data[net] = np.zeros(
+            [nspline, 6 * self.last_layer_size], dtype=self.data_type
+        )
 
         # tt.shape: [nspline, self.last_layer_size]
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
             tt = np.full((nspline, self.last_layer_size), stride1)
-            tt[:int((upper - lower) / stride0), :] = stride0
+            tt[: int((upper - lower) / stride0), :] = stride0
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
             tt = np.full((nspline, self.last_layer_size), stride1)
-            tt[int((lower - extrapolate * lower) / stride1) + 1:(int((lower - extrapolate * lower) / stride1) + int((upper - lower) / stride0)), :] = stride0
+            tt[
+                int((lower - extrapolate * lower) / stride1)
+                + 1 : (
+                    int((lower - extrapolate * lower) / stride1)
+                    + int((upper - lower) / stride0)
+                ),
+                :,
+            ] = stride0
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
             tt = np.full((nspline, self.last_layer_size), stride1)
-            tt[:int((upper - lower) / stride0), :] = stride0
+            tt[: int((upper - lower) / stride0), :] = stride0
         else:
             raise RuntimeError("Unsupported descriptor")
 
         # hh.shape: [nspline, self.last_layer_size]
-        hh = vv[1:nspline+1, :self.last_layer_size] - vv[:nspline, :self.last_layer_size]
-
-        self.data[net][:, :6 * self.last_layer_size:6] = vv[:nspline, :self.last_layer_size]
-        self.data[net][:, 1:6 * self.last_layer_size:6] = dd[:nspline, :self.last_layer_size]
-        self.data[net][:, 2:6 * self.last_layer_size:6] = 0.5 * d2[:nspline, :self.last_layer_size]
-        self.data[net][:, 3:6 * self.last_layer_size:6] = (1 / (2 * tt * tt * tt)) * (20 * hh - (8 * dd[1:nspline+1, :self.last_layer_size] + 12 * dd[:nspline, :self.last_layer_size]) * tt - (3 * d2[:nspline, :self.last_layer_size] - d2[1:nspline+1, :self.last_layer_size]) * tt * tt)
-        self.data[net][:, 4:6 * self.last_layer_size:6] = (1 / (2 * tt * tt * tt * tt)) * (-30 * hh + (14 * dd[1:nspline+1, :self.last_layer_size] + 16 * dd[:nspline, :self.last_layer_size]) * tt + (3 * d2[:nspline, :self.last_layer_size] - 2 * d2[1:nspline+1, :self.last_layer_size]) * tt * tt)
-        self.data[net][:, 5:6 * self.last_layer_size:6] = (1 / (2 * tt * tt * tt * tt * tt)) * (12 * hh - 6 * (dd[1:nspline+1, :self.last_layer_size] + dd[:nspline, :self.last_layer_size]) * tt + (d2[1:nspline+1, :self.last_layer_size] - d2[:nspline, :self.last_layer_size]) * tt * tt)
+        hh = (
+            vv[1 : nspline + 1, : self.last_layer_size]
+            - vv[:nspline, : self.last_layer_size]
+        )
+
+        self.data[net][:, : 6 * self.last_layer_size : 6] = vv[
+            :nspline, : self.last_layer_size
+        ]
+        self.data[net][:, 1 : 6 * self.last_layer_size : 6] = dd[
+            :nspline, : self.last_layer_size
+        ]
+        self.data[net][:, 2 : 6 * self.last_layer_size : 6] = (
+            0.5 * d2[:nspline, : self.last_layer_size]
+        )
+        self.data[net][:, 3 : 6 * self.last_layer_size : 6] = (
+            1 / (2 * tt * tt * tt)
+        ) * (
+            20 * hh
+            - (
+                8 * dd[1 : nspline + 1, : self.last_layer_size]
+                + 12 * dd[:nspline, : self.last_layer_size]
+            )
+            * tt
+            - (
+                3 * d2[:nspline, : self.last_layer_size]
+                - d2[1 : nspline + 1, : self.last_layer_size]
+            )
+            * tt
+            * tt
+        )
+        self.data[net][:, 4 : 6 * self.last_layer_size : 6] = (
+            1 / (2 * tt * tt * tt * tt)
+        ) * (
+            -30 * hh
+            + (
+                14 * dd[1 : nspline + 1, : self.last_layer_size]
+                + 16 * dd[:nspline, : self.last_layer_size]
+            )
+            * tt
+            + (
+                3 * d2[:nspline, : self.last_layer_size]
+                - 2 * d2[1 : nspline + 1, : self.last_layer_size]
+            )
+            * tt
+            * tt
+        )
+        self.data[net][:, 5 : 6 * self.last_layer_size : 6] = (
+            1 / (2 * tt * tt * tt * tt * tt)
+        ) * (
+            12 * hh
+            - 6
+            * (
+                dd[1 : nspline + 1, : self.last_layer_size]
+                + dd[:nspline, : self.last_layer_size]
+            )
+            * tt
+            + (
+                d2[1 : nspline + 1, : self.last_layer_size]
+                - d2[:nspline, : self.last_layer_size]
+            )
+            * tt
+            * tt
+        )
 
         self.upper[net] = upper
         self.lower[net] = lower
@@ -249,7 +402,7 @@ def _build_lower(self, net, xx, idx, upper, lower, stride0, stride1, extrapolate
     def _load_sub_graph(self):
         sub_graph_def = tf.GraphDef()
         with tf.Graph().as_default() as sub_graph:
-            tf.import_graph_def(sub_graph_def, name = "")
+            tf.import_graph_def(sub_graph_def, name="")
         return sub_graph, sub_graph_def
 
     def _get_bias(self):
@@ -260,34 +413,50 @@ def _get_bias(self):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
-                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
+                            node = self.embedding_net_nodes[
+                                f"filter_type_all{self.suffix}/bias_{layer}_{ii}"
+                            ]
                             bias["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             bias["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
-                        if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
-                            node = self.embedding_net_nodes[f"filter_type_{ii // self.ntypes}{self.suffix}/bias_{layer}_{ii % self.ntypes}"]
+                        if (
+                            ii // self.ntypes,
+                            ii % self.ntypes,
+                        ) not in self.exclude_types:
+                            node = self.embedding_net_nodes[
+                                f"filter_type_{ii // self.ntypes}{self.suffix}/bias_{layer}_{ii % self.ntypes}"
+                            ]
                             bias["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             bias["layer_" + str(layer)].append(np.array([]))
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
                 for ii in range(self.ntypes):
                     for jj in range(ii, self.ntypes):
-                        node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}_{jj}"]
+                        node = self.embedding_net_nodes[
+                            f"filter_type_all{self.suffix}/bias_{layer}_{ii}_{jj}"
+                        ]
                         bias["layer_" + str(layer)].append(tf.make_ndarray(node))
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
-                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
+                            node = self.embedding_net_nodes[
+                                f"filter_type_all{self.suffix}/bias_{layer}_{ii}"
+                            ]
                             bias["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             bias["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
-                        if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
-                            node = self.embedding_net_nodes[f"filter_type_{ii // self.ntypes}{self.suffix}/bias_{layer}_{ii % self.ntypes}"]
+                        if (
+                            ii // self.ntypes,
+                            ii % self.ntypes,
+                        ) not in self.exclude_types:
+                            node = self.embedding_net_nodes[
+                                f"filter_type_{ii // self.ntypes}{self.suffix}/bias_{layer}_{ii % self.ntypes}"
+                            ]
                             bias["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             bias["layer_" + str(layer)].append(np.array([]))
@@ -303,34 +472,50 @@ def _get_matrix(self):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
-                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
+                            node = self.embedding_net_nodes[
+                                f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"
+                            ]
                             matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             matrix["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
-                        if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
-                            node = self.embedding_net_nodes[f"filter_type_{ii // self.ntypes}{self.suffix}/matrix_{layer}_{ii % self.ntypes}"]
+                        if (
+                            ii // self.ntypes,
+                            ii % self.ntypes,
+                        ) not in self.exclude_types:
+                            node = self.embedding_net_nodes[
+                                f"filter_type_{ii // self.ntypes}{self.suffix}/matrix_{layer}_{ii % self.ntypes}"
+                            ]
                             matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             matrix["layer_" + str(layer)].append(np.array([]))
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
                 for ii in range(self.ntypes):
                     for jj in range(ii, self.ntypes):
-                        node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}_{jj}"]
+                        node = self.embedding_net_nodes[
+                            f"filter_type_all{self.suffix}/matrix_{layer}_{ii}_{jj}"
+                        ]
                         matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
             elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
                 if self.type_one_side:
                     for ii in range(0, self.ntypes):
                         if not self._all_excluded(ii):
-                            node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
+                            node = self.embedding_net_nodes[
+                                f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"
+                            ]
                             matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             matrix["layer_" + str(layer)].append(np.array([]))
                 else:
                     for ii in range(0, self.ntypes * self.ntypes):
-                        if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
-                            node = self.embedding_net_nodes[f"filter_type_{ii // self.ntypes}{self.suffix}/matrix_{layer}_{ii % self.ntypes}"]
+                        if (
+                            ii // self.ntypes,
+                            ii % self.ntypes,
+                        ) not in self.exclude_types:
+                            node = self.embedding_net_nodes[
+                                f"filter_type_{ii // self.ntypes}{self.suffix}/matrix_{layer}_{ii % self.ntypes}"
+                            ]
                             matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
                         else:
                             matrix["layer_" + str(layer)].append(np.array([]))
@@ -346,38 +531,95 @@ def _make_data(self, xx, idx):
                 xx = tf.reshape(xx, [xx.size, -1])
                 for layer in range(self.layer_size):
                     if layer == 0:
-                        xbar = tf.matmul(
-                        xx, self.matrix["layer_" + str(layer + 1)][idx]) + self.bias["layer_" + str(layer + 1)][idx]
+                        xbar = (
+                            tf.matmul(xx, self.matrix["layer_" + str(layer + 1)][idx])
+                            + self.bias["layer_" + str(layer + 1)][idx]
+                        )
                         if self.neuron[0] == 1:
-                            yy = self._layer_0(
-                                xx, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx]) + xx
+                            yy = (
+                                self._layer_0(
+                                    xx,
+                                    self.matrix["layer_" + str(layer + 1)][idx],
+                                    self.bias["layer_" + str(layer + 1)][idx],
+                                )
+                                + xx
+                            )
                             dy = op_module.unaggregated_dy_dx_s(
-                                yy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype)) + tf.ones([1, 1], yy.dtype)
+                                yy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                xbar,
+                                tf.constant(self.functype),
+                            ) + tf.ones([1, 1], yy.dtype)
                             dy2 = op_module.unaggregated_dy2_dx_s(
-                                yy, dy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype))
+                                yy,
+                                dy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                xbar,
+                                tf.constant(self.functype),
+                            )
                         elif self.neuron[0] == 2:
                             tt, yy = self._layer_1(
-                                xx, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
+                                xx,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                self.bias["layer_" + str(layer + 1)][idx],
+                            )
                             dy = op_module.unaggregated_dy_dx_s(
-                                yy - tt, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype)) + tf.ones([1, 2], yy.dtype)
+                                yy - tt,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                xbar,
+                                tf.constant(self.functype),
+                            ) + tf.ones([1, 2], yy.dtype)
                             dy2 = op_module.unaggregated_dy2_dx_s(
-                                yy - tt, dy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype))
+                                yy - tt,
+                                dy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                xbar,
+                                tf.constant(self.functype),
+                            )
                         else:
                             yy = self._layer_0(
-                                xx, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
+                                xx,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                self.bias["layer_" + str(layer + 1)][idx],
+                            )
                             dy = op_module.unaggregated_dy_dx_s(
-                                yy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype))
+                                yy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                xbar,
+                                tf.constant(self.functype),
+                            )
                             dy2 = op_module.unaggregated_dy2_dx_s(
-                                yy, dy, self.matrix["layer_" + str(layer + 1)][idx], xbar, tf.constant(self.functype))
+                                yy,
+                                dy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                xbar,
+                                tf.constant(self.functype),
+                            )
                     else:
-                        ybar = tf.matmul(
-                            yy, self.matrix["layer_" + str(layer + 1)][idx]) + self.bias["layer_" + str(layer + 1)][idx]
+                        ybar = (
+                            tf.matmul(yy, self.matrix["layer_" + str(layer + 1)][idx])
+                            + self.bias["layer_" + str(layer + 1)][idx]
+                        )
                         tt, zz = self._layer_1(
-                            yy, self.matrix["layer_" + str(layer + 1)][idx], self.bias["layer_" + str(layer + 1)][idx])
+                            yy,
+                            self.matrix["layer_" + str(layer + 1)][idx],
+                            self.bias["layer_" + str(layer + 1)][idx],
+                        )
                         dz = op_module.unaggregated_dy_dx(
-                            zz - tt, self.matrix["layer_" + str(layer + 1)][idx], dy, ybar, tf.constant(self.functype))
+                            zz - tt,
+                            self.matrix["layer_" + str(layer + 1)][idx],
+                            dy,
+                            ybar,
+                            tf.constant(self.functype),
+                        )
                         dy2 = op_module.unaggregated_dy2_dx(
-                            zz - tt, self.matrix["layer_" + str(layer + 1)][idx], dy, dy2, ybar, tf.constant(self.functype))
+                            zz - tt,
+                            self.matrix["layer_" + str(layer + 1)][idx],
+                            dy,
+                            dy2,
+                            ybar,
+                            tf.constant(self.functype),
+                        )
                         dy = dz
                         yy = zz
 
@@ -394,9 +636,8 @@ def _layer_1(self, x, w, b):
         return t, self.activation_fn(tf.matmul(x, w) + b) + t
 
     # Change the embedding net range to sw / min_nbor_dist
-    def _get_env_mat_range(self,
-                           min_nbor_dist):
-        sw    = self._spline5_switch(min_nbor_dist, self.rcut_smth, self.rcut)
+    def _get_env_mat_range(self, min_nbor_dist):
+        sw = self._spline5_switch(min_nbor_dist, self.rcut_smth, self.rcut)
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
             lower = -self.davg[:, 0] / self.dstd[:, 0]
             upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0]
@@ -409,20 +650,17 @@ def _get_env_mat_range(self,
             upper = ((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0]
         else:
             raise RuntimeError("Unsupported descriptor")
-        log.info('training data with lower boundary: ' + str(lower))
-        log.info('training data with upper boundary: ' + str(upper))
+        log.info("training data with lower boundary: " + str(lower))
+        log.info("training data with upper boundary: " + str(upper))
         # returns element-wise lower and upper
         return np.floor(lower), np.ceil(upper)
 
-    def _spline5_switch(self,
-                        xx,
-                        rmin,
-                        rmax):
+    def _spline5_switch(self, xx, rmin, rmax):
         if xx < rmin:
             vv = 1
         elif xx < rmax:
             uu = (xx - rmin) / (rmax - rmin)
-            vv = uu*uu*uu * (-6 * uu*uu + 15 * uu - 10) + 1
+            vv = uu * uu * uu * (-6 * uu * uu + 15 * uu - 10) + 1
         else:
             vv = 0
         return vv
@@ -430,19 +668,29 @@ def _spline5_switch(self,
     def _get_layer_size(self):
         layer_size = 0
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
-            layer_size = len(self.embedding_net_nodes) // ((self.ntypes * self.ntypes - len(self.exclude_types)) * 2)
-            if self.type_one_side :
-                layer_size = len(self.embedding_net_nodes) // ((self.ntypes - self._n_all_excluded) * 2)
+            layer_size = len(self.embedding_net_nodes) // (
+                (self.ntypes * self.ntypes - len(self.exclude_types)) * 2
+            )
+            if self.type_one_side:
+                layer_size = len(self.embedding_net_nodes) // (
+                    (self.ntypes - self._n_all_excluded) * 2
+                )
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
-            layer_size = len(self.embedding_net_nodes) // int(comb(self.ntypes + 1, 2) * 2)
+            layer_size = len(self.embedding_net_nodes) // int(
+                comb(self.ntypes + 1, 2) * 2
+            )
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
-            layer_size = len(self.embedding_net_nodes) // ((self.ntypes * self.ntypes - len(self.exclude_types)) * 2)
-            if self.type_one_side :
-                layer_size = len(self.embedding_net_nodes) // ((self.ntypes - self._n_all_excluded) * 2)
+            layer_size = len(self.embedding_net_nodes) // (
+                (self.ntypes * self.ntypes - len(self.exclude_types)) * 2
+            )
+            if self.type_one_side:
+                layer_size = len(self.embedding_net_nodes) // (
+                    (self.ntypes - self._n_all_excluded) * 2
+                )
         else:
             raise RuntimeError("Unsupported descriptor")
         return layer_size
-    
+
     @property
     @lru_cache()
     def _n_all_excluded(self) -> int:
@@ -452,7 +700,7 @@ def _n_all_excluded(self) -> int:
     @lru_cache()
     def _all_excluded(self, ii: int) -> bool:
         """Check if type ii excluds all types.
-        
+
         Parameters
         ----------
         ii : int
@@ -463,24 +711,26 @@ def _all_excluded(self, ii: int) -> bool:
         bool
             if type ii excluds all types
         """
-        return all([(ii, type_i) in self.exclude_types for type_i in range(self.ntypes)])
+        return all(
+            [(ii, type_i) in self.exclude_types for type_i in range(self.ntypes)]
+        )
 
     def _get_table_size(self):
         table_size = 0
         if isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
             table_size = self.ntypes * self.ntypes
-            if self.type_one_side :
+            if self.type_one_side:
                 table_size = self.ntypes
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
             table_size = int(comb(self.ntypes + 1, 2))
         elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
             table_size = self.ntypes * self.ntypes
-            if self.type_one_side :
+            if self.type_one_side:
                 table_size = self.ntypes
         else:
             raise RuntimeError("Unsupported descriptor")
         return table_size
-    
+
     def _get_data_type(self):
         for item in self.matrix["layer_" + str(self.layer_size)]:
             if len(item) != 0:
diff --git a/deepmd/utils/type_embed.py b/deepmd/utils/type_embed.py
index d1023176cf..c4109d374c 100644
--- a/deepmd/utils/type_embed.py
+++ b/deepmd/utils/type_embed.py
@@ -1,28 +1,42 @@
-import numpy as np
-from typing import Optional, Tuple, List, Union
+from typing import (
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
 
-from deepmd.env import tf
-from deepmd.utils.network import one_layer
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import op_module
-from deepmd.env import default_tf_session_config
-from deepmd.utils.network import  embedding_net
+import numpy as np
 
-from deepmd.utils.graph import get_type_embedding_net_variables_from_graph_def
-from deepmd.common import get_activation_func, get_precision
+from deepmd.common import (
+    get_activation_func,
+    get_precision,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    default_tf_session_config,
+    op_module,
+    tf,
+)
+from deepmd.utils.graph import (
+    get_type_embedding_net_variables_from_graph_def,
+)
+from deepmd.utils.network import (
+    embedding_net,
+    one_layer,
+)
 
 
 def embed_atom_type(
-        ntypes : int,
-        natoms : tf.Tensor, 
-        type_embedding : tf.Tensor,
+    ntypes: int,
+    natoms: tf.Tensor,
+    type_embedding: tf.Tensor,
 ):
     """
     Make the embedded type for the atoms in system.
-    The atoms are assumed to be sorted according to the type, 
+    The atoms are assumed to be sorted according to the type,
     thus their types are described by a `tf.Tensor` natoms, see explanation below.
-    
+
     Parameters
     ----------
     ntypes:
@@ -33,26 +47,28 @@ def embed_atom_type(
         natoms[1]: total number of atoms held by this processor
         natoms[i]: 2 <= i < Ntypes+2, number of type i atoms
     type_embedding:
-        The type embedding. 
+        The type embedding.
         It has the shape of [ntypes, embedding_dim]
 
     Returns
     -------
     atom_embedding
-        The embedded type of each atom. 
+        The embedded type of each atom.
         It has the shape of [numb_atoms, embedding_dim]
     """
-    te_out_dim = type_embedding.get_shape().as_list()[-1]        
+    te_out_dim = type_embedding.get_shape().as_list()[-1]
     atype = []
     for ii in range(ntypes):
-        atype.append(tf.tile([ii], [natoms[2+ii]]))
-    atype = tf.concat(atype, axis = 0)
-    atm_embed = tf.nn.embedding_lookup(type_embedding,tf.cast(atype,dtype=tf.int32)) #(nf*natom)*nchnl     
-    atm_embed = tf.reshape(atm_embed,[-1,te_out_dim])
+        atype.append(tf.tile([ii], [natoms[2 + ii]]))
+    atype = tf.concat(atype, axis=0)
+    atm_embed = tf.nn.embedding_lookup(
+        type_embedding, tf.cast(atype, dtype=tf.int32)
+    )  # (nf*natom)*nchnl
+    atm_embed = tf.reshape(atm_embed, [-1, te_out_dim])
     return atm_embed
-    
 
-class TypeEmbedNet():
+
+class TypeEmbedNet:
     """
 
     Parameters
@@ -65,7 +81,7 @@ class TypeEmbedNet():
     activation_function
             The activation function in the embedding net. Supported options are |ACTIVATION_FN|
     precision
-            The precision of the embedding net parameters. Supported options are |PRECISION| 
+            The precision of the embedding net parameters. Supported options are |PRECISION|
     trainable
             If the weights of embedding net are trainable.
     seed
@@ -75,17 +91,18 @@ class TypeEmbedNet():
     padding
             Concat the zero padding to the output, as the default embedding of empty type.
     """
+
     def __init__(
-            self,
-            neuron: List[int]=[],
-            resnet_dt: bool = False,
-            activation_function: Union[str, None] = 'tanh',
-            precision: str = 'default',
-            trainable: bool = True,
-            seed: Optional[int] = None,
-            uniform_seed: bool = False,
-            padding: bool = False,
-    )->None:
+        self,
+        neuron: List[int] = [],
+        resnet_dt: bool = False,
+        activation_function: Union[str, None] = "tanh",
+        precision: str = "default",
+        trainable: bool = True,
+        seed: Optional[int] = None,
+        uniform_seed: bool = False,
+        padding: bool = False,
+    ) -> None:
         """
         Constructor
         """
@@ -99,12 +116,11 @@ def __init__(
         self.type_embedding_net_variables = None
         self.padding = padding
 
-
     def build(
-            self, 
-            ntypes: int,
-            reuse = None, 
-            suffix = '',
+        self,
+        ntypes: int,
+        reuse=None,
+        suffix="",
     ):
         """
         Build the computational graph for the descriptor
@@ -121,37 +137,39 @@ def build(
         Returns
         -------
         embedded_types
-                The computational graph for embedded types        
+                The computational graph for embedded types
         """
-        types = tf.convert_to_tensor(
-            [ii for ii in range(ntypes)],
-            dtype = tf.int32
+        types = tf.convert_to_tensor([ii for ii in range(ntypes)], dtype=tf.int32)
+        ebd_type = tf.cast(
+            tf.one_hot(tf.cast(types, dtype=tf.int32), int(ntypes)),
+            self.filter_precision,
         )
-        ebd_type = tf.cast(tf.one_hot(tf.cast(types,dtype=tf.int32),int(ntypes)), self.filter_precision)
         ebd_type = tf.reshape(ebd_type, [-1, ntypes])
-        name = 'type_embed_net' + suffix
+        name = "type_embed_net" + suffix
         with tf.variable_scope(name, reuse=reuse):
             ebd_type = embedding_net(
                 ebd_type,
                 self.neuron,
-                activation_fn = self.filter_activation_fn,
-                precision = self.filter_precision,
-                resnet_dt = self.filter_resnet_dt,
-                seed = self.seed,
-                trainable = self.trainable,
-                initial_variables = self.type_embedding_net_variables,
-                uniform_seed = self.uniform_seed)
+                activation_fn=self.filter_activation_fn,
+                precision=self.filter_precision,
+                resnet_dt=self.filter_resnet_dt,
+                seed=self.seed,
+                trainable=self.trainable,
+                initial_variables=self.type_embedding_net_variables,
+                uniform_seed=self.uniform_seed,
+            )
         ebd_type = tf.reshape(ebd_type, [-1, self.neuron[-1]])  # ntypes * neuron[-1]
         if self.padding:
             last_type = tf.cast(tf.zeros([1, self.neuron[-1]]), self.filter_precision)
             ebd_type = tf.concat([ebd_type, last_type], 0)  # (ntypes + 1) * neuron[-1]
-        self.ebd_type = tf.identity(ebd_type, name ='t_typeebd')
-        return self.ebd_type 
+        self.ebd_type = tf.identity(ebd_type, name="t_typeebd")
+        return self.ebd_type
 
-    def init_variables(self,
-                       graph: tf.Graph,
-                       graph_def: tf.GraphDef,
-                       suffix = '',
+    def init_variables(
+        self,
+        graph: tf.Graph,
+        graph_def: tf.GraphDef,
+        suffix="",
     ) -> None:
         """
         Init the type embedding net variables with the given dict
@@ -165,4 +183,6 @@ def init_variables(self,
         suffix
             Name suffix to identify this descriptor
         """
-        self.type_embedding_net_variables = get_type_embedding_net_variables_from_graph_def(graph_def, suffix = suffix)
+        self.type_embedding_net_variables = (
+            get_type_embedding_net_variables_from_graph_def(graph_def, suffix=suffix)
+        )
diff --git a/deepmd/utils/weight_avg.py b/deepmd/utils/weight_avg.py
index aec5026ae4..72e36b5d68 100644
--- a/deepmd/utils/weight_avg.py
+++ b/deepmd/utils/weight_avg.py
@@ -1,10 +1,15 @@
-from typing import TYPE_CHECKING, List, Dict, Optional, Tuple
+from typing import (
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+)
+
 import numpy as np
 
 
-def weighted_average(
-    errors: List[Dict[str, Tuple[float, float]]]
-) -> Dict:
+def weighted_average(errors: List[Dict[str, Tuple[float, float]]]) -> Dict:
     """Compute wighted average of prediction errors for model.
 
     Parameters
@@ -26,7 +31,7 @@ def weighted_average(
             if kk in sum_err:
                 sum_err[kk] += ee * ee * ss
                 sum_siz[kk] += ss
-            else :
+            else:
                 sum_err[kk] = ee * ee * ss
                 sum_siz[kk] = ss
     for kk in sum_err.keys():
diff --git a/doc/conf.py b/doc/conf.py
index c0d473cf07..e165f9b887 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -13,15 +13,27 @@
 import os
 import subprocess
 import sys
+from datetime import (
+    date,
+)
+
 import recommonmark
-from recommonmark.transform import AutoStructify
-from datetime import date
-from deepmd.common import ACTIVATION_FN_DICT, PRECISION_DICT
-from deepmd.utils.argcheck import list_to_doc
+from recommonmark.transform import (
+    AutoStructify,
+)
+
+from deepmd.common import (
+    ACTIVATION_FN_DICT,
+    PRECISION_DICT,
+)
+from deepmd.utils.argcheck import (
+    list_to_doc,
+)
 
 sys.path.append(os.path.dirname(__file__))
 import sphinx_contrib_exhale_multiproject as _
 
+
 def mkindex(dirname):
     dirname = dirname + "/"
     oldfindex = open(dirname + "index.md", "r")
@@ -30,28 +42,30 @@ def mkindex(dirname):
 
     oldnames = []
     for entry in oldlist:
-        _name = entry[entry.find("(")+1 : entry.find(")")]
+        _name = entry[entry.find("(") + 1 : entry.find(")")]
         oldnames.append(_name)
-    
+
     newfindex = open(dirname + "index.md", "a")
     for root, dirs, files in os.walk(dirname, topdown=False):
-        newnames = [name for name in files if "index.md" not in name and name not in oldnames]
+        newnames = [
+            name for name in files if "index.md" not in name and name not in oldnames
+        ]
         for name in newnames:
             f = open(dirname + name, "r")
             _lines = f.readlines()
             for _headline in _lines:
                 _headline = _headline.strip("#")
                 headline = _headline.strip()
-                if (len(headline) == 0 or headline[0] == "." or headline[0] == "="):
+                if len(headline) == 0 or headline[0] == "." or headline[0] == "=":
                     continue
                 else:
                     break
-            longname = "- ["+headline+"]"+"("+name+")\n"
+            longname = "- [" + headline + "]" + "(" + name + ")\n"
             newfindex.write(longname)
 
-    
     newfindex.close()
 
+
 def classify_index_TS():
     dirname = "troubleshooting/"
     oldfindex = open(dirname + "index.md", "r")
@@ -61,25 +75,25 @@ def classify_index_TS():
     oldnames = []
     sub_titles = []
     heads = []
-    while(len(oldlist) > 0):
+    while len(oldlist) > 0:
         entry = oldlist.pop(0)
-        if (entry.find("(") >= 0):
-            _name = entry[entry.find("(")+1 : entry.find(")")]
+        if entry.find("(") >= 0:
+            _name = entry[entry.find("(") + 1 : entry.find(")")]
             oldnames.append(_name)
             continue
-        if (entry.find("##") >= 0):
-            _name = entry[entry.find("##")+3:-1]
+        if entry.find("##") >= 0:
+            _name = entry[entry.find("##") + 3 : -1]
             sub_titles.append(_name)
             continue
         entry.strip()
-        if (entry != '\n'):
+        if entry != "\n":
             heads.append(entry)
-    
+
     newfindex = open(dirname + "index.md", "w")
     for entry in heads:
         newfindex.write(entry)
-    newfindex.write('\n')
-    sub_lists = [[],[]]
+    newfindex.write("\n")
+    sub_lists = [[], []]
     for root, dirs, files in os.walk(dirname, topdown=False):
         newnames = [name for name in files if "index.md" not in name]
         for name in newnames:
@@ -89,16 +103,16 @@ def classify_index_TS():
             for _headline in _lines:
                 _headline = _headline.strip("#")
                 headline = _headline.strip()
-                if (len(headline) == 0 or headline[0] == "." or headline[0] == "="):
+                if len(headline) == 0 or headline[0] == "." or headline[0] == "=":
                     continue
                 else:
                     break
-            longname = "- ["+headline+"]"+"("+name+")\n"
-            if ("howtoset_" in name):
+            longname = "- [" + headline + "]" + "(" + name + ")\n"
+            if "howtoset_" in name:
                 sub_lists[1].append(longname)
             else:
                 sub_lists[0].append(longname)
-    
+
     newfindex.write("## Trouble shooting\n")
     for entry in sub_lists[0]:
         newfindex.write(entry)
@@ -111,22 +125,41 @@ def classify_index_TS():
 
 # -- Project information -----------------------------------------------------
 
-project = 'DeePMD-kit'
-copyright = '2017-%d, DeepModeling' % date.today().year
-author = 'DeepModeling'
+project = "DeePMD-kit"
+copyright = "2017-%d, DeepModeling" % date.today().year
+author = "DeepModeling"
+
 
 def run_apidoc(_):
-    from sphinx.ext.apidoc import main
     import sys
-    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+
+    from sphinx.ext.apidoc import (
+        main,
+    )
+
+    sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
     cur_dir = os.path.abspath(os.path.dirname(__file__))
-    module = os.path.join(cur_dir,"..","deepmd")
-    main(['-M', '--tocfile', 'api_py', '-H', 'Python API', '-o', os.path.join(cur_dir, "api_py"), module, '--force'])
+    module = os.path.join(cur_dir, "..", "deepmd")
+    main(
+        [
+            "-M",
+            "--tocfile",
+            "api_py",
+            "-H",
+            "Python API",
+            "-o",
+            os.path.join(cur_dir, "api_py"),
+            module,
+            "--force",
+        ]
+    )
+
 
 def setup(app):
 
     # Add hook for building doxygen xml when needed
-    app.connect('builder-inited', run_apidoc)
+    app.connect("builder-inited", run_apidoc)
+
 
 # -- General configuration ---------------------------------------------------
 
@@ -141,24 +174,24 @@ def setup(app):
 #     'sphinx.ext.autosummary'
 # ]
 
-#mkindex("troubleshooting")
-#mkindex("development")
-#classify_index_TS()
+# mkindex("troubleshooting")
+# mkindex("development")
+# classify_index_TS()
 
 extensions = [
     "deepmodeling_sphinx",
     "dargs.sphinx",
     "sphinx_rtd_theme",
-    'myst_parser',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.napoleon',
-    'sphinxarg.ext',
-    'numpydoc',
-    'breathe',
-    'exhale'
+    "myst_parser",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
+    "sphinxarg.ext",
+    "numpydoc",
+    "breathe",
+    "exhale",
 ]
 
 # breathe_domain_by_extension = {
@@ -172,7 +205,7 @@ def setup(app):
 breathe_default_project = "cc"
 
 exhale_args = {
-    "doxygenStripFromPath":  "..",
+    "doxygenStripFromPath": "..",
     # Suggested optional arguments
     # "createTreeView":        True,
     # TIP: if using the sphinx-bootstrap-theme, you need
@@ -206,21 +239,21 @@ def setup(app):
 }
 
 # Tell sphinx what the primary language being documented is.
-#primary_domain = 'cpp'
+# primary_domain = 'cpp'
 
 # Tell sphinx what the pygments highlight language should be.
-#highlight_language = 'cpp'
+# highlight_language = 'cpp'
 
-# 
+#
 myst_heading_anchors = 4
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 intersphinx_mapping = {
     "numpy": ("https://docs.scipy.org/doc/numpy/", None),
@@ -228,7 +261,7 @@ def setup(app):
     "tensorflow": (
         "https://www.tensorflow.org/api_docs/python",
         "https://github.com/mr-ubik/tensorflow-intersphinx/raw/master/tf2_py_objects.inv",
-    ), 
+    ),
     "ase": ("https://wiki.fysik.dtu.dk/ase/", None),
 }
 numpydoc_xref_param_type = True
@@ -236,42 +269,48 @@ def setup(app):
 
 numpydoc_xref_aliases = {}
 import typing
+
 for typing_type in typing.__all__:
     numpydoc_xref_aliases[typing_type] = "typing.%s" % typing_type
 
 rst_epilog = """
 .. |ACTIVATION_FN| replace:: %s
 .. |PRECISION| replace:: %s
-""" % (list_to_doc(ACTIVATION_FN_DICT.keys()), list_to_doc(PRECISION_DICT.keys()))
+""" % (
+    list_to_doc(ACTIVATION_FN_DICT.keys()),
+    list_to_doc(PRECISION_DICT.keys()),
+)
 
 # -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-html_css_files = ['css/custom.css']
+html_static_path = ["_static"]
+html_css_files = ["css/custom.css"]
 
-autodoc_default_flags = ['members']
+autodoc_default_flags = ["members"]
 autosummary_generate = True
-master_doc = 'index'
-mathjax_path = 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.0/es5/tex-mml-chtml.min.js'
+master_doc = "index"
+mathjax_path = (
+    "https://cdnjs.cloudflare.com/ajax/libs/mathjax/3.2.0/es5/tex-mml-chtml.min.js"
+)
 myst_enable_extensions = [
-    'dollarmath',
-    'colon_fence',
+    "dollarmath",
+    "colon_fence",
 ]
 # fix emoji issue in pdf
 latex_engine = "xelatex"
 latex_elements = {
-    'fontpkg': r'''
+    "fontpkg": r"""
 \usepackage{fontspec}
 \setmainfont{Symbola}
-''',
+""",
 }
 
 # For TF automatic generated OP docs
diff --git a/doc/data/data-conv.md b/doc/data/data-conv.md
index 574c15143e..e8464b1ea9 100644
--- a/doc/data/data-conv.md
+++ b/doc/data/data-conv.md
@@ -55,9 +55,9 @@ $ cat force.raw
 ```
 This `force.raw` contains 3 frames with each frame having the forces of 2 atoms, thus it has 3 lines and 6 columns. Each line provides all the 3 force components of 2 atoms in 1 frame. The first three numbers are the 3 force components of the first atom, while the second three numbers are the 3 force components of the second atom. Other files are organized similarly. The number of lines of all raw files should be identical.
 
-One can use the script `$deepmd_source_dir/data/raw/raw_to_set.sh` to convert the prepared raw files to the NumPy format. For example, if we have a raw file that contains 6000 frames, 
+One can use the script `$deepmd_source_dir/data/raw/raw_to_set.sh` to convert the prepared raw files to the NumPy format. For example, if we have a raw file that contains 6000 frames,
 ```bash
-$ ls 
+$ ls
 box.raw  coord.raw  energy.raw  force.raw  type.raw  virial.raw
 $ $deepmd_source_dir/data/raw/raw_to_set.sh 2000
 nframe is 6000
@@ -66,7 +66,7 @@ will make 3 sets
 making set 0 ...
 making set 1 ...
 making set 2 ...
-$ ls 
+$ ls
 box.raw  coord.raw  energy.raw  force.raw  set.000  set.001  set.002  type.raw  virial.raw
 ```
 It generates three sets `set.000`, `set.001` and `set.002`, with each set containing 2000 frames in the Numpy format.
diff --git a/doc/data/dpdata.md b/doc/data/dpdata.md
index 31679f0060..9b1a27ce82 100644
--- a/doc/data/dpdata.md
+++ b/doc/data/dpdata.md
@@ -2,7 +2,7 @@
 
 One can use a convenient tool [`dpdata`](https://github.com/deepmodeling/dpdata) to convert data directly from the output of first principle packages to the DeePMD-kit format.
 
-To install one can execute 
+To install one can execute
 ```bash
 pip install dpdata
 ```
@@ -15,11 +15,12 @@ $deepmd_source_dir/examples/data_conv
 Switch to that directory, then one can convert data by using the following python script
 ```python
 import dpdata
-dsys = dpdata.LabeledSystem('OUTCAR')
-dsys.to('deepmd/npy', 'deepmd_data', set_size = dsys.get_nframes())
+
+dsys = dpdata.LabeledSystem("OUTCAR")
+dsys.to("deepmd/npy", "deepmd_data", set_size=dsys.get_nframes())
 ```
 
-`get_nframes()` method gets the number of frames in the `OUTCAR`, and the argument `set_size` enforces that the set size is equal to the number of frames in the system, viz. only one `set` is created in the `system`. 
+`get_nframes()` method gets the number of frames in the `OUTCAR`, and the argument `set_size` enforces that the set size is equal to the number of frames in the system, viz. only one `set` is created in the `system`.
 
 The data in DeePMD-kit format is stored in the folder `deepmd_data`.
 
diff --git a/doc/data/system.md b/doc/data/system.md
index 6a6fb4b58f..a81016f4cb 100644
--- a/doc/data/system.md
+++ b/doc/data/system.md
@@ -1,6 +1,6 @@
 # System
 
-DeePMD-kit takes a **system** as the data structure. A snapshot of a system is called a **frame**. A system may contain multiple frames with the same atom types and numbers, i.e. the same formula (like `H2O`). To contains data with different formulas, one usually needs to divide data into multiple systems, which may sometimes result in sparse-frame systems. See a [new system format](../model/train-se-atten.md#data-format) to further combine different systems with the same atom numbers, when training with descriptor `se_atten`.  
+DeePMD-kit takes a **system** as the data structure. A snapshot of a system is called a **frame**. A system may contain multiple frames with the same atom types and numbers, i.e. the same formula (like `H2O`). To contains data with different formulas, one usually needs to divide data into multiple systems, which may sometimes result in sparse-frame systems. See a [new system format](../model/train-se-atten.md#data-format) to further combine different systems with the same atom numbers, when training with descriptor `se_atten`.
 
 A system should contain system properties, input frame properties, and labeled frame properties. The system property contains the following property:
 
@@ -14,7 +14,7 @@ The input frame properties contain the following property, the first axis of whi
 
 ID       | Property                | Raw file       | Unit | Required/Optional    | Shape                    | Description
 -------- | ----------------------  | -------------- | ---- | -------------------- | -----------------------  | -----------
-coord    | Atomic coordinates      | coord.raw      | Å    | Required             | Nframes \* Natoms \* 3   | 
+coord    | Atomic coordinates      | coord.raw      | Å    | Required             | Nframes \* Natoms \* 3   |
 box      | Boxes                   | box.raw        | Å    | Required if periodic | Nframes \* 3 \* 3        | in the order `XX XY XZ YX YY YZ ZX ZY ZZ`
 fparam   | Extra frame parameters  | fparam.raw     | Any  | Optional             | Nframes \* Any           |
 aparam   | Extra atomic parameters | aparam.raw     | Any  | Optional             | Nframes \* aparam \* Any |
@@ -24,8 +24,8 @@ The labeled frame properties are listed as follows, all of which will be used fo
 
 ID                     | Property                 | Raw file                 | Unit   | Shape                    | Description
 ---------------------- | -----------------------  | ------------------------ | ----   | -----------------------  | -----------
-energy                 | Frame energies           | energy.raw               | eV     | Nframes                  | 
-force                  | Atomic forces            | force.raw                | eV/Å   | Nframes \* Natoms \* 3   | 
+energy                 | Frame energies           | energy.raw               | eV     | Nframes                  |
+force                  | Atomic forces            | force.raw                | eV/Å   | Nframes \* Natoms \* 3   |
 virial                 | Frame virial             | virial.raw               | eV     | Nframes \* 9             | in the order `XX XY XZ YX YY YZ ZX ZY ZZ`
 atom_ener              | Atomic energies          | atom_ener.raw            | eV     | Nframes \* Natoms        |
 atom_pref              | Weights of atomic forces | atom_pref.raw            | 1      | Nframes \* Natoms        |
@@ -36,11 +36,11 @@ atomic_polarizability  | Atomic polarizability    | atomic_polarizability.raw| A
 
 In general, we always use the following convention of units:
 
-Property | Unit 
+Property | Unit
 ---------| ----
-Time     | ps   
-Length   | Å    
-Energy   | eV   
-Force    | eV/Å 
-Virial   | eV   
-Pressure | Bar  
+Time     | ps
+Length   | Å
+Energy   | eV
+Force    | eV/Å
+Virial   | eV
+Pressure | Bar
diff --git a/doc/development/coding-conventions.rst b/doc/development/coding-conventions.rst
index 2a3cfc3866..90531a3d5d 100644
--- a/doc/development/coding-conventions.rst
+++ b/doc/development/coding-conventions.rst
@@ -42,7 +42,7 @@ Conventions`_ and `Typing Conventions`_ PEPs, clarified and extended as follows:
   lines).
 
 * Maximum line length is 88 characters as recommended by
-  `black <https://github.com/psf/black>`_ which is less strict than 
+  `black <https://github.com/psf/black>`_ which is less strict than
   `Docstring Conventions`_ suggests.
 
 * Use "StudlyCaps" for class names.
@@ -63,8 +63,8 @@ Conventions`_ and `Typing Conventions`_ PEPs, clarified and extended as follows:
 
 * Use ``"double quotes"`` for string literals, and ``"""triple double
   quotes"""`` for docstring's. Single quotes are OK for
-  something like 
-  
+  something like
+
   .. code-block:: python
 
      f"something {'this' if x else 'that'}"
@@ -72,7 +72,7 @@ Conventions`_ and `Typing Conventions`_ PEPs, clarified and extended as follows:
 * Use f-strings ``s = f"{x:.2f}"`` instead of old style formating with ``"%f" % x``.
   string format method ``"{x:.2f}".format()`` may be used sparsely where it is more
   convenient than f-strings.
-  
+
 Whitespace
 ==========
 
@@ -87,7 +87,7 @@ Python is not C/C++ so whitespace  should be used sparingly to maintain code rea
 
 * You should have blank spaces after commas, colons, and semi-colons if it isn’t
   trailing next to the end of a bracket, brace, or parentheses.
-  
+
 * With any operators you should use space on both sides of the operator.
 
 * Colons for slicing are considered a binary operator, and should not have any spaces
@@ -118,7 +118,7 @@ General advice
 
 * Get rid of as many ``break`` and ``continue`` statements as possible.
 
-* Write short functions. 
+* Write short functions.
   All functions should fit within a standard screen.
 
 * Use descriptive variable names.
diff --git a/doc/development/type-embedding.md b/doc/development/type-embedding.md
index 40de2c5867..a027ebdf26 100644
--- a/doc/development/type-embedding.md
+++ b/doc/development/type-embedding.md
@@ -1,6 +1,6 @@
 # Atom Type Embedding
 ## Overview
-Here is an overview of the DeePMD-kit algorithm. Given a specific centric atom, we can obtain the matrix describing its local environment, named $\mathcal R$. It is consist of the distance between the centric atom and its neighbors, as well as a direction vector. We can embed each distance into a vector of $M_1$ dimension by an `embedding net`, so the environment matrix $\mathcal R$ can be embedded into matrix $\mathcal G$. We can thus extract a descriptor vector (of $M_1 \times M_2$ dim) of the centric atom from the $\mathcal G$ by some matrix multiplication, and put the descriptor into `fitting net` to get predicted energy $E$. The vanilla version of DeePMD-kit builds `embedding net` and `fitting net` relying on the atom type, resulting in $O(N)$ memory usage. After applying atom type embedding, in DeePMD-kit v2.0, we can share one `embedding net` and one `fitting net` in total, which decline training complexity largely. 
+Here is an overview of the DeePMD-kit algorithm. Given a specific centric atom, we can obtain the matrix describing its local environment, named $\mathcal R$. It is consist of the distance between the centric atom and its neighbors, as well as a direction vector. We can embed each distance into a vector of $M_1$ dimension by an `embedding net`, so the environment matrix $\mathcal R$ can be embedded into matrix $\mathcal G$. We can thus extract a descriptor vector (of $M_1 \times M_2$ dim) of the centric atom from the $\mathcal G$ by some matrix multiplication, and put the descriptor into `fitting net` to get predicted energy $E$. The vanilla version of DeePMD-kit builds `embedding net` and `fitting net` relying on the atom type, resulting in $O(N)$ memory usage. After applying atom type embedding, in DeePMD-kit v2.0, we can share one `embedding net` and one `fitting net` in total, which decline training complexity largely.
 
 ## Preliminary
 In the following chart, you can find the meaning of symbols used to clarify the atom-type embedding algorithm.
@@ -33,7 +33,7 @@ DeePMD-kit applying atom type embedding:
 
 $$E = F( [ \text{Multi}( \mathcal G( [s_{ij}, A(i), A(j)] ) ), A(j)] )$$
 
-or 
+or
 
 $$E = F( [ \text{Multi}( \mathcal G( [s_{ij}, A(j)] ) ), A(j)] )$$
 
@@ -59,9 +59,9 @@ In trainer.py, it will parse the parameter from the input JSON file. If a `type_
 ### model (model/ener.py)
 When building the operation graph of the `model` in `model.build`. If a `TypeEmbedNet` is detected, it will build the operation graph of `type embed net`, `embedding net` and `fitting net` by order. The building process of `type embed net` can be found in `TypeEmbedNet.build`, which output the type embedding vector of each atom type (of [$\text{ntypes} \times \text{nchanl}$] dimensions). We then save the type embedding vector into `input_dict`, so that they can be fetched later in `embedding net` and `fitting net`.
 ### embedding net (descriptor/se*.py)
-In `embedding net`, we shall take local environment $\mathcal R$ as input and output matrix $\mathcal G$. Functions called in this process by the order is 
+In `embedding net`, we shall take local environment $\mathcal R$ as input and output matrix $\mathcal G$. Functions called in this process by the order is
 ```
-build -> _pass_filter -> _filter -> _filter_lower 
+build -> _pass_filter -> _filter -> _filter_lower
 ```
 `_pass_filter`: It will first detect whether an atom type embedding exists, if so, it will apply atom type embedding algorithm and doesn't divide the input by type.
 
diff --git a/doc/freeze/compress.md b/doc/freeze/compress.md
index ec71de9e97..4ac45f4de7 100644
--- a/doc/freeze/compress.md
+++ b/doc/freeze/compress.md
@@ -78,7 +78,7 @@ Model compression, with little loss of accuracy, can greatly speed up MD inferen
 
 **Acceptable original model version**
 
-The model compression interface requires the version of DeePMD-kit used in the original model generation should be `2.0.0-alpha.0` or above. If one has a frozen 1.2 or 1.3 model, one can upgrade it through the `dp convert-from` interface. (eg: ```dp convert-from 1.2/1.3 -i old_frozen_model.pb -o new_frozen_model.pb```) 
+The model compression interface requires the version of DeePMD-kit used in the original model generation should be `2.0.0-alpha.0` or above. If one has a frozen 1.2 or 1.3 model, one can upgrade it through the `dp convert-from` interface. (eg: ```dp convert-from 1.2/1.3 -i old_frozen_model.pb -o new_frozen_model.pb```)
 
 **Acceptable descriptor type**
 
@@ -92,5 +92,3 @@ Descriptors with `se_e2_a`, `se_e3`, and `se_e2_r` types are supported by the mo
 - relu6
 - softplus
 - sigmoid
-
-
diff --git a/doc/freeze/freeze.md b/doc/freeze/freeze.md
index 368bea94c2..ba0cd44606 100644
--- a/doc/freeze/freeze.md
+++ b/doc/freeze/freeze.md
@@ -7,8 +7,8 @@ $ dp freeze -o graph.pb
 in the folder where the model is trained. The output model is called `graph.pb`.
 
 In [multi-task mode](../train/multi-task-training.md):
-- This process will in default output several models, each of which contains the common descriptor and 
-one of the user-defined fitting nets in {ref}`fitting_net_dict <model/fitting_net_dict>`, let's name it `fitting_key`, together frozen in `graph_{fitting_key}.pb`. 
-Those frozen models are exactly the same as single-task output with fitting net `fitting_key`. 
-- If you add `--united-model` option in this situation, 
+- This process will in default output several models, each of which contains the common descriptor and
+one of the user-defined fitting nets in {ref}`fitting_net_dict <model/fitting_net_dict>`, let's name it `fitting_key`, together frozen in `graph_{fitting_key}.pb`.
+Those frozen models are exactly the same as single-task output with fitting net `fitting_key`.
+- If you add `--united-model` option in this situation,
 the total multi-task model will be frozen into one unit `graph.pb`, which is mainly for multi-task initialization and can not be used directly for inference.
diff --git a/doc/freeze/index.rst b/doc/freeze/index.rst
index 0a2f3df0f1..b78b63cd0a 100644
--- a/doc/freeze/index.rst
+++ b/doc/freeze/index.rst
@@ -5,4 +5,4 @@ Freeze and Compress
    :maxdepth: 1
 
    freeze
-   compress
\ No newline at end of file
+   compress
diff --git a/doc/getting-started/index.rst b/doc/getting-started/index.rst
index d5d5651003..2a94ec3bf3 100644
--- a/doc/getting-started/index.rst
+++ b/doc/getting-started/index.rst
@@ -12,4 +12,4 @@ In this text, we will call the deep neural network that is used to represent the
    training
    freeze
    test
-   lammps
\ No newline at end of file
+   lammps
diff --git a/doc/index.rst b/doc/index.rst
index 048dfd1f25..c67b40e60a 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -16,7 +16,7 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
 .. toctree::
    :maxdepth: 3
    :caption: Getting Started
-   
+
    getting-started/index
 
 .. _advanced:
@@ -44,7 +44,7 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
 .. toctree::
    :maxdepth: 2
    :caption: Tutorial
-   :glob:  
+   :glob:
 
    Tutorials <https://tutorials.deepmodeling.com/>
    Publications <https://deepmodeling.com/blog/papers/deepmd-kit/>
@@ -78,5 +78,5 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
 * :ref:`modindex`
 * :ref:`search`
 
-.. _feedback: 
-.. _affiliated packages: 
+.. _feedback:
+.. _affiliated packages:
diff --git a/doc/inference/index.md b/doc/inference/index.md
index bf0bf54a3e..ef0164dbb5 100644
--- a/doc/inference/index.md
+++ b/doc/inference/index.md
@@ -3,4 +3,4 @@
 Note that the model for inference is required to be compatible with the DeePMD-kit package. See [Model compatibility](../troubleshooting/model-compatability.html) for details.
 
 - [Python interface](python.md)
-- [C++ interface](cxx.md)
\ No newline at end of file
+- [C++ interface](cxx.md)
diff --git a/doc/inference/index.rst b/doc/inference/index.rst
index 5b591e309c..1e6d98be00 100644
--- a/doc/inference/index.rst
+++ b/doc/inference/index.rst
@@ -1,7 +1,7 @@
 Inference
 =========
 
-Note that the model for inference is required to be compatible with the DeePMD-kit package. See `Model compatibility <../troubleshooting/model-compatability.html>`_ for details. 
+Note that the model for inference is required to be compatible with the DeePMD-kit package. See `Model compatibility <../troubleshooting/model-compatability.html>`_ for details.
 
 .. toctree::
    :maxdepth: 1
diff --git a/doc/inference/python.md b/doc/inference/python.md
index fd4cb6fc43..48eb1d7df0 100644
--- a/doc/inference/python.md
+++ b/doc/inference/python.md
@@ -4,10 +4,11 @@ One may use the python interface of DeePMD-kit for model inference, an example i
 ```python
 from deepmd.infer import DeepPot
 import numpy as np
-dp = DeepPot('graph.pb')
-coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1])
+
+dp = DeepPot("graph.pb")
+coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1])
 cell = np.diag(10 * np.ones(3)).reshape([1, -1])
-atype = [1,0,1]
+atype = [1, 0, 1]
 e, f, v = dp.eval(coord, cell, atype)
 ```
 where `e`, `f` and `v` are predicted energy, force and virial of the system, respectively.
@@ -18,9 +19,9 @@ from deepmd.infer import calc_model_devi
 from deepmd.infer import DeepPot as DP
 import numpy as np
 
-coord = np.array([[1,0,0], [0,0,1.5], [1,0,3]]).reshape([1, -1])
+coord = np.array([[1, 0, 0], [0, 0, 1.5], [1, 0, 3]]).reshape([1, -1])
 cell = np.diag(10 * np.ones(3)).reshape([1, -1])
-atype = [1,0,1]
+atype = [1, 0, 1]
 graphs = [DP("graph.000.pb"), DP("graph.001.pb")]
 model_devi = calc_model_devi(coord, cell, atype, graphs)
 ```
diff --git a/doc/install/index.md b/doc/install/index.md
index b6cc195b6f..4da25dc9cf 100644
--- a/doc/install/index.md
+++ b/doc/install/index.md
@@ -5,4 +5,4 @@
 - [Install LAMMPS](install-lammps.md)
 - [Install i-PI](install-ipi.md)
 - [Install GROMACS](install-gromacs.md)
-- [Building conda packages](build-conda.md)
\ No newline at end of file
+- [Building conda packages](build-conda.md)
diff --git a/doc/install/install-from-source.md b/doc/install/install-from-source.md
index eb318b5c43..8ce2524806 100644
--- a/doc/install/install-from-source.md
+++ b/doc/install/install-from-source.md
@@ -15,9 +15,9 @@ cd deepmd-kit
 deepmd_source_dir=`pwd`
 ```
 
-## Install the python interface 
+## Install the python interface
 ### Install Tensorflow's python interface
-First, check the python version on your machine 
+First, check the python version on your machine
 ```bash
 python --version
 ```
@@ -29,7 +29,7 @@ source $tensorflow_venv/bin/activate
 pip install --upgrade pip
 pip install --upgrade tensorflow
 ```
-It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by 
+It is important that every time a new shell is started and one wants to use `DeePMD-kit`, the virtual environment should be activated by
 ```bash
 source $tensorflow_venv/bin/activate
 ```
@@ -41,8 +41,8 @@ If one has multiple python interpreters named something like python3.x, it can b
 ```bash
 virtualenv -p python3.7 $tensorflow_venv
 ```
-If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by 
-```bash 
+If one does not need the GPU support of DeePMD-kit and is concerned about package size, the CPU-only version of TensorFlow should be installed by
+```bash
 pip install --upgrade tensorflow-cpu
 ```
 To verify the installation, run
@@ -146,9 +146,9 @@ From version 2.0.1, Horovod and mpi4py with MPICH support are shipped with the i
 
 If you don't install Horovod, DeePMD-kit will fall back to serial mode.
 
-## Install the C++ interface 
+## Install the C++ interface
 
-If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface installed in the previous section does everything and he/she can safely skip this section. 
+If one does not need to use DeePMD-kit with Lammps or I-Pi, then the python interface installed in the previous section does everything and he/she can safely skip this section.
 
 ### Install Tensorflow's C++ interface
 
@@ -161,14 +161,14 @@ First, the C++ interface of Tensorflow should be installed. It is noted that the
 Now go to the source code directory of DeePMD-kit and make a building place.
 ```bash
 cd $deepmd_source_dir/source
-mkdir build 
+mkdir build
 cd build
 ```
 I assume you want to install DeePMD-kit into path `$deepmd_root`, then execute CMake
 ```bash
 cmake -DTENSORFLOW_ROOT=$tensorflow_root -DCMAKE_INSTALL_PREFIX=$deepmd_root ..
 ```
-where the variable `tensorflow_root` stores the location where TensorFlow's C++ interface is installed. 
+where the variable `tensorflow_root` stores the location where TensorFlow's C++ interface is installed.
 
 One may add the following arguments to `cmake`:
 
@@ -184,12 +184,12 @@ One may add the following arguments to `cmake`:
 | -DUSE_TF_PYTHON_LIBS=&lt;value&gt; | `TRUE` or `FALSE` | `FALSE`       | If `TRUE`, Build C++ interface with TensorFlow's Python libraries(TensorFlow's Python Interface is required). And there's no need for building TensorFlow's C++ interface.|
 | -DENABLE_NATIVE_OPTIMIZATION       | `TRUE` or `FALSE` | `FALSE`       | Enable compilation optimization for the native machine's CPU type. Do not enable it if generated code will run on different CPUs. |
 
-If the CMake has been executed successfully, then run the following make commands to build the package:  
+If the CMake has been executed successfully, then run the following make commands to build the package:
 ```bash
 make -j4
 make install
 ```
-Option `-j4` means using 4 processes in parallel. You may want to use a different number according to your hardware. 
+Option `-j4` means using 4 processes in parallel. You may want to use a different number according to your hardware.
 
 If everything works fine, you will have the following executable and libraries installed in `$deepmd_root/bin` and `$deepmd_root/lib`
 ```bash
diff --git a/doc/install/install-gromacs.md b/doc/install/install-gromacs.md
index 489c263239..0cb0f7d7ff 100644
--- a/doc/install/install-gromacs.md
+++ b/doc/install/install-gromacs.md
@@ -2,7 +2,7 @@
 
 Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed.
 
-## Patch source code of GROMACS 
+## Patch source code of GROMACS
 Download the source code of a supported GROMACS version (2020.2) from https://manual.gromacs.org/2020.2/download.html. Run the following command:
 ```bash
 export PATH=$PATH:$deepmd_kit_root/bin
@@ -31,4 +31,4 @@ cmake3 .. -DCMAKE_CXX_STANDARD=14 \ # not required, but c++14 seems to be more c
           -DCMAKE_INSTALL_PREFIX=/path/to/gromacs-2020.2-deepmd
 make -j
 make install
-```
\ No newline at end of file
+```
diff --git a/doc/install/install-ipi.md b/doc/install/install-ipi.md
index cabbf0fa62..1f4de7474c 100644
--- a/doc/install/install-ipi.md
+++ b/doc/install/install-ipi.md
@@ -9,4 +9,4 @@ Test with Pytest:
 ```bash
 pip install pytest
 pytest --pyargs ipi.tests
-```
\ No newline at end of file
+```
diff --git a/doc/install/install-lammps.md b/doc/install/install-lammps.md
index 0be0b1ddb1..c960f6decf 100644
--- a/doc/install/install-lammps.md
+++ b/doc/install/install-lammps.md
@@ -1,6 +1,6 @@
 # Install LAMMPS
 
-There are two ways to install LAMMPS: the built-in mode and the plugin mode. The built-in mode builds LAMMPS along with the DeePMD-kit and DeePMD-kit will be loaded automatically when running LAMMPS. The plugin mode builds LAMMPS and a plugin separately, so one needs to use `plugin load` command to load the DeePMD-kit's LAMMPS plugin library. 
+There are two ways to install LAMMPS: the built-in mode and the plugin mode. The built-in mode builds LAMMPS along with the DeePMD-kit and DeePMD-kit will be loaded automatically when running LAMMPS. The plugin mode builds LAMMPS and a plugin separately, so one needs to use `plugin load` command to load the DeePMD-kit's LAMMPS plugin library.
 
 ## Install LAMMPS's DeePMD-kit module (built-in mode)
 Before following this section, [DeePMD-kit C++ interface](install-from-source.md) should have be installed.
@@ -35,7 +35,7 @@ If everything works fine, you will end up with an executable `lmp_mpi`.
 ./lmp_mpi -h
 ```
 
-The DeePMD-kit module can be removed from the LAMMPS source code by 
+The DeePMD-kit module can be removed from the LAMMPS source code by
 ```bash
 make no-user-deepmd
 ```
diff --git a/doc/install/install-tf.1.12.md b/doc/install/install-tf.1.12.md
index 7d18529e5b..f4009405d7 100644
--- a/doc/install/install-tf.1.12.md
+++ b/doc/install/install-tf.1.12.md
@@ -1,4 +1,4 @@
-# Install TensorFlow's C++ interface 
+# Install TensorFlow's C++ interface
 The TensorFlow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.15.0 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
 ```bash
 cd /some/workspace
@@ -31,7 +31,7 @@ Now build the shared library of TensorFlow:
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. 
+You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
 ```bash
@@ -103,4 +103,3 @@ The temporary installation directories for the dependencies can be removed:
 ```bash
 rm -fr /tmp/proto /tmp/eigen /tmp/nsync
 ```
-
diff --git a/doc/install/install-tf.1.14-gpu.md b/doc/install/install-tf.1.14-gpu.md
index ffaf0eb262..4e9fcaf7fc 100644
--- a/doc/install/install-tf.1.14-gpu.md
+++ b/doc/install/install-tf.1.14-gpu.md
@@ -1,4 +1,4 @@
-# Install TensorFlow-GPU's C++ interface 
+# Install TensorFlow-GPU's C++ interface
 TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. It is highly recommended that the Bazel version 0.24.1 is used. Full instructions on Bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
 ```bash
 cd /some/workspace
@@ -96,7 +96,7 @@ Now build the shared library of TensorFlow:
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. 
+You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue for your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
 ```bash
@@ -145,5 +145,3 @@ Currently, when building the Eigen package, you can delete the FFTW in the CMake
 fatal error: absl/numeric/int128_have_intrinsic.inc: No such file or directory
 ```
 Basically, you could build an empty file named "int128_have_intrinsic.inc" in the same directory of "int128.h".
-
-
diff --git a/doc/install/install-tf.1.14.md b/doc/install/install-tf.1.14.md
index b863a50cb6..065df9cad9 100644
--- a/doc/install/install-tf.1.14.md
+++ b/doc/install/install-tf.1.14.md
@@ -1,4 +1,4 @@
-# Install tensorflow's C++ interface 
+# Install tensorflow's C++ interface
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. It is highly recommended that the bazel version 0.24.1 is used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
 ```bash
 cd /some/workspace
@@ -31,7 +31,7 @@ Now build the shared library of tensorflow:
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. 
+You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install tensorflow in directory `$tensorflow_root`. Create the directory if it does not exists
 ```bash
diff --git a/doc/install/install-tf.1.8.md b/doc/install/install-tf.1.8.md
index 6247edf087..bfc1a616d4 100644
--- a/doc/install/install-tf.1.8.md
+++ b/doc/install/install-tf.1.8.md
@@ -31,7 +31,7 @@ Now build the shared library of TensorFlow:
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. 
+You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
 ```bash
diff --git a/doc/install/install-tf.2.3.md b/doc/install/install-tf.2.3.md
index b2b7193754..e538607db0 100644
--- a/doc/install/install-tf.2.3.md
+++ b/doc/install/install-tf.2.3.md
@@ -1,4 +1,4 @@
-# Install TensorFlow's C++ interface 
+# Install TensorFlow's C++ interface
 The tensorflow's C++ interface will be compiled from the source code. Firstly one installs bazel. The bazel version 3.1.0 should be used. A full instruction of bazel installation can be found [here](https://docs.bazel.build/versions/master/install.html).
 ```bash
 cd /some/workspace
@@ -78,7 +78,7 @@ Now build the shared library of tensorflow:
 ```bash
 bazel build -c opt --verbose_failures //tensorflow:libtensorflow_cc.so
 ```
-You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`. 
+You may want to add options `--copt=-msse4.2`,  `--copt=-mavx`, `--copt=-mavx2` and `--copt=-mfma` to enable SSE4.2, AVX, AVX2 and FMA SIMD accelerations, respectively. It is noted that these options should be chosen according to the CPU architecture. If the RAM becomes an issue of your machine, you may limit the RAM usage by using `--local_resources 2048,.5,1.0`.
 
 Now I assume you want to install TensorFlow in directory `$tensorflow_root`. Create the directory if it does not exist
 ```bash
diff --git a/doc/install/install-tf.2.8.md b/doc/install/install-tf.2.8.md
index f6fa082eaf..da1f299131 100644
--- a/doc/install/install-tf.2.8.md
+++ b/doc/install/install-tf.2.8.md
@@ -1,4 +1,4 @@
-# Install TensorFlow's C++ interface 
+# Install TensorFlow's C++ interface
 TensorFlow's C++ interface will be compiled from the source code. Firstly one installs Bazel. [bazelisk](https://github.com/bazelbuild/bazelisk) can be lanuched to use [bazel](https://github.com/bazelbuild/bazel).
 
 ```bash
diff --git a/doc/license.rst b/doc/license.rst
index 366102905e..533dd5f246 100644
--- a/doc/license.rst
+++ b/doc/license.rst
@@ -2,4 +2,4 @@
 License
 ==================
 
-The project DeePMD-kit is licensed under `GNU LGPLv3.0 <https://github.com/deepmodeling/deepmd-kit/blob/master/LICENSE>`_. 
+The project DeePMD-kit is licensed under `GNU LGPLv3.0 <https://github.com/deepmodeling/deepmd-kit/blob/master/LICENSE>`_.
diff --git a/doc/model/dplr.md b/doc/model/dplr.md
index 9893d30713..27bfadcf00 100644
--- a/doc/model/dplr.md
+++ b/doc/model/dplr.md
@@ -12,7 +12,7 @@ We use the deep Wannier model (DW) to represent the relative position of the Wan
 ```bash
 $deepmd_source_dir/examples/water/dplr/train/
 ```
-It is noted that **the tutorial dataset is not enough for training a productive model**. 
+It is noted that **the tutorial dataset is not enough for training a productive model**.
 Two settings make the training input script different from an energy training input:
 ```json
 	"fitting_net": {
@@ -22,7 +22,7 @@ Two settings make the training input script different from an energy training in
 	    "seed":		1
 	},
 ```
-The type of fitting is set to {ref}`dipole <model/fitting_net[dipole]>`. The dipole is associated with type 0 atoms (oxygens), by the setting `"dipole_type": [0]`. What we trained is the displacement of the WC from the corresponding oxygen atom. It shares the same training input as the atomic dipole because both are 3-dimensional vectors defined on atoms. 
+The type of fitting is set to {ref}`dipole <model/fitting_net[dipole]>`. The dipole is associated with type 0 atoms (oxygens), by the setting `"dipole_type": [0]`. What we trained is the displacement of the WC from the corresponding oxygen atom. It shares the same training input as the atomic dipole because both are 3-dimensional vectors defined on atoms.
 The loss section is provided as follows
 ```json
     "loss": {
@@ -31,7 +31,7 @@ The loss section is provided as follows
 	"pref_atomic":	1.0
     },
 ```
-so that the atomic dipole is trained as labels. Note that the NumPy compressed file `atomic_dipole.npy` should be provided in each dataset. 
+so that the atomic dipole is trained as labels. Note that the NumPy compressed file `atomic_dipole.npy` should be provided in each dataset.
 
 The training and freezing can be started from the example directory by
 ```bash
@@ -40,7 +40,7 @@ dp train dw.json && dp freeze -o dw.pb
 
 ## Train the DPLR model
 
-The training of the DPLR model is very similar to the standard short-range DP models. An example input script can be found in the example directory. The following section is introduced to compute the long-range energy contribution of the DPLR model, and modify the short-range DP model by this part. 
+The training of the DPLR model is very similar to the standard short-range DP models. An example input script can be found in the example directory. The following section is introduced to compute the long-range energy contribution of the DPLR model, and modify the short-range DP model by this part.
 ```json
         "modifier": {
             "type":             "dipole_charge",
@@ -51,7 +51,7 @@ The training of the DPLR model is very similar to the standard short-range DP mo
             "ewald_beta":       0.40
         },
 ```
-The {ref}`model_name <model/modifier[dipole_charge]/model_name>` specifies which DW model is used to predict the position of WCs. {ref}`model_charge_map <model/modifier[dipole_charge]/model_charge_map>` gives the amount of charge assigned to WCs. {ref}`sys_charge_map <model/modifier[dipole_charge]/sys_charge_map>` provides the nuclear charge of oxygen (type 0) and hydrogen (type 1) atoms. {ref}`ewald_beta <model/modifier[dipole_charge]/ewald_beta>` (unit $\text{Å}^{-1}$) gives the spread parameter controls the spread of Gaussian charges, and {ref}`ewald_h <model/modifier[dipole_charge]/ewald_h>`  (unit Å) assigns the grid size of Fourier transformation. 
+The {ref}`model_name <model/modifier[dipole_charge]/model_name>` specifies which DW model is used to predict the position of WCs. {ref}`model_charge_map <model/modifier[dipole_charge]/model_charge_map>` gives the amount of charge assigned to WCs. {ref}`sys_charge_map <model/modifier[dipole_charge]/sys_charge_map>` provides the nuclear charge of oxygen (type 0) and hydrogen (type 1) atoms. {ref}`ewald_beta <model/modifier[dipole_charge]/ewald_beta>` (unit $\text{Å}^{-1}$) gives the spread parameter controls the spread of Gaussian charges, and {ref}`ewald_h <model/modifier[dipole_charge]/ewald_h>`  (unit Å) assigns the grid size of Fourier transformation.
 The DPLR model can be trained and frozen by (from the example directory)
 ```bash
 dp train ener.json && dp freeze -o ener.pb
@@ -59,9 +59,9 @@ dp train ener.json && dp freeze -o ener.pb
 
 ## Molecular dynamics simulation with DPLR
 
-In MD simulations, the long-range part of the DPLR is calculated by the LAMMPS `kspace` support. Then the long-range interaction is back-propagated to atoms by DeePMD-kit. This setup is commonly used in classical molecular dynamics simulations as the "virtual site". Unfortunately, LAMMPS does not natively support virtual sites, so we have to hack the LAMMPS code, which makes the input configuration and script a little wired. 
+In MD simulations, the long-range part of the DPLR is calculated by the LAMMPS `kspace` support. Then the long-range interaction is back-propagated to atoms by DeePMD-kit. This setup is commonly used in classical molecular dynamics simulations as the "virtual site". Unfortunately, LAMMPS does not natively support virtual sites, so we have to hack the LAMMPS code, which makes the input configuration and script a little wired.
 
-An example of an input configuration file and script can be found in 
+An example of an input configuration file and script can be found in
 ```bash
 $deepmd_source_dir/examples/water/dplr/lmp/
 ```
@@ -82,7 +82,7 @@ We use `atom_style full` for DPLR simulations. the coordinates of the WCs are ex
 Masses
 
 1 16
-2 2 
+2 2
 3 16
 
 Atoms
@@ -131,7 +131,7 @@ Type 1 and 2 (O and H) are `real_atom`s, while type 3 (WCs) are `virtual_atom`s.
 kspace_style	pppm/dplr 1e-5
 kspace_modify	gewald ${BETA} diff ik mesh ${KMESH} ${KMESH} ${KMESH}
 ```
-The long-range part is calculated by the `kspace` support of LAMMPS. The `kspace_style` `pppm/dplr` is required. The spread parameter set by variable `BETA` should be set the same as that used in training. The `KMESH` should be set dense enough so the long-range calculation is converged. 
+The long-range part is calculated by the `kspace` support of LAMMPS. The `kspace_style` `pppm/dplr` is required. The spread parameter set by variable `BETA` should be set the same as that used in training. The `KMESH` should be set dense enough so the long-range calculation is converged.
 
 ```lammps
 # "fix dplr" set the position of the virtual atom, and spread the
@@ -142,7 +142,7 @@ The long-range part is calculated by the `kspace` support of LAMMPS. The `kspace
 fix		0 all dplr model ener.pb type_associate 1 3 bond_type 1
 fix_modify	0 virial yes
 ```
-The fix command `dplr` calculates the position of WCs by the DW model and back-propagates the long-range interaction on virtual atoms to real toms. 
+The fix command `dplr` calculates the position of WCs by the DW model and back-propagates the long-range interaction on virtual atoms to real toms.
 At this time, the training parameter {ref}`type_map <model/type_map>` will be mapped to LAMMPS atom types.
 
 ```lammps
@@ -157,15 +157,15 @@ The temperature of the system should be computed from the real atoms. The kineti
 fix             thermo_print all print ${THERMO_FREQ} "$(step) $(pe) $(ke) $(etotal) $(enthalpy) $(c_real_temp) $(c_real_press) $(vol) $(c_real_press[1]) $(c_real_press[2]) $(c_real_press[3])" append thermo.out screen no title "# step pe ke etotal enthalpy temp press vol pxx pyy pzz"
 ```
 
-The LAMMPS simulation can be started from the example directory by 
+The LAMMPS simulation can be started from the example directory by
 ```bash
 lmp -i in.lammps
 ```
-If LAMMPS complains that no model file `ener.pb` exists, it can be copied from the training example directory. 
+If LAMMPS complains that no model file `ener.pb` exists, it can be copied from the training example directory.
 
-The MD simulation lasts for only 20 steps. If one runs a longer simulation, it will blow up, because the model is trained with a very limited dataset for very short training steps, thus is of poor quality. 
+The MD simulation lasts for only 20 steps. If one runs a longer simulation, it will blow up, because the model is trained with a very limited dataset for very short training steps, thus is of poor quality.
 
-Another restriction that should be noted is that the energies printed at the zero steps are not correct. This is because at the zero steps the position of the WC has not been updated with the DW model. The energies printed in later steps are correct. 
+Another restriction that should be noted is that the energies printed at the zero steps are not correct. This is because at the zero steps the position of the WC has not been updated with the DW model. The energies printed in later steps are correct.
 
 
 
diff --git a/doc/model/overall.md b/doc/model/overall.md
index 98b7745ad9..437f3648f4 100644
--- a/doc/model/overall.md
+++ b/doc/model/overall.md
@@ -14,7 +14,7 @@ A model has two parts, a descriptor that maps atomic configuration to a set of s
 ```
 The two subsections, {ref}`descriptor <model/descriptor>` and {ref}`fitting_net <model/fitting_net>`, define the descriptor and the fitting net, respectively.
 
-The {ref}`type_map <model/type_map>` is optional, which provides the element names (but not necessarily same as the actual name of the element) of the corresponding atom types. A water model, as in this example, has two kinds of atoms. The atom types are internally recorded as integers, e.g., `0` for oxygen and `1` for hydrogen here. A mapping from the atom type to their names is provided by {ref}`type_map <model/type_map>`. 
+The {ref}`type_map <model/type_map>` is optional, which provides the element names (but not necessarily same as the actual name of the element) of the corresponding atom types. A water model, as in this example, has two kinds of atoms. The atom types are internally recorded as integers, e.g., `0` for oxygen and `1` for hydrogen here. A mapping from the atom type to their names is provided by {ref}`type_map <model/type_map>`.
 
 DeePMD-kit implements the following descriptors:
 1. [`se_e2_a`](train-se-e2-a.md): DeepPot-SE constructed from all information (both angular and radial) of atomic configurations. The embedding takes the distance between atoms as input.
diff --git a/doc/model/train-energy.md b/doc/model/train-energy.md
index a56f03fec4..af3e4969b3 100644
--- a/doc/model/train-energy.md
+++ b/doc/model/train-energy.md
@@ -12,8 +12,8 @@ The construction of the fitting net is given by section {ref}`fitting_net <model
 	    "seed":		1
 	},
 ```
-* {ref}`neuron <model/fitting_net[ener]/neuron>` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them. 
-* If the option {ref}`resnet_dt <model/fitting_net[ener]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet. 
+* {ref}`neuron <model/fitting_net[ener]/neuron>` specifies the size of the fitting net. If two neighboring layers are of the same size, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
+* If the option {ref}`resnet_dt <model/fitting_net[ener]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
 * {ref}`seed <model/fitting_net[ener]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 ## Loss
@@ -31,7 +31,7 @@ where $\alpha(t)$ denotes the learning rate at step $t$. $p_f^0$ and $p_f^\infty
 pref_f(t) = start_pref_f * ( lr(t) / start_lr ) + limit_pref_f * ( 1 - lr(t) / start_lr )
 ```
 
-The {ref}`loss <loss>` section in the `input.json` is 
+The {ref}`loss <loss>` section in the `input.json` is
 ```json
     "loss" : {
 	"start_pref_e":	0.02,
diff --git a/doc/model/train-fitting-tensor.md b/doc/model/train-fitting-tensor.md
index 348f33d425..d7c06a25ed 100644
--- a/doc/model/train-fitting-tensor.md
+++ b/doc/model/train-fitting-tensor.md
@@ -1,6 +1,6 @@
 # Fit `tensor` like `Dipole` and `Polarizability`
 
-Unlike `energy`, which is a scalar, one may want to fit some high dimensional physical quantity, like `dipole` (vector) and `polarizability` (matrix, shorted as `polar`). Deep Potential has provided different APIs to do this. In this example, we will show you how to train a model to fit a water system. A complete training input script of the examples can be found in 
+Unlike `energy`, which is a scalar, one may want to fit some high dimensional physical quantity, like `dipole` (vector) and `polarizability` (matrix, shorted as `polar`). Deep Potential has provided different APIs to do this. In this example, we will show you how to train a model to fit a water system. A complete training input script of the examples can be found in
 
 ```bash
 $deepmd_source_dir/examples/water_tensor/dipole/dipole_input.json
@@ -53,7 +53,7 @@ The {ref}`loss <loss>` section tells DP the weight of these two kinds of loss, i
 loss = pref * global_loss + pref_atomic * atomic_loss
 ```
 
-The loss section should be provided like 
+The loss section should be provided like
 
 ```json
 	"loss" : {
@@ -120,6 +120,4 @@ One may notice that in each step, some of the local loss and global loss will be
 		>atomic_system
 		>global_system
 ```
-During training, at each step when the `lcurve.out` is printed, the system used for evaluating the training (validation) error may be either with only global or only atomic labels, thus the corresponding atomic or global errors are missing and are printed as zeros. 
-
-
+During training, at each step when the `lcurve.out` is printed, the system used for evaluating the training (validation) error may be either with only global or only atomic labels, thus the corresponding atomic or global errors are missing and are printed as zeros.
diff --git a/doc/model/train-hybrid.md b/doc/model/train-hybrid.md
index fb4af522ca..37666668c7 100644
--- a/doc/model/train-hybrid.md
+++ b/doc/model/train-hybrid.md
@@ -9,7 +9,7 @@ To use the descriptor in DeePMD-kit, one firstly set the {ref}`type <model/descr
             "list" : [
                 {
 		    "type" : "se_e2_a",
-		    ...		    
+		    ...
                 },
                 {
 		    "type" : "se_e2_r",
diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md
index 7fe521f8a6..abf3d9173f 100644
--- a/doc/model/train-se-atten.md
+++ b/doc/model/train-se-atten.md
@@ -1,6 +1,6 @@
 # Descriptor `"se_atten"`
 
-## DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation 
+## DPA-1: Pretraining of Attention-based Deep Potential Model for Molecular Simulation
 
 ![ALT](../images/model_se_atten.png "model_se_atten")
 
@@ -8,7 +8,7 @@ Here we propose DPA-1, a Deep Potential model with a novel attention mechanism,
 
 See [this paper](https://arxiv.org/abs/2208.08236) for more information. DPA-1 is implemented as a new descriptor `"se_atten"` for model training, which can be used after simply editing the input.json.
 
-## Installation 
+## Installation
 Follow the [standard installation](../install/install-from-source.md#install-the-python-interface) of Python interface in the DeePMD-kit.
 After that, you can smoothly use the DPA-1 model with the following instructions.
 
@@ -43,11 +43,11 @@ An example of the DPA-1 descriptor is provided as follows
           "seed":	1
 	}
 ```
-* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures. 
-* {ref}`rcut <model/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts. 
+* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_atten"`, which will use DPA-1 structures.
+* {ref}`rcut <model/descriptor[se_atten]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_atten]/rcut_smth>` gives where the smoothing starts.
 * **{ref}`sel <model/descriptor[se_atten]/sel>`** gives the maximum possible number of neighbors in the cut-off radius. It is an int. Note that this number highly affects the efficiency of training, which we usually use less than 200. (We use 120 for training 56 elements in [OC2M dataset](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md))
 * The {ref}`neuron <model/descriptor[se_atten]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
-* The {ref}`axis_neuron <model/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) 
+* The {ref}`axis_neuron <model/descriptor[se_atten]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
 * If the option {ref}`resnet_dt <model/descriptor[se_atten]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
 * {ref}`seed <model/descriptor[se_atten]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 * {ref}`attn <model/descriptor[se_atten]/attn>` sets the length of a hidden vector during scale-dot attention computation.
@@ -79,13 +79,13 @@ For training large systems, especially those with dozens of elements, the {ref}`
    "Cu"
   ]
 ```
-which should include all the elements in the dataset you want to train on. 
+which should include all the elements in the dataset you want to train on.
 ## Data format
 DPA-1 supports the standard data format, which is detailed in [data-conv.md](../data/data-conv.md) and [system.md](../data/system.md).
 Note that in this format, only those frames with the same fingerprint (i.e. the number of atoms of different elements) can be put together as a unified system.
-This may lead to sparse frame numbers in those rare systems. 
+This may lead to sparse frame numbers in those rare systems.
 
-An ideal way is to put systems with the same total number of atoms together, which is the way we trained DPA-1 on [OC2M](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md). 
+An ideal way is to put systems with the same total number of atoms together, which is the way we trained DPA-1 on [OC2M](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md).
 This system format, which is called `mixed_type`, is proper to put frame-sparse systems together and is slightly different from the standard one.
 Take an example, a `mixed_type` may contain the following files:
 ```
@@ -113,8 +113,3 @@ The API to generate or transfer to `mixed_type` format will be uploaded on [dpda
 Here we upload the AlMgCu example shown in the paper, you can download it here:
 [Baidu disk](https://pan.baidu.com/s/1Mk9CihPHCmf8quwaMhT-nA?pwd=d586);
 [Google disk](https://drive.google.com/file/d/11baEpRrvHoqxORFPSdJiGWusb3Y4AnRE/view?usp=sharing).
-
-
-
-
-
diff --git a/doc/model/train-se-e2-a-tebd.md b/doc/model/train-se-e2-a-tebd.md
index 07546e3120..7528202ff2 100644
--- a/doc/model/train-se-e2-a-tebd.md
+++ b/doc/model/train-se-e2-a-tebd.md
@@ -1,8 +1,8 @@
 # Type embedding approach
- 
-We generate specific a type embedding vector for each atom type so that we can share one descriptor embedding net and one fitting net in total, which decline training complexity largely. 
 
-The training input script is similar to that of [`se_e2_a`](train-se-e2-a.md), but different by adding the {ref}`type_embedding <model/type_embedding>` section. 
+We generate specific a type embedding vector for each atom type so that we can share one descriptor embedding net and one fitting net in total, which decline training complexity largely.
+
+The training input script is similar to that of [`se_e2_a`](train-se-e2-a.md), but different by adding the {ref}`type_embedding <model/type_embedding>` section.
 
 ## Type embedding net
 The {ref}`model <model>` defines how the model is constructed, adding a section of type embedding net:
@@ -11,7 +11,7 @@ The {ref}`model <model>` defines how the model is constructed, adding a section
 	"type_map":	["O", "H"],
 	"type_embedding":{
 			...
-	},	    
+	},
 	"descriptor" :{
             ...
 	},
@@ -35,7 +35,7 @@ The construction of type embedding net is given by {ref}`type_embedding <model/t
 * {ref}`seed <model/type_embedding/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
 
 
-A complete training input script of this example can be found in the directory. 
+A complete training input script of this example can be found in the directory.
 ```bash
 $deepmd_source_dir/examples/water/se_e2_a_tebd/input.json
 ```
@@ -43,4 +43,4 @@ See [here](../development/type-embedding.md) for further explanation of `type em
 
 :::{note}
 You can't apply the compression method while using the atom type embedding.
-:::
\ No newline at end of file
+:::
diff --git a/doc/model/train-se-e2-a.md b/doc/model/train-se-e2-a.md
index dedd645dc4..a043f64716 100644
--- a/doc/model/train-se-e2-a.md
+++ b/doc/model/train-se-e2-a.md
@@ -4,7 +4,7 @@ The notation of `se_e2_a` is short for the Deep Potential Smooth Edition (DeepPo
 
 Note that it is sometimes called a "two-atom embedding descriptor" which means the input of the embedding net is atomic distances. The descriptor **does** encode multi-body information (both angular and radial information of neighboring atoms).
 
-In this example, we will train a DeepPot-SE model for a water system.  A complete training input script of this example can be found in the directory. 
+In this example, we will train a DeepPot-SE model for a water system.  A complete training input script of this example can be found in the directory.
 ```bash
 $deepmd_source_dir/examples/water/se_e2_a/input.json
 ```
@@ -24,12 +24,11 @@ The construction of the descriptor is given by section {ref}`descriptor <model/d
 	    "seed":		1
 	}
 ```
-* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_e2_a"`. 
-* {ref}`rcut <model/descriptor[se_e2_a]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_e2_a]/rcut_smth>` gives where the smoothing starts. 
-* {ref}`sel <model/descriptor[se_e2_a]/sel>` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`. 
+* The {ref}`type <model/descriptor/type>` of the descriptor is set to `"se_e2_a"`.
+* {ref}`rcut <model/descriptor[se_e2_a]/rcut>` is the cut-off radius for neighbor searching, and the {ref}`rcut_smth <model/descriptor[se_e2_a]/rcut_smth>` gives where the smoothing starts.
+* {ref}`sel <model/descriptor[se_e2_a]/sel>` gives the maximum possible number of neighbors in the cut-off radius. It is a list, the length of which is the same as the number of atom types in the system, and `sel[i]` denotes the maximum possible number of neighbors with type `i`.
 * The {ref}`neuron <model/descriptor[se_e2_a]/neuron>` specifies the size of the embedding net. From left to right the members denote the sizes of each hidden layer from the input end to the output end, respectively. If the outer layer is twice the size of the inner layer, then the inner layer is copied and concatenated, then a [ResNet architecture](https://arxiv.org/abs/1512.03385) is built between them.
 * If the option {ref}`type_one_side <model/descriptor[se_e2_a]/type_one_side>` is set to `true`, the embedding network parameters vary by types of neighbor atoms only, so there will be $N_\text{types}$ sets of embedding network parameters. Otherwise, the embedding network parameters vary by types of centric atoms and types of neighbor atoms, so there will be $N_\text{types}^2$ sets of embedding network parameters.
-* The {ref}`axis_neuron <model/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003) 
+* The {ref}`axis_neuron <model/descriptor[se_e2_a]/axis_neuron>` specifies the size of the submatrix of the embedding matrix, the axis matrix as explained in the [DeepPot-SE paper](https://arxiv.org/abs/1805.09003)
 * If the option {ref}`resnet_dt <model/descriptor[se_e2_a]/resnet_dt>` is set to `true`, then a timestep is used in the ResNet.
 * {ref}`seed <model/descriptor[se_e2_a]/seed>` gives the random seed that is used to generate random numbers when initializing the model parameters.
-
diff --git a/doc/model/train-se-e2-r.md b/doc/model/train-se-e2-r.md
index 181146e8e9..f48e10c17b 100644
--- a/doc/model/train-se-e2-r.md
+++ b/doc/model/train-se-e2-r.md
@@ -1,6 +1,6 @@
 # Descriptor `"se_e2_r"`
 
-The notation of `se_e2_r` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from the radial information of atomic configurations. The `e2` stands for the embedding with two-atom information. 
+The notation of `se_e2_r` is short for the Deep Potential Smooth Edition (DeepPot-SE) constructed from the radial information of atomic configurations. The `e2` stands for the embedding with two-atom information.
 
 A complete training input script of this example can be found in the directory
 ```bash
diff --git a/doc/nvnmd/index.rst b/doc/nvnmd/index.rst
index c4470ee3fd..868f96ec05 100644
--- a/doc/nvnmd/index.rst
+++ b/doc/nvnmd/index.rst
@@ -4,4 +4,4 @@ Use NVNMD
 .. toctree::
    :maxdepth: 1
 
-   nvnmd
\ No newline at end of file
+   nvnmd
diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md
index 60bff1508d..57c46dd6b0 100644
--- a/doc/nvnmd/nvnmd.md
+++ b/doc/nvnmd/nvnmd.md
@@ -23,7 +23,7 @@ mkdir -p data
 cp -r $dataset data
 ```
 
-where `$dataset` is the path to the data set and `$workspace` is the path to the working directory. 
+where `$dataset` is the path to the data set and `$workspace` is the path to the working directory.
 
 ## Input script
 
@@ -32,7 +32,7 @@ Create and go to the training directory.
 
 ```bash
 mkdir train
-cd train 
+cd train
 ```
 
 Then copy the input script `train_cnn.json` and `train_qnn.json` to the directory `train`
@@ -55,7 +55,7 @@ The structure of the input script is as follows
 
 ### nvnmd
 
-The "nvnmd" section is defined as 
+The "nvnmd" section is defined as
 
 ```json
 {
@@ -77,7 +77,7 @@ where items are defined as:
 
 ### learning_rate
 
-The "learning_rate" section is defined as 
+The "learning_rate" section is defined as
 
 ```json
 {
@@ -99,7 +99,7 @@ where items are defined as:
 
 ### loss
 
-The "loss" section is defined as 
+The "loss" section is defined as
 
 ```json
 {
@@ -125,7 +125,7 @@ where items are defined as:
 
 ### training
 
-The "training" section is defined as 
+The "training" section is defined as
 
 ```json
 {
@@ -172,7 +172,7 @@ dp train-nvnmd train_qnn.json -s s2
 
 After the training process, you will get two folders: `nvnmd_cnn` and `nvnmd_qnn`. The `nvnmd_cnn` contains the model after continuous neural network (CNN) training. The `nvnmd_qnn` contains the model after quantized neural network (QNN) training. The binary file `nvnmd_qnn/model.pb` is the model file that is used to perform NVNMD in the server [http://nvnmd.picp.vip].
 
-You can also restart the CNN training from the checkpoint (`nvnmd_cnn/model.ckpt`) by 
+You can also restart the CNN training from the checkpoint (`nvnmd_cnn/model.ckpt`) by
 
 ``` bash
 dp train-nvnmd train_cnn.json -r nvnmd_cnn/model.ckpt -s s1
diff --git a/doc/sphinx_contrib_exhale_multiproject.py b/doc/sphinx_contrib_exhale_multiproject.py
index f7026cbf14..3a8309b071 100644
--- a/doc/sphinx_contrib_exhale_multiproject.py
+++ b/doc/sphinx_contrib_exhale_multiproject.py
@@ -75,22 +75,24 @@
     }
 '''
 
+import os
+import os.path
+from pprint import (
+    pprint,
+)
+
 import exhale
 import exhale.configs
-import exhale.utils
 import exhale.deploy
-
-import os
-import os.path
-from pprint import pprint
+import exhale.utils
 
 
 def exhale_environment_ready(app):
     default_project = app.config.breathe_default_project
     default_exhale_args = dict(app.config.exhale_args)
 
-    exhale_projects_args = dict(app.config._raw_config['exhale_projects_args'])
-    breathe_projects = dict(app.config._raw_config['breathe_projects'])
+    exhale_projects_args = dict(app.config._raw_config["exhale_projects_args"])
+    breathe_projects = dict(app.config._raw_config["breathe_projects"])
 
     for project in breathe_projects:
         app.config.breathe_default_project = project
@@ -100,12 +102,14 @@ def exhale_environment_ready(app):
 
         app.config.exhale_args = dict(default_exhale_args)
         app.config.exhale_args.update(project_exhale_args)
-        app.config.exhale_args["containmentFolder"] = os.path.realpath(app.config.exhale_args["containmentFolder"])
-        print("="*75)
+        app.config.exhale_args["containmentFolder"] = os.path.realpath(
+            app.config.exhale_args["containmentFolder"]
+        )
+        print("=" * 75)
         print(project)
-        print("-"*50)
+        print("-" * 50)
         pprint(app.config.exhale_args)
-        print("="*75)
+        print("=" * 75)
 
         # First, setup the extension and verify all of the configurations.
         exhale.configs.apply_sphinx_configurations(app)
@@ -115,8 +119,11 @@ def exhale_environment_ready(app):
         try:
             exhale.deploy.explode()
         except:
-            exhale.utils.fancyError("Exhale: could not generate reStructuredText documents :/")
+            exhale.utils.fancyError(
+                "Exhale: could not generate reStructuredText documents :/"
+            )
 
     app.config.breathe_default_project = default_project
 
-exhale.environment_ready = exhale_environment_ready
\ No newline at end of file
+
+exhale.environment_ready = exhale_environment_ready
diff --git a/doc/test/index.md b/doc/test/index.md
index 815989d146..4a502123d9 100644
--- a/doc/test/index.md
+++ b/doc/test/index.md
@@ -1,4 +1,4 @@
 # Test
 
 - [Test a model](test.md)
-- [Calculate Model Deviation](model-deviation.md)
\ No newline at end of file
+- [Calculate Model Deviation](model-deviation.md)
diff --git a/doc/test/test.md b/doc/test/test.md
index ae7fd2fd5c..c206e8d777 100644
--- a/doc/test/test.md
+++ b/doc/test/test.md
@@ -1,6 +1,6 @@
 # Test a model
 
-The frozen model can be used in many ways. The most straightforward test can be performed using `dp test`. A typical usage of `dp test` is 
+The frozen model can be used in many ways. The most straightforward test can be performed using `dp test`. A typical usage of `dp test` is
 ```bash
 dp test -m graph.pb -s /path/to/system -n 30
 ```
@@ -29,4 +29,4 @@ optional arguments:
   -d DETAIL_FILE, --detail-file DETAIL_FILE
                         The prefix to files where details of energy, force and virial accuracy/accuracy per atom will be written
   -a, --atomic          Test the accuracy of atomic label, i.e. energy / tensor (dipole, polar)
-```
\ No newline at end of file
+```
diff --git a/doc/third-party/ase.md b/doc/third-party/ase.md
index 3abb44d997..ac65fc926e 100644
--- a/doc/third-party/ase.md
+++ b/doc/third-party/ase.md
@@ -5,12 +5,12 @@ Deep potential can be set up as a calculator with ASE to obtain potential energi
 from ase import Atoms
 from deepmd.calculator import DP
 
-water = Atoms('H2O',
-              positions=[(0.7601, 1.9270, 1),
-                         (1.9575, 1, 1),
-                         (1., 1., 1.)],
-              cell=[100, 100, 100],
-              calculator=DP(model="frozen_model.pb"))
+water = Atoms(
+    "H2O",
+    positions=[(0.7601, 1.9270, 1), (1.9575, 1, 1), (1.0, 1.0, 1.0)],
+    cell=[100, 100, 100],
+    calculator=DP(model="frozen_model.pb"),
+)
 print(water.get_potential_energy())
 print(water.get_forces())
 ```
@@ -18,7 +18,8 @@ print(water.get_forces())
 Optimization is also available:
 ```python
 from ase.optimize import BFGS
+
 dyn = BFGS(water)
 dyn.run(fmax=1e-6)
 print(water.get_positions())
-```
\ No newline at end of file
+```
diff --git a/doc/third-party/gromacs.md b/doc/third-party/gromacs.md
index d5bafc4d43..672fb693b9 100644
--- a/doc/third-party/gromacs.md
+++ b/doc/third-party/gromacs.md
@@ -25,10 +25,10 @@ For example, if one wants to simulate ethane in water, using DeepPotential for m
 
 [ bonds ]
 ; i  j  func  b0  kb
- 1  2     5        
- 1  3     5        
- 1  4     5        
- 1  5     5        
+ 1  2     5
+ 1  3     5
+ 1  4     5
+ 1  5     5
 
 [ exclusions ]
 ; ai  aj1  aj2  aj3  aj4
@@ -61,19 +61,19 @@ For comparison, the original topology file generated by `acpype` will be:
 
 [ bonds ]
 ;   ai     aj funct   r             k
-     1      2   1    1.0970e-01    3.1455e+05 ;     C1 - H1    
-     1      3   1    1.0970e-01    3.1455e+05 ;     C1 - H2    
-     1      4   1    1.0970e-01    3.1455e+05 ;     C1 - H3    
-     1      5   1    1.0970e-01    3.1455e+05 ;     C1 - H4    
+     1      2   1    1.0970e-01    3.1455e+05 ;     C1 - H1
+     1      3   1    1.0970e-01    3.1455e+05 ;     C1 - H2
+     1      4   1    1.0970e-01    3.1455e+05 ;     C1 - H3
+     1      5   1    1.0970e-01    3.1455e+05 ;     C1 - H4
 
 [ angles ]
 ;   ai     aj     ak    funct   theta         cth
-     2      1      3      1    1.0758e+02    3.2635e+02 ;     H1 - C1     - H2    
-     2      1      4      1    1.0758e+02    3.2635e+02 ;     H1 - C1     - H3    
-     2      1      5      1    1.0758e+02    3.2635e+02 ;     H1 - C1     - H4    
-     3      1      4      1    1.0758e+02    3.2635e+02 ;     H2 - C1     - H3    
-     3      1      5      1    1.0758e+02    3.2635e+02 ;     H2 - C1     - H4    
-     4      1      5      1    1.0758e+02    3.2635e+02 ;     H3 - C1     - H4    
+     2      1      3      1    1.0758e+02    3.2635e+02 ;     H1 - C1     - H2
+     2      1      4      1    1.0758e+02    3.2635e+02 ;     H1 - C1     - H3
+     2      1      5      1    1.0758e+02    3.2635e+02 ;     H1 - C1     - H4
+     3      1      4      1    1.0758e+02    3.2635e+02 ;     H2 - C1     - H3
+     3      1      5      1    1.0758e+02    3.2635e+02 ;     H2 - C1     - H4
+     4      1      5      1    1.0758e+02    3.2635e+02 ;     H3 - C1     - H4
 ```
 ### DeepMD Settings
 Before running simulations, we need to tell GROMACS to use DeepPotential by setting the environment variable `GMX_DEEPMD_INPUT_JSON`:
@@ -113,8 +113,8 @@ This part gives an example of how to simulate all atoms described by a DeepPoten
 ; name      at.num  mass     charge ptype  sigma      epsilon
 HW           1       1.008   0.0000  A   0.00000e+00  0.00000e+00
 OW           8      16.00    0.0000  A   0.00000e+00  0.00000e+00
-``` 
+```
 As mentioned in the above section, `input.json` and relevant files (`index.raw`, `type.raw`) should also be created. Then, we can start the simulation under the NVT ensemble and plot the radial distribution function (RDF) by `gmx rdf` command. We can see that the RDF given by Gromacs+DP matches perfectly with Lammps+DP, which further provides an evidence on the validity of our simulation.
 ![rdf](../../examples/water/gmx/rdf.png)
 
-However, we still recommend you run an all-atom DP simulation using LAMMPS since it is more stable and efficient.
\ No newline at end of file
+However, we still recommend you run an all-atom DP simulation using LAMMPS since it is more stable and efficient.
diff --git a/doc/third-party/index.md b/doc/third-party/index.md
index 5803f3ef95..3de01d6944 100644
--- a/doc/third-party/index.md
+++ b/doc/third-party/index.md
@@ -7,4 +7,4 @@ Note that the model for inference is required to be compatible with the DeePMD-k
 - [LAMMPS commands](lammps-command.md)
 - [Run path-integral MD with i-PI](ipi.md)
 - [Run MD with GROMACS](gromacs.md)
-- [Interfaces out of DeePMD-kit](out-of-deepmd-kit.md)
\ No newline at end of file
+- [Interfaces out of DeePMD-kit](out-of-deepmd-kit.md)
diff --git a/doc/third-party/index.rst b/doc/third-party/index.rst
index 8620058245..678dfc9315 100644
--- a/doc/third-party/index.rst
+++ b/doc/third-party/index.rst
@@ -1,7 +1,7 @@
 Integrate with third-party packages
 ===================================
 
-Note that the model for inference is required to be compatible with the DeePMD-kit package. See `Model compatibility <../troubleshooting/model-compatability.html>`_ for details. 
+Note that the model for inference is required to be compatible with the DeePMD-kit package. See `Model compatibility <../troubleshooting/model-compatability.html>`_ for details.
 
 .. toctree::
    :maxdepth: 1
@@ -11,4 +11,4 @@ Note that the model for inference is required to be compatible with the DeePMD-k
    lammps-command
    ipi
    gromacs
-   out-of-deepmd-kit
\ No newline at end of file
+   out-of-deepmd-kit
diff --git a/doc/third-party/ipi.md b/doc/third-party/ipi.md
index 41611adf3d..53d828def7 100644
--- a/doc/third-party/ipi.md
+++ b/doc/third-party/ipi.md
@@ -16,7 +16,7 @@ It is noted that multiple instances of the client allow for computing, in parall
     "graph_file":	"graph.pb",
     "coord_file":	"conf.xyz",
     "atom_type" : {
-	"OW":		0, 
+	"OW":		0,
 	"HW1":		1,
 	"HW2":		1
     }
@@ -31,4 +31,4 @@ The option **`port`** should be the same as that in input.xml:
 
 The option **`graph_file`** provides the file name of the frozen model.
 
-The `dp_ipi` gets the atom names from an [XYZ file](https://en.wikipedia.org/wiki/XYZ_file_format) provided by **`coord_file`** (meanwhile ignores all coordinates in it) and translates the names to atom types by rules provided by **`atom_type`**.
\ No newline at end of file
+The `dp_ipi` gets the atom names from an [XYZ file](https://en.wikipedia.org/wiki/XYZ_file_format) provided by **`coord_file`** (meanwhile ignores all coordinates in it) and translates the names to atom types by rules provided by **`atom_type`**.
diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index 2e6dc08444..c9f7928df4 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -26,8 +26,8 @@ The DeePMD-kit package provides the pair_style `deepmd`
 pair_style deepmd models ... keyword value ...
 ```
 - deepmd = style of this pair_style
-- models = frozen model(s) to compute the interaction. 
-If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics, 
+- models = frozen model(s) to compute the interaction.
+If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics,
 and the model deviation will be computed among all models every `out_freq` timesteps.
 - keyword = *out_file* or *out_freq* or *fparam* or *fparam_from_compute* or *atomic* or *relative* or *relative_v* or *aparam* or *ttm*
 <pre>
@@ -39,7 +39,7 @@ and the model deviation will be computed among all models every `out_freq` times
         parameters = one or more frame parameters required for model evaluation.
     <i>fparam_from_compute</i> value = id
         id = compute id used to update the frame parameter.
-    <i>atomic</i> = no value is required. 
+    <i>atomic</i> = no value is required.
         If this keyword is set, the model deviation of each atom will be output.
     <i>relative</i> value = level
         level = The level parameter for computing the relative model deviation of the force
@@ -63,7 +63,7 @@ compute    TEMP all temp
 ```
 
 ### Description
-Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Potential Smooth Edition][DP-SE]. It is noticed that deep potential is not a "pairwise" interaction, but a multi-body interaction. 
+Evaluate the interaction of the system by using [Deep Potential][DP] or [Deep Potential Smooth Edition][DP-SE]. It is noticed that deep potential is not a "pairwise" interaction, but a multi-body interaction.
 
 This pair style takes the deep potential defined in a model file that usually has the .pb extension. The model can be trained and frozen by package [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit).
 
@@ -80,7 +80,7 @@ $$E_{v_i}=\frac{\left|D_{v_i}\right|}{\left|v_i\right|+l}$$
 
 If the keyword `fparam` is set, the given frame parameter(s) will be fed to the model.
 If the keyword `fparam_from_compute` is set, the global parameter(s) from compute command (e.g., temperature from [compute temp command](https://docs.lammps.org/compute_temp.html)) will be fed to the model as the frame parameter(s).
-If the keyword `aparam` is set, the given atomic parameter(s) will be fed to the model, where each atom is assumed to have the same atomic parameter(s). 
+If the keyword `aparam` is set, the given atomic parameter(s) will be fed to the model, where each atom is assumed to have the same atomic parameter(s).
 If the keyword `ttm` is set, electronic temperatures from [fix ttm command](https://docs.lammps.org/fix_ttm.html) will be fed to the model as the atomic parameters.
 
 Only a single `pair_coeff` command is used with the deepmd style which specifies atom names. These are mapped to LAMMPS atom types (integers from 1 to Ntypes) by specifying Ntypes additional arguments after `* *` in the `pair_coeff` command.
@@ -93,7 +93,7 @@ If the training parameter {ref}`type_map <model/type_map>` is not set, atom name
 
 ## Compute tensorial properties
 
-The DeePMD-kit package provides the compute `deeptensor/atom` for computing atomic tensorial properties. 
+The DeePMD-kit package provides the compute `deeptensor/atom` for computing atomic tensorial properties.
 
 ```lammps
 compute ID group-ID deeptensor/atom model_file
@@ -109,9 +109,9 @@ At this time, the training parameter {ref}`type_map <model/type_map>` will be ma
 ```lammps
 compute         dipole all deeptensor/atom dipole.pb
 ```
-The result of the compute can be dumped to trajectory file by 
+The result of the compute can be dumped to trajectory file by
 ```lammps
-dump            1 all custom 100 water.dump id type c_dipole[1] c_dipole[2] c_dipole[3] 
+dump            1 all custom 100 water.dump id type c_dipole[1] c_dipole[2] c_dipole[3]
 ```
 
 ### Restrictions
@@ -119,7 +119,7 @@ dump            1 all custom 100 water.dump id type c_dipole[1] c_dipole[2] c_di
 
 
 ## Long-range interaction
-The reciprocal space part of the long-range interaction can be calculated by LAMMPS command `kspace_style`. To use it with DeePMD-kit, one writes 
+The reciprocal space part of the long-range interaction can be calculated by LAMMPS command `kspace_style`. To use it with DeePMD-kit, one writes
 ```lammps
 pair_style	deepmd graph.pb
 pair_coeff  * *
@@ -128,7 +128,7 @@ kspace_modify	gewald 0.45
 ```
 Please notice that the DeePMD does nothing to the direct space part of the electrostatic interaction, because this part is assumed to be fitted in the DeePMD model (the direct space cut-off is thus the cut-off of the DeePMD model). The splitting parameter `gewald` is modified by the `kspace_modify` command.
 
-## Use of the centroid/stress/atom to get the full 3x3 "atomic-virial" 
+## Use of the centroid/stress/atom to get the full 3x3 "atomic-virial"
 
 The [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit) allows also the computation of per-atom stress tensor defined as:
 
@@ -155,7 +155,7 @@ Using a per-atom stress tensor one can, for example, compute the heat flux defin
 
 $$\mathbf J = \sum_n e_n \mathbf v_n + \sum_{n,m} ( \mathbf r_m- \mathbf r_n) \frac{de_m}{d\mathbf r_n} \mathbf v_n$$
 
-to compute the heat flux with LAMMPS: 
+to compute the heat flux with LAMMPS:
 ```lammps
 compute ke_ID all ke/atom
 compute pe_ID all pe/atom
diff --git a/doc/train-input-auto.rst b/doc/train-input-auto.rst
index 9201809549..a3b69eade9 100644
--- a/doc/train-input-auto.rst
+++ b/doc/train-input-auto.rst
@@ -1,140 +1,140 @@
-.. _`model`: 
+.. _`model`:
 
-model: 
+model:
     | type: ``dict``
     | argument path: ``model``
 
-    .. _`model/type_map`: 
+    .. _`model/type_map`:
 
-    type_map: 
+    type_map:
         | type: ``list``, optional
         | argument path: ``model/type_map``
 
         A list of strings. Give the name to each type of atoms. It is noted that the number of atom type of training system must be less than 128 in a GPU environment.
 
-    .. _`model/data_stat_nbatch`: 
+    .. _`model/data_stat_nbatch`:
 
-    data_stat_nbatch: 
+    data_stat_nbatch:
         | type: ``int``, optional, default: ``10``
         | argument path: ``model/data_stat_nbatch``
 
         The model determines the normalization from the statistics of the data. This key specifies the number of `frames` in each `system` used for statistics.
 
-    .. _`model/data_stat_protect`: 
+    .. _`model/data_stat_protect`:
 
-    data_stat_protect: 
+    data_stat_protect:
         | type: ``float``, optional, default: ``0.01``
         | argument path: ``model/data_stat_protect``
 
         Protect parameter for atomic energy regression.
 
-    .. _`model/use_srtab`: 
+    .. _`model/use_srtab`:
 
-    use_srtab: 
+    use_srtab:
         | type: ``str``, optional
         | argument path: ``model/use_srtab``
 
         The table for the short-range pairwise interaction added on top of DP. The table is a text data file with (N_t + 1) * N_t / 2 + 1 columes. The first colume is the distance between atoms. The second to the last columes are energies for pairs of certain types. For example we have two atom types, 0 and 1. The columes from 2nd to 4th are for 0-0, 0-1 and 1-1 correspondingly.
 
-    .. _`model/smin_alpha`: 
+    .. _`model/smin_alpha`:
 
-    smin_alpha: 
+    smin_alpha:
         | type: ``float``, optional
         | argument path: ``model/smin_alpha``
 
         The short-range tabulated interaction will be swithed according to the distance of the nearest neighbor. This distance is calculated by softmin. This parameter is the decaying parameter in the softmin. It is only required when `use_srtab` is provided.
 
-    .. _`model/sw_rmin`: 
+    .. _`model/sw_rmin`:
 
-    sw_rmin: 
+    sw_rmin:
         | type: ``float``, optional
         | argument path: ``model/sw_rmin``
 
         The lower boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
 
-    .. _`model/sw_rmax`: 
+    .. _`model/sw_rmax`:
 
-    sw_rmax: 
+    sw_rmax:
         | type: ``float``, optional
         | argument path: ``model/sw_rmax``
 
         The upper boundary of the interpolation between short-range tabulated interaction and DP. It is only required when `use_srtab` is provided.
 
-    .. _`model/type_embedding`: 
+    .. _`model/type_embedding`:
 
-    type_embedding: 
+    type_embedding:
         | type: ``dict``, optional
         | argument path: ``model/type_embedding``
 
         The type embedding.
 
-        .. _`model/type_embedding/neuron`: 
+        .. _`model/type_embedding/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[2, 4, 8]``
             | argument path: ``model/type_embedding/neuron``
 
             Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
 
-        .. _`model/type_embedding/activation_function`: 
+        .. _`model/type_embedding/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/type_embedding/activation_function``
 
             The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/type_embedding/resnet_dt`: 
+        .. _`model/type_embedding/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/type_embedding/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/type_embedding/precision`: 
+        .. _`model/type_embedding/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/type_embedding/precision``
 
             The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/type_embedding/trainable`: 
+        .. _`model/type_embedding/trainable`:
 
-        trainable: 
+        trainable:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/type_embedding/trainable``
 
             If the parameters in the embedding net are trainable
 
-        .. _`model/type_embedding/seed`: 
+        .. _`model/type_embedding/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/type_embedding/seed``
 
             Random seed for parameter initialization
 
-    .. _`model/descriptor`: 
+    .. _`model/descriptor`:
 
-    descriptor: 
+    descriptor:
         | type: ``dict``
         | argument path: ``model/descriptor``
 
         The descriptor of atomic environment.
 
 
-        Depending on the value of *type*, different sub args are accepted. 
+        Depending on the value of *type*, different sub args are accepted.
 
-        .. _`model/descriptor/type`: 
+        .. _`model/descriptor/type`:
 
         type:
             | type: ``str`` (flag key)
-            | argument path: ``model/descriptor/type`` 
+            | argument path: ``model/descriptor/type``
             | possible choices: |code:model/descriptor[loc_frame]|_, |code:model/descriptor[se_e2_a]|_, |code:model/descriptor[se_e2_r]|_, |code:model/descriptor[se_e3]|_, |code:model/descriptor[se_a_tpe]|_, |code:model/descriptor[hybrid]|_
 
-            The type of the descritpor. See explanation below. 
+            The type of the descritpor. See explanation below.
 
             - `loc_frame`: Defines a local frame at each atom, and the compute the descriptor as local coordinates under this frame.
 
@@ -165,41 +165,41 @@ model:
         .. _`flag:model/descriptor/type`: `model/descriptor/type`_
 
 
-        .. _`model/descriptor[loc_frame]`: 
+        .. _`model/descriptor[loc_frame]`:
 
-        When |flag:model/descriptor/type|_ is set to ``loc_frame``: 
+        When |flag:model/descriptor/type|_ is set to ``loc_frame``:
 
-        .. _`model/descriptor[loc_frame]/sel_a`: 
+        .. _`model/descriptor[loc_frame]/sel_a`:
 
-        sel_a: 
+        sel_a:
             | type: ``list``
             | argument path: ``model/descriptor[loc_frame]/sel_a``
 
             A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_a[i]` gives the selected number of type-i neighbors. The full relative coordinates of the neighbors are used by the descriptor.
 
-        .. _`model/descriptor[loc_frame]/sel_r`: 
+        .. _`model/descriptor[loc_frame]/sel_r`:
 
-        sel_r: 
+        sel_r:
             | type: ``list``
             | argument path: ``model/descriptor[loc_frame]/sel_r``
 
             A list of integers. The length of the list should be the same as the number of atom types in the system. `sel_r[i]` gives the selected number of type-i neighbors. Only relative distance of the neighbors are used by the descriptor. sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius.
 
-        .. _`model/descriptor[loc_frame]/rcut`: 
+        .. _`model/descriptor[loc_frame]/rcut`:
 
-        rcut: 
+        rcut:
             | type: ``float``, optional, default: ``6.0``
             | argument path: ``model/descriptor[loc_frame]/rcut``
 
             The cut-off radius. The default value is 6.0
 
-        .. _`model/descriptor[loc_frame]/axis_rule`: 
+        .. _`model/descriptor[loc_frame]/axis_rule`:
 
-        axis_rule: 
+        axis_rule:
             | type: ``list``
             | argument path: ``model/descriptor[loc_frame]/axis_rule``
 
-            A list of integers. The length should be 6 times of the number of types. 
+            A list of integers. The length should be 6 times of the number of types.
 
             - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.
 
@@ -214,13 +214,13 @@ model:
             - axis_rule[i*6+5]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.
 
 
-        .. _`model/descriptor[se_e2_a]`: 
+        .. _`model/descriptor[se_e2_a]`:
 
-        When |flag:model/descriptor/type|_ is set to ``se_e2_a`` (or its alias ``se_a``): 
+        When |flag:model/descriptor/type|_ is set to ``se_e2_a`` (or its alias ``se_a``):
 
-        .. _`model/descriptor[se_e2_a]/sel`: 
+        .. _`model/descriptor[se_e2_a]/sel`:
 
-        sel: 
+        sel:
             | type: ``list`` | ``str``, optional, default: ``auto``
             | argument path: ``model/descriptor[se_e2_a]/sel``
 
@@ -230,110 +230,110 @@ model:
 
                 - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
 
-        .. _`model/descriptor[se_e2_a]/rcut`: 
+        .. _`model/descriptor[se_e2_a]/rcut`:
 
-        rcut: 
+        rcut:
             | type: ``float``, optional, default: ``6.0``
             | argument path: ``model/descriptor[se_e2_a]/rcut``
 
             The cut-off radius.
 
-        .. _`model/descriptor[se_e2_a]/rcut_smth`: 
+        .. _`model/descriptor[se_e2_a]/rcut_smth`:
 
-        rcut_smth: 
+        rcut_smth:
             | type: ``float``, optional, default: ``0.5``
             | argument path: ``model/descriptor[se_e2_a]/rcut_smth``
 
             Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
 
-        .. _`model/descriptor[se_e2_a]/neuron`: 
+        .. _`model/descriptor[se_e2_a]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[10, 20, 40]``
             | argument path: ``model/descriptor[se_e2_a]/neuron``
 
             Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
 
-        .. _`model/descriptor[se_e2_a]/axis_neuron`: 
+        .. _`model/descriptor[se_e2_a]/axis_neuron`:
 
-        axis_neuron: 
+        axis_neuron:
             | type: ``int``, optional, default: ``4``, alias: *n_axis_neuron*
             | argument path: ``model/descriptor[se_e2_a]/axis_neuron``
 
             Size of the submatrix of G (embedding matrix).
 
-        .. _`model/descriptor[se_e2_a]/activation_function`: 
+        .. _`model/descriptor[se_e2_a]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/descriptor[se_e2_a]/activation_function``
 
             The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/descriptor[se_e2_a]/resnet_dt`: 
+        .. _`model/descriptor[se_e2_a]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e2_a]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/descriptor[se_e2_a]/type_one_side`: 
+        .. _`model/descriptor[se_e2_a]/type_one_side`:
 
-        type_one_side: 
+        type_one_side:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e2_a]/type_one_side``
 
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
 
-        .. _`model/descriptor[se_e2_a]/precision`: 
+        .. _`model/descriptor[se_e2_a]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/descriptor[se_e2_a]/precision``
 
             The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/descriptor[se_e2_a]/trainable`: 
+        .. _`model/descriptor[se_e2_a]/trainable`:
 
-        trainable: 
+        trainable:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/descriptor[se_e2_a]/trainable``
 
             If the parameters in the embedding net is trainable
 
-        .. _`model/descriptor[se_e2_a]/seed`: 
+        .. _`model/descriptor[se_e2_a]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/descriptor[se_e2_a]/seed``
 
             Random seed for parameter initialization
 
-        .. _`model/descriptor[se_e2_a]/exclude_types`: 
+        .. _`model/descriptor[se_e2_a]/exclude_types`:
 
-        exclude_types: 
+        exclude_types:
             | type: ``list``, optional, default: ``[]``
             | argument path: ``model/descriptor[se_e2_a]/exclude_types``
 
             The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.
 
-        .. _`model/descriptor[se_e2_a]/set_davg_zero`: 
+        .. _`model/descriptor[se_e2_a]/set_davg_zero`:
 
-        set_davg_zero: 
+        set_davg_zero:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e2_a]/set_davg_zero``
 
             Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
 
 
-        .. _`model/descriptor[se_e2_r]`: 
+        .. _`model/descriptor[se_e2_r]`:
 
-        When |flag:model/descriptor/type|_ is set to ``se_e2_r`` (or its alias ``se_r``): 
+        When |flag:model/descriptor/type|_ is set to ``se_e2_r`` (or its alias ``se_r``):
 
-        .. _`model/descriptor[se_e2_r]/sel`: 
+        .. _`model/descriptor[se_e2_r]/sel`:
 
-        sel: 
+        sel:
             | type: ``list`` | ``str``, optional, default: ``auto``
             | argument path: ``model/descriptor[se_e2_r]/sel``
 
@@ -343,102 +343,102 @@ model:
 
                 - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
 
-        .. _`model/descriptor[se_e2_r]/rcut`: 
+        .. _`model/descriptor[se_e2_r]/rcut`:
 
-        rcut: 
+        rcut:
             | type: ``float``, optional, default: ``6.0``
             | argument path: ``model/descriptor[se_e2_r]/rcut``
 
             The cut-off radius.
 
-        .. _`model/descriptor[se_e2_r]/rcut_smth`: 
+        .. _`model/descriptor[se_e2_r]/rcut_smth`:
 
-        rcut_smth: 
+        rcut_smth:
             | type: ``float``, optional, default: ``0.5``
             | argument path: ``model/descriptor[se_e2_r]/rcut_smth``
 
             Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
 
-        .. _`model/descriptor[se_e2_r]/neuron`: 
+        .. _`model/descriptor[se_e2_r]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[10, 20, 40]``
             | argument path: ``model/descriptor[se_e2_r]/neuron``
 
             Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
 
-        .. _`model/descriptor[se_e2_r]/activation_function`: 
+        .. _`model/descriptor[se_e2_r]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/descriptor[se_e2_r]/activation_function``
 
             The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/descriptor[se_e2_r]/resnet_dt`: 
+        .. _`model/descriptor[se_e2_r]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e2_r]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/descriptor[se_e2_r]/type_one_side`: 
+        .. _`model/descriptor[se_e2_r]/type_one_side`:
 
-        type_one_side: 
+        type_one_side:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e2_r]/type_one_side``
 
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
 
-        .. _`model/descriptor[se_e2_r]/precision`: 
+        .. _`model/descriptor[se_e2_r]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/descriptor[se_e2_r]/precision``
 
             The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/descriptor[se_e2_r]/trainable`: 
+        .. _`model/descriptor[se_e2_r]/trainable`:
 
-        trainable: 
+        trainable:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/descriptor[se_e2_r]/trainable``
 
             If the parameters in the embedding net are trainable
 
-        .. _`model/descriptor[se_e2_r]/seed`: 
+        .. _`model/descriptor[se_e2_r]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/descriptor[se_e2_r]/seed``
 
             Random seed for parameter initialization
 
-        .. _`model/descriptor[se_e2_r]/exclude_types`: 
+        .. _`model/descriptor[se_e2_r]/exclude_types`:
 
-        exclude_types: 
+        exclude_types:
             | type: ``list``, optional, default: ``[]``
             | argument path: ``model/descriptor[se_e2_r]/exclude_types``
 
             The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.
 
-        .. _`model/descriptor[se_e2_r]/set_davg_zero`: 
+        .. _`model/descriptor[se_e2_r]/set_davg_zero`:
 
-        set_davg_zero: 
+        set_davg_zero:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e2_r]/set_davg_zero``
 
             Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
 
 
-        .. _`model/descriptor[se_e3]`: 
+        .. _`model/descriptor[se_e3]`:
 
-        When |flag:model/descriptor/type|_ is set to ``se_e3`` (or its aliases ``se_at``, ``se_a_3be``, ``se_t``): 
+        When |flag:model/descriptor/type|_ is set to ``se_e3`` (or its aliases ``se_at``, ``se_a_3be``, ``se_t``):
 
-        .. _`model/descriptor[se_e3]/sel`: 
+        .. _`model/descriptor[se_e3]/sel`:
 
-        sel: 
+        sel:
             | type: ``list`` | ``str``, optional, default: ``auto``
             | argument path: ``model/descriptor[se_e3]/sel``
 
@@ -448,86 +448,86 @@ model:
 
                 - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
 
-        .. _`model/descriptor[se_e3]/rcut`: 
+        .. _`model/descriptor[se_e3]/rcut`:
 
-        rcut: 
+        rcut:
             | type: ``float``, optional, default: ``6.0``
             | argument path: ``model/descriptor[se_e3]/rcut``
 
             The cut-off radius.
 
-        .. _`model/descriptor[se_e3]/rcut_smth`: 
+        .. _`model/descriptor[se_e3]/rcut_smth`:
 
-        rcut_smth: 
+        rcut_smth:
             | type: ``float``, optional, default: ``0.5``
             | argument path: ``model/descriptor[se_e3]/rcut_smth``
 
             Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
 
-        .. _`model/descriptor[se_e3]/neuron`: 
+        .. _`model/descriptor[se_e3]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[10, 20, 40]``
             | argument path: ``model/descriptor[se_e3]/neuron``
 
             Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
 
-        .. _`model/descriptor[se_e3]/activation_function`: 
+        .. _`model/descriptor[se_e3]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/descriptor[se_e3]/activation_function``
 
             The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/descriptor[se_e3]/resnet_dt`: 
+        .. _`model/descriptor[se_e3]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e3]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/descriptor[se_e3]/precision`: 
+        .. _`model/descriptor[se_e3]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/descriptor[se_e3]/precision``
 
             The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/descriptor[se_e3]/trainable`: 
+        .. _`model/descriptor[se_e3]/trainable`:
 
-        trainable: 
+        trainable:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/descriptor[se_e3]/trainable``
 
             If the parameters in the embedding net are trainable
 
-        .. _`model/descriptor[se_e3]/seed`: 
+        .. _`model/descriptor[se_e3]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/descriptor[se_e3]/seed``
 
             Random seed for parameter initialization
 
-        .. _`model/descriptor[se_e3]/set_davg_zero`: 
+        .. _`model/descriptor[se_e3]/set_davg_zero`:
 
-        set_davg_zero: 
+        set_davg_zero:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_e3]/set_davg_zero``
 
             Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
 
 
-        .. _`model/descriptor[se_a_tpe]`: 
+        .. _`model/descriptor[se_a_tpe]`:
 
-        When |flag:model/descriptor/type|_ is set to ``se_a_tpe`` (or its alias ``se_a_ebd``): 
+        When |flag:model/descriptor/type|_ is set to ``se_a_tpe`` (or its alias ``se_a_ebd``):
 
-        .. _`model/descriptor[se_a_tpe]/sel`: 
+        .. _`model/descriptor[se_a_tpe]/sel`:
 
-        sel: 
+        sel:
             | type: ``list`` | ``str``, optional, default: ``auto``
             | argument path: ``model/descriptor[se_a_tpe]/sel``
 
@@ -537,158 +537,158 @@ model:
 
                 - `str`. Can be "auto:factor" or "auto". "factor" is a float number larger than 1. This option will automatically determine the `sel`. In detail it counts the maximal number of neighbors with in the cutoff radius for each type of neighbor, then multiply the maximum by the "factor". Finally the number is wraped up to 4 divisible. The option "auto" is equivalent to "auto:1.1".
 
-        .. _`model/descriptor[se_a_tpe]/rcut`: 
+        .. _`model/descriptor[se_a_tpe]/rcut`:
 
-        rcut: 
+        rcut:
             | type: ``float``, optional, default: ``6.0``
             | argument path: ``model/descriptor[se_a_tpe]/rcut``
 
             The cut-off radius.
 
-        .. _`model/descriptor[se_a_tpe]/rcut_smth`: 
+        .. _`model/descriptor[se_a_tpe]/rcut_smth`:
 
-        rcut_smth: 
+        rcut_smth:
             | type: ``float``, optional, default: ``0.5``
             | argument path: ``model/descriptor[se_a_tpe]/rcut_smth``
 
             Where to start smoothing. For example the 1/r term is smoothed from `rcut` to `rcut_smth`
 
-        .. _`model/descriptor[se_a_tpe]/neuron`: 
+        .. _`model/descriptor[se_a_tpe]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[10, 20, 40]``
             | argument path: ``model/descriptor[se_a_tpe]/neuron``
 
             Number of neurons in each hidden layers of the embedding net. When two layers are of the same size or one layer is twice as large as the previous layer, a skip connection is built.
 
-        .. _`model/descriptor[se_a_tpe]/axis_neuron`: 
+        .. _`model/descriptor[se_a_tpe]/axis_neuron`:
 
-        axis_neuron: 
+        axis_neuron:
             | type: ``int``, optional, default: ``4``, alias: *n_axis_neuron*
             | argument path: ``model/descriptor[se_a_tpe]/axis_neuron``
 
             Size of the submatrix of G (embedding matrix).
 
-        .. _`model/descriptor[se_a_tpe]/activation_function`: 
+        .. _`model/descriptor[se_a_tpe]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/descriptor[se_a_tpe]/activation_function``
 
             The activation function in the embedding net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/descriptor[se_a_tpe]/resnet_dt`: 
+        .. _`model/descriptor[se_a_tpe]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_a_tpe]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/descriptor[se_a_tpe]/type_one_side`: 
+        .. _`model/descriptor[se_a_tpe]/type_one_side`:
 
-        type_one_side: 
+        type_one_side:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_a_tpe]/type_one_side``
 
             Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
 
-        .. _`model/descriptor[se_a_tpe]/precision`: 
+        .. _`model/descriptor[se_a_tpe]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/descriptor[se_a_tpe]/precision``
 
             The precision of the embedding net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/descriptor[se_a_tpe]/trainable`: 
+        .. _`model/descriptor[se_a_tpe]/trainable`:
 
-        trainable: 
+        trainable:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/descriptor[se_a_tpe]/trainable``
 
             If the parameters in the embedding net is trainable
 
-        .. _`model/descriptor[se_a_tpe]/seed`: 
+        .. _`model/descriptor[se_a_tpe]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/descriptor[se_a_tpe]/seed``
 
             Random seed for parameter initialization
 
-        .. _`model/descriptor[se_a_tpe]/exclude_types`: 
+        .. _`model/descriptor[se_a_tpe]/exclude_types`:
 
-        exclude_types: 
+        exclude_types:
             | type: ``list``, optional, default: ``[]``
             | argument path: ``model/descriptor[se_a_tpe]/exclude_types``
 
             The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1.
 
-        .. _`model/descriptor[se_a_tpe]/set_davg_zero`: 
+        .. _`model/descriptor[se_a_tpe]/set_davg_zero`:
 
-        set_davg_zero: 
+        set_davg_zero:
             | type: ``bool``, optional, default: ``False``
             | argument path: ``model/descriptor[se_a_tpe]/set_davg_zero``
 
             Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used
 
-        .. _`model/descriptor[se_a_tpe]/type_nchanl`: 
+        .. _`model/descriptor[se_a_tpe]/type_nchanl`:
 
-        type_nchanl: 
+        type_nchanl:
             | type: ``int``, optional, default: ``4``
             | argument path: ``model/descriptor[se_a_tpe]/type_nchanl``
 
             number of channels for type embedding
 
-        .. _`model/descriptor[se_a_tpe]/type_nlayer`: 
+        .. _`model/descriptor[se_a_tpe]/type_nlayer`:
 
-        type_nlayer: 
+        type_nlayer:
             | type: ``int``, optional, default: ``2``
             | argument path: ``model/descriptor[se_a_tpe]/type_nlayer``
 
             number of hidden layers of type embedding net
 
-        .. _`model/descriptor[se_a_tpe]/numb_aparam`: 
+        .. _`model/descriptor[se_a_tpe]/numb_aparam`:
 
-        numb_aparam: 
+        numb_aparam:
             | type: ``int``, optional, default: ``0``
             | argument path: ``model/descriptor[se_a_tpe]/numb_aparam``
 
             dimension of atomic parameter. if set to a value > 0, the atomic parameters are embedded.
 
 
-        .. _`model/descriptor[hybrid]`: 
+        .. _`model/descriptor[hybrid]`:
 
-        When |flag:model/descriptor/type|_ is set to ``hybrid``: 
+        When |flag:model/descriptor/type|_ is set to ``hybrid``:
 
-        .. _`model/descriptor[hybrid]/list`: 
+        .. _`model/descriptor[hybrid]/list`:
 
-        list: 
+        list:
             | type: ``list``
             | argument path: ``model/descriptor[hybrid]/list``
 
             A list of descriptor definitions
 
-    .. _`model/fitting_net`: 
+    .. _`model/fitting_net`:
 
-    fitting_net: 
+    fitting_net:
         | type: ``dict``
         | argument path: ``model/fitting_net``
 
         The fitting of physical properties.
 
 
-        Depending on the value of *type*, different sub args are accepted. 
+        Depending on the value of *type*, different sub args are accepted.
 
-        .. _`model/fitting_net/type`: 
+        .. _`model/fitting_net/type`:
 
         type:
             | type: ``str`` (flag key), default: ``ener``
-            | argument path: ``model/fitting_net/type`` 
+            | argument path: ``model/fitting_net/type``
             | possible choices: |code:model/fitting_net[ener]|_, |code:model/fitting_net[dipole]|_, |code:model/fitting_net[polar]|_
 
-            The type of the fitting. See explanation below. 
+            The type of the fitting. See explanation below.
 
             - `ener`: Fit an energy model (potential energy surface).
 
@@ -709,61 +709,61 @@ model:
         .. _`flag:model/fitting_net/type`: `model/fitting_net/type`_
 
 
-        .. _`model/fitting_net[ener]`: 
+        .. _`model/fitting_net[ener]`:
 
-        When |flag:model/fitting_net/type|_ is set to ``ener``: 
+        When |flag:model/fitting_net/type|_ is set to ``ener``:
 
-        .. _`model/fitting_net[ener]/numb_fparam`: 
+        .. _`model/fitting_net[ener]/numb_fparam`:
 
-        numb_fparam: 
+        numb_fparam:
             | type: ``int``, optional, default: ``0``
             | argument path: ``model/fitting_net[ener]/numb_fparam``
 
             The dimension of the frame parameter. If set to >0, file `fparam.npy` should be included to provided the input fparams.
 
-        .. _`model/fitting_net[ener]/numb_aparam`: 
+        .. _`model/fitting_net[ener]/numb_aparam`:
 
-        numb_aparam: 
+        numb_aparam:
             | type: ``int``, optional, default: ``0``
             | argument path: ``model/fitting_net[ener]/numb_aparam``
 
             The dimension of the atomic parameter. If set to >0, file `aparam.npy` should be included to provided the input aparams.
 
-        .. _`model/fitting_net[ener]/neuron`: 
+        .. _`model/fitting_net[ener]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron*
             | argument path: ``model/fitting_net[ener]/neuron``
 
             The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
 
-        .. _`model/fitting_net[ener]/activation_function`: 
+        .. _`model/fitting_net[ener]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/fitting_net[ener]/activation_function``
 
             The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/fitting_net[ener]/precision`: 
+        .. _`model/fitting_net[ener]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/fitting_net[ener]/precision``
 
             The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/fitting_net[ener]/resnet_dt`: 
+        .. _`model/fitting_net[ener]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/fitting_net[ener]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/fitting_net[ener]/trainable`: 
+        .. _`model/fitting_net[ener]/trainable`:
 
-        trainable: 
+        trainable:
             | type: ``list`` | ``bool``, optional, default: ``True``
             | argument path: ``model/fitting_net[ener]/trainable``
 
@@ -773,176 +773,176 @@ model:
 
             - list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1.
 
-        .. _`model/fitting_net[ener]/rcond`: 
+        .. _`model/fitting_net[ener]/rcond`:
 
-        rcond: 
+        rcond:
             | type: ``float``, optional, default: ``0.001``
             | argument path: ``model/fitting_net[ener]/rcond``
 
             The condition number used to determine the inital energy shift for each type of atoms.
 
-        .. _`model/fitting_net[ener]/seed`: 
+        .. _`model/fitting_net[ener]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/fitting_net[ener]/seed``
 
             Random seed for parameter initialization of the fitting net
 
-        .. _`model/fitting_net[ener]/atom_ener`: 
+        .. _`model/fitting_net[ener]/atom_ener`:
 
-        atom_ener: 
+        atom_ener:
             | type: ``list``, optional, default: ``[]``
             | argument path: ``model/fitting_net[ener]/atom_ener``
 
             Specify the atomic energy in vacuum for each type
 
 
-        .. _`model/fitting_net[dipole]`: 
+        .. _`model/fitting_net[dipole]`:
 
-        When |flag:model/fitting_net/type|_ is set to ``dipole``: 
+        When |flag:model/fitting_net/type|_ is set to ``dipole``:
 
-        .. _`model/fitting_net[dipole]/neuron`: 
+        .. _`model/fitting_net[dipole]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron*
             | argument path: ``model/fitting_net[dipole]/neuron``
 
             The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
 
-        .. _`model/fitting_net[dipole]/activation_function`: 
+        .. _`model/fitting_net[dipole]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/fitting_net[dipole]/activation_function``
 
             The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/fitting_net[dipole]/resnet_dt`: 
+        .. _`model/fitting_net[dipole]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/fitting_net[dipole]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/fitting_net[dipole]/precision`: 
+        .. _`model/fitting_net[dipole]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/fitting_net[dipole]/precision``
 
             The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/fitting_net[dipole]/sel_type`: 
+        .. _`model/fitting_net[dipole]/sel_type`:
 
-        sel_type: 
+        sel_type:
             | type: ``list`` | ``int`` | ``NoneType``, optional, alias: *dipole_type*
             | argument path: ``model/fitting_net[dipole]/sel_type``
 
             The atom types for which the atomic dipole will be provided. If not set, all types will be selected.
 
-        .. _`model/fitting_net[dipole]/seed`: 
+        .. _`model/fitting_net[dipole]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/fitting_net[dipole]/seed``
 
             Random seed for parameter initialization of the fitting net
 
 
-        .. _`model/fitting_net[polar]`: 
+        .. _`model/fitting_net[polar]`:
 
-        When |flag:model/fitting_net/type|_ is set to ``polar``: 
+        When |flag:model/fitting_net/type|_ is set to ``polar``:
 
-        .. _`model/fitting_net[polar]/neuron`: 
+        .. _`model/fitting_net[polar]/neuron`:
 
-        neuron: 
+        neuron:
             | type: ``list``, optional, default: ``[120, 120, 120]``, alias: *n_neuron*
             | argument path: ``model/fitting_net[polar]/neuron``
 
             The number of neurons in each hidden layers of the fitting net. When two hidden layers are of the same size, a skip connection is built.
 
-        .. _`model/fitting_net[polar]/activation_function`: 
+        .. _`model/fitting_net[polar]/activation_function`:
 
-        activation_function: 
+        activation_function:
             | type: ``str``, optional, default: ``tanh``
             | argument path: ``model/fitting_net[polar]/activation_function``
 
             The activation function in the fitting net. Supported activation functions are "relu", "relu6", "softplus", "sigmoid", "tanh", "gelu".
 
-        .. _`model/fitting_net[polar]/resnet_dt`: 
+        .. _`model/fitting_net[polar]/resnet_dt`:
 
-        resnet_dt: 
+        resnet_dt:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/fitting_net[polar]/resnet_dt``
 
             Whether to use a "Timestep" in the skip connection
 
-        .. _`model/fitting_net[polar]/precision`: 
+        .. _`model/fitting_net[polar]/precision`:
 
-        precision: 
+        precision:
             | type: ``str``, optional, default: ``float64``
             | argument path: ``model/fitting_net[polar]/precision``
 
             The precision of the fitting net parameters, supported options are "default", "float16", "float32", "float64".
 
-        .. _`model/fitting_net[polar]/fit_diag`: 
+        .. _`model/fitting_net[polar]/fit_diag`:
 
-        fit_diag: 
+        fit_diag:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/fitting_net[polar]/fit_diag``
 
             Fit the diagonal part of the rotational invariant polarizability matrix, which will be converted to normal polarizability matrix by contracting with the rotation matrix.
 
-        .. _`model/fitting_net[polar]/scale`: 
+        .. _`model/fitting_net[polar]/scale`:
 
-        scale: 
+        scale:
             | type: ``float`` | ``list``, optional, default: ``1.0``
             | argument path: ``model/fitting_net[polar]/scale``
 
             The output of the fitting net (polarizability matrix) will be scaled by ``scale``
 
-        .. _`model/fitting_net[polar]/shift_diag`: 
+        .. _`model/fitting_net[polar]/shift_diag`:
 
-        shift_diag: 
+        shift_diag:
             | type: ``bool``, optional, default: ``True``
             | argument path: ``model/fitting_net[polar]/shift_diag``
 
             Whether to shift the diagonal of polar, which is beneficial to training. Default is true.
 
-        .. _`model/fitting_net[polar]/sel_type`: 
+        .. _`model/fitting_net[polar]/sel_type`:
 
-        sel_type: 
+        sel_type:
             | type: ``list`` | ``int`` | ``NoneType``, optional, alias: *pol_type*
             | argument path: ``model/fitting_net[polar]/sel_type``
 
             The atom types for which the atomic polarizability will be provided. If not set, all types will be selected.
 
-        .. _`model/fitting_net[polar]/seed`: 
+        .. _`model/fitting_net[polar]/seed`:
 
-        seed: 
+        seed:
             | type: ``int`` | ``NoneType``, optional
             | argument path: ``model/fitting_net[polar]/seed``
 
             Random seed for parameter initialization of the fitting net
 
-    .. _`model/modifier`: 
+    .. _`model/modifier`:
 
-    modifier: 
+    modifier:
         | type: ``dict``, optional
         | argument path: ``model/modifier``
 
         The modifier of model output.
 
 
-        Depending on the value of *type*, different sub args are accepted. 
+        Depending on the value of *type*, different sub args are accepted.
 
-        .. _`model/modifier/type`: 
+        .. _`model/modifier/type`:
 
         type:
             | type: ``str`` (flag key)
-            | argument path: ``model/modifier/type`` 
+            | argument path: ``model/modifier/type``
             | possible choices: |code:model/modifier[dipole_charge]|_
 
             The type of modifier. See explanation below.
@@ -956,66 +956,66 @@ model:
         .. _`flag:model/modifier/type`: `model/modifier/type`_
 
 
-        .. _`model/modifier[dipole_charge]`: 
+        .. _`model/modifier[dipole_charge]`:
 
-        When |flag:model/modifier/type|_ is set to ``dipole_charge``: 
+        When |flag:model/modifier/type|_ is set to ``dipole_charge``:
 
-        .. _`model/modifier[dipole_charge]/model_name`: 
+        .. _`model/modifier[dipole_charge]/model_name`:
 
-        model_name: 
+        model_name:
             | type: ``str``
             | argument path: ``model/modifier[dipole_charge]/model_name``
 
             The name of the frozen dipole model file.
 
-        .. _`model/modifier[dipole_charge]/model_charge_map`: 
+        .. _`model/modifier[dipole_charge]/model_charge_map`:
 
-        model_charge_map: 
+        model_charge_map:
             | type: ``list``
             | argument path: ``model/modifier[dipole_charge]/model_charge_map``
 
-            The charge of the WFCC. The list length should be the same as the `sel_type <model/fitting_net[dipole]/sel_type_>`_. 
+            The charge of the WFCC. The list length should be the same as the `sel_type <model/fitting_net[dipole]/sel_type_>`_.
 
-        .. _`model/modifier[dipole_charge]/sys_charge_map`: 
+        .. _`model/modifier[dipole_charge]/sys_charge_map`:
 
-        sys_charge_map: 
+        sys_charge_map:
             | type: ``list``
             | argument path: ``model/modifier[dipole_charge]/sys_charge_map``
 
             The charge of real atoms. The list length should be the same as the `type_map <model/type_map_>`_
 
-        .. _`model/modifier[dipole_charge]/ewald_beta`: 
+        .. _`model/modifier[dipole_charge]/ewald_beta`:
 
-        ewald_beta: 
+        ewald_beta:
             | type: ``float``, optional, default: ``0.4``
             | argument path: ``model/modifier[dipole_charge]/ewald_beta``
 
             The splitting parameter of Ewald sum. Unit is A^-1
 
-        .. _`model/modifier[dipole_charge]/ewald_h`: 
+        .. _`model/modifier[dipole_charge]/ewald_h`:
 
-        ewald_h: 
+        ewald_h:
             | type: ``float``, optional, default: ``1.0``
             | argument path: ``model/modifier[dipole_charge]/ewald_h``
 
             The grid spacing of the FFT grid. Unit is A
 
-    .. _`model/compress`: 
+    .. _`model/compress`:
 
-    compress: 
+    compress:
         | type: ``dict``, optional
         | argument path: ``model/compress``
 
         Model compression configurations
 
 
-        Depending on the value of *type*, different sub args are accepted. 
+        Depending on the value of *type*, different sub args are accepted.
 
-        .. _`model/compress/type`: 
+        .. _`model/compress/type`:
 
         type:
             | type: ``str`` (flag key), default: ``se_e2_a``
-            | argument path: ``model/compress/type`` 
+            | argument path: ``model/compress/type``
             | possible choices: |code:model/compress[se_e2_a]|_
 
             The type of model compression, which should be consistent with the descriptor type.
@@ -1027,46 +1027,46 @@ model:
         .. _`flag:model/compress/type`: `model/compress/type`_
 
 
-        .. _`model/compress[se_e2_a]`: 
+        .. _`model/compress[se_e2_a]`:
 
-        When |flag:model/compress/type|_ is set to ``se_e2_a`` (or its alias ``se_a``): 
+        When |flag:model/compress/type|_ is set to ``se_e2_a`` (or its alias ``se_a``):
 
-        .. _`model/compress[se_e2_a]/compress`: 
+        .. _`model/compress[se_e2_a]/compress`:
 
-        compress: 
+        compress:
             | type: ``bool``
             | argument path: ``model/compress[se_e2_a]/compress``
 
             The name of the frozen model file.
 
-        .. _`model/compress[se_e2_a]/model_file`: 
+        .. _`model/compress[se_e2_a]/model_file`:
 
-        model_file: 
+        model_file:
             | type: ``str``
             | argument path: ``model/compress[se_e2_a]/model_file``
 
             The input model file, which will be compressed by the DeePMD-kit.
 
-        .. _`model/compress[se_e2_a]/table_config`: 
+        .. _`model/compress[se_e2_a]/table_config`:
 
-        table_config: 
+        table_config:
             | type: ``list``
             | argument path: ``model/compress[se_e2_a]/table_config``
 
             The arguments of model compression, including extrapolate(scale of model extrapolation), stride(uniform stride of tabulation's first and second table), and frequency(frequency of tabulation overflow check).
 
-        .. _`model/compress[se_e2_a]/min_nbor_dist`: 
+        .. _`model/compress[se_e2_a]/min_nbor_dist`:
 
-        min_nbor_dist: 
+        min_nbor_dist:
             | type: ``float``
             | argument path: ``model/compress[se_e2_a]/min_nbor_dist``
 
             The nearest distance between neighbor atoms saved in the frozen model.
 
 
-.. _`loss`: 
+.. _`loss`:
 
-loss: 
+loss:
     | type: ``dict``, optional
     | argument path: ``loss``
 
@@ -1074,16 +1074,16 @@ loss:
     \.
 
 
-    Depending on the value of *type*, different sub args are accepted. 
+    Depending on the value of *type*, different sub args are accepted.
 
-    .. _`loss/type`: 
+    .. _`loss/type`:
 
     type:
         | type: ``str`` (flag key), default: ``ener``
-        | argument path: ``loss/type`` 
+        | argument path: ``loss/type``
         | possible choices: |code:loss[ener]|_, |code:loss[tensor]|_
 
-        The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`. 
+        The type of the loss. When the fitting type is `ener`, the loss type should be set to `ener` or left unset. When the fitting type is `dipole` or `polar`, the loss type should be set to `tensor`.
         \.
 
         .. |code:loss[ener]| replace:: ``ener``
@@ -1095,120 +1095,120 @@ loss:
     .. _`flag:loss/type`: `loss/type`_
 
 
-    .. _`loss[ener]`: 
+    .. _`loss[ener]`:
 
-    When |flag:loss/type|_ is set to ``ener``: 
+    When |flag:loss/type|_ is set to ``ener``:
 
-    .. _`loss[ener]/start_pref_e`: 
+    .. _`loss[ener]/start_pref_e`:
 
-    start_pref_e: 
+    start_pref_e:
         | type: ``float`` | ``int``, optional, default: ``0.02``
         | argument path: ``loss[ener]/start_pref_e``
 
         The prefactor of energy loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the energy label should be provided by file energy.npy in each data system. If both start_pref_energy and limit_pref_energy are set to 0, then the energy will be ignored.
 
-    .. _`loss[ener]/limit_pref_e`: 
+    .. _`loss[ener]/limit_pref_e`:
 
-    limit_pref_e: 
+    limit_pref_e:
         | type: ``float`` | ``int``, optional, default: ``1.0``
         | argument path: ``loss[ener]/limit_pref_e``
 
         The prefactor of energy loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
 
-    .. _`loss[ener]/start_pref_f`: 
+    .. _`loss[ener]/start_pref_f`:
 
-    start_pref_f: 
+    start_pref_f:
         | type: ``float`` | ``int``, optional, default: ``1000``
         | argument path: ``loss[ener]/start_pref_f``
 
         The prefactor of force loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the force label should be provided by file force.npy in each data system. If both start_pref_force and limit_pref_force are set to 0, then the force will be ignored.
 
-    .. _`loss[ener]/limit_pref_f`: 
+    .. _`loss[ener]/limit_pref_f`:
 
-    limit_pref_f: 
+    limit_pref_f:
         | type: ``float`` | ``int``, optional, default: ``1.0``
         | argument path: ``loss[ener]/limit_pref_f``
 
         The prefactor of force loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
 
-    .. _`loss[ener]/start_pref_v`: 
+    .. _`loss[ener]/start_pref_v`:
 
-    start_pref_v: 
+    start_pref_v:
         | type: ``float`` | ``int``, optional, default: ``0.0``
         | argument path: ``loss[ener]/start_pref_v``
 
         The prefactor of virial loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the virial label should be provided by file virial.npy in each data system. If both start_pref_virial and limit_pref_virial are set to 0, then the virial will be ignored.
 
-    .. _`loss[ener]/limit_pref_v`: 
+    .. _`loss[ener]/limit_pref_v`:
 
-    limit_pref_v: 
+    limit_pref_v:
         | type: ``float`` | ``int``, optional, default: ``0.0``
         | argument path: ``loss[ener]/limit_pref_v``
 
         The prefactor of virial loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
 
-    .. _`loss[ener]/start_pref_ae`: 
+    .. _`loss[ener]/start_pref_ae`:
 
-    start_pref_ae: 
+    start_pref_ae:
         | type: ``float`` | ``int``, optional, default: ``0.0``
         | argument path: ``loss[ener]/start_pref_ae``
 
         The prefactor of atom_ener loss at the start of the training. Should be larger than or equal to 0. If set to none-zero value, the atom_ener label should be provided by file atom_ener.npy in each data system. If both start_pref_atom_ener and limit_pref_atom_ener are set to 0, then the atom_ener will be ignored.
 
-    .. _`loss[ener]/limit_pref_ae`: 
+    .. _`loss[ener]/limit_pref_ae`:
 
-    limit_pref_ae: 
+    limit_pref_ae:
         | type: ``float`` | ``int``, optional, default: ``0.0``
         | argument path: ``loss[ener]/limit_pref_ae``
 
         The prefactor of atom_ener loss at the limit of the training, Should be larger than or equal to 0. i.e. the training step goes to infinity.
 
-    .. _`loss[ener]/relative_f`: 
+    .. _`loss[ener]/relative_f`:
 
-    relative_f: 
+    relative_f:
         | type: ``float`` | ``NoneType``, optional
         | argument path: ``loss[ener]/relative_f``
 
         If provided, relative force error will be used in the loss. The difference of force will be normalized by the magnitude of the force in the label with a shift given by `relative_f`, i.e. DF_i / ( || F || + relative_f ) with DF denoting the difference between prediction and label and || F || denoting the L2 norm of the label.
 
 
-    .. _`loss[tensor]`: 
+    .. _`loss[tensor]`:
 
-    When |flag:loss/type|_ is set to ``tensor``: 
+    When |flag:loss/type|_ is set to ``tensor``:
 
-    .. _`loss[tensor]/pref`: 
+    .. _`loss[tensor]/pref`:
 
-    pref: 
+    pref:
         | type: ``float`` | ``int``
         | argument path: ``loss[tensor]/pref``
 
         The prefactor of the weight of global loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to global label, i.e. 'polarizability.npy` or `dipole.npy`, whose shape should be #frames x [9 or 3]. If it's larger than 0.0, this npy should be included.
 
-    .. _`loss[tensor]/pref_atomic`: 
+    .. _`loss[tensor]/pref_atomic`:
 
-    pref_atomic: 
+    pref_atomic:
         | type: ``float`` | ``int``
         | argument path: ``loss[tensor]/pref_atomic``
 
         The prefactor of the weight of atomic loss. It should be larger than or equal to 0. If controls the weight of loss corresponding to atomic label, i.e. `atomic_polarizability.npy` or `atomic_dipole.npy`, whose shape should be #frames x ([9 or 3] x #selected atoms). If it's larger than 0.0, this npy should be included. Both `pref` and `pref_atomic` should be provided, and either can be set to 0.0.
 
 
-.. _`learning_rate`: 
+.. _`learning_rate`:
 
-learning_rate: 
+learning_rate:
     | type: ``dict``
     | argument path: ``learning_rate``
 
     The definitio of learning rate
 
 
-    Depending on the value of *type*, different sub args are accepted. 
+    Depending on the value of *type*, different sub args are accepted.
 
-    .. _`learning_rate/type`: 
+    .. _`learning_rate/type`:
 
     type:
         | type: ``str`` (flag key), default: ``exp``
-        | argument path: ``learning_rate/type`` 
+        | argument path: ``learning_rate/type``
         | possible choices: |code:learning_rate[exp]|_
 
         The type of the learning rate.
@@ -1220,74 +1220,74 @@ learning_rate:
     .. _`flag:learning_rate/type`: `learning_rate/type`_
 
 
-    .. _`learning_rate[exp]`: 
+    .. _`learning_rate[exp]`:
 
-    When |flag:learning_rate/type|_ is set to ``exp``: 
+    When |flag:learning_rate/type|_ is set to ``exp``:
 
-    .. _`learning_rate[exp]/start_lr`: 
+    .. _`learning_rate[exp]/start_lr`:
 
-    start_lr: 
+    start_lr:
         | type: ``float``, optional, default: ``0.001``
         | argument path: ``learning_rate[exp]/start_lr``
 
         The learning rate the start of the training.
 
-    .. _`learning_rate[exp]/stop_lr`: 
+    .. _`learning_rate[exp]/stop_lr`:
 
-    stop_lr: 
+    stop_lr:
         | type: ``float``, optional, default: ``1e-08``
         | argument path: ``learning_rate[exp]/stop_lr``
 
         The desired learning rate at the end of the training.
 
-    .. _`learning_rate[exp]/decay_steps`: 
+    .. _`learning_rate[exp]/decay_steps`:
 
-    decay_steps: 
+    decay_steps:
         | type: ``int``, optional, default: ``5000``
         | argument path: ``learning_rate[exp]/decay_steps``
 
         The learning rate is decaying every this number of training steps.
 
 
-.. _`training`: 
+.. _`training`:
 
-training: 
+training:
     | type: ``dict``
     | argument path: ``training``
 
     The training options.
 
-    .. _`training/training_data`: 
+    .. _`training/training_data`:
 
-    training_data: 
+    training_data:
         | type: ``dict``
         | argument path: ``training/training_data``
 
         Configurations of training data.
 
-        .. _`training/training_data/systems`: 
+        .. _`training/training_data/systems`:
 
-        systems: 
+        systems:
             | type: ``list`` | ``str``
             | argument path: ``training/training_data/systems``
 
             The data systems for training. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
 
-        .. _`training/training_data/set_prefix`: 
+        .. _`training/training_data/set_prefix`:
 
-        set_prefix: 
+        set_prefix:
             | type: ``str``, optional, default: ``set``
             | argument path: ``training/training_data/set_prefix``
 
             The prefix of the sets in the `systems <training/training_data/systems_>`_.
 
-        .. _`training/training_data/batch_size`: 
+        .. _`training/training_data/batch_size`:
 
-        batch_size: 
+        batch_size:
             | type: ``list`` | ``int`` | ``str``, optional, default: ``auto``
             | argument path: ``training/training_data/batch_size``
 
-            This key can be 
+            This key can be
 
             - list: the length of which is the same as the `systems <training/training_data/systems_>`_. The batch size of each system is given by the elements of the list.
 
@@ -1297,9 +1297,9 @@ training:
 
             - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
 
-        .. _`training/training_data/auto_prob`: 
+        .. _`training/training_data/auto_prob`:
 
-        auto_prob: 
+        auto_prob:
             | type: ``str``, optional, default: ``prob_sys_size``, alias: *auto_prob_style*
             | argument path: ``training/training_data/auto_prob``
 
@@ -1311,45 +1311,45 @@ training:
 
             - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.
 
-        .. _`training/training_data/sys_probs`: 
+        .. _`training/training_data/sys_probs`:
 
-        sys_probs: 
+        sys_probs:
             | type: ``list`` | ``NoneType``, optional, default: ``None``, alias: *sys_weights*
             | argument path: ``training/training_data/sys_probs``
 
             A list of float if specified. Should be of the same length as `systems`, specifying the probability of each system.
 
-    .. _`training/validation_data`: 
+    .. _`training/validation_data`:
 
-    validation_data: 
+    validation_data:
         | type: ``dict`` | ``NoneType``, optional, default: ``None``
         | argument path: ``training/validation_data``
 
         Configurations of validation data. Similar to that of training data, except that a `numb_btch` argument may be configured
 
-        .. _`training/validation_data/systems`: 
+        .. _`training/validation_data/systems`:
 
-        systems: 
+        systems:
             | type: ``list`` | ``str``
             | argument path: ``training/validation_data/systems``
 
             The data systems for validation. This key can be provided with a list that specifies the systems, or be provided with a string by which the prefix of all systems are given and the list of the systems is automatically generated.
 
-        .. _`training/validation_data/set_prefix`: 
+        .. _`training/validation_data/set_prefix`:
 
-        set_prefix: 
+        set_prefix:
             | type: ``str``, optional, default: ``set``
             | argument path: ``training/validation_data/set_prefix``
 
             The prefix of the sets in the `systems <training/validation_data/systems_>`_.
 
-        .. _`training/validation_data/batch_size`: 
+        .. _`training/validation_data/batch_size`:
 
-        batch_size: 
+        batch_size:
             | type: ``list`` | ``int`` | ``str``, optional, default: ``auto``
             | argument path: ``training/validation_data/batch_size``
 
-            This key can be 
+            This key can be
 
             - list: the length of which is the same as the `systems <training/validation_data/systems_>`_. The batch size of each system is given by the elements of the list.
 
@@ -1359,9 +1359,9 @@ training:
 
             - string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.
 
-        .. _`training/validation_data/auto_prob`: 
+        .. _`training/validation_data/auto_prob`:
 
-        auto_prob: 
+        auto_prob:
             | type: ``str``, optional, default: ``prob_sys_size``, alias: *auto_prob_style*
             | argument path: ``training/validation_data/auto_prob``
 
@@ -1373,131 +1373,130 @@ training:
 
             - "prob_sys_size;stt_idx:end_idx:weight;stt_idx:end_idx:weight;..." : the list of systems is devided into blocks. A block is specified by `stt_idx:end_idx:weight`, where `stt_idx` is the starting index of the system, `end_idx` is then ending (not including) index of the system, the probabilities of the systems in this block sums up to `weight`, and the relatively probabilities within this block is proportional to the number of batches in the system.
 
-        .. _`training/validation_data/sys_probs`: 
+        .. _`training/validation_data/sys_probs`:
 
-        sys_probs: 
+        sys_probs:
             | type: ``list`` | ``NoneType``, optional, default: ``None``, alias: *sys_weights*
             | argument path: ``training/validation_data/sys_probs``
 
             A list of float if specified. Should be of the same length as `systems`, specifying the probability of each system.
 
-        .. _`training/validation_data/numb_btch`: 
+        .. _`training/validation_data/numb_btch`:
 
-        numb_btch: 
+        numb_btch:
             | type: ``int``, optional, default: ``1``, alias: *numb_batch*
             | argument path: ``training/validation_data/numb_btch``
 
             An integer that specifies the number of systems to be sampled for each validation period.
 
-    .. _`training/numb_steps`: 
+    .. _`training/numb_steps`:
 
-    numb_steps: 
+    numb_steps:
         | type: ``int``, alias: *stop_batch*
         | argument path: ``training/numb_steps``
 
         Number of training batch. Each training uses one batch of data.
 
-    .. _`training/seed`: 
+    .. _`training/seed`:
 
-    seed: 
+    seed:
         | type: ``int`` | ``NoneType``, optional
         | argument path: ``training/seed``
 
         The random seed for getting frames from the training data set.
 
-    .. _`training/disp_file`: 
+    .. _`training/disp_file`:
 
-    disp_file: 
+    disp_file:
         | type: ``str``, optional, default: ``lcurve.out``
         | argument path: ``training/disp_file``
 
         The file for printing learning curve.
 
-    .. _`training/disp_freq`: 
+    .. _`training/disp_freq`:
 
-    disp_freq: 
+    disp_freq:
         | type: ``int``, optional, default: ``1000``
         | argument path: ``training/disp_freq``
 
         The frequency of printing learning curve.
 
-    .. _`training/numb_test`: 
+    .. _`training/numb_test`:
 
-    numb_test: 
+    numb_test:
         | type: ``list`` | ``int`` | ``str``, optional, default: ``1``
         | argument path: ``training/numb_test``
 
         Number of frames used for the test during training.
 
-    .. _`training/save_freq`: 
+    .. _`training/save_freq`:
 
-    save_freq: 
+    save_freq:
         | type: ``int``, optional, default: ``1000``
         | argument path: ``training/save_freq``
 
         The frequency of saving check point.
 
-    .. _`training/save_ckpt`: 
+    .. _`training/save_ckpt`:
 
-    save_ckpt: 
+    save_ckpt:
         | type: ``str``, optional, default: ``model.ckpt``
         | argument path: ``training/save_ckpt``
 
         The file name of saving check point.
 
-    .. _`training/disp_training`: 
+    .. _`training/disp_training`:
 
-    disp_training: 
+    disp_training:
         | type: ``bool``, optional, default: ``True``
         | argument path: ``training/disp_training``
 
         Displaying verbose information during training.
 
-    .. _`training/time_training`: 
+    .. _`training/time_training`:
 
-    time_training: 
+    time_training:
         | type: ``bool``, optional, default: ``True``
         | argument path: ``training/time_training``
 
         Timing durining training.
 
-    .. _`training/profiling`: 
+    .. _`training/profiling`:
 
-    profiling: 
+    profiling:
         | type: ``bool``, optional, default: ``False``
         | argument path: ``training/profiling``
 
         Profiling during training.
 
-    .. _`training/profiling_file`: 
+    .. _`training/profiling_file`:
 
-    profiling_file: 
+    profiling_file:
         | type: ``str``, optional, default: ``timeline.json``
         | argument path: ``training/profiling_file``
 
         Output file for profiling.
 
-    .. _`training/tensorboard`: 
+    .. _`training/tensorboard`:
 
-    tensorboard: 
+    tensorboard:
         | type: ``bool``, optional, default: ``False``
         | argument path: ``training/tensorboard``
 
         Enable tensorboard
 
-    .. _`training/tensorboard_log_dir`: 
+    .. _`training/tensorboard_log_dir`:
 
-    tensorboard_log_dir: 
+    tensorboard_log_dir:
         | type: ``str``, optional, default: ``log``
         | argument path: ``training/tensorboard_log_dir``
 
         The log directory of tensorboard outputs
 
-    .. _`training/tensorboard_freq`: 
+    .. _`training/tensorboard_freq`:
 
-    tensorboard_freq: 
+    tensorboard_freq:
         | type: ``int``, optional, default: ``1``
         | argument path: ``training/tensorboard_freq``
 
         The frequency of writing tensorboard events.
-
diff --git a/doc/train/finetuning.md b/doc/train/finetuning.md
index ede27c246a..ebc7cda2c9 100644
--- a/doc/train/finetuning.md
+++ b/doc/train/finetuning.md
@@ -1,33 +1,33 @@
 # Finetune the pretrained model
 
-Pretraining-and-finetuning is a widely used approach in other fields such as Computer Vision (CV) or Natural Language Processing (NLP) 
+Pretraining-and-finetuning is a widely used approach in other fields such as Computer Vision (CV) or Natural Language Processing (NLP)
 to vastly reduce the training cost, while it's not trivial in potential models.
-Compositions and configurations of data samples or even computational parameters in upstream software (such as VASP) 
+Compositions and configurations of data samples or even computational parameters in upstream software (such as VASP)
 may be different between the pretrained and target datasets, leading to energy shifts or other diversities of training data.
 
 Recently the emerging of methods such as [DPA-1](https://arxiv.org/abs/2208.08236) has brought us to a new stage where we can
 perform similar pretraining-finetuning approaches.
-DPA-1 can hopefully learn the common knowledge in the pretrained dataset (especially the `force` information) 
+DPA-1 can hopefully learn the common knowledge in the pretrained dataset (especially the `force` information)
 and thus reduce the computational cost in downstream training tasks.
-If you have a pretrained model `pretrained.pb` 
-(here we support models using [`se_atten`](../model/train-se-atten.md) descriptor and [`ener`](../model/train-energy.md) fitting net) 
-on a large dataset (for example, [OC2M](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md) in 
+If you have a pretrained model `pretrained.pb`
+(here we support models using [`se_atten`](../model/train-se-atten.md) descriptor and [`ener`](../model/train-energy.md) fitting net)
+on a large dataset (for example, [OC2M](https://github.com/Open-Catalyst-Project/ocp/blob/main/DATASET.md) in
 DPA-1 [paper](https://arxiv.org/abs/2208.08236)), a finetuning strategy can be performed by simply running:
 
 ```bash
 $ dp train input.json --finetune pretrained.pb
 ```
 
-The command above will change the energy bias in the last layer of the fitting net in `pretrained.pb`, 
-according to the training dataset in input.json. 
+The command above will change the energy bias in the last layer of the fitting net in `pretrained.pb`,
+according to the training dataset in input.json.
 
 :::{warning}
 Note that the elements in the training dataset must be contained in the pretrained dataset.
 :::
 
-The finetune procedure will inherit the model structures in `pretrained.pb`, 
-and thus it will ignore the model parameters in `input.json`, 
-such as {ref}`descriptor <model/descriptor>`, {ref}`fitting_net <model/fitting_net>`, 
+The finetune procedure will inherit the model structures in `pretrained.pb`,
+and thus it will ignore the model parameters in `input.json`,
+such as {ref}`descriptor <model/descriptor>`, {ref}`fitting_net <model/fitting_net>`,
 {ref}`type_embedding <model/type_embedding>` and {ref}`type_map <model/type_map>`.
 However, you can still set the `trainable` parameters in each part of `input.json` to control the training procedure.
 
diff --git a/doc/train/gpu-limitations.md b/doc/train/gpu-limitations.md
index d684698c42..5df76d28c9 100644
--- a/doc/train/gpu-limitations.md
+++ b/doc/train/gpu-limitations.md
@@ -1,5 +1,5 @@
 # Known limitations of using GPUs
-If you use DeePMD-kit in a GPU environment, the acceptable value range of some variables is additionally restricted compared to the CPU environment due to the software's GPU implementations: 
+If you use DeePMD-kit in a GPU environment, the acceptable value range of some variables is additionally restricted compared to the CPU environment due to the software's GPU implementations:
 1. The number of atom types of a given system must be less than 128.
 2. The maximum distance between an atom and its neighbors must be less than 128. It can be controlled by setting the rcut value of training parameters.
 3. Theoretically, the maximum number of atoms that a single GPU can accept is about 10,000,000. However, this value is limited by the GPU memory size currently, usually within 1000,000 atoms even in the model compression mode.
diff --git a/doc/train/index.rst b/doc/train/index.rst
index f8b792e33f..92e84b3000 100644
--- a/doc/train/index.rst
+++ b/doc/train/index.rst
@@ -11,4 +11,4 @@ Training
    multi-task-training
    tensorboard
    gpu-limitations
-   finetuning
\ No newline at end of file
+   finetuning
diff --git a/doc/train/multi-task-training.md b/doc/train/multi-task-training.md
index 34a2e6ed25..4549eeab77 100644
--- a/doc/train/multi-task-training.md
+++ b/doc/train/multi-task-training.md
@@ -1,23 +1,23 @@
 # Multi-task training
 
 ## Perform the multi-task training
-Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode, 
-with one common descriptor and multiple specific fitting nets for each data set. 
+Training on multiple data sets (each data set contains several data systems) can be performed in multi-task mode,
+with one common descriptor and multiple specific fitting nets for each data set.
 One can simply switch the following parameters in training input script to perform multi-task mode:
-- {ref}`fitting_net <model/fitting_net>` --> {ref}`fitting_net_dict <model/fitting_net_dict>`, 
+- {ref}`fitting_net <model/fitting_net>` --> {ref}`fitting_net_dict <model/fitting_net_dict>`,
 each key of which can be one individual fitting net.
-- {ref}`training_data <training/training_data>`,  {ref}`validation_data <training/validation_data>` 
---> {ref}`data_dict <training/data_dict>`, each key of which can be one individual data set contains 
-several data systems for corresponding fitting net, the keys must be consistent with those in 
+- {ref}`training_data <training/training_data>`,  {ref}`validation_data <training/validation_data>`
+--> {ref}`data_dict <training/data_dict>`, each key of which can be one individual data set contains
+several data systems for corresponding fitting net, the keys must be consistent with those in
 {ref}`fitting_net_dict <model/fitting_net_dict>`.
-- {ref}`loss <loss>` --> {ref}`loss_dict <loss_dict>`, each key of which can be one individual loss setting 
-for corresponding fitting net, the keys must be consistent with those in 
+- {ref}`loss <loss>` --> {ref}`loss_dict <loss_dict>`, each key of which can be one individual loss setting
+for corresponding fitting net, the keys must be consistent with those in
 {ref}`fitting_net_dict <model/fitting_net_dict>`, if not set, the corresponding fitting net will use the default loss.
-- (Optional) {ref}`fitting_weight <training/fitting_weight>`, each key of which can be a non-negative integer or float, 
-deciding the chosen probability for corresponding fitting net in training, if not set or invalid, 
+- (Optional) {ref}`fitting_weight <training/fitting_weight>`, each key of which can be a non-negative integer or float,
+deciding the chosen probability for corresponding fitting net in training, if not set or invalid,
 the corresponding fitting net will not be used.
 
-The training procedure will automatically choose single-task or multi-task mode, based on the above parameters. 
+The training procedure will automatically choose single-task or multi-task mode, based on the above parameters.
 Note that parameters of single-task mode and multi-task mode can not be mixed.
 
 An example input for training energy and dipole in water system can be found here: [multi-task input on water](../../examples/water_multi_task/ener_dipole/input.json).
@@ -37,8 +37,8 @@ The supported fitting nets for multi-task mode are listed:
 The output of `dp freeze` command in multi-task mode can be seen in [freeze command](../freeze/freeze.md).
 
 ## Initialization from pretrained multi-task model
-For advance training in multi-task mode, one can first train the descriptor on several upstream datasets and then transfer it on new downstream ones with newly added fitting nets. 
-At the second step, you can also inherit some fitting nets trained on upstream datasets, by merely adding fitting net keys in {ref}`fitting_net_dict <model/fitting_net_dict>` and 
+For advance training in multi-task mode, one can first train the descriptor on several upstream datasets and then transfer it on new downstream ones with newly added fitting nets.
+At the second step, you can also inherit some fitting nets trained on upstream datasets, by merely adding fitting net keys in {ref}`fitting_net_dict <model/fitting_net_dict>` and
 optional fitting net weights in {ref}`fitting_weight <training/fitting_weight>`.
 
 Take [multi-task input on water](../../examples/water_multi_task/ener_dipole/input.json) again for example.
@@ -69,7 +69,7 @@ After training, you can freeze this multi-task model into one unit graph:
 ```bash
 $ dp freeze -o graph.pb --united-model
 ```
-Then if you want to transfer the trained descriptor and some fitting nets (take `water_ener` for example) to newly added datasets with new fitting net `water_ener_2`, 
+Then if you want to transfer the trained descriptor and some fitting nets (take `water_ener` for example) to newly added datasets with new fitting net `water_ener_2`,
 you can modify the {ref}`model <model>` part of the new input script in a more simplified way:
 ```json
     "model": {
@@ -86,7 +86,7 @@ you can modify the {ref}`model <model>` part of the new input script in a more s
 ```
 It will autocomplete the configurations according to the frozen graph.
 
-Note that for newly added fitting net keys, other parts in the input script, including {ref}`data_dict <training/data_dict>` and {ref}`loss_dict <loss_dict>` (optionally {ref}`fitting_weight <training/fitting_weight>`), 
+Note that for newly added fitting net keys, other parts in the input script, including {ref}`data_dict <training/data_dict>` and {ref}`loss_dict <loss_dict>` (optionally {ref}`fitting_weight <training/fitting_weight>`),
 should be set explicitly. While for old fitting net keys, it will inherit the old configurations if not set.
 
 Finally, you can perform the modified multi-task training from the frozen model with command:
@@ -110,14 +110,14 @@ For example, if one want to share the first and the third layers for two three-h
             240
         ],
         "layer_name": ["l0", null, "l2", null]
-    },  
+    },
     "wb97m": {
         "neuron": [
             240,
             240,
-            240 
+            240
         ],
         "layer_name": ["l0", null, "l2", null]
-    }   
+    }
 }
 ```
diff --git a/doc/train/tensorboard.md b/doc/train/tensorboard.md
index 922f9ac49b..4846005216 100644
--- a/doc/train/tensorboard.md
+++ b/doc/train/tensorboard.md
@@ -12,7 +12,7 @@ DeePMD-kit can now use most of the interesting features enabled by TensorBoard!
 * **Visualizing the model graph** (ops and layers)
 * **Viewing histograms of weights, biases, or other tensors as they change over time.**
 * **Viewing summaries of trainable variables**
-  
+
 <!-- * **Projecting embeddings to a lower dimensional space.**
 * **Precision curves.** -->
 
@@ -24,7 +24,7 @@ directory by modifying the input script, setting {ref}`tensorboard <training/ten
 ```json
     "training" : {
 	"systems":	["../data/"],
-	"set_prefix":	"set",    
+	"set_prefix":	"set",
 	"stop_batch":	1000000,
 	"batch_size":	1,
 
diff --git a/doc/train/train-input.rst b/doc/train/train-input.rst
index fdf2db9fec..893dd0980e 100644
--- a/doc/train/train-input.rst
+++ b/doc/train/train-input.rst
@@ -6,4 +6,3 @@ Training Parameters
 .. dargs::
    :module: deepmd.utils.argcheck
    :func: gen_args
-
diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md
index 7c7247a36b..39cf87d8b3 100644
--- a/doc/train/training-advanced.md
+++ b/doc/train/training-advanced.md
@@ -15,7 +15,7 @@ The {ref}`learning_rate <learning_rate>` section in `input.json` is given as fol
     }
 ```
 * {ref}`start_lr <learning_rate[exp]/start_lr>` gives the learning rate at the beginning of the training.
-* {ref}`stop_lr <learning_rate[exp]/stop_lr>` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge. 
+* {ref}`stop_lr <learning_rate[exp]/stop_lr>` gives the learning rate at the end of the training. It should be small enough to ensure that the network parameters satisfactorily converge.
 * During the training, the learning rate decays exponentially from {ref}`start_lr <learning_rate[exp]/start_lr>` to {ref}`stop_lr <learning_rate[exp]/stop_lr>` following the formula:
 
 $$ \alpha(t) = \alpha_0 \lambda ^ { t / \tau } $$
@@ -110,12 +110,12 @@ positional arguments:
 
 optional arguments:
   -h, --help            show this help message and exit
- 
+
   --init-model INIT_MODEL
                         Initialize a model by the provided checkpoint
 
   --restart RESTART     Restart the training from the provided checkpoint
- 
+
   --init-frz-model INIT_FRZ_MODEL
                         Initialize the training from the frozen model.
   --skip-neighbor-stat  Skip calculating neighbor statistics. Sel checking, automatic sel, and model compression will be disabled. (default: False)
diff --git a/doc/train/training.md b/doc/train/training.md
index 40aa4c0e8c..a4afda73c2 100644
--- a/doc/train/training.md
+++ b/doc/train/training.md
@@ -11,7 +11,7 @@ $ dp train input.json
 ```
 where `input.json` is the name of the input script.
 
-By default, the verbosity level of the DeePMD-kit is `INFO`, one may see a lot of important information on the code and environment showing on the screen. Among them two pieces of information regarding data systems are worth special notice. 
+By default, the verbosity level of the DeePMD-kit is `INFO`, one may see a lot of important information on the code and environment showing on the screen. Among them two pieces of information regarding data systems are worth special notice.
 ```bash
 DEEPMD INFO    ---Summary of DataSystem: training     -----------------------------------------------
 DEEPMD INFO    found 3 system(s):
@@ -26,9 +26,9 @@ DEEPMD INFO                                        system  natoms  bch_sz   n_bc
 DEEPMD INFO                          ../data_water/data_3     192       1      80  1.000    T
 DEEPMD INFO    --------------------------------------------------------------------------------------
 ```
-The DeePMD-kit prints detailed information on the training and validation data sets. The data sets are defined by {ref}`training_data <training/training_data>` and {ref}`validation_data <training/validation_data>` defined in the {ref}`training <training>` section of the input script. The training data set is composed of three data systems, while the validation data set is composed by one data system. The number of atoms, batch size, the number of batches in the system and the probability of using the system are all shown on the screen. The last column presents if the periodic boundary condition is assumed for the system. 
+The DeePMD-kit prints detailed information on the training and validation data sets. The data sets are defined by {ref}`training_data <training/training_data>` and {ref}`validation_data <training/validation_data>` defined in the {ref}`training <training>` section of the input script. The training data set is composed of three data systems, while the validation data set is composed by one data system. The number of atoms, batch size, the number of batches in the system and the probability of using the system are all shown on the screen. The last column presents if the periodic boundary condition is assumed for the system.
 
-During the training, the error of the model is tested every {ref}`disp_freq <training/disp_freq>` training steps with the batch used to train the model and with {ref}`numb_btch <training/validation_data/numb_btch>` batches from the validating data. The training error and validation error are printed correspondingly in the file {ref}`disp_file <training/disp_file>` (default is `lcurve.out`). The batch size can be set in the input script by the key {ref}`batch_size <training/training_data/batch_size>` in the corresponding sections for the training and validation data set. An example of the output 
+During the training, the error of the model is tested every {ref}`disp_freq <training/disp_freq>` training steps with the batch used to train the model and with {ref}`numb_btch <training/validation_data/numb_btch>` batches from the validating data. The training error and validation error are printed correspondingly in the file {ref}`disp_file <training/disp_file>` (default is `lcurve.out`). The batch size can be set in the input script by the key {ref}`batch_size <training/training_data/batch_size>` in the corresponding sections for the training and validation data set. An example of the output
 ```bash
 #  step      rmse_val    rmse_trn    rmse_e_val  rmse_e_trn    rmse_f_val  rmse_f_trn         lr
       0      3.33e+01    3.41e+01      1.03e+01    1.03e+01      8.39e-01    8.72e-01    1.0e-03
@@ -56,7 +56,7 @@ plt.grid()
 plt.show()
 ```
 
-Checkpoints will be written to files with the prefix {ref}`save_ckpt <training/save_ckpt>` every {ref}`save_freq <training/save_freq>` training steps. 
+Checkpoints will be written to files with the prefix {ref}`save_ckpt <training/save_ckpt>` every {ref}`save_freq <training/save_freq>` training steps.
 
 :::{warning}
 It is warned that the example water data (in folder `examples/water/data`) is of very limited amount, is provided only for testing purposes, and should not be used to train a production model.
diff --git a/doc/troubleshooting/howtoset_netsize.md b/doc/troubleshooting/howtoset_netsize.md
index 3bb07dcb80..22d215eec6 100644
--- a/doc/troubleshooting/howtoset_netsize.md
+++ b/doc/troubleshooting/howtoset_netsize.md
@@ -9,17 +9,17 @@ Here are some test forms on fitting-net size tuning or embedding-net size tuning
 
 Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
 ---|---|---|---
-[240,240,240] |   1.742252e-02 |  7.259383e-05 |  4.014115e-02 
-[80,80,80]    |   1.799349e-02 |  7.497287e-05 |  4.042977e-02 
-[40,40,40]    |   1.799036e-02 |  7.495984e-05 |  4.068806e-02 
-[20,20,20]    |   1.834032e-02 |  7.641801e-05 |  4.094784e-02 
-[10,10,10]    |   1.913058e-02 |  7.971073e-05 |  4.154775e-02 
-[5,5,5]       |   1.932914e-02 |  8.053808e-05 |  4.188052e-02 
-[4,4,4]       |   1.944832e-02 |  8.103467e-05 |  4.217826e-02 
-[3,3,3]       |   2.068631e-02 |  8.619296e-05 |  4.300497e-02 
-[2,2,2]       |   2.267962e-02 |  9.449840e-05 |  4.413609e-02 
-[1,1,1]       |   2.813596e-02 |  1.172332e-04 |  4.781115e-02 
-[]            |   3.135002e-02 |  1.306251e-04 |  5.373120e-02 
+[240,240,240] |   1.742252e-02 |  7.259383e-05 |  4.014115e-02
+[80,80,80]    |   1.799349e-02 |  7.497287e-05 |  4.042977e-02
+[40,40,40]    |   1.799036e-02 |  7.495984e-05 |  4.068806e-02
+[20,20,20]    |   1.834032e-02 |  7.641801e-05 |  4.094784e-02
+[10,10,10]    |   1.913058e-02 |  7.971073e-05 |  4.154775e-02
+[5,5,5]       |   1.932914e-02 |  8.053808e-05 |  4.188052e-02
+[4,4,4]       |   1.944832e-02 |  8.103467e-05 |  4.217826e-02
+[3,3,3]       |   2.068631e-02 |  8.619296e-05 |  4.300497e-02
+[2,2,2]       |   2.267962e-02 |  9.449840e-05 |  4.413609e-02
+[1,1,1]       |   2.813596e-02 |  1.172332e-04 |  4.781115e-02
+[]            |   3.135002e-02 |  1.306251e-04 |  5.373120e-02
 
 _[] means no hidden layer, but there is still a linear output layer. This situation is equal to the linear regression._
 
@@ -27,13 +27,13 @@ _[] means no hidden layer, but there is still a linear output layer. This situat
 
 Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
 ---|---|---|---
-[25,50,100]  | 1.742252e-02  | 7.259383e-05  | 4.014115e-02  
-[10,20,40]   | 2.909990e-02  | 1.212496e-04  | 4.734667e-02  
-[5,10,20]    | 3.357767e-02  | 1.399070e-04  | 5.706385e-02  
-[4,8,16]     | 6.060367e-02  | 2.525153e-04  | 7.333304e-02  
-[3,6,12]     | 5.656043e-02  | 2.356685e-04  | 7.793539e-02  
-[2,4,8]      | 5.277023e-02  | 2.198759e-04  | 7.459995e-02  
-[1,2,4]      | 1.302282e-01  | 5.426174e-04  | 9.672238e-02  
+[25,50,100]  | 1.742252e-02  | 7.259383e-05  | 4.014115e-02
+[10,20,40]   | 2.909990e-02  | 1.212496e-04  | 4.734667e-02
+[5,10,20]    | 3.357767e-02  | 1.399070e-04  | 5.706385e-02
+[4,8,16]     | 6.060367e-02  | 2.525153e-04  | 7.333304e-02
+[3,6,12]     | 5.656043e-02  | 2.356685e-04  | 7.793539e-02
+[2,4,8]      | 5.277023e-02  | 2.198759e-04  | 7.459995e-02
+[1,2,4]      | 1.302282e-01  | 5.426174e-04  | 9.672238e-02
 
 
 ## Cu
@@ -43,31 +43,31 @@ Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV
 Fitting-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
 ---|---|---|---
 [240,240,240]  | 4.135548e-02   |  1.615449e-04   |  8.940946e-02
-[20,20,20]     | 4.323858e-02   |  1.689007e-04   |  8.955762e-02 
-[10,10,10]     | 4.399364e-02   |  1.718502e-04   |  8.962891e-02 
-[5,5,5]        | 4.468404e-02   |  1.745470e-04   |  8.970111e-02 
-[4,4,4]        | 4.463580e-02   |  1.743586e-04   |  8.972011e-02 
-[3,3,3]        | 4.493758e-02   |  1.755374e-04   |  8.971303e-02 
-[2,2,2]        | 4.500736e-02   |  1.758100e-04   |  8.973878e-02 
-[1,1,1]        | 4.542073e-02   |  1.774247e-04   |  8.964761e-02 
-[]             | 4.545168e-02   |  1.775456e-04   |  8.983201e-02 
+[20,20,20]     | 4.323858e-02   |  1.689007e-04   |  8.955762e-02
+[10,10,10]     | 4.399364e-02   |  1.718502e-04   |  8.962891e-02
+[5,5,5]        | 4.468404e-02   |  1.745470e-04   |  8.970111e-02
+[4,4,4]        | 4.463580e-02   |  1.743586e-04   |  8.972011e-02
+[3,3,3]        | 4.493758e-02   |  1.755374e-04   |  8.971303e-02
+[2,2,2]        | 4.500736e-02   |  1.758100e-04   |  8.973878e-02
+[1,1,1]        | 4.542073e-02   |  1.774247e-04   |  8.964761e-02
+[]             | 4.545168e-02   |  1.775456e-04   |  8.983201e-02
 
 ### Embedding net size tuning form on Cu: (Fitting-net size: [240,240,240])
 
 Embedding-net size | Energy L2err(eV) | Energy L2err/Natoms(eV) | Force L2err(eV/Angstrom)
 ---|---|---|---
-[25,50,100]          | 4.135548e-02  |  1.615449e-04  |  8.940946e-02  
-[20,40,80]           | 4.203562e-02  |  1.642016e-04  |  8.925881e-02  
-[15,30,60]           | 4.146672e-02  |  1.619794e-04  |  8.936911e-02  
-[10,20,40]           | 4.263060e-02  |  1.665258e-04  |  8.955818e-02  
-[5,10,20]            | 4.994913e-02  |  1.951138e-04  |  9.007786e-02   
-[4,8,16]             | 1.022157e-01  |  3.992802e-04  |  9.532119e-02   
-[3,6,12]             | 1.362098e-01  |  5.320695e-04  |  1.073860e-01   
-[2,4,8]              | 7.061800e-02  |  2.758515e-04  |  9.126418e-02   
-[1,2,4] && seed = 1  | 9.843161e-02  |  3.844985e-04  |  9.348505e-02   
-[1,2,4] && seed = 2  | 9.404335e-02  |  3.673568e-04  |  9.304089e-02   
-[1,2,4] && seed = 3  | 1.508016e-01  |  5.890688e-04  |  1.382356e-01   
-[1,2,4] && seed = 4  | 9.686949e-02  |  3.783965e-04  |  9.294820e-02   
+[25,50,100]          | 4.135548e-02  |  1.615449e-04  |  8.940946e-02
+[20,40,80]           | 4.203562e-02  |  1.642016e-04  |  8.925881e-02
+[15,30,60]           | 4.146672e-02  |  1.619794e-04  |  8.936911e-02
+[10,20,40]           | 4.263060e-02  |  1.665258e-04  |  8.955818e-02
+[5,10,20]            | 4.994913e-02  |  1.951138e-04  |  9.007786e-02
+[4,8,16]             | 1.022157e-01  |  3.992802e-04  |  9.532119e-02
+[3,6,12]             | 1.362098e-01  |  5.320695e-04  |  1.073860e-01
+[2,4,8]              | 7.061800e-02  |  2.758515e-04  |  9.126418e-02
+[1,2,4] && seed = 1  | 9.843161e-02  |  3.844985e-04  |  9.348505e-02
+[1,2,4] && seed = 2  | 9.404335e-02  |  3.673568e-04  |  9.304089e-02
+[1,2,4] && seed = 3  | 1.508016e-01  |  5.890688e-04  |  1.382356e-01
+[1,2,4] && seed = 4  | 9.686949e-02  |  3.783965e-04  |  9.294820e-02
 
 
 ## Water
diff --git a/doc/troubleshooting/howtoset_num_nodes.md b/doc/troubleshooting/howtoset_num_nodes.md
index 18c6a5962f..1415f50c50 100644
--- a/doc/troubleshooting/howtoset_num_nodes.md
+++ b/doc/troubleshooting/howtoset_num_nodes.md
@@ -18,7 +18,7 @@ mpirun -np $num_nodes dp
 ```
 Note that `mpirun` here should be the same as the MPI used to build software. For example, one can use `mpirun -h` and `lmp -h` to see if `mpirun` and LAMMPS has the same MPI version.
 
-Sometimes, `$num_nodes` and the nodes information can be directly given by the HPC scheduler system, if the MPI used here is the same as the MPI used to build the scheduler system. Otherwise, one have to manually assign these information. 
+Sometimes, `$num_nodes` and the nodes information can be directly given by the HPC scheduler system, if the MPI used here is the same as the MPI used to build the scheduler system. Otherwise, one have to manually assign these information.
 
 ## Parallelism between independent operators
 
diff --git a/doc/troubleshooting/howtoset_sel.md b/doc/troubleshooting/howtoset_sel.md
index 2c993491fc..867ff853da 100644
--- a/doc/troubleshooting/howtoset_sel.md
+++ b/doc/troubleshooting/howtoset_sel.md
@@ -2,7 +2,7 @@
 
 `sel` is short for "selected number of atoms in `rcut`".
 
-`sel_a[i]` is a list of integers. The length of the list should be the same as the number of atom types in the system. 
+`sel_a[i]` is a list of integers. The length of the list should be the same as the number of atom types in the system.
 
 `sel_a[i]` gives the number of the selected number of type `i` neighbors within `rcut`. To ensure that the results are strictly accurate, `sel_a[i]` should be larger than the largest number of type `i` neighbors in the `rcut`.
 
diff --git a/doc/troubleshooting/index.rst b/doc/troubleshooting/index.rst
index f938d0973c..6c816d89d5 100644
--- a/doc/troubleshooting/index.rst
+++ b/doc/troubleshooting/index.rst
@@ -1,6 +1,6 @@
 FAQs
 ====
-As a consequence of differences in computers or systems, problems may occur. Some common circumstances are listed as follows. 
+As a consequence of differences in computers or systems, problems may occur. Some common circumstances are listed as follows.
 In addition, some frequently asked questions are listed as follows.
 If other unexpected problems occur, you're welcome to contact us for help.
 
@@ -11,4 +11,4 @@ If other unexpected problems occur, you're welcome to contact us for help.
    :caption: Troubleshooting
    :glob:
 
-   ./*
\ No newline at end of file
+   ./*
diff --git a/doc/troubleshooting/installation.md b/doc/troubleshooting/installation.md
index 4d681aecda..bd52f88d80 100644
--- a/doc/troubleshooting/installation.md
+++ b/doc/troubleshooting/installation.md
@@ -1,6 +1,6 @@
 # Installation
 ## Inadequate versions of gcc/g++
-Sometimes you may use a gcc/g++ of version < 4.8. In this way, you can still compile all the parts of TensorFlow and most of the parts of DeePMD-kit, but i-Pi and GROMACS plugins will be disabled automatically. Or if you have a gcc/g++ of version > 4.8, say, 7.2.0, you may choose to use it by doing 
+Sometimes you may use a gcc/g++ of version < 4.8. In this way, you can still compile all the parts of TensorFlow and most of the parts of DeePMD-kit, but i-Pi and GROMACS plugins will be disabled automatically. Or if you have a gcc/g++ of version > 4.8, say, 7.2.0, you may choose to use it by doing
 ```bash
 export CC=/path/to/gcc-7.2.0/bin/gcc
 export CXX=/path/to/gcc-7.2.0/bin/g++
diff --git a/doc/troubleshooting/md-version-compatibility.md b/doc/troubleshooting/md-version-compatibility.md
index 3adfb26fbb..631cab92ea 100644
--- a/doc/troubleshooting/md-version-compatibility.md
+++ b/doc/troubleshooting/md-version-compatibility.md
@@ -1,7 +1,7 @@
 # MD: cannot run LAMMPS after installing a new version of DeePMD-kit
 This typically happens when you install a new version of DeePMD-kit and copy directly the generated `USER-DEEPMD` to a LAMMPS source code folder and re-install LAMMPS.
 
-To solve this problem, it suffices to first remove `USER-DEEPMD` from the LAMMPS source code by 
+To solve this problem, it suffices to first remove `USER-DEEPMD` from the LAMMPS source code by
 ```bash
 make no-user-deepmd
 ```
diff --git a/doc/troubleshooting/model-compatability.md b/doc/troubleshooting/model-compatability.md
index 0db5f57a16..5500fedfa3 100644
--- a/doc/troubleshooting/model-compatability.md
+++ b/doc/troubleshooting/model-compatability.md
@@ -2,7 +2,7 @@
 
 When the version of DeePMD-kit used to train the model is different from the that of DeePMD-kit running MDs, one has the problem of model compatibility.
 
-DeePMD-kit guarantees that the codes with the same major and minor revisions are compatible. That is to say, v0.12.5 is compatible with v0.12.0, but is not compatible with v0.11.0 or v1.0.0. 
+DeePMD-kit guarantees that the codes with the same major and minor revisions are compatible. That is to say, v0.12.5 is compatible with v0.12.0, but is not compatible with v0.11.0 or v1.0.0.
 
 One can execute `dp convert-from` to convert an old model to a new one.
 
diff --git a/doc/troubleshooting/precision.md b/doc/troubleshooting/precision.md
index 074552068f..1b162d141c 100644
--- a/doc/troubleshooting/precision.md
+++ b/doc/troubleshooting/precision.md
@@ -21,7 +21,7 @@ It is neccessary to check them carefully to avoid inconsistent data.
 The accuracy of models will not exceed the accuracy of training data, so the training data should reach enough accuracy.
 Here is a checklist for the accuracy of data:
 - SCF should converge to a suitable threshold for all points in the training data.
-- The convergence of the energy, force and virial with respect to the energy cutoff and k-spacing sample is checked. 
+- The convergence of the energy, force and virial with respect to the energy cutoff and k-spacing sample is checked.
 - Sometimes, QM software may generate unstable outliers, which should be removed.
 - The data should be extracted with enough digits and stored with the proper precision. Large energies may have low precision when they are stored as the single-precision floating-point format (FP32).
 
diff --git a/examples/data_conv/OUTCAR b/examples/data_conv/OUTCAR
index a1d06ea578..15041df5f0 100644
--- a/examples/data_conv/OUTCAR
+++ b/examples/data_conv/OUTCAR
@@ -1,5 +1,5 @@
- vasp.5.4.4.18Apr17-6-g9f103f2a35 (build Jan 29 2020 12:26:58) complex          
-  
+ vasp.5.4.4.18Apr17-6-g9f103f2a35 (build Jan 29 2020 12:26:58) complex
+
  executed on             LinuxIFC date 2020.05.07  01:33:18
  running on   16 total cores
  distrk:  each k-point on   16 cores,    1 groups
@@ -10,10 +10,10 @@
 
 
  INCAR:
- POTCAR:    PAW_PBE O_h 06Feb2004                 
- POTCAR:    PAW_PBE H_h 06Feb2004                 
+ POTCAR:    PAW_PBE O_h 06Feb2004
+ POTCAR:    PAW_PBE H_h 06Feb2004
 
- ----------------------------------------------------------------------------- 
+ -----------------------------------------------------------------------------
 |                                                                             |
 |           W    W    AA    RRRRR   N    N  II  N    N   GGGG   !!!           |
 |           W    W   A  A   R    R  NN   N  II  NN   N  G    G  !!!           |
@@ -32,45 +32,45 @@
 |      Unfortunately you need to use the default for GW and RPA calculations. |
 |      (for HF NCORE is supported but not extensively tested yet)             |
 |                                                                             |
- ----------------------------------------------------------------------------- 
-
- POTCAR:    PAW_PBE O_h 06Feb2004                 
-   VRHFIN =O: s2p4                                                              
-   LEXCH  = PE                                                                  
-   EATOM  =   432.3788 eV,   31.7789 Ry                                         
-                                                                                
-   TITEL  = PAW_PBE O_h 06Feb2004                                               
-   LULTRA =        F    use ultrasoft PP ?                                      
-   IUNSCR =        1    unscreen: 0-lin 1-nonlin 2-no                           
-   RPACOR =    0.800    partial core radius                                     
-   POMASS =   16.000; ZVAL   =    6.000    mass and valenz                      
-   RCORE  =    1.100    outmost cutoff radius                                   
-   RWIGS  =    1.400; RWIGS  =    0.741    wigner-seitz radius (au A)           
-   ENMAX  =  700.000; ENMIN  =  500.000 eV                                      
-   ICORE  =        2    local potential                                         
-   LCOR   =        T    correct aug charges                                     
-   LPAW   =        T    paw PP                                                  
-   EAUG   =  888.804                                                            
-   DEXC   =    0.000                                                            
-   RMAX   =    1.128    core radius for proj-oper                               
-   RAUG   =    1.300    factor for augmentation sphere                          
-   RDEP   =    1.125    radius for radial grids                                 
-   RDEPT  =    1.088    core radius for aug-charge                              
-                                                                                
-   Atomic configuration                                                         
-    4 entries                                                                   
-     n  l   j            E        occ.                                          
-     1  0  0.50      -514.6923   2.0000                                         
-     2  0  0.50       -23.9615   2.0000                                         
-     2  1  0.50        -9.0305   4.0000                                         
-     3  2  1.50        -9.5241   0.0000                                         
-   Description                                                                  
-     l       E           TYP  RCUT    TYP  RCUT                                 
-     0    -23.9615319     23  1.100                                             
-     0    -25.3221145     23  1.100                                             
-     1     -9.0304911     23  1.100                                             
-     1     -5.4802209     23  1.100                                             
-     2     -9.5240782      7  1.100                                             
+ -----------------------------------------------------------------------------
+
+ POTCAR:    PAW_PBE O_h 06Feb2004
+   VRHFIN =O: s2p4
+   LEXCH  = PE
+   EATOM  =   432.3788 eV,   31.7789 Ry
+
+   TITEL  = PAW_PBE O_h 06Feb2004
+   LULTRA =        F    use ultrasoft PP ?
+   IUNSCR =        1    unscreen: 0-lin 1-nonlin 2-no
+   RPACOR =    0.800    partial core radius
+   POMASS =   16.000; ZVAL   =    6.000    mass and valenz
+   RCORE  =    1.100    outmost cutoff radius
+   RWIGS  =    1.400; RWIGS  =    0.741    wigner-seitz radius (au A)
+   ENMAX  =  700.000; ENMIN  =  500.000 eV
+   ICORE  =        2    local potential
+   LCOR   =        T    correct aug charges
+   LPAW   =        T    paw PP
+   EAUG   =  888.804
+   DEXC   =    0.000
+   RMAX   =    1.128    core radius for proj-oper
+   RAUG   =    1.300    factor for augmentation sphere
+   RDEP   =    1.125    radius for radial grids
+   RDEPT  =    1.088    core radius for aug-charge
+
+   Atomic configuration
+    4 entries
+     n  l   j            E        occ.
+     1  0  0.50      -514.6923   2.0000
+     2  0  0.50       -23.9615   2.0000
+     2  1  0.50        -9.0305   4.0000
+     3  2  1.50        -9.5241   0.0000
+   Description
+     l       E           TYP  RCUT    TYP  RCUT
+     0    -23.9615319     23  1.100
+     0    -25.3221145     23  1.100
+     1     -9.0304911     23  1.100
+     1     -5.4802209     23  1.100
+     2     -9.5240782      7  1.100
   local pseudopotential read in
   partial core-charges read in
   partial kinetic energy density read in
@@ -84,42 +84,42 @@
   non local Contribution for L=           1  read in
     real space projection operators read in
     PAW grid and wavefunctions read in
- 
+
    number of l-projection  operators is LMAX  =           4
    number of lm-projection operators is LMMAX =           8
- 
- POTCAR:    PAW_PBE H_h 06Feb2004                 
-   VRHFIN =H: ultrasoft test                                                    
-   LEXCH  = PE                                                                  
-   EATOM  =    12.4884 eV,    0.9179 Ry                                         
-                                                                                
-   TITEL  = PAW_PBE H_h 06Feb2004                                               
-   LULTRA =        F    use ultrasoft PP ?                                      
-   IUNSCR =        0    unscreen: 0-lin 1-nonlin 2-no                           
-   RPACOR =    0.000    partial core radius                                     
-   POMASS =    1.000; ZVAL   =    1.000    mass and valenz                      
-   RCORE  =    0.800    outmost cutoff radius                                   
-   RWIGS  =    0.700; RWIGS  =    0.370    wigner-seitz radius (au A)           
-   ENMAX  =  700.000; ENMIN  =  350.000 eV                                      
-   RCLOC  =    0.701    cutoff for local pot                                    
-   LCOR   =        T    correct aug charges                                     
-   LPAW   =        T    paw PP                                                  
-   EAUG   = 1000.000                                                            
-   RMAX   =    0.819    core radius for proj-oper                               
-   RAUG   =    1.000    factor for augmentation sphere                          
-   RDEP   =    0.817    radius for radial grids                                 
-   RDEPT  =    0.817    core radius for aug-charge                              
-                                                                                
-   Atomic configuration                                                         
-    2 entries                                                                   
-     n  l   j            E        occ.                                          
-     1  0  0.50        -6.4927   1.0000                                         
-     2  1  0.50        -3.4015   0.0000                                         
-   Description                                                                  
-     l       E           TYP  RCUT    TYP  RCUT                                 
-     0     -6.4927493     23  0.800                                             
-     0      6.8029130     23  0.800                                             
-     1     -6.8029130     23  0.800                                             
+
+ POTCAR:    PAW_PBE H_h 06Feb2004
+   VRHFIN =H: ultrasoft test
+   LEXCH  = PE
+   EATOM  =    12.4884 eV,    0.9179 Ry
+
+   TITEL  = PAW_PBE H_h 06Feb2004
+   LULTRA =        F    use ultrasoft PP ?
+   IUNSCR =        0    unscreen: 0-lin 1-nonlin 2-no
+   RPACOR =    0.000    partial core radius
+   POMASS =    1.000; ZVAL   =    1.000    mass and valenz
+   RCORE  =    0.800    outmost cutoff radius
+   RWIGS  =    0.700; RWIGS  =    0.370    wigner-seitz radius (au A)
+   ENMAX  =  700.000; ENMIN  =  350.000 eV
+   RCLOC  =    0.701    cutoff for local pot
+   LCOR   =        T    correct aug charges
+   LPAW   =        T    paw PP
+   EAUG   = 1000.000
+   RMAX   =    0.819    core radius for proj-oper
+   RAUG   =    1.000    factor for augmentation sphere
+   RDEP   =    0.817    radius for radial grids
+   RDEPT  =    0.817    core radius for aug-charge
+
+   Atomic configuration
+    2 entries
+     n  l   j            E        occ.
+     1  0  0.50        -6.4927   1.0000
+     2  1  0.50        -3.4015   0.0000
+   Description
+     l       E           TYP  RCUT    TYP  RCUT
+     0     -6.4927493     23  0.800
+     0      6.8029130     23  0.800
+     1     -6.8029130     23  0.800
   local pseudopotential read in
   atomic valenz-charges read in
   non local Contribution for L=           0  read in
@@ -129,25 +129,25 @@
   non local Contribution for L=           1  read in
     real space projection operators read in
     PAW grid and wavefunctions read in
- 
+
    number of l-projection  operators is LMAX  =           3
    number of lm-projection operators is LMMAX =           5
- 
+
   PAW_PBE O_h 06Feb2004                 :
  energy of atom  1       EATOM= -432.3788
  kinetic energy error for atom=    0.0035 (will be added to EATOM!!)
   PAW_PBE H_h 06Feb2004                 :
  energy of atom  2       EATOM=  -12.4884
  kinetic energy error for atom=    0.0001 (will be added to EATOM!!)
- 
- 
- POSCAR: O2 H4                                   
+
+
+ POSCAR: O2 H4
   positions in cartesian coordinates
   No initial velocities read in
  exchange correlation table for  LEXCH =        8
    RHO(1)=    0.500       N(1)  =     2000
    RHO(2)=  100.500       N(2)  =     4000
- 
+
 
 
 --------------------------------------------------------------------------------------------------------
@@ -160,37 +160,37 @@
    4  0.088  0.111  0.118-   1 1.01
    5  0.870  0.066  0.171-   2 1.02
    6  0.870  0.066  0.064-   2 1.02
- 
+
 
 IMPORTANT INFORMATION: All symmetrisations will be switched off!
 NOSYMM: (Re-)initialisation of all symmetry stuff for point group C_1.
 
- 
- 
+
+
 
 Automatic generation of k-mesh.
  generate k-points for:    1    1    1
 Space group operators:
  irot       det(A)        alpha          n_x          n_y          n_z        tau_x        tau_y        tau_z
     1     1.000000     0.000000     1.000000     0.000000     0.000000     0.000000     0.000000     0.000000
- 
+
  Subroutine IBZKPT returns following result:
  ===========================================
- 
+
  Found      1 irreducible k-points:
- 
+
  Following reciprocal coordinates:
             Coordinates               Weight
   0.000000  0.000000  0.000000      1.000000
- 
+
  Following cartesian coordinates:
             Coordinates               Weight
   0.000000  0.000000  0.000000      1.000000
- 
- 
+
+
  Subroutine IBZKPT_HF returns following result:
  ==============================================
- 
+
  Found      1 k-points in 1st BZ
  the following      1 k-points will be used (e.g. in the exchange kernel)
  Following reciprocal coordinates:   # in IRBZ
@@ -215,8 +215,8 @@ Space group operators:
    NGX,Y,Z   is equivalent  to a cutoff of  21.28, 21.28, 21.28 a.u.
    NGXF,Y,Z  is equivalent  to a cutoff of  42.56, 42.56, 42.56 a.u.
 
- SYSTEM =  unknown system                          
- POSCAR =  O2 H4                                   
+ SYSTEM =  unknown system
+ POSCAR =  O2 H4
 
  Startparameter for this run:
    NWRITE =      2    write-flag & timer
@@ -234,7 +234,7 @@ Space group operators:
    ENCUT  = 1500.0 eV 110.25 Ry   10.50 a.u.  94.74 47.37 47.37*2*pi/ulx,y,z
    ENINI  = 1500.0     initial cutoff
    ENAUG  = 1000.0 eV  augmentation charge cutoff
-   NELM   =     60;   NELMIN=  4; NELMDL= -5     # of ELM steps 
+   NELM   =     60;   NELMIN=  4; NELMDL= -5     # of ELM steps
    EDIFF  = 0.1E-07   stopping-criterion for ELM
    LREAL  =      F    real-space projection
    NLSPLINE    = F    spline interpolate recip. space projectors
@@ -247,7 +247,7 @@ Space group operators:
  Ionic relaxation
    EDIFFG = 0.1E-06   stopping-criterion for IOM
    NSW    =      0    number of steps for IOM
-   NBLOCK =      1;   KBLOCK =      1    inner block; outer block 
+   NBLOCK =      1;   KBLOCK =      1    inner block; outer block
    IBRION =     -1    ionic relax: 0-MD 1-quasi-New 2-CG
    NFREE  =      0    steps in history (QN), initial steepest desc. (CG)
    ISIF   =      2    stress and relaxation
@@ -270,7 +270,7 @@ Space group operators:
    ZVAL   =   6.00  1.00
   Atomic Wigner-Seitz radii
    RWIGS  =  -1.00 -1.00
-  virtual crystal weights 
+  virtual crystal weights
    VCA    =   1.00  1.00
    NELECT =      16.0000    total number of electrons
    NUPDOWN=      -1.0000    fix difference up-down
@@ -298,14 +298,14 @@ Space group operators:
  Intra band minimization:
    WEIMIN = 0.0000     energy-eigenvalue tresh-hold
    EBREAK =  0.16E-09  absolut break condition
-   DEPER  =   0.30     relativ break condition  
+   DEPER  =   0.30     relativ break condition
 
    TIME   =   0.40     timestep for ELM
 
   volume/ion in A,a.u.               =    1125.00      7591.87
   Fermi-wavevector in a.u.,A,eV,Ry     =   0.218280  0.412489  0.648264  0.047646
   Thomas-Fermi vector in A             =   0.996232
- 
+
  Write flags
    LWAVE        =      F    write WAVECAR
    LDOWNSAMPLE  =      F    k-point downsampling of WAVECAR
@@ -319,7 +319,7 @@ Space group operators:
  Dipole corrections
    LMONO  =      F    monopole corrections only (constant potential shift)
    LDIPOL =      F    correct potential (dipole corrections)
-   IDIPOL =      0    1-x, 2-y, 3-z, 4-all directions 
+   IDIPOL =      0    1-x, 2-y, 3-z, 4-all directions
    EPSILON=  1.0000000 bulk dielectric constant
 
  Exchange correlation treatment:
@@ -343,7 +343,7 @@ Space group operators:
    RTIME   =   -0.100 relaxation time in fs
   (WPLASMAI=    0.000 imaginary part of plasma frequency in eV, 0.658/RTIME)
    DFIELD  = 0.0000000 0.0000000 0.0000000 field for delta impulse in time
- 
+
  Orbital magnetization related:
    ORBITALMAG=     F  switch on orbital magnetization
    LCHIMAG   =     F  perturbation theory with respect to B field
@@ -368,9 +368,9 @@ Space group operators:
  using additional bands            8
  reciprocal scheme for non local part
  use partial core corrections
- calculate Harris-corrections to forces 
+ calculate Harris-corrections to forces
    (improved forces if not selfconsistent)
- use gradient corrections 
+ use gradient corrections
  use of overlap-Matrix (Vanderbilt PP)
  Gauss-broadening in eV      SIGMA  =   0.05
 
@@ -389,21 +389,21 @@ Space group operators:
     30.000000000 15.000000000 15.000000000     0.033333333  0.066666667  0.066666667
 
 
- 
- k-points in units of 2pi/SCALE and weight: read from INCAR                         
+
+ k-points in units of 2pi/SCALE and weight: read from INCAR
    0.00000000  0.00000000  0.00000000       1.000
- 
- k-points in reciprocal lattice and weights: read from INCAR                         
+
+ k-points in reciprocal lattice and weights: read from INCAR
    0.00000000  0.00000000  0.00000000       1.000
- 
- position of ions in fractional coordinates (direct lattice) 
+
+ position of ions in fractional coordinates (direct lattice)
    0.12126746  0.10473971  0.11733333
    0.87873254  0.10473971  0.11733333
    0.13194264  0.17027479  0.11626154
    0.08787106  0.11115189  0.11766219
    0.86972453  0.06627868  0.17073627
    0.86985000  0.06605326  0.06404818
- 
+
  position of ions in cartesian coordinates  (Angst):
    3.63802389  1.57109570  1.76000001
   26.36197611  1.57109570  1.76000001
@@ -411,7 +411,7 @@ Space group operators:
    2.63613168  1.66727831  1.76493284
   26.09173582  0.99418024  2.56104402
   26.09549990  0.99079897  0.96072265
- 
+
 
 
 --------------------------------------------------------------------------------------------------------
@@ -422,7 +422,7 @@ Space group operators:
  maximum and minimum number of plane-waves per node :    889965   889965
 
  maximum number of plane-waves:    889965
- maximum index in each direction: 
+ maximum index in each direction:
    IXMAX=   94   IYMAX=   47   IZMAX=   47
    IXMIN=  -94   IYMIN=  -47   IZMIN=  -47
 
@@ -441,7 +441,7 @@ Space group operators:
    grid      :     965246. kBytes
    one-center:         18. kBytes
    wavefun   :      14240. kBytes
- 
+
      INWAV:  cpu time    0.0000: real time    0.0000
  Broyden mixing: mesh for mixing (old mesh)
    NGX =189   NGY = 95   NGZ = 95
@@ -450,7 +450,7 @@ Space group operators:
 
  initial charge density was supplied:
  charge density of overlapping atoms calculated
- number of electron      16.0000000 magnetization 
+ number of electron      16.0000000 magnetization
  keeping initial charge density in first step
 
 
@@ -482,8 +482,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    48
  total energy-change (2. order) : 0.1514268E+03  (-0.4031052E+03)
- number of electron      16.0000000 magnetization 
- augmentation part       16.0000000 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part       16.0000000 magnetization
 
  Free energy of the ion-electron system (eV)
   ---------------------------------------------------
@@ -518,8 +518,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    64
  total energy-change (2. order) :-0.1039947E+03  (-0.1039945E+03)
- number of electron      16.0000000 magnetization 
- augmentation part       16.0000000 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part       16.0000000 magnetization
 
  Free energy of the ion-electron system (eV)
   ---------------------------------------------------
@@ -554,8 +554,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    48
  total energy-change (2. order) :-0.7308666E+02  (-0.7308666E+02)
- number of electron      16.0000000 magnetization 
- augmentation part       16.0000000 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part       16.0000000 magnetization
 
  Free energy of the ion-electron system (eV)
   ---------------------------------------------------
@@ -590,8 +590,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    32
  total energy-change (2. order) :-0.6216357E+01  (-0.6216357E+01)
- number of electron      16.0000000 magnetization 
- augmentation part       16.0000000 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part       16.0000000 magnetization
 
  Free energy of the ion-electron system (eV)
   ---------------------------------------------------
@@ -628,8 +628,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    64
  total energy-change (2. order) :-0.1289698E+00  (-0.1289698E+00)
- number of electron      16.0000000 magnetization 
- augmentation part        0.2051310 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.2051310 magnetization
 
  Broyden mixing:
   rms(total) = 0.18249E+01    rms(broyden)= 0.18249E+01
@@ -675,8 +675,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    48
  total energy-change (2. order) : 0.3401693E+01  (-0.1284952E+01)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1254852 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1254852 magnetization
 
  Broyden mixing:
   rms(total) = 0.18033E+01    rms(broyden)= 0.18033E+01
@@ -726,8 +726,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    54
  total energy-change (2. order) : 0.4794357E-01  (-0.1172557E+00)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1054543 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1054543 magnetization
 
  Broyden mixing:
   rms(total) = 0.15927E+01    rms(broyden)= 0.15927E+01
@@ -777,8 +777,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    46
  total energy-change (2. order) :-0.8265731E+00  (-0.6778624E+00)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1566603 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1566603 magnetization
 
  Broyden mixing:
   rms(total) = 0.11958E+01    rms(broyden)= 0.11958E+01
@@ -828,8 +828,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    48
  total energy-change (2. order) : 0.1056462E+01  (-0.2351360E+00)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1272037 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1272037 magnetization
 
  Broyden mixing:
   rms(total) = 0.41679E+00    rms(broyden)= 0.41679E+00
@@ -879,8 +879,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    54
  total energy-change (2. order) :-0.4627474E-02  (-0.1961741E-01)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1175680 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1175680 magnetization
 
  Broyden mixing:
   rms(total) = 0.20948E+00    rms(broyden)= 0.20948E+00
@@ -930,8 +930,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    41
  total energy-change (2. order) :-0.3396011E-02  (-0.9357442E-03)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1175273 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1175273 magnetization
 
  Broyden mixing:
   rms(total) = 0.11910E+00    rms(broyden)= 0.11910E+00
@@ -981,8 +981,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    45
  total energy-change (2. order) : 0.7972297E-02  (-0.1200521E-01)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1108344 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1108344 magnetization
 
  Broyden mixing:
   rms(total) = 0.58878E-01    rms(broyden)= 0.58878E-01
@@ -1032,8 +1032,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    53
  total energy-change (2. order) :-0.2817125E-02  (-0.1004256E-02)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1090245 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1090245 magnetization
 
  Broyden mixing:
   rms(total) = 0.73025E-01    rms(broyden)= 0.73025E-01
@@ -1083,8 +1083,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    39
  total energy-change (2. order) :-0.7901685E-02  (-0.3680976E-03)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1080283 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1080283 magnetization
 
  Broyden mixing:
   rms(total) = 0.78044E-01    rms(broyden)= 0.78044E-01
@@ -1134,8 +1134,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    34
  total energy-change (2. order) :-0.2715443E-02  (-0.3426902E-03)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1089825 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1089825 magnetization
 
  Broyden mixing:
   rms(total) = 0.30925E-01    rms(broyden)= 0.30925E-01
@@ -1185,8 +1185,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    50
  total energy-change (2. order) :-0.7488643E-03  (-0.1972646E-03)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1098972 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1098972 magnetization
 
  Broyden mixing:
   rms(total) = 0.12681E-01    rms(broyden)= 0.12681E-01
@@ -1237,8 +1237,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    33
  total energy-change (2. order) :-0.3167343E-02  (-0.4434230E-03)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1110904 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1110904 magnetization
 
  Broyden mixing:
   rms(total) = 0.24794E-01    rms(broyden)= 0.24794E-01
@@ -1289,8 +1289,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    41
  total energy-change (2. order) :-0.1578669E-02  (-0.9871864E-04)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1117241 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1117241 magnetization
 
  Broyden mixing:
   rms(total) = 0.28035E-01    rms(broyden)= 0.28035E-01
@@ -1341,8 +1341,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    41
  total energy-change (2. order) :-0.5446741E-03  (-0.1128610E-03)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1110888 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1110888 magnetization
 
  Broyden mixing:
   rms(total) = 0.10695E-01    rms(broyden)= 0.10695E-01
@@ -1393,8 +1393,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    48
  total energy-change (2. order) :-0.3896540E-03  (-0.4392959E-04)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1106257 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1106257 magnetization
 
  Broyden mixing:
   rms(total) = 0.91288E-03    rms(broyden)= 0.91284E-03
@@ -1445,8 +1445,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    40
  total energy-change (2. order) :-0.2304305E-03  (-0.1690825E-05)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1106527 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1106527 magnetization
 
  Broyden mixing:
   rms(total) = 0.11007E-02    rms(broyden)= 0.11007E-02
@@ -1497,8 +1497,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    39
  total energy-change (2. order) :-0.1835709E-03  (-0.4515843E-05)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105401 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105401 magnetization
 
  Broyden mixing:
   rms(total) = 0.23291E-02    rms(broyden)= 0.23291E-02
@@ -1549,8 +1549,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    49
  total energy-change (2. order) :-0.6534761E-04  (-0.2114694E-05)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1104497 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1104497 magnetization
 
  Broyden mixing:
   rms(total) = 0.30474E-02    rms(broyden)= 0.30474E-02
@@ -1601,8 +1601,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    37
  total energy-change (2. order) :-0.3766322E-04  (-0.1162993E-05)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105096 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105096 magnetization
 
  Broyden mixing:
   rms(total) = 0.14979E-02    rms(broyden)= 0.14979E-02
@@ -1653,8 +1653,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    48
  total energy-change (2. order) :-0.1596734E-04  (-0.6312087E-06)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105646 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105646 magnetization
 
  Broyden mixing:
   rms(total) = 0.44451E-03    rms(broyden)= 0.44451E-03
@@ -1705,8 +1705,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    43
  total energy-change (2. order) :-0.1249405E-04  (-0.1004617E-06)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105830 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105830 magnetization
 
  Broyden mixing:
   rms(total) = 0.17005E-03    rms(broyden)= 0.17005E-03
@@ -1758,8 +1758,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    42
  total energy-change (2. order) :-0.4542188E-05  (-0.6904532E-07)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105978 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105978 magnetization
 
  Broyden mixing:
   rms(total) = 0.39214E-03    rms(broyden)= 0.39214E-03
@@ -1811,8 +1811,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    42
  total energy-change (2. order) :-0.2690320E-05  (-0.5570044E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105992 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105992 magnetization
 
  Broyden mixing:
   rms(total) = 0.26696E-03    rms(broyden)= 0.26696E-03
@@ -1864,8 +1864,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    34
  total energy-change (2. order) :-0.9984269E-06  (-0.1780315E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105978 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105978 magnetization
 
  Broyden mixing:
   rms(total) = 0.19719E-03    rms(broyden)= 0.19719E-03
@@ -1917,8 +1917,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    42
  total energy-change (2. order) :-0.4557878E-06  (-0.6069651E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105934 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105934 magnetization
 
  Broyden mixing:
   rms(total) = 0.14046E-03    rms(broyden)= 0.14046E-03
@@ -1970,8 +1970,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    39
  total energy-change (2. order) :-0.5099819E-06  (-0.5763795E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105892 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105892 magnetization
 
  Broyden mixing:
   rms(total) = 0.49855E-04    rms(broyden)= 0.49855E-04
@@ -2023,8 +2023,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    34
  total energy-change (2. order) :-0.3252635E-06  (-0.1585956E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105879 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105879 magnetization
 
  Broyden mixing:
   rms(total) = 0.20063E-04    rms(broyden)= 0.20063E-04
@@ -2076,8 +2076,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    38
  total energy-change (2. order) :-0.6773371E-07  (-0.1062542E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105866 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105866 magnetization
 
  Broyden mixing:
   rms(total) = 0.91623E-05    rms(broyden)= 0.91622E-05
@@ -2129,8 +2129,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    30
  total energy-change (2. order) :-0.1850694E-06  (-0.1083993E-08)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105855 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105855 magnetization
 
  Broyden mixing:
   rms(total) = 0.24782E-04    rms(broyden)= 0.24782E-04
@@ -2182,8 +2182,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    30
  total energy-change (2. order) :-0.1145991E-06  (-0.9243699E-09)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105845 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105845 magnetization
 
  Broyden mixing:
   rms(total) = 0.45365E-04    rms(broyden)= 0.45365E-04
@@ -2235,8 +2235,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    32
  total energy-change (2. order) :-0.3223522E-07  (-0.2489422E-09)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105843 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105843 magnetization
 
  Broyden mixing:
   rms(total) = 0.32658E-04    rms(broyden)= 0.32658E-04
@@ -2287,8 +2287,8 @@ Space group operators:
 
  eigenvalue-minimisations  :    35
  total energy-change (2. order) :-0.5719812E-08  (-0.5881873E-10)
- number of electron      16.0000000 magnetization 
- augmentation part        0.1105843 magnetization 
+ number of electron      16.0000000 magnetization
+ augmentation part        0.1105843 magnetization
 
  Free energy of the ion-electron system (eV)
   ---------------------------------------------------
@@ -2318,14 +2318,14 @@ Space group operators:
   (the norm of the test charge is              1.0000)
        1-119.5768       2-119.7330       3 -46.9125       4 -47.1157       5 -47.1365
        6 -47.1399
- 
- 
- 
+
+
+
  E-fermi :  -6.7437     XC(G=0):  -0.1760     alpha+bet : -0.0402
 
 
  k-point     1 :       0.0000    0.0000    0.0000
-  band No.  band energies     occupation 
+  band No.  band energies     occupation
       1     -24.7993      2.00000
       2     -24.6817      2.00000
       3     -12.6088      2.00000
@@ -2350,7 +2350,7 @@ Space group operators:
  soft charge-density along one line, spin component           1
          0         1         2         3         4         5         6         7         8         9
  total charge-density along one line
- 
+
  pseudopotential strength for first ion, spin component:           1
  26.736  32.699   0.059  -0.001  -0.040   0.078  -0.001  -0.053
  32.699  39.992   0.072  -0.001  -0.049   0.095  -0.001  -0.064
@@ -2425,8 +2425,8 @@ Space group operators:
    0.227E+02 0.443E+02 0.663E+02   -.239E+02 -.469E+02 -.701E+02   0.171E+01 0.372E+01 0.552E+01   0.229E-04 0.492E-04 0.493E-04
  -----------------------------------------------------------------------------------------------
    0.417E+02 0.301E+01 0.320E+00   0.000E+00 0.213E-13 -.142E-13   -.417E+02 -.301E+01 -.320E+00   0.334E-03 -.211E-04 0.257E-05
- 
- 
+
+
  POSITION                                       TOTAL-FORCE (eV/Angst)
  -----------------------------------------------------------------------------------
       3.63802      1.57110      1.76000        -0.719072      2.472756     -0.030872
@@ -2448,7 +2448,7 @@ Space group operators:
   free  energy   TOTEN  =       -28.34083468 eV
 
   energy  without entropy=      -28.34083468  energy(sigma->0) =      -28.34083468
- 
+
 
 
 --------------------------------------------------------------------------------------------------------
@@ -2472,20 +2472,20 @@ Space group operators:
    grid      :     965246. kBytes
    one-center:         18. kBytes
    wavefun   :      14240. kBytes
- 
-  
-  
+
+
+
  General timing and accounting informations for this job:
  ========================================================
-  
+
                   Total CPU time used (sec):      877.816
                             User time (sec):      847.517
                           System time (sec):       30.300
                          Elapsed time (sec):      880.605
-  
+
                    Maximum memory used (kb):     3453896.
                    Average memory used (kb):           0.
-  
+
                           Minor page faults:       702128
                           Major page faults:            7
                  Voluntary context switches:        10645
diff --git a/examples/fparam/train/.gitignore b/examples/fparam/train/.gitignore
index b5cec52a88..40090c1798 100644
--- a/examples/fparam/train/.gitignore
+++ b/examples/fparam/train/.gitignore
@@ -2,4 +2,3 @@
 model.ckpt*
 frozen_model.pb
 checkpoint
-
diff --git a/examples/fparam/train/input.json b/examples/fparam/train/input.json
index c32b0d1c17..5ebe55c887 100644
--- a/examples/fparam/train/input.json
+++ b/examples/fparam/train/input.json
@@ -1,63 +1,75 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"data_stat_nbatch":	1,
-	"descriptor": {
-	    "type":		"se_a",
-	    "sel":		[60],
-	    "rcut_smth":	1.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	8,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[120, 120, 120],
-	    "resnet_dt":	true,
-	    "numb_fparam":	1,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "data_stat_nbatch": 1,
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        60
+      ],
+      "rcut_smth": 1.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 1,
+      "seed": 1
+    }
+  },
 
-    "loss" : {
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0
-    },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+
+  "learning_rate": {
+    "start_lr": 0.001,
+    "stop_lr": 1e-8,
+    "decay_steps": 5000
+  },
 
-    "learning_rate" : {
-	"start_lr":	0.001,
-	"stop_lr":	1e-8,
-	"decay_steps":	5000
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/e3000_i2000/",
+        "../data/e8000_i2000/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
     },
+    "stop_batch": 1000000,
 
-    "_comment": " traing controls",
-    "training" : {
-		"training_data": {
-			"systems": ["../data/e3000_i2000/", "../data/e8000_i2000/"],
-			"set_prefix":	"set",
-			"batch_size":	1
-		},
-	"stop_batch":	1000000,
+    "seed": 1,
 
-	"seed":		1,
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json"
+  },
 
-	"_comment": " display and restart",
-	"_comment": " frequencies counted in batch",
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"profiling":	false,
-	"profiling_file":	"timeline.json"
-    },
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/examples/fparam/train/input_aparam.json b/examples/fparam/train/input_aparam.json
index b978ef055e..e83f825458 100644
--- a/examples/fparam/train/input_aparam.json
+++ b/examples/fparam/train/input_aparam.json
@@ -1,63 +1,75 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"data_stat_nbatch":	1,
-	"descriptor": {
-	    "type":		"se_a",
-	    "sel":		[60],
-	    "rcut_smth":	1.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	8,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[120, 120, 120],
-	    "resnet_dt":	true,
-	    "numb_aparam":	1,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "data_stat_nbatch": 1,
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        60
+      ],
+      "rcut_smth": 1.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "numb_aparam": 1,
+      "seed": 1
+    }
+  },
 
-    "loss" : {
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0
-    },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+
+  "learning_rate": {
+    "start_lr": 0.001,
+    "stop_lr": 3e-8,
+    "decay_steps": 5000
+  },
 
-    "learning_rate" : {
-	"start_lr":	0.001,
-	"stop_lr":	3e-8,
-	"decay_steps":	5000
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/e3000_i2000/",
+        "../data/e8000_i2000/"
+      ],
+      "set_prefix": "set",
+      "batch_size": 1
     },
+    "stop_batch": 1000000,
 
-    "_comment": " traing controls",
-    "training" : {
-		"training_data": {
-			"systems":	["../data/e3000_i2000/", "../data/e8000_i2000/"],
-			"set_prefix":	"set",
-			"batch_size":	1
-		},
-	"stop_batch":	1000000,
+    "seed": 1,
 
-	"seed":		1,
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json"
+  },
 
-	"_comment": " display and restart",
-	"_comment": " frequencies counted in batch",
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"profiling":	false,
-	"profiling_file":	"timeline.json"
-    },
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/examples/infer_water/infer_water.c b/examples/infer_water/infer_water.c
index 0be6d0cf60..48e2248508 100644
--- a/examples/infer_water/infer_water.c
+++ b/examples/infer_water/infer_water.c
@@ -1,27 +1,26 @@
 #include <stdio.h>
 #include <stdlib.h>
+
 #include "deepmd/c_api.h"
 
-int main(){
+int main() {
   const char* model = "graph.pb";
-  double coord[] = {1., 0., 0., 0., 0., 1.5, 1. ,0. ,3.};
+  double coord[] = {1., 0., 0., 0., 0., 1.5, 1., 0., 3.};
   double cell[] = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
   int atype[] = {1, 0, 1};
   // init C pointers with given memory
   double* e = malloc(sizeof(*e));
-  double* f = malloc(sizeof(*f) * 9); // natoms * 3
+  double* f = malloc(sizeof(*f) * 9);  // natoms * 3
   double* v = malloc(sizeof(*v) * 9);
-  double* ae = malloc(sizeof(*ae) * 9); // natoms
-  double* av = malloc(sizeof(*av) * 27); // natoms * 9
+  double* ae = malloc(sizeof(*ae) * 9);   // natoms
+  double* av = malloc(sizeof(*av) * 27);  // natoms * 9
   // DP model
   DP_DeepPot* dp = DP_NewDeepPot(model);
-  DP_DeepPotCompute (dp, 3, coord, atype, cell, e, f, v, ae, av);
+  DP_DeepPotCompute(dp, 3, coord, atype, cell, e, f, v, ae, av);
   // print results
   printf("energy: %f\n", *e);
-  for (int ii = 0; ii < 9; ++ii)
-    printf("force[%d]: %f\n", ii, f[ii]);
-  for (int ii = 0; ii < 9; ++ii)
-    printf("force[%d]: %f\n", ii, v[ii]);
+  for (int ii = 0; ii < 9; ++ii) printf("force[%d]: %f\n", ii, f[ii]);
+  for (int ii = 0; ii < 9; ++ii) printf("force[%d]: %f\n", ii, v[ii]);
   // free memory
   free(e);
   free(f);
@@ -29,4 +28,4 @@ int main(){
   free(ae);
   free(av);
   free(dp);
-}
\ No newline at end of file
+}
diff --git a/examples/infer_water/infer_water.cpp b/examples/infer_water/infer_water.cpp
index 3697ba588c..ed8adbb31e 100644
--- a/examples/infer_water/infer_water.cpp
+++ b/examples/infer_water/infer_water.cpp
@@ -1,11 +1,11 @@
 #include "deepmd/DeepPot.h"
 
-int main(){
-  deepmd::DeepPot dp ("graph.pb");
-  std::vector<double > coord = {1., 0., 0., 0., 0., 1.5, 1. ,0. ,3.};
-  std::vector<double > cell = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
-  std::vector<int > atype = {1, 0, 1};
+int main() {
+  deepmd::DeepPot dp("graph.pb");
+  std::vector<double> coord = {1., 0., 0., 0., 0., 1.5, 1., 0., 3.};
+  std::vector<double> cell = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
+  std::vector<int> atype = {1, 0, 1};
   double e;
-  std::vector<double > f, v;
-  dp.compute (e, f, v, coord, atype, cell);
+  std::vector<double> f, v;
+  dp.compute(e, f, v, coord, atype, cell);
 }
diff --git a/examples/infer_water/infer_water_hpp.cpp b/examples/infer_water/infer_water_hpp.cpp
index 1c73ebd600..a6563e8951 100644
--- a/examples/infer_water/infer_water_hpp.cpp
+++ b/examples/infer_water/infer_water_hpp.cpp
@@ -1,19 +1,17 @@
 /* header only C++ library
-*/
+ */
 #include "deepmd/deepmd.hpp"
 
-int main(){
-  deepmd::hpp::DeepPot dp ("graph.pb");
-  std::vector<double > coord = {1., 0., 0., 0., 0., 1.5, 1. ,0. ,3.};
-  std::vector<double > cell = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
-  std::vector<int > atype = {1, 0, 1};
+int main() {
+  deepmd::hpp::DeepPot dp("graph.pb");
+  std::vector<double> coord = {1., 0., 0., 0., 0., 1.5, 1., 0., 3.};
+  std::vector<double> cell = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
+  std::vector<int> atype = {1, 0, 1};
   double e;
-  std::vector<double > f, v;
-  dp.compute (e, f, v, coord, atype, cell);
+  std::vector<double> f, v;
+  dp.compute(e, f, v, coord, atype, cell);
   // print results
   printf("energy: %f\n", e);
-  for (int ii = 0; ii < 9; ++ii)
-    printf("force[%d]: %f\n", ii, f[ii]);
-  for (int ii = 0; ii < 9; ++ii)
-    printf("force[%d]: %f\n", ii, v[ii]);
+  for (int ii = 0; ii < 9; ++ii) printf("force[%d]: %f\n", ii, f[ii]);
+  for (int ii = 0; ii < 9; ++ii) printf("force[%d]: %f\n", ii, v[ii]);
 }
diff --git a/examples/infer_water/infer_water_nlist.cpp b/examples/infer_water/infer_water_nlist.cpp
index f45fd04585..c234c6b755 100644
--- a/examples/infer_water/infer_water_nlist.cpp
+++ b/examples/infer_water/infer_water_nlist.cpp
@@ -1,41 +1,35 @@
 /**
  * Infer water using a neighbor list
-*/
+ */
 
 #ifdef USE_NATIVE_CXX_API
 #include "deepmd/DeepPot.h"
+using deepmd::convert_nlist;
 using deepmd::DeepPot;
 using deepmd::InputNlist;
-using deepmd::convert_nlist;
 #else
 #include "deepmd/deepmd.hpp"
+using deepmd::hpp::convert_nlist;
 using deepmd::hpp::DeepPot;
 using deepmd::hpp::InputNlist;
-using deepmd::hpp::convert_nlist;
 #endif
 
-int main(){
-  DeepPot dp ("graph.pb");
-  std::vector<double > coord = {1., 0., 0., 0., 0., 1.5, 1. ,0. ,3.};
-  std::vector<double > cell = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
-  std::vector<int > atype = {1, 0, 1};
+int main() {
+  DeepPot dp("graph.pb");
+  std::vector<double> coord = {1., 0., 0., 0., 0., 1.5, 1., 0., 3.};
+  std::vector<double> cell = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
+  std::vector<int> atype = {1, 0, 1};
   // neighbor list
-  std::vector<std::vector<int >> nlist_vec = {
-    {1, 2},
-    {0, 2},
-    {0, 1}
-    };
+  std::vector<std::vector<int>> nlist_vec = {{1, 2}, {0, 2}, {0, 1}};
   double e;
-  std::vector<double > f, v;
+  std::vector<double> f, v;
   std::vector<int> ilist(3), numneigh(3);
   std::vector<int*> firstneigh(3);
   InputNlist nlist(3, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(nlist, nlist_vec);
-  dp.compute (e, f, v, coord, atype, cell, 0, nlist, 0);
+  dp.compute(e, f, v, coord, atype, cell, 0, nlist, 0);
   // print results
   printf("energy: %f\n", e);
-  for (int ii = 0; ii < 9; ++ii)
-    printf("force[%d]: %f\n", ii, f[ii]);
-  for (int ii = 0; ii < 9; ++ii)
-    printf("force[%d]: %f\n", ii, v[ii]);
+  for (int ii = 0; ii < 9; ++ii) printf("force[%d]: %f\n", ii, f[ii]);
+  for (int ii = 0; ii < 9; ++ii) printf("force[%d]: %f\n", ii, v[ii]);
 }
diff --git a/examples/methane/index.raw b/examples/methane/index.raw
index 17847fb721..926717b11d 100644
--- a/examples/methane/index.raw
+++ b/examples/methane/index.raw
@@ -1 +1 @@
-0 1 2 3 4
\ No newline at end of file
+0 1 2 3 4
diff --git a/examples/methane/input.json b/examples/methane/input.json
index cbd5830cac..e148f122d2 100644
--- a/examples/methane/input.json
+++ b/examples/methane/input.json
@@ -1,7 +1,7 @@
 {
-    "graph_file": "frozen_model.pb",
-    "type_file": "type.raw",
-    "index_file": "index.raw",
-    "lambda": 1.0,
-    "pbc": false
+  "graph_file": "frozen_model.pb",
+  "type_file": "type.raw",
+  "index_file": "index.raw",
+  "lambda": 1.0,
+  "pbc": false
 }
diff --git a/examples/methane/methane.itp b/examples/methane/methane.itp
index d9cec75123..8ba0f0e4b2 100644
--- a/examples/methane/methane.itp
+++ b/examples/methane/methane.itp
@@ -17,10 +17,10 @@
 
 [ bonds ]
 ; i  j  func  b0  kb
- 1  2     5        
- 1  3     5        
- 1  4     5        
- 1  5     5        
+ 1  2     5
+ 1  3     5
+ 1  4     5
+ 1  5     5
 
 [ exclusions ]
 ; ai  aj1  aj2  aj3  aj4
diff --git a/examples/methane/run.sh b/examples/methane/run.sh
index ef4178b05f..042fed8709 100644
--- a/examples/methane/run.sh
+++ b/examples/methane/run.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
-gmx_mpi grompp -f md.mdp -c lig_solv.gro -p topol.top -o md.tpr -maxwarn 3;
+gmx_mpi grompp -f md.mdp -c lig_solv.gro -p topol.top -o md.tpr -maxwarn 3
 export GMX_DEEPMD_INPUT_JSON=input.json
-gmx_mpi mdrun -deffnm md;
\ No newline at end of file
+gmx_mpi mdrun -deffnm md
diff --git a/examples/methane/topol.top b/examples/methane/topol.top
index 58350bc91d..c61cbd8aad 100644
--- a/examples/methane/topol.top
+++ b/examples/methane/topol.top
@@ -11,5 +11,5 @@ methane in water
 
 [ molecules ]
 ; Compound        nmols
-methane          1     
+methane          1
 SOL               218
diff --git a/examples/methane/type.raw b/examples/methane/type.raw
index 13d50a8964..3900d20987 100644
--- a/examples/methane/type.raw
+++ b/examples/methane/type.raw
@@ -1 +1 @@
-1 0 0 0 0
\ No newline at end of file
+1 0 0 0 0
diff --git a/examples/nopbc/README.md b/examples/nopbc/README.md
index 9405d53877..02e3652f09 100644
--- a/examples/nopbc/README.md
+++ b/examples/nopbc/README.md
@@ -5,4 +5,3 @@ This is an example of training Deep Potential models using non-periodic data wit
 It's warned that the example data is of very limited amount, so it cannot be put into production. Full data set can be downloaded from the following reference.
 
 Zeng, J., Cao, L., Xu, M. et al. Complex reaction processes in combustion unraveled by neural network-based molecular dynamics simulation. Nat Commun 11, 5713 (2020). DOI: [10.1038/s41467-020-19497-z](https://doi.org/10.1038/s41467-020-19497-z)
-
diff --git a/examples/nopbc/train/input.json b/examples/nopbc/train/input.json
index f1f06da3f8..43669ad45c 100644
--- a/examples/nopbc/train/input.json
+++ b/examples/nopbc/train/input.json
@@ -1,60 +1,75 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["C", "H", "O"],
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[40, 80, 40],
-	    "rcut_smth":	1.00,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	12,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "C",
+      "H",
+      "O"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        40,
+        80,
+        40
+      ],
+      "rcut_smth": 1.00,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 12,
+      "seed": 1,
+      "_comment": " that's all"
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	4000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 4000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		"../data/",
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	4000000,
-	"seed":		10,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
-    },    
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "training": {
+    "training_data": {
+      "systems": "../data/",
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "numb_steps": 4000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/nvnmd/train/train_cnn.json b/examples/nvnmd/train/train_cnn.json
index a961856247..91c1525a1c 100644
--- a/examples/nvnmd/train/train_cnn.json
+++ b/examples/nvnmd/train/train_cnn.json
@@ -1,40 +1,43 @@
 {
-    "nvnmd":{
-        "net_size": 128,
-        "sel": [60, 60],
-        "rcut": 6.0,
-        "rcut_smth": 0.5
-    },
-    "learning_rate": {
-        "type": "exp",
-        "start_lr": 1e-3,
-        "stop_lr": 3e-8,
-        "decay_steps": 5000
-    },
-    "loss": {
-        "start_pref_e": 0.02,
-        "limit_pref_e": 1,
-        "start_pref_f": 1000,
-        "limit_pref_f": 1,
-        "start_pref_v": 0,
-        "limit_pref_v": 0
-    },
-    "training": {
-        "seed": 1,
-        "stop_batch": 1000000,
-        "numb_test": 1,
-        "disp_file": "lcurve.out",
-        "disp_freq": 1000,
-        "save_ckpt": "model.ckpt",
-        "save_freq": 10000,
-        "training_data": {
-            "systems": [
-                "../data"
-            ],
-            "set_prefix": "set",
-            "batch_size": [
-                1
-            ]
-        }
+  "nvnmd": {
+    "net_size": 128,
+    "sel": [
+      60,
+      60
+    ],
+    "rcut": 6.0,
+    "rcut_smth": 0.5
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 1e-3,
+    "stop_lr": 3e-8,
+    "decay_steps": 5000
+  },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+  "training": {
+    "seed": 1,
+    "stop_batch": 1000000,
+    "numb_test": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "save_freq": 10000,
+    "training_data": {
+      "systems": [
+        "../data"
+      ],
+      "set_prefix": "set",
+      "batch_size": [
+        1
+      ]
     }
+  }
 }
diff --git a/examples/nvnmd/train/train_qnn.json b/examples/nvnmd/train/train_qnn.json
index 1a63f5c2c6..25db3294d3 100644
--- a/examples/nvnmd/train/train_qnn.json
+++ b/examples/nvnmd/train/train_qnn.json
@@ -1,40 +1,43 @@
 {
-    "nvnmd": {
-        "net_size": 128,
-        "sel": [60, 60],
-        "rcut": 6.0,
-        "rcut_smth": 0.5
-    },
-    "learning_rate": {
-        "type": "exp",
-        "start_lr": 1e-8,
-        "stop_lr": 1e-9,
-        "decay_steps": 5000
-    },
-    "loss": {
-        "start_pref_e": 1,
-        "limit_pref_e": 1,
-        "start_pref_f": 1,
-        "limit_pref_f": 1,
-        "start_pref_v": 0,
-        "limit_pref_v": 0
-    },
-    "training": {
-        "seed": 1,
-        "stop_batch": 100000,
-        "numb_test": 1,
-        "disp_file": "lcurve.out",
-        "disp_freq": 1000,
-        "save_ckpt": "model.ckpt",
-        "save_freq": 10000,
-        "training_data": {
-            "systems": [
-                "../data"
-            ],
-            "set_prefix": "set",
-            "batch_size": [
-                1
-            ]
-        }
+  "nvnmd": {
+    "net_size": 128,
+    "sel": [
+      60,
+      60
+    ],
+    "rcut": 6.0,
+    "rcut_smth": 0.5
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 1e-8,
+    "stop_lr": 1e-9,
+    "decay_steps": 5000
+  },
+  "loss": {
+    "start_pref_e": 1,
+    "limit_pref_e": 1,
+    "start_pref_f": 1,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+  "training": {
+    "seed": 1,
+    "stop_batch": 100000,
+    "numb_test": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "save_freq": 10000,
+    "training_data": {
+      "systems": [
+        "../data"
+      ],
+      "set_prefix": "set",
+      "batch_size": [
+        1
+      ]
     }
-}
\ No newline at end of file
+  }
+}
diff --git a/examples/water/dplr/lmp/conf.lmp b/examples/water/dplr/lmp/conf.lmp
index 11905861c9..f0bc2bf80c 100644
--- a/examples/water/dplr/lmp/conf.lmp
+++ b/examples/water/dplr/lmp/conf.lmp
@@ -13,7 +13,7 @@
 Masses
 
 1 16
-2 2 
+2 2
 3 16
 
 Atoms
diff --git a/examples/water/dplr/lmp/in.lammps b/examples/water/dplr/lmp/in.lammps
index eb082ea5ab..4b5b09f8b2 100644
--- a/examples/water/dplr/lmp/in.lammps
+++ b/examples/water/dplr/lmp/in.lammps
@@ -67,5 +67,3 @@ velocity        real_atom zero linear
 
 run             ${NSTEPS}
 write_data	out.lmp nocoeff
-
-
diff --git a/examples/water/dplr/train/dw.json b/examples/water/dplr/train/dw.json
index 954e299dc2..ccd3a1d910 100644
--- a/examples/water/dplr/train/dw.json
+++ b/examples/water/dplr/train/dw.json
@@ -1,67 +1,84 @@
 {
-    "_comment": " model parameters",
-    "model":{
-	"type_map":		["O", "H"],
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[16, 32, 64],
-	    "resnet_dt":	false,
-	    "axis_neuron":	8,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net": {
-	    "type":		"dipole",
-	    "dipole_type":	[0],
-	    "neuron":		[128, 128, 128],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        16,
+        32,
+        64
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "seed": 1,
+      "_comment": " that's all"
     },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.01,
-	"stop_lr":	1e-7,
-	"decay_steps":	5000,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "type": "dipole",
+      "dipole_type": [
+        0
+      ],
+      "neuron": [
+        128,
+        128,
+        128
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss": {
-	"type":		"tensor",
-	"pref":		0.0,
-	"pref_atomic":	1.0,
-	"_comment": " that's all"
-    },
-
-    "_comment": " traing controls",
-    "training": {
-	"training_data": {
-	    "systems":		["data"],
-	    "batch_size":	"auto",
-	    "set_prefix":	"set",    
-	    "_comment":		"that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.01,
+    "stop_lr": 1e-7,
+    "decay_steps": 5000,
+    "_comment": "that's all"
+  },
 
-	"numb_steps":	2000,
-	"seed":		1,
+  "loss": {
+    "type": "tensor",
+    "pref": 0.0,
+    "pref_atomic": 1.0,
+    "_comment": " that's all"
+  },
 
-	"_comment": " display and restart",
-	"_comment": " frequencies counted in batch",
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	500,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"_comment":	"that's all"
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "data"
+      ],
+      "batch_size": "auto",
+      "set_prefix": "set",
+      "_comment": "that's all"
     },
 
-    "_comment":		"that's all"
-}
+    "numb_steps": 2000,
+    "seed": 1,
 
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 500,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "_comment": "that's all"
+  },
+
+  "_comment": "that's all"
+}
diff --git a/examples/water/dplr/train/ener.json b/examples/water/dplr/train/ener.json
index c2fa6ad877..f99195e4e9 100644
--- a/examples/water/dplr/train/ener.json
+++ b/examples/water/dplr/train/ener.json
@@ -1,68 +1,88 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-        "descriptor" :{
-            "type":             "se_e2_a",
-            "sel":              [46, 92],
-            "rcut_smth":        0.5,
-            "rcut":             6.00,
-            "neuron":           [16, 32, 64],
-            "resnet_dt":        false,
-            "axis_neuron":      8,
-            "seed":             3458359619,
-            "_comment":         " that's all"
-        },
-	"fitting_net" : {
-	    "neuron":		[128, 128, 128],
-	    "resnet_dt":	true,
-	    "seed":		108835393,
-	    "_comment":		" that's all"
-	},
-        "modifier": {
-            "type":             "dipole_charge",
-            "model_name":       "dw.pb",
-            "model_charge_map": [-8],
-            "sys_charge_map":   [6, 1],
-            "ewald_h":          1.00,
-            "ewald_beta":       0.40
-        },
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.5,
+      "rcut": 6.00,
+      "neuron": [
+        16,
+        32,
+        64
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 8,
+      "seed": 3458359619,
+      "_comment": " that's all"
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	1.0e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        128,
+        128,
+        128
+      ],
+      "resnet_dt": true,
+      "seed": 108835393,
+      "_comment": " that's all"
     },
-
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
+    "modifier": {
+      "type": "dipole_charge",
+      "model_name": "dw.pb",
+      "model_charge_map": [
+        -8
+      ],
+      "sys_charge_map": [
+        6,
+        1
+      ],
+      "ewald_h": 1.00,
+      "ewald_beta": 0.40
     },
+    "_comment": " that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		["data"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	2000,
-	"seed":	        4266336533,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	500,
-	"_comment":	"that's all"
-    },    
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 1.0e-8,
+    "_comment": "that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
+  "training": {
+    "training_data": {
+      "systems": [
+        "data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 4266336533,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 500,
+    "_comment": "that's all"
+  },
+
+  "_comment": "that's all"
+}
diff --git a/examples/water/gmx/index.raw b/examples/water/gmx/index.raw
index 0b9395650e..cddafdfed3 100644
--- a/examples/water/gmx/index.raw
+++ b/examples/water/gmx/index.raw
@@ -1 +1 @@
-0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
\ No newline at end of file
+0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
diff --git a/examples/water/gmx/input.json b/examples/water/gmx/input.json
index c4a5113f1b..070c9f12d7 100644
--- a/examples/water/gmx/input.json
+++ b/examples/water/gmx/input.json
@@ -1,6 +1,6 @@
 {
-    "graph_file" : "frozen_model.pb",
-    "type_file": "type.raw",
-    "index_file": "index.raw",
-    "lambda": 1.0
+  "graph_file": "frozen_model.pb",
+  "type_file": "type.raw",
+  "index_file": "index.raw",
+  "lambda": 1.0
 }
diff --git a/examples/water/gmx/md.mdp b/examples/water/gmx/md.mdp
index 058040ea09..2cd3e2a592 100755
--- a/examples/water/gmx/md.mdp
+++ b/examples/water/gmx/md.mdp
@@ -46,4 +46,3 @@ DispCorr                 = AllEnerPres
 pcoupl                   = no
 
 gen-vel                  = yes
-
diff --git a/examples/water/gmx/md.sh b/examples/water/gmx/md.sh
index 9a5f19ee8f..0053261cfb 100755
--- a/examples/water/gmx/md.sh
+++ b/examples/water/gmx/md.sh
@@ -2,4 +2,4 @@
 export GMX_DEEPMD_INPUT_JSON=input.json
 gmx_mpi grompp -f md.mdp -c water.gro -p water.top -o md.tpr -maxwarn 3
 gmx_mpi mdrun -deffnm md
-gmx_mpi rdf -f md.trr -s md.tpr -o md_rdf.xvg -ref "name OW" -sel "name OW"
\ No newline at end of file
+gmx_mpi rdf -f md.trr -s md.tpr -o md_rdf.xvg -ref "name OW" -sel "name OW"
diff --git a/examples/water/gmx/type.raw b/examples/water/gmx/type.raw
index 664d90548b..f8e54eb171 100644
--- a/examples/water/gmx/type.raw
+++ b/examples/water/gmx/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
\ No newline at end of file
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water/hybrid/input.json b/examples/water/hybrid/input.json
index a72591fce2..5f4cd6c5ec 100644
--- a/examples/water/hybrid/input.json
+++ b/examples/water/hybrid/input.json
@@ -1,81 +1,107 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-        "descriptor" :{
-            "type":             "hybrid",
-            "list" : [
-                {
-                    "type":     "se_e2_a",
-                    "sel":              [20, 40],
-                    "rcut_smth":        0.50,
-                    "rcut":             4.00,
-                    "neuron":           [10, 20, 40],
-                    "resnet_dt":        false,
-                    "axis_neuron":      4,
-                    "seed":             1,
-                    "_comment": " that's all"
-                },
-                {
-                    "type":     "se_e2_r",
-                    "sel":              [46, 92],
-                    "rcut_smth":        0.50,
-                    "rcut":             6.00,
-                    "neuron":           [5, 10, 20],
-                    "resnet_dt":        false,
-                    "seed":             1,
-                    "_comment": " that's all"
-                }
-            ]
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "hybrid",
+      "list": [
+        {
+          "type": "se_e2_a",
+          "sel": [
+            20,
+            40
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 4.00,
+          "neuron": [
+            10,
+            20,
+            40
+          ],
+          "resnet_dt": false,
+          "axis_neuron": 4,
+          "seed": 1,
+          "_comment": " that's all"
         },
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+        {
+          "type": "se_e2_r",
+          "sel": [
+            46,
+            92
+          ],
+          "rcut_smth": 0.50,
+          "rcut": 6.00,
+          "neuron": [
+            5,
+            10,
+            20
+          ],
+          "resnet_dt": false,
+          "seed": 1,
+          "_comment": " that's all"
+        }
+      ]
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	1000000,
-	"seed":		10,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
-    },    
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/water/ipi/input.xml b/examples/water/ipi/input.xml
index fa2e244772..428f438ecd 100755
--- a/examples/water/ipi/input.xml
+++ b/examples/water/ipi/input.xml
@@ -28,10 +28,10 @@
        <barostat mode='isotropic'>
         <thermostat mode='gle'>
           <A shape='(5,5)'>
-            [   1.119252684345e-2,    8.975945537096e-6,   -2.728650447177e-5,   -7.011051989908e-4,    8.414373543550e-3, 
-          1.194065829660e-7,    1.071876106695e-6,   -9.439643019253e-6,   -1.329262116026e-5,   -2.667325152958e-6, 
-          2.161979961890e-4,    9.439643019253e-6,    4.639122293442e-5,    4.329184279724e-6,    8.132076333400e-5, 
-         -7.322257663569e-4,    1.329262116026e-5,   -4.329184279724e-6,    5.173717780694e-4,    1.590872642196e-5, 
+            [   1.119252684345e-2,    8.975945537096e-6,   -2.728650447177e-5,   -7.011051989908e-4,    8.414373543550e-3,
+          1.194065829660e-7,    1.071876106695e-6,   -9.439643019253e-6,   -1.329262116026e-5,   -2.667325152958e-6,
+          2.161979961890e-4,    9.439643019253e-6,    4.639122293442e-5,    4.329184279724e-6,    8.132076333400e-5,
+         -7.322257663569e-4,    1.329262116026e-5,   -4.329184279724e-6,    5.173717780694e-4,    1.590872642196e-5,
           8.299189140989e-3,    2.667325152958e-6,   -8.132076333400e-5,   -1.590872642196e-5,    6.992095202254e-3
            ]
           </A>
diff --git a/examples/water/ipi/water.json b/examples/water/ipi/water.json
index db5b00423e..0b4c588583 100644
--- a/examples/water/ipi/water.json
+++ b/examples/water/ipi/water.json
@@ -1,13 +1,13 @@
 {
-    "verbose":		false,
-    "use_unix":		true,
-    "port":		31415,
-    "host":		"localhost",
-    "graph_file":	"graph.pb",
-    "coord_file":	"conf.xyz",
-    "atom_type" : {
-	"OW":		0, 
-	"HW1":		1,
-	"HW2":		1
-    }
+  "verbose": false,
+  "use_unix": true,
+  "port": 31415,
+  "host": "localhost",
+  "graph_file": "graph.pb",
+  "coord_file": "conf.xyz",
+  "atom_type": {
+    "OW": 0,
+    "HW1": 1,
+    "HW2": 1
+  }
 }
diff --git a/examples/water/lmp/in.lammps b/examples/water/lmp/in.lammps
index 828beb4c68..f38db5836b 100644
--- a/examples/water/lmp/in.lammps
+++ b/examples/water/lmp/in.lammps
@@ -20,6 +20,6 @@ fix             1 all nvt temp 330.0 330.0 0.5
 timestep        0.0005
 thermo_style    custom step pe ke etotal temp press vol
 thermo          100
-dump		1 all custom 100 water.dump id type x y z 
+dump		1 all custom 100 water.dump id type x y z
 
 run             1000
diff --git a/examples/water/lmp/in.plugin.lammps b/examples/water/lmp/in.plugin.lammps
index 7791c7ae77..40b40dbc03 100644
--- a/examples/water/lmp/in.plugin.lammps
+++ b/examples/water/lmp/in.plugin.lammps
@@ -23,6 +23,6 @@ fix             1 all nvt temp 330.0 330.0 0.5
 timestep        0.0005
 thermo_style    custom step pe ke etotal temp press vol
 thermo          100
-dump		1 all custom 100 water.dump id type x y z 
+dump		1 all custom 100 water.dump id type x y z
 
 run             1000
diff --git a/examples/water/lmp/water.lmp b/examples/water/lmp/water.lmp
index 4036b7522a..8bd1def35a 100644
--- a/examples/water/lmp/water.lmp
+++ b/examples/water/lmp/water.lmp
@@ -1,4 +1,4 @@
-# LAMMPS data 
+# LAMMPS data
 192 atoms
 2 atom types
 0.0 12.44470 xlo xhi
diff --git a/examples/water/se_atten/input.json b/examples/water/se_atten/input.json
index 2d54eaa1b6..3aeeeedbf2 100644
--- a/examples/water/se_atten/input.json
+++ b/examples/water/se_atten/input.json
@@ -1,70 +1,86 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-	"descriptor" :{
-	    "type":		"se_atten",
-	    "sel":		120,
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1,
-		"attn":		128,
-		"attn_layer":	2,
-		"attn_dotr": 	true,
-		"attn_mask": 	false,
-	    "_comment":		" that's all"
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 120,
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "attn": 128,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false,
+      "_comment": " that's all"
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	1000000,
-	"seed":		10,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
-    },    
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/water/se_e2_a/input.json b/examples/water/se_e2_a/input.json
index ff87919d46..2e69108677 100644
--- a/examples/water/se_e2_a/input.json
+++ b/examples/water/se_e2_a/input.json
@@ -1,66 +1,85 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	1000000,
-	"seed":		10,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
-    },    
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/water/se_e2_a_mixed_prec/input.json b/examples/water/se_e2_a_mixed_prec/input.json
index 889abedabf..d75cf3742a 100644
--- a/examples/water/se_e2_a_mixed_prec/input.json
+++ b/examples/water/se_e2_a_mixed_prec/input.json
@@ -1,70 +1,89 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"mixed_precision": {
-            "compute_prec": "float16",
-            "output_prec":  "float32"
-        },
-	"numb_steps":	1000000,
-	"seed":		10,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
-    },    
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "mixed_precision": {
+      "compute_prec": "float16",
+      "output_prec": "float32"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/water/se_e2_a_tebd/input.json b/examples/water/se_e2_a_tebd/input.json
index 4f8b8698c7..00c25314fe 100644
--- a/examples/water/se_e2_a_tebd/input.json
+++ b/examples/water/se_e2_a_tebd/input.json
@@ -1,72 +1,95 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-	"type_embedding":{
-	    "neuron":		[2, 4, 8],
-	    "resnet_dt":	false,
-	    "seed":		1
-	},	    
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "type_one_side":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "type_embedding": {
+      "neuron": [
+        2,
+        4,
+        8
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "type_one_side": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
-
-    "loss" :{
-	"type":		"ener",
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	1000000,
-	"seed":		10,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
-    },    
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
+
+  "_comment": "that's all"
+}
diff --git a/examples/water/se_e2_r/input.json b/examples/water/se_e2_r/input.json
index c30d07fa6e..4c1fa57d6a 100644
--- a/examples/water/se_e2_r/input.json
+++ b/examples/water/se_e2_r/input.json
@@ -1,65 +1,84 @@
 {
-    "_comment": " model parameters",
-    "model":	{
-	"type_map":		["O", "H"],
-	"descriptor": {
-	    "type":		"se_e2_r",
-	    "sel":		[46, 92],
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[5, 10, 20],
-	    "resnet_dt":	false,
-	    "seed":		1,
-	    "_comment": " that's all"
-	},
-	"fitting_net" :{
-	    "neuron":		[120, 120, 120],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		"that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_r",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        5,
+        10,
+        20
+      ],
+      "resnet_dt": false,
+      "seed": 1,
+      "_comment": " that's all"
     },
-
-    "learning_rate" : {
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.005,	
-	"stop_lr":	3.51e-8,
-	"_comment":	" that's all"
+    "fitting_net": {
+      "neuron": [
+        120,
+        120,
+        120
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": "that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" : {
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.005,
+    "stop_lr": 3.51e-8,
+    "_comment": " that's all"
+  },
+
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-    "_comment": " traing controls",
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	1000000,
-	"seed":		1,
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"save_freq":	1000,
-	"_comment":	"that's all"
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
     },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1000000,
+    "seed": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/examples/water/se_e3/input.json b/examples/water/se_e3/input.json
index 05a25d9ef9..2e7be5f570 100644
--- a/examples/water/se_e3/input.json
+++ b/examples/water/se_e3/input.json
@@ -1,73 +1,89 @@
 {
-    "_comment": " model parameters",
-    "model": {
-	"type_map":	["O", "H"],
-	"descriptor" :{
-	    "type":		"se_e3",
-	    "sel":		"auto",
-	    "rcut_smth":	0.50,
-	    "rcut":		5.80,
-	    "neuron":		[2, 4, 8],
-	    "resnet_dt":	false,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net" : {
-	    "neuron":           [240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e3",
+      "sel": "auto",
+      "rcut_smth": 0.50,
+      "rcut": 5.80,
+      "neuron": [
+        2,
+        4,
+        8
+      ],
+      "resnet_dt": false,
+      "seed": 1,
+      "_comment": " that's all"
     },
-
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.001,	
-	"stop_lr":	3.51e-8,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "loss" :{
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0,
-	"_comment":	" that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-    "_comment": " traing controls",
-    "training" : {
-	"training_data": {
-	    "systems":		["../data/data_0/", "../data/data_1/", "../data/data_2/"],
-	    "batch_size":	"auto",
-	    "_comment":		"that's all"
-	},
-	"validation_data":{
-	    "systems":		["../data/data_3"],
-	    "batch_size":	1,
-	    "numb_btch":	3,
-	    "_comment":		"that's all"
-	},
-	"numb_steps":	1000000,
-	"seed":		1,
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-	"_comment": " display and restart",
-	"_comment": " frequencies counted in batch",
-	"disp_file":	"lcurve.out",
-	"disp_freq":	10,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"profiling":	false,
-	"profiling_file":"timeline.json",
-	"_comment":	"that's all"
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/data_0/",
+        "../data/data_1/",
+        "../data/data_2/"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "../data/data_3"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
     },
+    "numb_steps": 1000000,
+    "seed": 1,
 
-    "_comment":		"that's all"
-}
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file": "lcurve.out",
+    "disp_freq": 10,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false,
+    "profiling_file": "timeline.json",
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/water_multi_task/ener_dipole/input.json b/examples/water_multi_task/ener_dipole/input.json
index df60e3b763..21e5db7228 100644
--- a/examples/water_multi_task/ener_dipole/input.json
+++ b/examples/water_multi_task/ener_dipole/input.json
@@ -1,105 +1,131 @@
 {
-    "_comment": "that's all",
-    "model": {
-    "type_map": ["O", "H"],
+  "_comment": "that's all",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
     "descriptor": {
-        "type":     "se_e2_a",
-        "sel":      [46, 92],
-        "rcut_smth":    0.5,
-        "rcut":     6.0,
-        "neuron":       [25, 50, 100],
-        "resnet_dt":    false,
-        "axis_neuron":  16,
-        "seed":     1,
-        "_comment":     " that's all"
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.5,
+      "rcut": 6.0,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
     },
     "fitting_net_dict": {
-        "water_dipole": {
-            "type":         "dipole",
-            "sel_type":     [0],
-            "neuron":       [100, 100, 100],
-            "resnet_dt":    true,
-            "seed":         1,
-            "_comment":     " that's all"
-        },
-        "water_ener": {
-            "neuron":       [240, 240, 240],
-            "resnet_dt":    true,
-            "seed":         1,
-            "_comment":     " that's all"
-        }
+      "water_dipole": {
+        "type": "dipole",
+        "sel_type": [
+          0
+        ],
+        "neuron": [
+          100,
+          100,
+          100
+        ],
+        "resnet_dt": true,
+        "seed": 1,
+        "_comment": " that's all"
+      },
+      "water_ener": {
+        "neuron": [
+          240,
+          240,
+          240
+        ],
+        "resnet_dt": true,
+        "seed": 1,
+        "_comment": " that's all"
+      }
     },
     "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-08,
+    "_comment": "that's all"
+  },
+  "loss_dict": {
+    "water_dipole": {
+      "type": "tensor",
+      "pref": 1.0,
+      "pref_atomic": 1.0,
+      "_comment": " that's all"
     },
-    "learning_rate": {
-        "type":         "exp",
-        "decay_steps":  5000,
-        "start_lr":     0.001,
-        "stop_lr":      3.51e-08,
-        "_comment":     "that's all"
-    },
-    "loss_dict": {
-        "water_dipole": {
-            "type":     "tensor",
-            "pref":     1.0,
-            "pref_atomic":  1.0,
-            "_comment": " that's all"
-        },
-        "water_ener": {
-            "type":     "ener",
-            "start_pref_e": 0.02,
-            "limit_pref_e": 1,
-            "start_pref_f": 1000,
-            "limit_pref_f": 1,
-            "start_pref_v": 0,
-            "limit_pref_v": 0,
-            "_comment": " that's all"
-        }
-    },
-    "training": {
+    "water_ener": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0,
+      "_comment": " that's all"
+    }
+  },
+  "training": {
     "data_dict": {
-    "water_dipole": {
+      "water_dipole": {
         "training_data": {
-            "systems": [
-                "../../water_tensor/dipole/training_data/atomic_system",
-                "../../water_tensor/dipole/training_data/global_system"
-            ],
-            "batch_size": "auto",
-            "_comment": "that's all"
+          "systems": [
+            "../../water_tensor/dipole/training_data/atomic_system",
+            "../../water_tensor/dipole/training_data/global_system"
+          ],
+          "batch_size": "auto",
+          "_comment": "that's all"
         },
         "validation_data": {
-            "systems": [
-                "../../water_tensor/dipole/validation_data/atomic_system",
-                "../../water_tensor/dipole/validation_data/global_system"
-            ],
-            "batch_size": 1,
-            "numb_btch": 3,
-            "_comment": "that's all"
+          "systems": [
+            "../../water_tensor/dipole/validation_data/atomic_system",
+            "../../water_tensor/dipole/validation_data/global_system"
+          ],
+          "batch_size": 1,
+          "numb_btch": 3,
+          "_comment": "that's all"
         }
-    },
-    "water_ener": {
+      },
+      "water_ener": {
         "training_data": {
-            "systems":      ["../../water/data/data_0/", "../../water/data/data_1/", "../../water/data/data_2/"],
-            "batch_size":   "auto",
-            "_comment":     "that's all"
+          "systems": [
+            "../../water/data/data_0/",
+            "../../water/data/data_1/",
+            "../../water/data/data_2/"
+          ],
+          "batch_size": "auto",
+          "_comment": "that's all"
         },
         "validation_data": {
-            "systems":      ["../../water/data/data_3/"],
-            "batch_size":   1,
-            "numb_btch":    3,
-            "_comment":     "that's all"
+          "systems": [
+            "../../water/data/data_3/"
+          ],
+          "batch_size": 1,
+          "numb_btch": 3,
+          "_comment": "that's all"
         }
-    }
+      }
     },
     "fitting_weight": {
-        "water_dipole": 10,
-        "water_ener":   20
+      "water_dipole": 10,
+      "water_ener": 20
     },
-    "numb_steps":   1000000,
-    "seed":         10,
-    "disp_file":    "lcurve.out",
-    "disp_freq":    100,
-    "save_freq":    1000,
-    "_comment":     "that's all"
-    }
-}
\ No newline at end of file
+    "numb_steps": 1000000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  }
+}
diff --git a/examples/water_tensor/dipole/dipole_input.json b/examples/water_tensor/dipole/dipole_input.json
index 3550e9f60b..fe642cfba9 100644
--- a/examples/water_tensor/dipole/dipole_input.json
+++ b/examples/water_tensor/dipole/dipole_input.json
@@ -1,79 +1,81 @@
 {
-	"_comment": " model parameters",
-	"model": {
-		"type_map": [
-			"O",
-			"H"
-		],
-		"descriptor": {
-			"type": "se_e2_a",
-			"sel": [
-				46,
-				92
-			],
-			"rcut_smth": 3.80,
-			"rcut": 4.00,
-			"neuron": [
-				25,
-				50,
-				100
-			],
-			"resnet_dt": false,
-			"axis_neuron": 6,
-			"seed": 1,
-			"_comment": " that's all"
-		},
-		"fitting_net": {
-			"type": "dipole",
-			"sel_type": [
-				0
-			],
-			"neuron": [
-				100,
-				100,
-				100
-			],
-			"resnet_dt": true,
-			"seed": 1,
-			"_comment": " that's all"
-		},
-		"_comment": " that's all"
-	},
-	"learning_rate": {
-		"type": "exp",
-		"start_lr": 0.01,
-		"decay_steps": 5000,
-		"_comment": "that's all"
-	},
-	"loss": {
-		"type":"tensor",
-		"pref":			1.0,
-		"pref_atomic":	1.0,
-		"_comment": " that's all"
-	},
-	"_comment": " traing controls",
-	"training": {
-		"training_data": {
-			"systems":		[
-				"./training_data/atomic_system",
-				"./training_data/global_system"],
-			"batch_size":	"auto",
-			"_comment":		"that's all"
-		},
-		"validation_data":{
-			"systems":		[
-				"./validation_data/atomic_system",
-				"./validation_data/global_system"],
-			"batch_size":	1,
-			"numb_btch":	3,
-			"_comment":		"that's all"
-		},
-		"numb_steps":	2000,
-		"seed":		10,
-		"disp_file":	"lcurve.out",
-		"disp_freq":	100,
-		"save_freq":	1000,
-		"_comment":	"that's all"
-	},
-	"_comment": "that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 3.80,
+      "rcut": 4.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 6,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "type": "dipole",
+      "sel_type": [
+        0
+      ],
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.01,
+    "decay_steps": 5000,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment": " that's all"
+  },
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "./training_data/atomic_system",
+        "./training_data/global_system"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "./validation_data/atomic_system",
+        "./validation_data/global_system"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
+  "_comment": "that's all"
 }
diff --git a/examples/water_tensor/dipole/training_data/atomic_system/type.raw b/examples/water_tensor/dipole/training_data/atomic_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/dipole/training_data/atomic_system/type.raw
+++ b/examples/water_tensor/dipole/training_data/atomic_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/dipole/training_data/global_system/type.raw b/examples/water_tensor/dipole/training_data/global_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/dipole/training_data/global_system/type.raw
+++ b/examples/water_tensor/dipole/training_data/global_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/dipole/validation_data/atomic_system/type.raw b/examples/water_tensor/dipole/validation_data/atomic_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/dipole/validation_data/atomic_system/type.raw
+++ b/examples/water_tensor/dipole/validation_data/atomic_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/dipole/validation_data/global_system/type.raw b/examples/water_tensor/dipole/validation_data/global_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/dipole/validation_data/global_system/type.raw
+++ b/examples/water_tensor/dipole/validation_data/global_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/polar/polar_input.json b/examples/water_tensor/polar/polar_input.json
index ff4a89ca0c..d29928ae09 100644
--- a/examples/water_tensor/polar/polar_input.json
+++ b/examples/water_tensor/polar/polar_input.json
@@ -1,70 +1,87 @@
 {
-    "_comment": " model parameters",
-    "model":{
-	"type_map":		["O", "H"],
-	"data_stat_nbatch":	10,
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"fitting_net": {
-	    "type":		"polar",
-	    "sel_type":		[0],
-	    "fit_diag":		false,
-		"neuron":		[100, 100, 100],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "data_stat_nbatch": 10,
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
     },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"start_lr":	0.01,
-	"stop_lr":	3.51e-7,
-	"_comment":	"that's all"
-    },
-    "loss":{
-		"type":"tensor",
-		"pref_atomic":1.0,
-		"pref":1.0,
-		"_comment":"that's all"
+    "fitting_net": {
+      "type": "polar",
+      "sel_type": [
+        0
+      ],
+      "fit_diag": false,
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "_comment": " traing controls",
-    "training": {
-		"training_data": {
-			"systems":		[
-				"./training_data/atomic_system",
-				"./training_data/global_system"],
-			"batch_size":	"auto",
-			"_comment":		"that's all"
-		},
-		"validation_data":{
-			"systems":		[
-				"./validation_data/atomic_system",
-				"./validation_data/global_system"],
-			"batch_size":	1,
-			"numb_btch":	3,
-			"_comment":		"that's all"
-		},
-		"numb_steps":	2000,
-		"seed":		10,
-		"disp_file":	"lcurve.out",
-		"disp_freq":	100,
-		"save_freq":	1000,
-		"_comment":	"that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.01,
+    "stop_lr": 3.51e-7,
+    "_comment": "that's all"
+  },
+  "loss": {
+    "type": "tensor",
+    "pref_atomic": 1.0,
+    "pref": 1.0,
+    "_comment": "that's all"
+  },
 
-    "_comment":		"that's all"
-}
+  "_comment": " traing controls",
+  "training": {
+    "training_data": {
+      "systems": [
+        "./training_data/atomic_system",
+        "./training_data/global_system"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "./validation_data/atomic_system",
+        "./validation_data/global_system"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 2000,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "save_freq": 1000,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/examples/water_tensor/polar/training_data/atomic_system/type.raw b/examples/water_tensor/polar/training_data/atomic_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/polar/training_data/atomic_system/type.raw
+++ b/examples/water_tensor/polar/training_data/atomic_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/polar/training_data/global_system/type.raw b/examples/water_tensor/polar/training_data/global_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/polar/training_data/global_system/type.raw
+++ b/examples/water_tensor/polar/training_data/global_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/polar/validation_data/atomic_system/type.raw b/examples/water_tensor/polar/validation_data/atomic_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/polar/validation_data/atomic_system/type.raw
+++ b/examples/water_tensor/polar/validation_data/atomic_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/examples/water_tensor/polar/validation_data/global_system/type.raw b/examples/water_tensor/polar/validation_data/global_system/type.raw
index 5b62228245..6c71c85e58 100644
--- a/examples/water_tensor/polar/validation_data/global_system/type.raw
+++ b/examples/water_tensor/polar/validation_data/global_system/type.raw
@@ -1 +1 @@
-0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 
+0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1
diff --git a/setup.py b/setup.py
index 9d58d8eba4..bfd2cf8b2b 100644
--- a/setup.py
+++ b/setup.py
@@ -3,14 +3,21 @@
 import os
 import sys
 
-from skbuild import setup
-from wheel.bdist_wheel import bdist_wheel
+from skbuild import (
+    setup,
+)
+from wheel.bdist_wheel import (
+    bdist_wheel,
+)
 
 topdir = os.path.abspath(os.path.dirname(__file__))
-sys.path.insert(0, os.path.join(topdir, 'backend'))
-
-from find_tensorflow import find_tensorflow, get_tf_requirement, get_tf_version
+sys.path.insert(0, os.path.join(topdir, "backend"))
 
+from find_tensorflow import (
+    find_tensorflow,
+    get_tf_requirement,
+    get_tf_version,
+)
 
 cmake_args = []
 # get variant option from the environment varibles, available: cpu, cuda, rocm
@@ -103,7 +110,7 @@ def get_tag(self):
             "dargs>=0.3.4",
             "sphinx-argparse",
             "pygments-lammps",
-            ],
+        ],
         "lmp": [
             "lammps-manylinux-2-28~=2022.6.23.2.2; platform_system=='Linux'",
             "lammps~=2022.6.23.2.2; platform_system!='Linux'",
@@ -133,7 +140,7 @@ def get_tag(self):
         "console_scripts": ["dp = deepmd.entrypoints.main:main"],
         "lammps.plugins": ["deepmd = deepmd.lmp:get_op_dir"],
     },
-    cmdclass = {
+    cmdclass={
         "bdist_wheel": bdist_wheel_abi3,
     },
 )
diff --git a/source/3rdparty/json.hpp b/source/3rdparty/json.hpp
index 242f034c3c..a70aaf8cbc 100644
--- a/source/3rdparty/json.hpp
+++ b/source/3rdparty/json.hpp
@@ -25444,4 +25444,4 @@ inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std
 
 
 
-#endif  // INCLUDE_NLOHMANN_JSON_HPP_
\ No newline at end of file
+#endif  // INCLUDE_NLOHMANN_JSON_HPP_
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index 84648c169a..0b6e99cff4 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -9,39 +9,46 @@ if(BUILD_TESTING)
 endif()
 
 # build cpp or python interfaces
-if (NOT DEFINED BUILD_CPP_IF) 
+if(NOT DEFINED BUILD_CPP_IF)
   set(BUILD_CPP_IF TRUE)
-endif (NOT DEFINED BUILD_CPP_IF)
-if (NOT DEFINED BUILD_PY_IF) 
+endif(NOT DEFINED BUILD_CPP_IF)
+if(NOT DEFINED BUILD_PY_IF)
   set(BUILD_PY_IF FALSE)
-endif (NOT DEFINED BUILD_PY_IF)
-if ((NOT BUILD_PY_IF) AND (NOT BUILD_CPP_IF))
+endif(NOT DEFINED BUILD_PY_IF)
+if((NOT BUILD_PY_IF) AND (NOT BUILD_CPP_IF))
   # nothing to do
   message(FATAL_ERROR "Nothing to do.")
 endif()
 
-if (BUILD_CPP_IF AND BUILD_TESTING)
-  if (NOT INSTALL_TENSORFLOW)
+if(BUILD_CPP_IF AND BUILD_TESTING)
+  if(NOT INSTALL_TENSORFLOW)
     # some errors in conda packages...
     find_package(GTest)
   endif()
   if(NOT GTEST_LIBRARIES)
-    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/googletest.cmake.in googletest-download/CMakeLists.txt @ONLY)
-    execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
+    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/googletest.cmake.in
+                   googletest-download/CMakeLists.txt @ONLY)
+    execute_process(
+      COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
       RESULT_VARIABLE result
-      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download)
     if(result)
       message(FATAL_ERROR "CMake step for googletest failed: ${result}")
     endif()
-    execute_process(COMMAND ${CMAKE_COMMAND} --build .
+    execute_process(
+      COMMAND ${CMAKE_COMMAND} --build .
       RESULT_VARIABLE result
-      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download )
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/googletest-download)
     if(result)
       message(FATAL_ERROR "Build step for googletest failed: ${result}")
     endif()
-    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-    add_subdirectory(${CMAKE_CURRENT_BINARY_DIR}/googletest-src ${CMAKE_CURRENT_BINARY_DIR}/googletest-build EXCLUDE_FROM_ALL)
-  endif ()
+    set(gtest_force_shared_crt
+        ON
+        CACHE BOOL "" FORCE)
+    add_subdirectory(
+      ${CMAKE_CURRENT_BINARY_DIR}/googletest-src
+      ${CMAKE_CURRENT_BINARY_DIR}/googletest-build EXCLUDE_FROM_ALL)
+  endif()
 endif()
 
 find_package(Git)
@@ -50,30 +57,26 @@ if(GIT_FOUND)
     COMMAND git describe --tags --dirty
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
     OUTPUT_VARIABLE GIT_SUMM
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
   execute_process(
     COMMAND git log -1 --format=%h
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
     OUTPUT_VARIABLE GIT_HASH
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
   execute_process(
     COMMAND git rev-parse --abbrev-ref HEAD
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
     OUTPUT_VARIABLE GIT_BRANCH
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
   execute_process(
     COMMAND git show -s --format=%ci ${GIT_HASH}
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
     OUTPUT_VARIABLE GIT_DATE
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
 endif(GIT_FOUND)
 
 # global defines
-list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)
+list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)
 
 # model version
 file(READ ${PROJECT_SOURCE_DIR}/config/MODEL_VER MODEL_VERSION)
@@ -81,14 +84,18 @@ string(REPLACE "\n" " " MODEL_VERSION ${MODEL_VERSION})
 message(STATUS "Supported model version: ${MODEL_VERSION}")
 
 # Devices that have both ROCM and CUDA are not currently supported
-if (USE_ROCM_TOOLKIT AND USE_CUDA_TOOLKIT)
-  message (FATAL_ERROR "Devices that have both ROCM and CUDA are not currently supported")
+if(USE_ROCM_TOOLKIT AND USE_CUDA_TOOLKIT)
+  message(
+    FATAL_ERROR
+      "Devices that have both ROCM and CUDA are not currently supported")
 endif()
 set(DP_VARIANT "cpu")
 
 # define USE_CUDA_TOOLKIT
-if (USE_CUDA_TOOLKIT)
-  set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
+if(USE_CUDA_TOOLKIT)
+  set(CUDA_USE_STATIC_CUDA_RUNTIME
+      OFF
+      CACHE INTERNAL "")
   find_package(CUDA REQUIRED)
   add_definitions("-DGOOGLE_CUDA")
   message(STATUS "Found CUDA in ${CUDA_TOOLKIT_ROOT_DIR}, build nv GPU support")
@@ -97,23 +104,26 @@ else()
   message(STATUS "Will not build nv GPU support")
 endif(USE_CUDA_TOOLKIT)
 
-#define USE_ROCM_TOOLKIT
-if (USE_ROCM_TOOLKIT)
+# define USE_ROCM_TOOLKIT
+if(USE_ROCM_TOOLKIT)
   find_package(ROCM REQUIRED)
   add_definitions("-DTENSORFLOW_USE_ROCM")
   add_compile_definitions(__HIP_PLATFORM_HCC__)
   message(STATUS "Found ROCM in ${ROCM_ROOT}, build AMD GPU support")
   set(DP_VARIANT "rocm")
 else()
-  message(STATUS "Will not build AMD GPU support")  
-endif (USE_ROCM_TOOLKIT)
+  message(STATUS "Will not build AMD GPU support")
+endif(USE_ROCM_TOOLKIT)
 
 set(DEEPMD_SOURCE_DIR ${PROJECT_SOURCE_DIR}/..)
 
 # setup tensorflow libraries by python
-if (USE_TF_PYTHON_LIBS)
+if(USE_TF_PYTHON_LIBS)
   if(NOT $ENV{CIBUILDWHEEL} STREQUAL "1")
-    find_package (Python COMPONENTS Interpreter Development REQUIRED)
+    find_package(
+      Python
+      COMPONENTS Interpreter Development
+      REQUIRED)
   else()
     set(Python_LIBRARIES ${Python_LIBRARY})
   endif()
@@ -126,9 +136,9 @@ find_package(tensorflow REQUIRED)
 find_package(Threads)
 
 # define build type
-if ((NOT DEFINED CMAKE_BUILD_TYPE) OR CMAKE_BUILD_TYPE STREQUAL "")
-   set (CMAKE_BUILD_TYPE release)
-endif ()
+if((NOT DEFINED CMAKE_BUILD_TYPE) OR CMAKE_BUILD_TYPE STREQUAL "")
+  set(CMAKE_BUILD_TYPE release)
+endif()
 
 # set op prec
 set(HIGH_PREC_DEF "HIGH_PREC")
@@ -139,91 +149,102 @@ set(LOW_PREC_VARIANT "_low")
 
 # find openmp
 find_package(OpenMP)
-if (OPENMP_FOUND)
-    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+if(OPENMP_FOUND)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
 endif()
 
 # optimize flags
 option(ENABLE_NATIVE_OPTIMIZATION "Enable native optimization" OFF)
-if (ENABLE_NATIVE_OPTIMIZATION)
-  set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mtune=native")
-  set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
+if(ENABLE_NATIVE_OPTIMIZATION)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mtune=native")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")
 endif()
 
 # define names of libs
-set (LIB_DEEPMD		"deepmd")
-set (LIB_DEEPMD_OP		"deepmd_op")
-if (BUILD_CPP_IF)
-  set (LIB_DEEPMD_CC		"deepmd_cc")
-  set (LIB_DEEPMD_C     "deepmd_c")
-  if (USE_CUDA_TOOLKIT)
-    set (LIB_DEEPMD_OP_DEVICE		"deepmd_op_cuda")
-  elseif (USE_ROCM_TOOLKIT)
-    set (LIB_DEEPMD_OP_DEVICE   "deepmd_op_rocm")
+set(LIB_DEEPMD "deepmd")
+set(LIB_DEEPMD_OP "deepmd_op")
+if(BUILD_CPP_IF)
+  set(LIB_DEEPMD_CC "deepmd_cc")
+  set(LIB_DEEPMD_C "deepmd_c")
+  if(USE_CUDA_TOOLKIT)
+    set(LIB_DEEPMD_OP_DEVICE "deepmd_op_cuda")
+  elseif(USE_ROCM_TOOLKIT)
+    set(LIB_DEEPMD_OP_DEVICE "deepmd_op_rocm")
   else()
-    set (LIB_DEEPMD_OP_DEVICE		"deepmd_op")
+    set(LIB_DEEPMD_OP_DEVICE "deepmd_op")
   endif()
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 4.8)
-    set (LIB_DEEPMD_NATIVE	"deepmd_native_md")
-    set (LIB_DEEPMD_IPI		  "deepmd_ipi")
-    set (LIB_DEEPMD_GROMACS "deepmd_gromacs")
-  else ()
-    message (STATUS "Your gcc/g++ version is ${CMAKE_CXX_COMPILER_VERSION}, so native MD, ipi and gromacs plugin are disabled. To enable them, use gcc/g++ >= 4.8.")
-  endif ()
-endif (BUILD_CPP_IF)
-
-add_subdirectory (op/)
-add_subdirectory (lib/)
-if (BUILD_PY_IF)
-  add_subdirectory (config/)
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 4.8)
+    set(LIB_DEEPMD_NATIVE "deepmd_native_md")
+    set(LIB_DEEPMD_IPI "deepmd_ipi")
+    set(LIB_DEEPMD_GROMACS "deepmd_gromacs")
+  else()
+    message(
+      STATUS
+        "Your gcc/g++ version is ${CMAKE_CXX_COMPILER_VERSION}, so native MD, ipi and gromacs plugin are disabled. To enable them, use gcc/g++ >= 4.8."
+    )
+  endif()
+endif(BUILD_CPP_IF)
+
+add_subdirectory(op/)
+add_subdirectory(lib/)
+if(BUILD_PY_IF)
+  add_subdirectory(config/)
   # add_subdirectory (tests/)
-endif (BUILD_PY_IF)
-if (BUILD_CPP_IF) 
-  add_subdirectory (api_cc/)
-  add_subdirectory (api_c/)
-  add_subdirectory (lmp/)
-  if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.8)
+endif(BUILD_PY_IF)
+if(BUILD_CPP_IF)
+  add_subdirectory(api_cc/)
+  add_subdirectory(api_c/)
+  add_subdirectory(lmp/)
+  if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.8)
     # add_subdirectory (md/)
     if(NOT BUILD_PY_IF)
-    add_subdirectory (ipi/)
-    add_subdirectory (gmx/)
+      add_subdirectory(ipi/)
+      add_subdirectory(gmx/)
     endif()
-  endif ()
-endif (BUILD_CPP_IF)
+  endif()
+endif(BUILD_CPP_IF)
 
 # uninstall target
 configure_file(
-    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in"
-    "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
-    IMMEDIATE @ONLY)
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in"
+  "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY)
 
-add_custom_target(uninstall
-    COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
+add_custom_target(
+  uninstall COMMAND ${CMAKE_COMMAND} -P
+                    ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
 
 # lammps target
-configure_file(
-    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_lammps.cmake.in"
-    "${CMAKE_CURRENT_BINARY_DIR}/cmake_lammps.cmake"
-    IMMEDIATE @ONLY)
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_lammps.cmake.in"
+               "${CMAKE_CURRENT_BINARY_DIR}/cmake_lammps.cmake" IMMEDIATE @ONLY)
 
-add_custom_target(lammps
-    COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_lammps.cmake)
+add_custom_target(lammps COMMAND ${CMAKE_COMMAND} -P
+                                 ${CMAKE_CURRENT_BINARY_DIR}/cmake_lammps.cmake)
 
 # add configure file
 if(BUILD_CPP_IF AND NOT BUILD_PY_IF)
   include(CMakePackageConfigHelpers)
-  set(targets_export_name ${CMAKE_PROJECT_NAME}Targets CACHE INTERNAL "")
-  set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated" CACHE INTERNAL "")
-  set(cmake_files_install_dir "${CMAKE_INSTALL_PREFIX}/lib/cmake/${CMAKE_PROJECT_NAME}")
+  set(targets_export_name
+      ${CMAKE_PROJECT_NAME}Targets
+      CACHE INTERNAL "")
+  set(generated_dir
+      "${CMAKE_CURRENT_BINARY_DIR}/generated"
+      CACHE INTERNAL "")
+  set(cmake_files_install_dir
+      "${CMAKE_INSTALL_PREFIX}/lib/cmake/${CMAKE_PROJECT_NAME}")
   set(version_file "${generated_dir}/${CMAKE_PROJECT_NAME}ConfigVersion.cmake")
-  write_basic_package_version_file(${version_file} VERSION $<IF:${GIT_SUMM}?${GIT_SUMM}:"0.0.0"> COMPATIBILITY AnyNewerVersion)
-  install(EXPORT ${targets_export_name}
+  write_basic_package_version_file(
+    ${version_file}
+    VERSION $<IF:${GIT_SUMM}?${GIT_SUMM}:"0.0.0">
+    COMPATIBILITY AnyNewerVersion)
+  install(
+    EXPORT ${targets_export_name}
     NAMESPACE ${CMAKE_PROJECT_NAME}::
     DESTINATION ${cmake_files_install_dir})
   set(config_file "${generated_dir}/${CMAKE_PROJECT_NAME}Config.cmake")
-  configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Config.cmake.in"
-    "${config_file}" INSTALL_DESTINATION ${cmake_files_install_dir})
+  configure_package_config_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Config.cmake.in" "${config_file}"
+    INSTALL_DESTINATION ${cmake_files_install_dir})
   install(FILES ${version_file} ${config_file}
-    DESTINATION ${cmake_files_install_dir})
+          DESTINATION ${cmake_files_install_dir})
 endif(BUILD_CPP_IF AND NOT BUILD_PY_IF)
diff --git a/source/api_c/CMakeLists.txt b/source/api_c/CMakeLists.txt
index 75e785a341..93b170f59c 100644
--- a/source/api_c/CMakeLists.txt
+++ b/source/api_c/CMakeLists.txt
@@ -2,49 +2,38 @@ file(GLOB LIB_SRC src/*.cc src/*.cpp)
 # only install public headers
 file(GLOB INC_SRC include/c_api.h include/deepmd.hpp)
 
-set (libname "${LIB_DEEPMD_C}")
+set(libname "${LIB_DEEPMD_C}")
 
 add_library(${libname} SHARED ${LIB_SRC})
 
 # link: libdeepmd libdeepmd_op libtensorflow_cc libtensorflow_framework
-target_link_libraries (${libname} PRIVATE ${LIB_DEEPMD_CC})
+target_link_libraries(${libname} PRIVATE ${LIB_DEEPMD_CC})
 target_include_directories(
-  ${libname} PUBLIC
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  $<INSTALL_INTERFACE:include>
-)
+  ${libname} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+                    $<INSTALL_INTERFACE:include>)
 
 set_target_properties(
-  ${libname} 
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
-)
-if (CMAKE_TESTING_ENABLED)
+  ${libname} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}")
+if(CMAKE_TESTING_ENABLED)
   target_link_libraries(${libname} PRIVATE coverage_config)
 endif()
 
 if(BUILD_PY_IF)
+  install(TARGETS ${libname} DESTINATION deepmd/op/)
+else(BUILD_PY_IF)
   install(
     TARGETS ${libname}
-    DESTINATION deepmd/op/
-  )
-else(BUILD_PY_IF)
-install(
-  TARGETS ${libname}
-  EXPORT ${CMAKE_PROJECT_NAME}Targets
-  DESTINATION lib/
-)
+    EXPORT ${CMAKE_PROJECT_NAME}Targets
+    DESTINATION lib/)
 
-install(
-  FILES		${INC_SRC}
-  DESTINATION	include/deepmd
-)
+  install(FILES ${INC_SRC} DESTINATION include/deepmd)
 endif(BUILD_PY_IF)
 
-if (PACKAGE_C)
-  MESSAGE(STATUS "Packaging C API library")
+if(PACKAGE_C)
+  message(STATUS "Packaging C API library")
   # follow pypa/auditwheel convention
-  install(CODE [[
+  install(
+    CODE [[
     file(GET_RUNTIME_DEPENDENCIES
       LIBRARIES $<TARGET_FILE:deepmd_c> $<TARGET_FILE:deepmd_op>
       RESOLVED_DEPENDENCIES_VAR _r_deps
@@ -64,22 +53,14 @@ if (PACKAGE_C)
         FOLLOW_SYMLINK_CHAIN
       )
     endforeach()
-    ]]
-  )
-  install(
-    FILES ${INC_SRC}
-    DESTINATION	${CMAKE_BINARY_DIR}/libdeepmd_c/include/deepmd
-  )
-  install(
-    TARGETS ${libname}
-    DESTINATION	${CMAKE_BINARY_DIR}/libdeepmd_c/lib
-  )
-  install(
-    TARGETS ${LIB_DEEPMD_OP}
-    DESTINATION	${CMAKE_BINARY_DIR}/libdeepmd_c/lib
-  )
+    ]])
+  install(FILES ${INC_SRC}
+          DESTINATION ${CMAKE_BINARY_DIR}/libdeepmd_c/include/deepmd)
+  install(TARGETS ${libname} DESTINATION ${CMAKE_BINARY_DIR}/libdeepmd_c/lib)
+  install(TARGETS ${LIB_DEEPMD_OP}
+          DESTINATION ${CMAKE_BINARY_DIR}/libdeepmd_c/lib)
 endif()
 
-if (CMAKE_TESTING_ENABLED)
+if(CMAKE_TESTING_ENABLED)
   add_subdirectory(tests)
 endif()
diff --git a/source/api_c/include/c_api.h b/source/api_c/include/c_api.h
index 41b2c7d2a2..2d68cb62db 100644
--- a/source/api_c/include/c_api.h
+++ b/source/api_c/include/c_api.h
@@ -3,682 +3,729 @@
 extern "C" {
 #endif
 
-/** 
+/**
  * @brief Neighbor list.
-*/
+ */
 typedef struct DP_Nlist DP_Nlist;
 
 /**
-* @brief Create a new neighbor list.
-* @param[in] inum_ Number of core region atoms
-* @param[in] Array stores the core region atom's index
-* @param[in] Array stores the core region atom's neighbor atom number
-* @param[in] Array stores the core region atom's neighbor index
-* @returns A pointer to the neighbor list.
-**/
-extern DP_Nlist* DP_NewNlist(
-  int inum_, 
-  int * ilist_,
-  int * numneigh_, 
-  int ** firstneigh_);
+ * @brief Create a new neighbor list.
+ * @param[in] inum_ Number of core region atoms
+ * @param[in] Array stores the core region atom's index
+ * @param[in] Array stores the core region atom's neighbor atom number
+ * @param[in] Array stores the core region atom's neighbor index
+ * @returns A pointer to the neighbor list.
+ **/
+extern DP_Nlist* DP_NewNlist(int inum_,
+                             int* ilist_,
+                             int* numneigh_,
+                             int** firstneigh_);
 
 /**
-* @brief The deep potential.
-**/
+ * @brief The deep potential.
+ **/
 typedef struct DP_DeepPot DP_DeepPot;
 
 /**
-* @brief DP constructor with initialization.
-* @param[in] c_model The name of the frozen model file.
-* @returns A pointer to the deep potential.
-**/
+ * @brief DP constructor with initialization.
+ * @param[in] c_model The name of the frozen model file.
+ * @returns A pointer to the deep potential.
+ **/
 extern DP_DeepPot* DP_NewDeepPot(const char* c_model);
 
 /**
  * @brief DP constructor with initialization.
- * 
+ *
  * @param c_model The name of the frozen model file.
  * @param gpu_rank The rank of the GPU.
  * @param c_file_content The content of the model file.
  * @return DP_DeepPot* A pointer to the deep potential.
  */
-extern DP_DeepPot* DP_NewDeepPotWithParam(
-        const char* c_model, const int gpu_rank, const char* c_file_content);
-
-/**
-* @brief Evaluate the energy, force and virial by using a DP. (double version)
-* @param[in] dp The DP to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[out] energy Output energy.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_energy Output atomic energy. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepPotCompute (
-  DP_DeepPot* dp,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  double* energy,
-  double* force,
-  double* virial,
-  double* atomic_energy,
-  double* atomic_virial
-  );
-
-/**
-* @brief Evaluate the energy, force and virial by using a DP. (float version)
-* @param[in] dp The DP to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[out] energy Output energy.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_energy Output atomic energy. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepPotComputef (
-  DP_DeepPot* dp,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  double* energy,
-  float* force,
-  float* virial,
-  float* atomic_energy,
-  float* atomic_virial
-  );
-
-/**
-* @brief Evaluate the energy, force and virial by using a DP with the neighbor list. (double version)
-* @param[in] dp The DP to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[in] ago Update the internal neighbour list if ago is 0.
-* @param[out] energy Output energy.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_energy Output atomic energy. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepPotComputeNList (
-  DP_DeepPot* dp,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  const int ago,
-  double* energy,
-  double* force,
-  double* virial,
-  double* atomic_energy,
-  double* atomic_virial
-  );
-
-/**
-* @brief Evaluate the energy, force and virial by using a DP with the neighbor list. (float version)
-* @param[in] dp The DP to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[in] ago Update the internal neighbour list if ago is 0.
-* @param[out] energy Output energy.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_energy Output atomic energy. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepPotComputeNListf (
-  DP_DeepPot* dp,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  const int ago,
-  double* energy,
-  float* force,
-  float* virial,
-  float* atomic_energy,
-  float* atomic_virial
-  );
-
-/**
-* @brief The deep potential model deviation.
-**/
+extern DP_DeepPot* DP_NewDeepPotWithParam(const char* c_model,
+                                          const int gpu_rank,
+                                          const char* c_file_content);
+
+/**
+ * @brief Evaluate the energy, force and virial by using a DP. (double version)
+ * @param[in] dp The DP to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[out] energy Output energy.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_energy Output atomic energy. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepPotCompute(DP_DeepPot* dp,
+                              const int natom,
+                              const double* coord,
+                              const int* atype,
+                              const double* cell,
+                              double* energy,
+                              double* force,
+                              double* virial,
+                              double* atomic_energy,
+                              double* atomic_virial);
+
+/**
+ * @brief Evaluate the energy, force and virial by using a DP. (float version)
+ * @param[in] dp The DP to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[out] energy Output energy.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_energy Output atomic energy. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepPotComputef(DP_DeepPot* dp,
+                               const int natom,
+                               const float* coord,
+                               const int* atype,
+                               const float* cell,
+                               double* energy,
+                               float* force,
+                               float* virial,
+                               float* atomic_energy,
+                               float* atomic_virial);
+
+/**
+ * @brief Evaluate the energy, force and virial by using a DP with the neighbor
+ *list. (double version)
+ * @param[in] dp The DP to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[out] energy Output energy.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_energy Output atomic energy. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepPotComputeNList(DP_DeepPot* dp,
+                                   const int natom,
+                                   const double* coord,
+                                   const int* atype,
+                                   const double* cell,
+                                   const int nghost,
+                                   const DP_Nlist* nlist,
+                                   const int ago,
+                                   double* energy,
+                                   double* force,
+                                   double* virial,
+                                   double* atomic_energy,
+                                   double* atomic_virial);
+
+/**
+ * @brief Evaluate the energy, force and virial by using a DP with the neighbor
+ *list. (float version)
+ * @param[in] dp The DP to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[out] energy Output energy.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_energy Output atomic energy. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepPotComputeNListf(DP_DeepPot* dp,
+                                    const int natom,
+                                    const float* coord,
+                                    const int* atype,
+                                    const float* cell,
+                                    const int nghost,
+                                    const DP_Nlist* nlist,
+                                    const int ago,
+                                    double* energy,
+                                    float* force,
+                                    float* virial,
+                                    float* atomic_energy,
+                                    float* atomic_virial);
+
+/**
+ * @brief The deep potential model deviation.
+ **/
 typedef struct DP_DeepPotModelDevi DP_DeepPotModelDevi;
 
 /**
-* @brief DP model deviation constructor with initialization.
-* @param[in] c_models The array of the name of the frozen model file.
-* @param[in] nmodels The number of models.
-**/
-extern DP_DeepPotModelDevi* DP_NewDeepPotModelDevi(const char** c_models, int n_models);
-
-/**
-* @brief Evaluate the energy, force and virial by using a DP model deviation with neighbor list. (double version)
-* @param[in] dp The DP model deviation to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[in] ago Update the internal neighbour list if ago is 0.
-* @param[out] energy Output energy.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_energy Output atomic energy. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepPotModelDeviComputeNList (
-  DP_DeepPotModelDevi* dp,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  const int ago,
-  double* energy,
-  double* force,
-  double* virial,
-  double* atomic_energy,
-  double* atomic_virial
-  );
-
-/**
-* @brief Evaluate the energy, force and virial by using a DP model deviation with neighbor list. (float version)
-* @param[in] dp The DP model deviation to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[in] ago Update the internal neighbour list if ago is 0.
-* @param[out] energy Output energy.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_energy Output atomic energy. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepPotModelDeviComputeNListf (
-  DP_DeepPotModelDevi* dp,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  const int ago,
-  double* energy,
-  float* force,
-  float* virial,
-  float* atomic_energy,
-  float* atomic_virial
-  );
+ * @brief DP model deviation constructor with initialization.
+ * @param[in] c_models The array of the name of the frozen model file.
+ * @param[in] nmodels The number of models.
+ **/
+extern DP_DeepPotModelDevi* DP_NewDeepPotModelDevi(const char** c_models,
+                                                   int n_models);
+
+/**
+ * @brief Evaluate the energy, force and virial by using a DP model deviation
+ *with neighbor list. (double version)
+ * @param[in] dp The DP model deviation to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[out] energy Output energy.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_energy Output atomic energy. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepPotModelDeviComputeNList(DP_DeepPotModelDevi* dp,
+                                            const int natom,
+                                            const double* coord,
+                                            const int* atype,
+                                            const double* cell,
+                                            const int nghost,
+                                            const DP_Nlist* nlist,
+                                            const int ago,
+                                            double* energy,
+                                            double* force,
+                                            double* virial,
+                                            double* atomic_energy,
+                                            double* atomic_virial);
+
+/**
+ * @brief Evaluate the energy, force and virial by using a DP model deviation
+ *with neighbor list. (float version)
+ * @param[in] dp The DP model deviation to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[out] energy Output energy.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_energy Output atomic energy. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepPotModelDeviComputeNListf(DP_DeepPotModelDevi* dp,
+                                             const int natom,
+                                             const float* coord,
+                                             const int* atype,
+                                             const float* cell,
+                                             const int nghost,
+                                             const DP_Nlist* nlist,
+                                             const int ago,
+                                             double* energy,
+                                             float* force,
+                                             float* virial,
+                                             float* atomic_energy,
+                                             float* atomic_virial);
 
 /**
  * @brief Get the type map of a DP model deviation.
  * @param[in] dp The DP model deviation to use.
  * @return The cutoff radius.
-*/
+ */
 double DP_DeepPotModelDeviGetCutoff(DP_DeepPotModelDevi* dp);
 
 /**
  * @brief Get the type map of a DP model deviation.
  * @param[in] dp The DP model deviation to use.
  * @return The number of types of the DP model deviation.
-*/
+ */
 int DP_DeepPotModelDeviGetNumbTypes(DP_DeepPotModelDevi* dp);
 
 /**
  * @brief Get the type map of a DP.
  * @param[in] dp The DP to use.
  * @return The cutoff radius.
-*/
+ */
 double DP_DeepPotGetCutoff(DP_DeepPot* dp);
 
 /**
  * @brief Get the type map of a DP.
  * @param[in] dp The DP to use.
  * @return The number of types of the DP.
-*/
+ */
 int DP_DeepPotGetNumbTypes(DP_DeepPot* dp);
 
 /**
  * @brief Get the type map of a DP.
  * @param[in] dp The DP to use.
  * @return The type map of the DP.
-*/
+ */
 const char* DP_DeepPotGetTypeMap(DP_DeepPot* dp);
 
 /**
-* @brief The deep tensor.
-**/
+ * @brief The deep tensor.
+ **/
 typedef struct DP_DeepTensor DP_DeepTensor;
 
 /**
-* @brief Deep Tensor constructor with initialization.
-* @param[in] c_model The name of the frozen model file.
-* @returns A pointer to the deep tensor.
-**/
+ * @brief Deep Tensor constructor with initialization.
+ * @param[in] c_model The name of the frozen model file.
+ * @returns A pointer to the deep tensor.
+ **/
 extern DP_DeepTensor* DP_NewDeepTensor(const char* c_model);
 
 /**
  * @brief Deep Tensor constructor with initialization.
- * 
+ *
  * @param c_model The name of the frozen model file.
  * @param gpu_rank The rank of the GPU.
  * @param c_name_scope The name scope.
  * @return DP_DeepTensor* A pointer to the deep tensor.
  */
-extern DP_DeepTensor* DP_NewDeepTensorWithParam(
-        const char* c_model, const int gpu_rank, const char* c_name_scope);
-
-/**
-* @brief Evaluate the tensor by using a DP. (double version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[out] tensor Output tensor.
-  **/
-extern void DP_DeepTensorComputeTensor (
-  DP_DeepTensor* dt,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  double** tensor,
-  int* size
-  );
-
-/**
-* @brief Evaluate the tensor by using a DP. (float version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[out] tensor Output tensor.
-* @param[out] size Output size of the tensor.
-  **/
-extern void DP_DeepTensorComputeTensorf (
-  DP_DeepTensor* dt,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  float** tensor,
-  int* size
-  );
-
-/**
-* @brief Evaluate the tensor by using a DP with the neighbor list. (double version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[out] tensor Output tensor.
-* @param[out] size Output size of the tensor.
-  **/
-extern void DP_DeepTensorComputeTensorNList (
-  DP_DeepTensor* dt,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  double** tensor,
-  int* size
-  );
-
-/**
-* @brief Evaluate the tensor by using a DP with the neighbor list. (float version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[out] tensor Output tensor.
-* @param[out] size Output size of the tensor.
-  **/
-extern void DP_DeepTensorComputeTensorNListf (
-  DP_DeepTensor* dt,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  float** tensor,
-  int* size
-  );
-
-/**
-* @brief Evaluate the global tensor, force and virial by using a DP. (double version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[out] global_tensor Output global tensor.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_tensor Output atomic tensor. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @param[out] size_at Output size of atomic tensor.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepTensorCompute (
-  DP_DeepTensor* dt,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  double* global_tensor,
-  double* force,
-  double* virial,
-  double** atomic_tensor,
-  double* atomic_virial,
-  int* size_at
-  );
-
-/**
-* @brief Evaluate the global tensor, force and virial by using a DP. (float version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[out] global_tensor Output global tensor.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_tensor Output atomic tensor. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @param[out] size_at Output size of atomic tensor.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepTensorComputef (
-  DP_DeepTensor* dt,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  float* global_tensor,
-  float* force,
-  float* virial,
-  float** atomic_tensor,
-  float* atomic_virial,
-  int* size_at
-  );
-
-/**
-* @brief Evaluate the global tensor, force and virial by using a DP with the neighbor list. (double version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[out] global_tensor Output global tensor.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_tensor Output atomic tensor. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @param[out] size_at Output size of atomic tensor.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepTensorComputeNList (
-  DP_DeepTensor* dt,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  double* global_tensor,
-  double* force,
-  double* virial,
-  double** atomic_tensor,
-  double* atomic_virial,
-  int* size_at
-  );
-
-/**
-* @brief Evaluate the global tensor, force and virial by using a DP with the neighbor list. (float version)
-* @param[in] dt The Deep Tensor to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] box The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[out] global_tensor Output global tensor.
-* @param[out] force Output force. The array should be of size natoms x 3.
-* @param[out] virial Output virial. The array should be of size 9.
-* @param[out] atomic_tensor Output atomic tensor. The array should be of size natoms.
-* @param[out] atomic_virial Output atomic virial. The array should be of size natoms x 9.
-* @param[out] size_at Output size of atomic tensor.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DeepTensorComputeNListf (
-  DP_DeepTensor* dt,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int nghost,
-  const DP_Nlist* nlist,
-  float* global_tensor,
-  float* force,
-  float* virial,
-  float** atomic_tensor,
-  float* atomic_virial,
-  int* size_at
-  );
+extern DP_DeepTensor* DP_NewDeepTensorWithParam(const char* c_model,
+                                                const int gpu_rank,
+                                                const char* c_name_scope);
+
+/**
+ * @brief Evaluate the tensor by using a DP. (double version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[out] tensor Output tensor.
+ **/
+extern void DP_DeepTensorComputeTensor(DP_DeepTensor* dt,
+                                       const int natom,
+                                       const double* coord,
+                                       const int* atype,
+                                       const double* cell,
+                                       double** tensor,
+                                       int* size);
+
+/**
+ * @brief Evaluate the tensor by using a DP. (float version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[out] tensor Output tensor.
+ * @param[out] size Output size of the tensor.
+ **/
+extern void DP_DeepTensorComputeTensorf(DP_DeepTensor* dt,
+                                        const int natom,
+                                        const float* coord,
+                                        const int* atype,
+                                        const float* cell,
+                                        float** tensor,
+                                        int* size);
+
+/**
+ * @brief Evaluate the tensor by using a DP with the neighbor list. (double
+ *version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[out] tensor Output tensor.
+ * @param[out] size Output size of the tensor.
+ **/
+extern void DP_DeepTensorComputeTensorNList(DP_DeepTensor* dt,
+                                            const int natom,
+                                            const double* coord,
+                                            const int* atype,
+                                            const double* cell,
+                                            const int nghost,
+                                            const DP_Nlist* nlist,
+                                            double** tensor,
+                                            int* size);
+
+/**
+ * @brief Evaluate the tensor by using a DP with the neighbor list. (float
+ *version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[out] tensor Output tensor.
+ * @param[out] size Output size of the tensor.
+ **/
+extern void DP_DeepTensorComputeTensorNListf(DP_DeepTensor* dt,
+                                             const int natom,
+                                             const float* coord,
+                                             const int* atype,
+                                             const float* cell,
+                                             const int nghost,
+                                             const DP_Nlist* nlist,
+                                             float** tensor,
+                                             int* size);
+
+/**
+ * @brief Evaluate the global tensor, force and virial by using a DP. (double
+ *version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[out] global_tensor Output global tensor.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_tensor Output atomic tensor. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @param[out] size_at Output size of atomic tensor.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepTensorCompute(DP_DeepTensor* dt,
+                                 const int natom,
+                                 const double* coord,
+                                 const int* atype,
+                                 const double* cell,
+                                 double* global_tensor,
+                                 double* force,
+                                 double* virial,
+                                 double** atomic_tensor,
+                                 double* atomic_virial,
+                                 int* size_at);
+
+/**
+ * @brief Evaluate the global tensor, force and virial by using a DP. (float
+ *version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[out] global_tensor Output global tensor.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_tensor Output atomic tensor. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @param[out] size_at Output size of atomic tensor.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepTensorComputef(DP_DeepTensor* dt,
+                                  const int natom,
+                                  const float* coord,
+                                  const int* atype,
+                                  const float* cell,
+                                  float* global_tensor,
+                                  float* force,
+                                  float* virial,
+                                  float** atomic_tensor,
+                                  float* atomic_virial,
+                                  int* size_at);
+
+/**
+ * @brief Evaluate the global tensor, force and virial by using a DP with the
+ *neighbor list. (double version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[out] global_tensor Output global tensor.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_tensor Output atomic tensor. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @param[out] size_at Output size of atomic tensor.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepTensorComputeNList(DP_DeepTensor* dt,
+                                      const int natom,
+                                      const double* coord,
+                                      const int* atype,
+                                      const double* cell,
+                                      const int nghost,
+                                      const DP_Nlist* nlist,
+                                      double* global_tensor,
+                                      double* force,
+                                      double* virial,
+                                      double** atomic_tensor,
+                                      double* atomic_virial,
+                                      int* size_at);
+
+/**
+ * @brief Evaluate the global tensor, force and virial by using a DP with the
+ *neighbor list. (float version)
+ * @param[in] dt The Deep Tensor to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] box The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[out] global_tensor Output global tensor.
+ * @param[out] force Output force. The array should be of size natoms x 3.
+ * @param[out] virial Output virial. The array should be of size 9.
+ * @param[out] atomic_tensor Output atomic tensor. The array should be of size
+ *natoms.
+ * @param[out] atomic_virial Output atomic virial. The array should be of size
+ *natoms x 9.
+ * @param[out] size_at Output size of atomic tensor.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DeepTensorComputeNListf(DP_DeepTensor* dt,
+                                       const int natom,
+                                       const float* coord,
+                                       const int* atype,
+                                       const float* cell,
+                                       const int nghost,
+                                       const DP_Nlist* nlist,
+                                       float* global_tensor,
+                                       float* force,
+                                       float* virial,
+                                       float** atomic_tensor,
+                                       float* atomic_virial,
+                                       int* size_at);
 
 /**
  * @brief Get the type map of a Deep Tensor.
  * @param[in] dt The Deep Tensor to use.
  * @return The cutoff radius.
-*/
+ */
 double DP_DeepTensorGetCutoff(DP_DeepTensor* dt);
 
 /**
  * @brief Get the type map of a Deep Tensor.
  * @param[in] dt The Deep Tensor to use.
  * @return The number of types of the Deep Tensor.
-*/
+ */
 int DP_DeepTensorGetNumbTypes(DP_DeepTensor* dt);
 
 /**
  * @brief Get the output dimension of a Deep Tensor.
  * @param[in] dt The Deep Tensor to use.
  * @return The output dimension of the Deep Tensor.
-*/
+ */
 int DP_DeepTensorGetOutputDim(DP_DeepTensor* dt);
 
 /**
  * @brief Get sel types of a Deep Tensor.
  * @param[in] dt The Deep Tensor to use.
  * @return The sel types
-*/
+ */
 int* DP_DeepTensorGetSelTypes(DP_DeepTensor* dt);
 
 /**
  * @brief Get the number of sel types of a Deep Tensor.
  * @param[in] dt The Deep Tensor to use.
  * @return The number of sel types
-*/
+ */
 int DP_DeepTensorGetNumbSelTypes(DP_DeepTensor* dt);
 
 /**
-* @brief The dipole charge modifier.
-**/
+ * @brief The dipole charge modifier.
+ **/
 typedef struct DP_DipoleChargeModifier DP_DipoleChargeModifier;
 
 /**
-* @brief Dipole charge modifier constructor with initialization.
-* @param[in] c_model The name of the frozen model file.
-* @returns A pointer to the dipole charge modifier.
-**/
+ * @brief Dipole charge modifier constructor with initialization.
+ * @param[in] c_model The name of the frozen model file.
+ * @returns A pointer to the dipole charge modifier.
+ **/
 extern DP_DipoleChargeModifier* DP_NewDipoleChargeModifier(const char* c_model);
 
 /**
  * @brief Dipole charge modifier constructor with initialization.
- * 
+ *
  * @param c_model The name of the frozen model file.
  * @param gpu_rank The rank of the GPU.
  * @param c_name_scope The name scope.
  * @return DP_DipoleChargeModifier* A pointer to the dipole charge modifier.
  */
 extern DP_DipoleChargeModifier* DP_NewDipoleChargeModifierWithParam(
-        const char* c_model, const int gpu_rank, const char* c_name_scope);
-
-/**
-* @brief Evaluate the force and virial correction by using a dipole charge modifier with the neighbor list. (double version)
-* @param[in] dcm The dipole charge modifier to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] cell The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] pairs The pairs of atoms. The list should contain npairs pairs of ints.
-* @param[in] npairs The number of pairs.
-* @param[in] delef_ The electric field on each atom. The array should be of size nframes x natoms x 3.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[out] dfcorr_ Output force correction. The array should be of size natoms x 3.
-* @param[out] dvcorr_ Output virial correction. The array should be of size 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DipoleChargeModifierComputeNList (
-  DP_DipoleChargeModifier* dcm,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int* pairs,
-  const int npairs,
-  const double* delef_,
-  const int nghost,
-  const DP_Nlist* nlist,
-  double* dfcorr_,
-  double* dvcorr_
-  );
-
-
-/**
-* @brief Evaluate the force and virial correction by using a dipole charge modifier with the neighbor list. (float version)
-* @param[in] dcm The dipole charge modifier to use.
-* @param[in] natoms The number of atoms.
-* @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-* @param[in] atype The atom types. The array should contain natoms ints.
-* @param[in] cell The cell of the region. The array should be of size 9. Pass NULL if pbc is not used.
-* @param[in] pairs The pairs of atoms. The list should contain npairs pairs of ints.
-* @param[in] npairs The number of pairs.
-* @param[in] delef_ The electric field on each atom. The array should be of size nframes x natoms x 3.
-* @param[in] nghost The number of ghost atoms.
-* @param[in] nlist The neighbor list.
-* @param[out] dfcorr_ Output force correction. The array should be of size natoms x 3.
-* @param[out] dvcorr_ Output virial correction. The array should be of size 9.
-* @warning The output arrays should be allocated before calling this function. Pass NULL if not required.
-  **/
-extern void DP_DipoleChargeModifierComputeNListf (
-  DP_DipoleChargeModifier* dcm,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int* pairs,
-  const int npairs,
-  const float* delef_,
-  const int nghost,
-  const DP_Nlist* nlist,
-  float* dfcorr_,
-  float* dvcorr_
-  );
+    const char* c_model, const int gpu_rank, const char* c_name_scope);
+
+/**
+ * @brief Evaluate the force and virial correction by using a dipole charge
+ *modifier with the neighbor list. (double version)
+ * @param[in] dcm The dipole charge modifier to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] cell The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] pairs The pairs of atoms. The list should contain npairs pairs of
+ *ints.
+ * @param[in] npairs The number of pairs.
+ * @param[in] delef_ The electric field on each atom. The array should be of
+ *size nframes x natoms x 3.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[out] dfcorr_ Output force correction. The array should be of size
+ *natoms x 3.
+ * @param[out] dvcorr_ Output virial correction. The array should be of size 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DipoleChargeModifierComputeNList(DP_DipoleChargeModifier* dcm,
+                                                const int natom,
+                                                const double* coord,
+                                                const int* atype,
+                                                const double* cell,
+                                                const int* pairs,
+                                                const int npairs,
+                                                const double* delef_,
+                                                const int nghost,
+                                                const DP_Nlist* nlist,
+                                                double* dfcorr_,
+                                                double* dvcorr_);
+
+/**
+ * @brief Evaluate the force and virial correction by using a dipole charge
+ *modifier with the neighbor list. (float version)
+ * @param[in] dcm The dipole charge modifier to use.
+ * @param[in] natoms The number of atoms.
+ * @param[in] coord The coordinates of atoms. The array should be of size natoms
+ *x 3.
+ * @param[in] atype The atom types. The array should contain natoms ints.
+ * @param[in] cell The cell of the region. The array should be of size 9. Pass
+ *NULL if pbc is not used.
+ * @param[in] pairs The pairs of atoms. The list should contain npairs pairs of
+ *ints.
+ * @param[in] npairs The number of pairs.
+ * @param[in] delef_ The electric field on each atom. The array should be of
+ *size nframes x natoms x 3.
+ * @param[in] nghost The number of ghost atoms.
+ * @param[in] nlist The neighbor list.
+ * @param[out] dfcorr_ Output force correction. The array should be of size
+ *natoms x 3.
+ * @param[out] dvcorr_ Output virial correction. The array should be of size 9.
+ * @warning The output arrays should be allocated before calling this function.
+ *Pass NULL if not required.
+ **/
+extern void DP_DipoleChargeModifierComputeNListf(DP_DipoleChargeModifier* dcm,
+                                                 const int natom,
+                                                 const float* coord,
+                                                 const int* atype,
+                                                 const float* cell,
+                                                 const int* pairs,
+                                                 const int npairs,
+                                                 const float* delef_,
+                                                 const int nghost,
+                                                 const DP_Nlist* nlist,
+                                                 float* dfcorr_,
+                                                 float* dvcorr_);
 
 /**
  * @brief Get the type map of a DipoleChargeModifier.
  * @param[in] dcm The DipoleChargeModifier to use.
  * @return The cutoff radius.
-*/
+ */
 double DP_DipoleChargeModifierGetCutoff(DP_DipoleChargeModifier* dt);
 
 /**
  * @brief Get the type map of a DipoleChargeModifier.
  * @param[in] dcm The DipoleChargeModifier to use.
  * @return The number of types of the DipoleChargeModifier.
-*/
+ */
 int DP_DipoleChargeModifierGetNumbTypes(DP_DipoleChargeModifier* dt);
 
 /**
  * @brief Get sel types of a DipoleChargeModifier.
  * @param[in] dcm The DipoleChargeModifier to use.
  * @return The sel types
-*/
+ */
 int* DP_DipoleChargeModifierGetSelTypes(DP_DipoleChargeModifier* dt);
 
 /**
  * @brief Get the number of sel types of a DipoleChargeModifier.
  * @param[in] dcm The DipoleChargeModifier to use.
  * @return The number of sel types
-*/
+ */
 int DP_DipoleChargeModifierGetNumbSelTypes(DP_DipoleChargeModifier* dt);
 
 /**
-* @brief Convert PBtxt to PB.
-* @param[in] c_pbtxt The name of the PBtxt file.
-* @param[in] c_pb The name of the PB file.
-  **/
-extern void DP_ConvertPbtxtToPb(
-  const char* c_pbtxt,
-  const char* c_pb
-  );
+ * @brief Convert PBtxt to PB.
+ * @param[in] c_pbtxt The name of the PBtxt file.
+ * @param[in] c_pb The name of the PB file.
+ **/
+extern void DP_ConvertPbtxtToPb(const char* c_pbtxt, const char* c_pb);
 
 /**
- * @brief Print the summary of DeePMD-kit, including the version and the build information.
+ * @brief Print the summary of DeePMD-kit, including the version and the build
+ * information.
  * @param[in] c_pre The prefix to each line.
  */
-extern void DP_PrintSummary(
-  const char* c_pre
-  );
+extern void DP_PrintSummary(const char* c_pre);
 
 #ifdef __cplusplus
 } /* end extern "C" */
-#endif
\ No newline at end of file
+#endif
diff --git a/source/api_c/include/c_api_internal.h b/source/api_c/include/c_api_internal.h
index 43a4081f38..d0c9f59fcc 100644
--- a/source/api_c/include/c_api_internal.h
+++ b/source/api_c/include/c_api_internal.h
@@ -1,7 +1,7 @@
-#include "neighbor_list.h"
+#include "DataModifier.h"
 #include "DeepPot.h"
 #include "DeepTensor.h"
-#include "DataModifier.h"
+#include "neighbor_list.h"
 
 struct DP_Nlist {
   DP_Nlist(deepmd::InputNlist& nl);
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 9074249675..8e7a1a9404 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -6,1350 +6,1351 @@ This header-only library provides a C++ 11 interface to the DeePMD-kit C API.
 
 #pragma once
 
+#include <algorithm>
+#include <cassert>
+#include <iostream>
 #include <string>
 #include <vector>
-#include <iostream>
-#include <cassert>
-#include <algorithm>
 
 #include "c_api.h"
 
 template <typename FPTYPE>
-inline void _DP_DeepPotCompute(
-    DP_DeepPot *dp,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    double *energy,
-    FPTYPE *force,
-    FPTYPE *virial,
-    FPTYPE *atomic_energy,
-    FPTYPE *atomic_virial);
+inline void _DP_DeepPotCompute(DP_DeepPot *dp,
+                               const int natom,
+                               const FPTYPE *coord,
+                               const int *atype,
+                               const FPTYPE *cell,
+                               double *energy,
+                               FPTYPE *force,
+                               FPTYPE *virial,
+                               FPTYPE *atomic_energy,
+                               FPTYPE *atomic_virial);
 
 template <>
-inline void _DP_DeepPotCompute<double>(
-    DP_DeepPot *dp,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    double *energy,
-    double *force,
-    double *virial,
-    double *atomic_energy,
-    double *atomic_virial)
-{
-    DP_DeepPotCompute(dp, natom, coord, atype, cell, energy, force, virial, atomic_energy, atomic_virial);
+inline void _DP_DeepPotCompute<double>(DP_DeepPot *dp,
+                                       const int natom,
+                                       const double *coord,
+                                       const int *atype,
+                                       const double *cell,
+                                       double *energy,
+                                       double *force,
+                                       double *virial,
+                                       double *atomic_energy,
+                                       double *atomic_virial) {
+  DP_DeepPotCompute(dp, natom, coord, atype, cell, energy, force, virial,
+                    atomic_energy, atomic_virial);
 }
 
 template <>
-inline void _DP_DeepPotCompute<float>(
-    DP_DeepPot *dp,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    double *energy,
-    float *force,
-    float *virial,
-    float *atomic_energy,
-    float *atomic_virial)
-{
-    DP_DeepPotComputef(dp, natom, coord, atype, cell, energy, force, virial, atomic_energy, atomic_virial);
+inline void _DP_DeepPotCompute<float>(DP_DeepPot *dp,
+                                      const int natom,
+                                      const float *coord,
+                                      const int *atype,
+                                      const float *cell,
+                                      double *energy,
+                                      float *force,
+                                      float *virial,
+                                      float *atomic_energy,
+                                      float *atomic_virial) {
+  DP_DeepPotComputef(dp, natom, coord, atype, cell, energy, force, virial,
+                     atomic_energy, atomic_virial);
 }
 
 template <typename FPTYPE>
-inline void _DP_DeepPotComputeNList(
-    DP_DeepPot *dp,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    const int ago,
-    double *energy,
-    FPTYPE *force,
-    FPTYPE *virial,
-    FPTYPE *atomic_energy,
-    FPTYPE *atomic_virial);
+inline void _DP_DeepPotComputeNList(DP_DeepPot *dp,
+                                    const int natom,
+                                    const FPTYPE *coord,
+                                    const int *atype,
+                                    const FPTYPE *cell,
+                                    const int nghost,
+                                    const DP_Nlist *nlist,
+                                    const int ago,
+                                    double *energy,
+                                    FPTYPE *force,
+                                    FPTYPE *virial,
+                                    FPTYPE *atomic_energy,
+                                    FPTYPE *atomic_virial);
 
 template <>
-inline void _DP_DeepPotComputeNList<double>(
-    DP_DeepPot *dp,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    const int ago,
-    double *energy,
-    double *force,
-    double *virial,
-    double *atomic_energy,
-    double *atomic_virial)
-{
-    DP_DeepPotComputeNList(dp, natom, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
+inline void _DP_DeepPotComputeNList<double>(DP_DeepPot *dp,
+                                            const int natom,
+                                            const double *coord,
+                                            const int *atype,
+                                            const double *cell,
+                                            const int nghost,
+                                            const DP_Nlist *nlist,
+                                            const int ago,
+                                            double *energy,
+                                            double *force,
+                                            double *virial,
+                                            double *atomic_energy,
+                                            double *atomic_virial) {
+  DP_DeepPotComputeNList(dp, natom, coord, atype, cell, nghost, nlist, ago,
+                         energy, force, virial, atomic_energy, atomic_virial);
 }
 
 template <>
-inline void _DP_DeepPotComputeNList<float>(
-    DP_DeepPot *dp,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    const int ago,
-    double *energy,
-    float *force,
-    float *virial,
-    float *atomic_energy,
-    float *atomic_virial)
-{
-    DP_DeepPotComputeNListf(dp, natom, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
+inline void _DP_DeepPotComputeNList<float>(DP_DeepPot *dp,
+                                           const int natom,
+                                           const float *coord,
+                                           const int *atype,
+                                           const float *cell,
+                                           const int nghost,
+                                           const DP_Nlist *nlist,
+                                           const int ago,
+                                           double *energy,
+                                           float *force,
+                                           float *virial,
+                                           float *atomic_energy,
+                                           float *atomic_virial) {
+  DP_DeepPotComputeNListf(dp, natom, coord, atype, cell, nghost, nlist, ago,
+                          energy, force, virial, atomic_energy, atomic_virial);
 }
 
 template <typename FPTYPE>
-inline void _DP_DeepPotModelDeviComputeNList(
-    DP_DeepPotModelDevi *dp,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    const int ago,
-    double *energy,
-    FPTYPE *force,
-    FPTYPE *virial,
-    FPTYPE *atomic_energy,
-    FPTYPE *atomic_virial);
+inline void _DP_DeepPotModelDeviComputeNList(DP_DeepPotModelDevi *dp,
+                                             const int natom,
+                                             const FPTYPE *coord,
+                                             const int *atype,
+                                             const FPTYPE *cell,
+                                             const int nghost,
+                                             const DP_Nlist *nlist,
+                                             const int ago,
+                                             double *energy,
+                                             FPTYPE *force,
+                                             FPTYPE *virial,
+                                             FPTYPE *atomic_energy,
+                                             FPTYPE *atomic_virial);
 
 template <>
-inline void _DP_DeepPotModelDeviComputeNList<double>(
-    DP_DeepPotModelDevi *dp,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    const int ago,
-    double *energy,
-    double *force,
-    double *virial,
-    double *atomic_energy,
-    double *atomic_virial)
-{
-    DP_DeepPotModelDeviComputeNList(dp, natom, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
+inline void _DP_DeepPotModelDeviComputeNList<double>(DP_DeepPotModelDevi *dp,
+                                                     const int natom,
+                                                     const double *coord,
+                                                     const int *atype,
+                                                     const double *cell,
+                                                     const int nghost,
+                                                     const DP_Nlist *nlist,
+                                                     const int ago,
+                                                     double *energy,
+                                                     double *force,
+                                                     double *virial,
+                                                     double *atomic_energy,
+                                                     double *atomic_virial) {
+  DP_DeepPotModelDeviComputeNList(dp, natom, coord, atype, cell, nghost, nlist,
+                                  ago, energy, force, virial, atomic_energy,
+                                  atomic_virial);
 }
 
 template <>
-inline void _DP_DeepPotModelDeviComputeNList<float>(
-    DP_DeepPotModelDevi *dp,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    const int ago,
-    double *energy,
-    float *force,
-    float *virial,
-    float *atomic_energy,
-    float *atomic_virial)
-{
-    DP_DeepPotModelDeviComputeNListf(dp, natom, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
+inline void _DP_DeepPotModelDeviComputeNList<float>(DP_DeepPotModelDevi *dp,
+                                                    const int natom,
+                                                    const float *coord,
+                                                    const int *atype,
+                                                    const float *cell,
+                                                    const int nghost,
+                                                    const DP_Nlist *nlist,
+                                                    const int ago,
+                                                    double *energy,
+                                                    float *force,
+                                                    float *virial,
+                                                    float *atomic_energy,
+                                                    float *atomic_virial) {
+  DP_DeepPotModelDeviComputeNListf(dp, natom, coord, atype, cell, nghost, nlist,
+                                   ago, energy, force, virial, atomic_energy,
+                                   atomic_virial);
 }
 
 template <typename FPTYPE>
-inline void _DP_DeepTensorComputeTensor(
-    DP_DeepTensor *dt,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    FPTYPE **tensor,
-    int* size);
+inline void _DP_DeepTensorComputeTensor(DP_DeepTensor *dt,
+                                        const int natom,
+                                        const FPTYPE *coord,
+                                        const int *atype,
+                                        const FPTYPE *cell,
+                                        FPTYPE **tensor,
+                                        int *size);
 
 template <>
-inline void _DP_DeepTensorComputeTensor<double>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    double **tensor,
-    int* size)
-{
-    DP_DeepTensorComputeTensor(dt, natom, coord, atype, cell, tensor, size);
+inline void _DP_DeepTensorComputeTensor<double>(DP_DeepTensor *dt,
+                                                const int natom,
+                                                const double *coord,
+                                                const int *atype,
+                                                const double *cell,
+                                                double **tensor,
+                                                int *size) {
+  DP_DeepTensorComputeTensor(dt, natom, coord, atype, cell, tensor, size);
 }
 
 template <>
-inline void _DP_DeepTensorComputeTensor<float>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    float **tensor,
-    int* size)
-{
-    DP_DeepTensorComputeTensorf(dt, natom, coord, atype, cell, tensor, size);
+inline void _DP_DeepTensorComputeTensor<float>(DP_DeepTensor *dt,
+                                               const int natom,
+                                               const float *coord,
+                                               const int *atype,
+                                               const float *cell,
+                                               float **tensor,
+                                               int *size) {
+  DP_DeepTensorComputeTensorf(dt, natom, coord, atype, cell, tensor, size);
 }
 
 template <typename FPTYPE>
-inline void _DP_DeepTensorComputeTensorNList(
-    DP_DeepTensor *dt,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    FPTYPE **tensor,
-    int* size);
+inline void _DP_DeepTensorComputeTensorNList(DP_DeepTensor *dt,
+                                             const int natom,
+                                             const FPTYPE *coord,
+                                             const int *atype,
+                                             const FPTYPE *cell,
+                                             const int nghost,
+                                             const DP_Nlist *nlist,
+                                             FPTYPE **tensor,
+                                             int *size);
 
 template <>
-inline void _DP_DeepTensorComputeTensorNList<double>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    double **tensor,
-    int* size)
-{
-    DP_DeepTensorComputeTensorNList(dt, natom, coord, atype, cell, nghost, nlist, tensor, size);
+inline void _DP_DeepTensorComputeTensorNList<double>(DP_DeepTensor *dt,
+                                                     const int natom,
+                                                     const double *coord,
+                                                     const int *atype,
+                                                     const double *cell,
+                                                     const int nghost,
+                                                     const DP_Nlist *nlist,
+                                                     double **tensor,
+                                                     int *size) {
+  DP_DeepTensorComputeTensorNList(dt, natom, coord, atype, cell, nghost, nlist,
+                                  tensor, size);
 }
 
 template <>
-inline void _DP_DeepTensorComputeTensorNList<float>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    float **tensor,
-    int* size)
-{
-    DP_DeepTensorComputeTensorNListf(dt, natom, coord, atype, cell, nghost, nlist, tensor, size);
+inline void _DP_DeepTensorComputeTensorNList<float>(DP_DeepTensor *dt,
+                                                    const int natom,
+                                                    const float *coord,
+                                                    const int *atype,
+                                                    const float *cell,
+                                                    const int nghost,
+                                                    const DP_Nlist *nlist,
+                                                    float **tensor,
+                                                    int *size) {
+  DP_DeepTensorComputeTensorNListf(dt, natom, coord, atype, cell, nghost, nlist,
+                                   tensor, size);
 }
 
 template <typename FPTYPE>
-inline void _DP_DeepTensorCompute(
-    DP_DeepTensor *dt,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    FPTYPE *global_tensor,
-    FPTYPE *force,
-    FPTYPE *virial,
-    FPTYPE **atomic_energy,
-    FPTYPE *atomic_virial,
-    int* size_at);
+inline void _DP_DeepTensorCompute(DP_DeepTensor *dt,
+                                  const int natom,
+                                  const FPTYPE *coord,
+                                  const int *atype,
+                                  const FPTYPE *cell,
+                                  FPTYPE *global_tensor,
+                                  FPTYPE *force,
+                                  FPTYPE *virial,
+                                  FPTYPE **atomic_energy,
+                                  FPTYPE *atomic_virial,
+                                  int *size_at);
 
 template <>
-inline void _DP_DeepTensorCompute<double>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    double *global_tensor,
-    double *force,
-    double *virial,
-    double **atomic_tensor,
-    double *atomic_virial,
-    int* size_at)
-{
-    DP_DeepTensorCompute(dt, natom, coord, atype, cell, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
+inline void _DP_DeepTensorCompute<double>(DP_DeepTensor *dt,
+                                          const int natom,
+                                          const double *coord,
+                                          const int *atype,
+                                          const double *cell,
+                                          double *global_tensor,
+                                          double *force,
+                                          double *virial,
+                                          double **atomic_tensor,
+                                          double *atomic_virial,
+                                          int *size_at) {
+  DP_DeepTensorCompute(dt, natom, coord, atype, cell, global_tensor, force,
+                       virial, atomic_tensor, atomic_virial, size_at);
 }
 
 template <>
-inline void _DP_DeepTensorCompute<float>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    float *global_tensor,
-    float *force,
-    float *virial,
-    float **atomic_tensor,
-    float *atomic_virial,
-    int* size_at)
-{
-    DP_DeepTensorComputef(dt, natom, coord, atype, cell, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
+inline void _DP_DeepTensorCompute<float>(DP_DeepTensor *dt,
+                                         const int natom,
+                                         const float *coord,
+                                         const int *atype,
+                                         const float *cell,
+                                         float *global_tensor,
+                                         float *force,
+                                         float *virial,
+                                         float **atomic_tensor,
+                                         float *atomic_virial,
+                                         int *size_at) {
+  DP_DeepTensorComputef(dt, natom, coord, atype, cell, global_tensor, force,
+                        virial, atomic_tensor, atomic_virial, size_at);
 }
 
 template <typename FPTYPE>
-inline void _DP_DeepTensorComputeNList(
-    DP_DeepTensor *dt,
-    const int natom,
-    const FPTYPE *coord,
-    const int *atype,
-    const FPTYPE *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    FPTYPE *global_tensor,
-    FPTYPE *force,
-    FPTYPE *virial,
-    FPTYPE **atomic_energy,
-    FPTYPE *atomic_virial,
-    int* size_at);
+inline void _DP_DeepTensorComputeNList(DP_DeepTensor *dt,
+                                       const int natom,
+                                       const FPTYPE *coord,
+                                       const int *atype,
+                                       const FPTYPE *cell,
+                                       const int nghost,
+                                       const DP_Nlist *nlist,
+                                       FPTYPE *global_tensor,
+                                       FPTYPE *force,
+                                       FPTYPE *virial,
+                                       FPTYPE **atomic_energy,
+                                       FPTYPE *atomic_virial,
+                                       int *size_at);
 
 template <>
-inline void _DP_DeepTensorComputeNList<double>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const double *coord,
-    const int *atype,
-    const double *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    double *global_tensor,
-    double *force,
-    double *virial,
-    double **atomic_tensor,
-    double *atomic_virial,
-    int* size_at)
-{
-    DP_DeepTensorComputeNList(dt, natom, coord, atype, cell, nghost, nlist, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
+inline void _DP_DeepTensorComputeNList<double>(DP_DeepTensor *dt,
+                                               const int natom,
+                                               const double *coord,
+                                               const int *atype,
+                                               const double *cell,
+                                               const int nghost,
+                                               const DP_Nlist *nlist,
+                                               double *global_tensor,
+                                               double *force,
+                                               double *virial,
+                                               double **atomic_tensor,
+                                               double *atomic_virial,
+                                               int *size_at) {
+  DP_DeepTensorComputeNList(dt, natom, coord, atype, cell, nghost, nlist,
+                            global_tensor, force, virial, atomic_tensor,
+                            atomic_virial, size_at);
 }
 
 template <>
-inline void _DP_DeepTensorComputeNList<float>(
-    DP_DeepTensor *dt,
-    const int natom,
-    const float *coord,
-    const int *atype,
-    const float *cell,
-    const int nghost,
-    const DP_Nlist *nlist,
-    float *global_tensor,
-    float *force,
-    float *virial,
-    float **atomic_tensor,
-    float *atomic_virial,
-    int* size_at)
-{
-    DP_DeepTensorComputeNListf(dt, natom, coord, atype, cell, nghost, nlist, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
+inline void _DP_DeepTensorComputeNList<float>(DP_DeepTensor *dt,
+                                              const int natom,
+                                              const float *coord,
+                                              const int *atype,
+                                              const float *cell,
+                                              const int nghost,
+                                              const DP_Nlist *nlist,
+                                              float *global_tensor,
+                                              float *force,
+                                              float *virial,
+                                              float **atomic_tensor,
+                                              float *atomic_virial,
+                                              int *size_at) {
+  DP_DeepTensorComputeNListf(dt, natom, coord, atype, cell, nghost, nlist,
+                             global_tensor, force, virial, atomic_tensor,
+                             atomic_virial, size_at);
 }
 
 template <typename FPTYPE>
-inline void _DP_DipoleChargeModifierComputeNList(
-    DP_DipoleChargeModifier* dcm,
-    const int natom,
-    const FPTYPE* coord,
-    const int* atype,
-    const FPTYPE* cell,
-    const int* pairs,
-    const int npairs,
-    const FPTYPE* delef_,
-    const int nghost,
-    const DP_Nlist* nlist,
-    FPTYPE* dfcorr_,
-    FPTYPE* dvcorr_
-);
+inline void _DP_DipoleChargeModifierComputeNList(DP_DipoleChargeModifier *dcm,
+                                                 const int natom,
+                                                 const FPTYPE *coord,
+                                                 const int *atype,
+                                                 const FPTYPE *cell,
+                                                 const int *pairs,
+                                                 const int npairs,
+                                                 const FPTYPE *delef_,
+                                                 const int nghost,
+                                                 const DP_Nlist *nlist,
+                                                 FPTYPE *dfcorr_,
+                                                 FPTYPE *dvcorr_);
 
 template <>
 inline void _DP_DipoleChargeModifierComputeNList<double>(
-    DP_DipoleChargeModifier* dcm,
+    DP_DipoleChargeModifier *dcm,
     const int natom,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    const int* pairs,
+    const double *coord,
+    const int *atype,
+    const double *cell,
+    const int *pairs,
     const int npairs,
-    const double* delef_,
+    const double *delef_,
     const int nghost,
-    const DP_Nlist* nlist,
-    double* dfcorr_,
-    double* dvcorr_
-)
-{
-    DP_DipoleChargeModifierComputeNList(dcm, natom, coord, atype, cell, pairs, npairs, delef_, nghost, nlist, dfcorr_, dvcorr_);
+    const DP_Nlist *nlist,
+    double *dfcorr_,
+    double *dvcorr_) {
+  DP_DipoleChargeModifierComputeNList(dcm, natom, coord, atype, cell, pairs,
+                                      npairs, delef_, nghost, nlist, dfcorr_,
+                                      dvcorr_);
 }
 
 template <>
 inline void _DP_DipoleChargeModifierComputeNList<float>(
-    DP_DipoleChargeModifier* dcm,
+    DP_DipoleChargeModifier *dcm,
     const int natom,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    const int* pairs,
+    const float *coord,
+    const int *atype,
+    const float *cell,
+    const int *pairs,
     const int npairs,
-    const float* delef_,
+    const float *delef_,
     const int nghost,
-    const DP_Nlist* nlist,
-    float* dfcorr_,
-    float* dvcorr_
-)
-{
-    DP_DipoleChargeModifierComputeNListf(dcm, natom, coord, atype, cell, pairs, npairs, delef_, nghost, nlist, dfcorr_, dvcorr_);
+    const DP_Nlist *nlist,
+    float *dfcorr_,
+    float *dvcorr_) {
+  DP_DipoleChargeModifierComputeNListf(dcm, natom, coord, atype, cell, pairs,
+                                       npairs, delef_, nghost, nlist, dfcorr_,
+                                       dvcorr_);
 }
 
-namespace deepmd
-{
-    namespace hpp
-    {
-        /**
-        * @brief Neighbor list.
-        **/
-        struct InputNlist
-        {
-            InputNlist () 
-                : inum(0), ilist(nullptr), numneigh(nullptr), firstneigh(nullptr),
-                nl(DP_NewNlist(0, nullptr, nullptr, nullptr))
-            {};
-            InputNlist(
-                int inum_,
-                int *ilist_,
-                int *numneigh_,
-                int **firstneigh_)
-                : inum(inum_), ilist(ilist_), numneigh(numneigh_), firstneigh(firstneigh_),
-                nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_))
-            {};
-            /// @brief C API neighbor list.
-            DP_Nlist* nl;
-            /// @brief Number of core region atoms
-            int inum;
-            /// @brief Array stores the core region atom's index
-            int *ilist;
-            /// @brief Array stores the core region atom's neighbor atom number
-            int *numneigh;
-            /// @brief Array stores the core region atom's neighbor index
-            int **firstneigh;
-        };
-
-        /**
-         * @brief Convert pbtxt to pb.
-         * @param[in] fn_pb_txt Filename of the pb txt file.
-         * @param[in] fn_pb Filename of the pb file.
-         **/
-        void
-        inline
-        convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb)
-        {
-            DP_ConvertPbtxtToPb(fn_pb_txt.c_str(), fn_pb.c_str());
-        };
-        /**
-         * @brief Convert int vector to InputNlist.
-         * @param[out] to_nlist InputNlist.
-         * @param[in] from_nlist 2D int vector. The first axis represents the centeral atoms
-         *                      and the second axis represents the neighbor atoms.
-        */
-        void
-        inline
-        convert_nlist(
-            InputNlist & to_nlist,
-            std::vector<std::vector<int> > & from_nlist
-            )
-        {
-            to_nlist.inum = from_nlist.size();
-            for(int ii = 0; ii < to_nlist.inum; ++ii){
-                to_nlist.ilist[ii] = ii;
-                to_nlist.numneigh[ii] = from_nlist[ii].size();
-                to_nlist.firstneigh[ii] = &from_nlist[ii][0];
-            }
-            to_nlist.nl = DP_NewNlist(
-                to_nlist.inum,
-                to_nlist.ilist,
-                to_nlist.numneigh,
-                to_nlist.firstneigh
-                );
-        }
-        /**
-         * @brief Deep Potential.
-         **/
-        class DeepPot
-        {
-        public:
-            /**
-             * @brief DP constructor without initialization.
-             **/
-            DeepPot() : dp(nullptr) {};
-            ~DeepPot(){};
-            /**
-             * @brief DP constructor with initialization.
-             * @param[in] model The name of the frozen model file.
-             * @param[in] gpu_rank The GPU rank.
-             * @param[in] file_content The content of the frozen model file.
-             **/
-            DeepPot(const std::string &model, const int &gpu_rank = 0, const std::string &file_content = "") : dp(nullptr)
-            {
-                init(model, gpu_rank, file_content);
-            };
-            /**
-             * @brief Initialize the DP.
-             * @param[in] model The name of the frozen model file.
-             * @param[in] gpu_rank The GPU rank.
-             * @param[in] file_content The content of the frozen model file.
-             **/
-            void init(const std::string &model, const int &gpu_rank = 0, const std::string &file_content = "")
-            {
-                if (dp)
-                {
-                    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-                    return;
-                }
-                dp = DP_NewDeepPotWithParam(model.c_str(), gpu_rank, file_content.c_str());
-            };
-
-            /**
-             * @brief Evaluate the energy, force and virial by using this DP.
-             * @param[out] ener The system energy.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(double &ener,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-                double *ener_ = &ener;
-                force.resize(natoms * 3);
-                virial.resize(9);
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-
-                _DP_DeepPotCompute<VALUETYPE>(dp, natoms, coord_, atype_, box_, ener_, force_, virial_, nullptr, nullptr);
-            };
-            /**
-             * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial by using this DP.
-             * @param[out] ener The system energy.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[out] atom_energy The atomic energy.
-             * @param[out] atom_virial The atomic virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(double &ener,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         std::vector<VALUETYPE> &atom_energy,
-                         std::vector<VALUETYPE> &atom_virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                double *ener_ = &ener;
-                force.resize(natoms * 3);
-                virial.resize(9);
-                atom_energy.resize(natoms);
-                atom_virial.resize(natoms * 9);
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-                VALUETYPE *atomic_ener_ = &atom_energy[0];
-                VALUETYPE *atomic_virial_ = &atom_virial[0];
-
-                _DP_DeepPotCompute<VALUETYPE>(dp, natoms, coord_, atype_, box_, ener_, force_, virial_, atomic_ener_, atomic_virial_);
-            };
-
-            /**
-             * @brief Evaluate the energy, force and virial by using this DP with the neighbor list.
-             * @param[out] ener The system energy.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             * @param[in] nghost The number of ghost atoms.
-             * @param[in] nlist The neighbor list.
-             * @param[in] ago Update the internal neighbour list if ago is 0.
-             **/
-            template <typename VALUETYPE>
-            void compute(double &ener,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list,
-                         const int &ago)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty())
-                {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-                double *ener_ = &ener;
-                force.resize(natoms * 3);
-                virial.resize(9);
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-
-                _DP_DeepPotComputeNList<VALUETYPE>(dp, natoms, coord_, atype_, box_, nghost, lmp_list.nl, ago, ener_, force_, virial_, nullptr, nullptr);
-            };
-            /**
-             * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial by using this DP with the neighbor list.
-             * @param[out] ener The system energy.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[out] atom_energy The atomic energy.
-             * @param[out] atom_virial The atomic virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             * @param[in] nghost The number of ghost atoms.
-             * @param[in] nlist The neighbor list.
-             * @param[in] ago Update the internal neighbour list if ago is 0.
-             **/
-            template <typename VALUETYPE>
-            void compute(double &ener,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         std::vector<VALUETYPE> &atom_energy,
-                         std::vector<VALUETYPE> &atom_virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list,
-                         const int &ago)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty())
-                {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                double *ener_ = &ener;
-                force.resize(natoms * 3);
-                virial.resize(9);
-                atom_energy.resize(natoms);
-                atom_virial.resize(natoms * 9);
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-                VALUETYPE *atomic_ener_ = &atom_energy[0];
-                VALUETYPE *atomic_virial_ = &atom_virial[0];
-
-                _DP_DeepPotComputeNList<VALUETYPE>(dp, natoms, coord_, atype_, box_, nghost, lmp_list.nl, ago, ener_, force_, virial_, atomic_ener_, atomic_virial_);
-            };
-            /**
-             * @brief Get the cutoff radius.
-             * @return The cutoff radius.
-             **/
-            double cutoff() const
-            {
-                assert(dp);
-                return DP_DeepPotGetCutoff(dp);
-            };
-            /**
-             * @brief Get the number of types.
-             * @return The number of types.
-             **/
-            int numb_types() const
-            {
-                assert(dp);
-                return DP_DeepPotGetNumbTypes(dp);
-            };
-            /**
-             * @brief Get the type map (element name of the atom types) of this model.
-             * @param[out] type_map The type map of this model.
-             **/
-            void get_type_map(std::string &type_map)
-            {
-                const char *type_map_c = DP_DeepPotGetTypeMap(dp);
-                type_map.assign(type_map_c);
-                delete[] type_map_c;
-            };
-            /**
-             * @brief Print the summary of DeePMD-kit, including the version and the build information.
-             * @param[in] pre The prefix to each line.
-             */
-            void print_summary(const std::string &pre) const
-            {
-                DP_PrintSummary(pre.c_str());
-            }
-
-        private:
-            DP_DeepPot *dp;
-        };
-
-        /**
-         * @brief Deep Potential model deviation.
-         **/
-        class DeepPotModelDevi
-        {
-        public:
-            /**
-             * @brief DP model deviation constructor without initialization.
-             **/
-            DeepPotModelDevi() : dp(nullptr) {};
-            ~DeepPotModelDevi(){};
-            /**
-             * @brief DP model deviation constructor with initialization.
-             * @param[in] models The names of the frozen model file.
-             **/
-            DeepPotModelDevi(const std::vector<std::string> &models) : dp(nullptr)
-            {
-                init(models);
-            };
-            /**
-             * @brief Initialize the DP model deviation.
-             * @param[in] model The name of the frozen model file.
-             **/
-            void init(const std::vector<std::string> &models)
-            {
-                if (dp)
-                {
-                    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-                    return;
-                }
-                std::vector<const char*> cstrings;
-                cstrings.reserve(models.size());
-                for (std::string const& str : models)
-                    cstrings.push_back(str.data());
-
-                dp = DP_NewDeepPotModelDevi(cstrings.data(), cstrings.size());
-                numb_models = models.size();
-            };
-
-            /**
-             * @brief Evaluate the energy, force and virial by using this DP model deviation.
-             * @param[out] ener The system energy.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<double> &ener,
-                         std::vector<std::vector<VALUETYPE>> &force,
-                         std::vector<std::vector<VALUETYPE>> &virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list,
-                         const int &ago)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                // memory will be continous for std::vector but not std::vector<std::vector>
-                std::vector<double> energy_flat(numb_models);
-                std::vector<VALUETYPE> force_flat(numb_models * natoms * 3);
-                std::vector<VALUETYPE> virial_flat(numb_models * 9);
-                double *ener_ = &energy_flat[0];
-                VALUETYPE *force_ = &force_flat[0];
-                VALUETYPE *virial_ = &virial_flat[0];
-
-                _DP_DeepPotModelDeviComputeNList<VALUETYPE>(dp, natoms, coord_, atype_, box_, nghost, lmp_list.nl, ago, ener_, force_, virial_, nullptr, nullptr);
-
-                // reshape
-                ener.resize(numb_models);
-                force.resize(numb_models);
-                virial.resize(numb_models);
-                for (int i = 0; i < numb_models; i++)
-                {
-                    ener[i] = energy_flat[i];
-                    force[i].resize(natoms * 3);
-                    virial[i].resize(9);
-                    for (int j = 0; j < natoms * 3; j++)
-                        force[i][j] = force_flat[i * natoms * 3 + j];
-                    for (int j = 0; j < 9; j++)
-                        virial[i][j] = virial_flat[i * 9 + j];
-                }
-            };
-            /**
-             * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial by using this DP model deviation.
-             * @param[out] ener The system energy.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[out] atom_energy The atomic energy.
-             * @param[out] atom_virial The atomic virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<double> &ener,
-                         std::vector<std::vector<VALUETYPE>> &force,
-                         std::vector<std::vector<VALUETYPE>> &virial,
-                         std::vector<std::vector<VALUETYPE>> &atom_energy,
-                         std::vector<std::vector<VALUETYPE>> &atom_virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list,
-                         const int &ago)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                std::vector<double> energy_flat(numb_models);
-                std::vector<VALUETYPE> force_flat(numb_models * natoms * 3);
-                std::vector<VALUETYPE> virial_flat(numb_models * 9);
-                std::vector<VALUETYPE> atom_energy_flat(numb_models * natoms);
-                std::vector<VALUETYPE> atom_virial_flat(numb_models * natoms * 9);
-                double *ener_ = &energy_flat[0];
-                VALUETYPE *force_ = &force_flat[0];
-                VALUETYPE *virial_ = &virial_flat[0];
-                VALUETYPE *atomic_ener_ = &atom_energy_flat[0];
-                VALUETYPE *atomic_virial_ = &atom_virial_flat[0];
-
-                _DP_DeepPotModelDeviComputeNList<VALUETYPE>(dp, natoms, coord_, atype_, box_, nghost, lmp_list.nl, ago, ener_, force_, virial_, atomic_ener_, atomic_virial_);
-
-                // reshape
-                ener.resize(numb_models);
-                force.resize(numb_models);
-                virial.resize(numb_models);
-                atom_energy.resize(numb_models);
-                atom_virial.resize(numb_models);
-                for (int i = 0; i < numb_models; i++)
-                {
-                    ener[i] = energy_flat[i];
-                    force[i].resize(natoms * 3);
-                    virial[i].resize(9);
-                    atom_energy[i].resize(natoms);
-                    atom_virial[i].resize(natoms * 9);
-                    for (int j = 0; j < natoms * 3; j++)
-                        force[i][j] = force_flat[i * natoms * 3 + j];
-                    for (int j = 0; j < 9; j++)
-                        virial[i][j] = virial_flat[i * 9 + j];
-                    for (int j = 0; j < natoms; j++)
-                        atom_energy[i][j] = atom_energy_flat[i * natoms + j];
-                    for (int j = 0; j < natoms * 9; j++)
-                        atom_virial[i][j] = atom_virial_flat[i * natoms * 9 + j];
-                }
-            };
-            /**
-             * @brief Get the cutoff radius.
-             * @return The cutoff radius.
-             **/
-            double cutoff() const
-            {
-                assert(dp);
-                return DP_DeepPotModelDeviGetCutoff(dp);
-            };
-            /**
-             * @brief Get the number of types.
-             * @return The number of types.
-             **/
-            int numb_types() const
-            {
-                assert(dp);
-                return DP_DeepPotModelDeviGetNumbTypes(dp);
-            };
-
-        private:
-            DP_DeepPotModelDevi *dp;
-            int numb_models;
-        };
-
-        /**
-         * @brief Deep Tensor.
-         **/
-        class DeepTensor
-        {
-        public:
-            /**
-             * @brief Deep Tensor constructor without initialization.
-             **/
-            DeepTensor() : dt(nullptr) {};
-            ~DeepTensor(){};
-            /**
-             * @brief DeepTensor constructor with initialization.
-             * @param[in] model The name of the frozen model file.
-             **/
-            DeepTensor(const std::string &model, const int &gpu_rank = 0, const std::string &name_scope = "") : dt(nullptr)
-            {
-                init(model, gpu_rank, name_scope);
-            };
-            /**
-             * @brief Initialize the DeepTensor.
-             * @param[in] model The name of the frozen model file.
-             **/
-            void init(const std::string &model, const int &gpu_rank = 0, const std::string &name_scope = "")
-            {
-                if (dt)
-                {
-                    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-                    return;
-                }
-                dt = DP_NewDeepTensorWithParam(model.c_str(), gpu_rank, name_scope.c_str());
-                odim = output_dim();
-                nsel_types = DP_DeepTensorGetNumbSelTypes(dt);
-            };
-
-            /**
-             * @brief Evaluate the tensor, force and virial by using this Deep Tensor.
-             * @param[out] tensor The atomic tensor.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<VALUETYPE> &tensor,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                VALUETYPE *tensor_;
-                VALUETYPE **p_tensor = &tensor_;
-                int size;
-                int *p_size = &size;
-
-                _DP_DeepTensorComputeTensor<VALUETYPE>(dt, natoms, coord_, atype_, box_, p_tensor, p_size);
-
-                tensor.resize(size);
-                std::copy(tensor_, tensor_ + size, tensor.begin());
-                delete[] tensor_;
-            };
-
-            /**
-             * @brief Evaluate the tensor, force and virial by using this Deep Tensor with the neighbor list.
-             * @param[out] tensor The tensor.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             * @param[in] nghost The number of ghost atoms.
-             * @param[in] nlist The neighbor list.
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<VALUETYPE> &tensor,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty())
-                {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                VALUETYPE *tensor_;
-                VALUETYPE **p_tensor = &tensor_;
-                int size;
-                int *p_size = &size;
-
-                _DP_DeepTensorComputeTensorNList<VALUETYPE>(dt, natoms, coord_, atype_, box_, nghost, lmp_list.nl, p_tensor, p_size);
-
-                tensor.resize(size);
-                std::copy(tensor_, tensor_ + size, tensor.begin());
-                delete[] tensor_;
-            };
-
-            /**
-             * @brief Evaluate the global tensor, force and virial by using this Deep Tensor.
-             * @param[out] global_tensor The global tensor.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<VALUETYPE> &global_tensor,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-                global_tensor.resize(odim);
-                force.resize(odim * natoms * 3);
-                virial.resize(odim * 9);
-                VALUETYPE *global_tensor_ = &global_tensor[0];
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-
-                _DP_DeepTensorCompute<VALUETYPE>(dt, natoms, coord_, atype_, box_, global_tensor_, force_, virial_, nullptr, nullptr, nullptr);
-            };
-            /**
-             * @brief Evaluate the global tensor, force, virial, atomic tensor, and atomic virial by using this Deep Tensor.
-             * @param[out] global_tensor The global tensor.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[out] atom_tensor The atomic tensor.
-             * @param[out] atom_virial The atomic virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<VALUETYPE> &global_tensor,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         std::vector<VALUETYPE> &atom_tensor,
-                         std::vector<VALUETYPE> &atom_virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty()) {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                global_tensor.resize(odim);
-                force.resize(odim * natoms * 3);
-                virial.resize(odim * 9);
-                atom_virial.resize(odim * natoms * 9);
-                VALUETYPE *global_tensor_ = &global_tensor[0];
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-                VALUETYPE *atomic_virial_ = &atom_virial[0];
-
-                VALUETYPE *atomic_tensor_;
-                VALUETYPE **p_atomic_tensor = &atomic_tensor_;
-                int size_at;
-                int *p_size_at = &size_at;
-
-                _DP_DeepTensorCompute<VALUETYPE>(dt, natoms, coord_, atype_, box_, global_tensor_, force_, virial_, p_atomic_tensor, atomic_virial_, p_size_at);
-
-                atom_tensor.resize(size_at);
-                std::copy(atomic_tensor_, atomic_tensor_ + size_at, atom_tensor.begin());
-                delete[] atomic_tensor_;
-            };
-
-            /**
-             * @brief Evaluate the global tensor, force and virial by using this Deep Tensor with the neighbor list.
-             * @param[out] global_tensor The global tensor.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             * @param[in] nghost The number of ghost atoms.
-             * @param[in] nlist The neighbor list.
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<VALUETYPE> &global_tensor,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty())
-                {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-                global_tensor.resize(odim);
-                force.resize(odim * natoms * 3);
-                virial.resize(odim * 9);
-                VALUETYPE *global_tensor_ = &global_tensor[0];
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-
-                _DP_DeepTensorComputeNList<VALUETYPE>(dt, natoms, coord_, atype_, box_, nghost, lmp_list.nl, global_tensor_, force_, virial_, nullptr, nullptr, nullptr);
-            };
-            /**
-             * @brief Evaluate the global tensor, force, virial, atomic tensor, and atomic virial by using this Deep Tensor with the neighbor list.
-             * @param[out] global_tensor The global tensor.
-             * @param[out] force The force on each atom.
-             * @param[out] virial The virial.
-             * @param[out] atom_tensor The atomic tensor.
-             * @param[out] atom_virial The atomic virial.
-             * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-             * @param[in] atype The atom types. The list should contain natoms ints.
-             * @param[in] box The cell of the region. The array should be of size nframes x 9 (PBC) or empty (no PBC).
-             * @param[in] nghost The number of ghost atoms.
-             * @param[in] nlist The neighbor list.
-             **/
-            template <typename VALUETYPE>
-            void compute(std::vector<VALUETYPE> &global_tensor,
-                         std::vector<VALUETYPE> &force,
-                         std::vector<VALUETYPE> &virial,
-                         std::vector<VALUETYPE> &atom_tensor,
-                         std::vector<VALUETYPE> &atom_virial,
-                         const std::vector<VALUETYPE> &coord,
-                         const std::vector<int> &atype,
-                         const std::vector<VALUETYPE> &box,
-                         const int nghost,
-                         const InputNlist &lmp_list)
-            {
-                unsigned int natoms = atype.size();
-                assert(natoms * 3 == coord.size());
-                if (!box.empty())
-                {
-                    assert(box.size() == 9);
-                }
-                const VALUETYPE *coord_ = &coord[0];
-                const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
-                const int *atype_ = &atype[0];
-
-                global_tensor.resize(odim);
-                force.resize(odim * natoms * 3);
-                virial.resize(odim * 9);
-                atom_virial.resize(odim * natoms * 9);
-                VALUETYPE *global_tensor_ = &global_tensor[0];
-                VALUETYPE *force_ = &force[0];
-                VALUETYPE *virial_ = &virial[0];
-                VALUETYPE *atomic_virial_ = &atom_virial[0];
-
-                VALUETYPE *atomic_tensor_;
-                VALUETYPE **p_atomic_tensor = &atomic_tensor_;
-                int size_at;
-                int *p_size_at = &size_at;
-
-                _DP_DeepTensorComputeNList<VALUETYPE>(dt, natoms, coord_, atype_, box_, nghost, lmp_list.nl, global_tensor_, force_, virial_, p_atomic_tensor, atomic_virial_, p_size_at);
-
-                atom_tensor.resize(size_at);
-                std::copy(atomic_tensor_, atomic_tensor_ + size_at, atom_tensor.begin());
-                delete[] atomic_tensor_;
-            };
-            /**
-             * @brief Get the cutoff radius.
-             * @return The cutoff radius.
-             **/
-            double cutoff() const
-            {
-                assert(dt);
-                return DP_DeepTensorGetCutoff(dt);
-            };
-            /**
-             * @brief Get the number of types.
-             * @return The number of types.
-             **/
-            int numb_types() const
-            {
-                assert(dt);
-                return DP_DeepTensorGetNumbTypes(dt);
-            };
-            /**
-             * @brief Get the output dimension.
-             * @return The output dimension.
-             **/
-            int output_dim() const
-            {
-                assert(dt);
-                return DP_DeepTensorGetOutputDim(dt);
-            }
-
-            std::vector<int> sel_types() const
-            {
-                int* sel_types_arr = DP_DeepTensorGetSelTypes(dt);
-                std::vector<int> sel_types_vec = std::vector<int>(sel_types_arr, sel_types_arr + nsel_types);
-                return sel_types_vec;
-            }
-            /**
-             * @brief Print the summary of DeePMD-kit, including the version and the build information.
-             * @param[in] pre The prefix to each line.
-             */
-            void print_summary(const std::string &pre) const
-            {
-                DP_PrintSummary(pre.c_str());
-            }
-
-        private:
-            DP_DeepTensor *dt;
-            int odim;
-            int nsel_types;
-        };
-
-        class DipoleChargeModifier
-        {
-        public:
-            /**
-             * @brief DipoleChargeModifier constructor without initialization.
-             **/
-            DipoleChargeModifier() : dcm(nullptr) {};
-            ~DipoleChargeModifier(){};
-            /**
-             * @brief DipoleChargeModifier constructor with initialization.
-             * @param[in] model The name of the frozen model file.
-             * @param[in] gpu_rank The rank of the GPU to be used.
-             * @param[in] name_scope The name scope of the model.
-             **/
-            DipoleChargeModifier(const std::string &model, const int &gpu_rank = 0, const std::string &name_scope = "") : dcm(nullptr)
-            {
-                init(model, gpu_rank, name_scope);
-            };
-            /**
-             * @brief Initialize the DipoleChargeModifier.
-             * @param[in] model The name of the frozen model file.
-             * @param[in] gpu_rank The rank of the GPU to be used.
-             * @param[in] name_scope The name scope of the model.
-             **/
-            void init(const std::string &model, const int &gpu_rank = 0, const std::string &name_scope = "")
-            {
-                if (dcm)
-                {
-                    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-                    return;
-                }
-                dcm = DP_NewDipoleChargeModifierWithParam(model.c_str(), gpu_rank, name_scope.c_str());
-                nsel_types = DP_DipoleChargeModifierGetNumbSelTypes(dcm);
-            };
-            /**
-             * @brief Evaluate the force and virial correction by using this dipole charge modifier.
-             * @param[out] dfcorr_ The force correction on each atom.
-             * @param[out] dvcorr_ The virial correction.
-             * @param[in] dcoord_ The coordinates of atoms. The array should be of size natoms x 3.
-             * @param[in] datype_ The atom types. The list should contain natoms ints.
-             * @param[in] dbox The cell of the region. The array should be of size 9.
-             * @param[in] pairs The pairs of atoms. The list should contain npairs pairs of ints.
-             * @param[in] delef_ The electric field on each atom. The array should be of size natoms x 3.
-             * @param[in] nghost The number of ghost atoms.
-             * @param[in] lmp_list The neighbor list.
-             **/
-            template<typename VALUETYPE>
-            void compute (std::vector<VALUETYPE> &		dfcorr_,
-                    std::vector<VALUETYPE> &		dvcorr_,
-                    const std::vector<VALUETYPE> &	dcoord_,
-                    const std::vector<int> &		datype_,
-                    const std::vector<VALUETYPE> &	dbox, 
-                    const std::vector<std::pair<int,int>> &	pairs,
-                    const std::vector<VALUETYPE> &	delef_, 
-                    const int			nghost,
-                    const InputNlist &	lmp_list)
-            {
-                unsigned int natoms = datype_.size();
-                assert(natoms * 3 == dcoord_.size());
-                if (!dbox.empty())
-                {
-                    assert(dbox.size() == 9);
-                }
-                const VALUETYPE *dcoord = &dcoord_[0];
-                const VALUETYPE *dbox_ = !dbox.empty() ? &dbox[0] : nullptr;
-                const int *datype = &datype_[0];
-                const int npairs = pairs.size();
-                const int *dpairs = reinterpret_cast<const int *>(&pairs[0]);
-                const VALUETYPE *delef = &delef_[0];
-
-                dfcorr_.resize(natoms * 3);
-                dvcorr_.resize(9);
-                VALUETYPE *dfcorr = &dfcorr_[0];
-                VALUETYPE *dvcorr = &dvcorr_[0];
-
-                _DP_DipoleChargeModifierComputeNList<VALUETYPE>(dcm, natoms, dcoord, datype, dbox_, dpairs, npairs, delef, nghost, lmp_list.nl, dfcorr, dvcorr);
-            };
-            /**
-             * @brief Get the cutoff radius.
-             * @return The cutoff radius.
-             **/
-            double cutoff() const
-            {
-                assert(dcm);
-                return DP_DipoleChargeModifierGetCutoff(dcm);
-            };
-            /**
-             * @brief Get the number of types.
-             * @return The number of types.
-             **/
-            int numb_types() const
-            {
-                assert(dcm);
-                return DP_DipoleChargeModifierGetNumbTypes(dcm);
-            };
-
-            std::vector<int> sel_types() const
-            {
-                int* sel_types_arr = DP_DipoleChargeModifierGetSelTypes(dcm);
-                std::vector<int> sel_types_vec = std::vector<int>(sel_types_arr, sel_types_arr + nsel_types);
-                return sel_types_vec;
-            }
-
-            /**
-             * @brief Print the summary of DeePMD-kit, including the version and the build information.
-             * @param[in] pre The prefix to each line.
-             */
-            void print_summary(const std::string &pre) const
-            {
-                DP_PrintSummary(pre.c_str());
-            }
-        private:
-            DP_DipoleChargeModifier *dcm;
-            int nsel_types;
-        };
-    }
+namespace deepmd {
+namespace hpp {
+/**
+ * @brief Neighbor list.
+ **/
+struct InputNlist {
+  InputNlist()
+      : inum(0),
+        ilist(nullptr),
+        numneigh(nullptr),
+        firstneigh(nullptr),
+        nl(DP_NewNlist(0, nullptr, nullptr, nullptr)){};
+  InputNlist(int inum_, int *ilist_, int *numneigh_, int **firstneigh_)
+      : inum(inum_),
+        ilist(ilist_),
+        numneigh(numneigh_),
+        firstneigh(firstneigh_),
+        nl(DP_NewNlist(inum_, ilist_, numneigh_, firstneigh_)){};
+  /// @brief C API neighbor list.
+  DP_Nlist *nl;
+  /// @brief Number of core region atoms
+  int inum;
+  /// @brief Array stores the core region atom's index
+  int *ilist;
+  /// @brief Array stores the core region atom's neighbor atom number
+  int *numneigh;
+  /// @brief Array stores the core region atom's neighbor index
+  int **firstneigh;
+};
+
+/**
+ * @brief Convert pbtxt to pb.
+ * @param[in] fn_pb_txt Filename of the pb txt file.
+ * @param[in] fn_pb Filename of the pb file.
+ **/
+void inline convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb) {
+  DP_ConvertPbtxtToPb(fn_pb_txt.c_str(), fn_pb.c_str());
+};
+/**
+ * @brief Convert int vector to InputNlist.
+ * @param[out] to_nlist InputNlist.
+ * @param[in] from_nlist 2D int vector. The first axis represents the centeral
+ * atoms and the second axis represents the neighbor atoms.
+ */
+void inline convert_nlist(InputNlist &to_nlist,
+                          std::vector<std::vector<int>> &from_nlist) {
+  to_nlist.inum = from_nlist.size();
+  for (int ii = 0; ii < to_nlist.inum; ++ii) {
+    to_nlist.ilist[ii] = ii;
+    to_nlist.numneigh[ii] = from_nlist[ii].size();
+    to_nlist.firstneigh[ii] = &from_nlist[ii][0];
+  }
+  to_nlist.nl = DP_NewNlist(to_nlist.inum, to_nlist.ilist, to_nlist.numneigh,
+                            to_nlist.firstneigh);
 }
+/**
+ * @brief Deep Potential.
+ **/
+class DeepPot {
+ public:
+  /**
+   * @brief DP constructor without initialization.
+   **/
+  DeepPot() : dp(nullptr){};
+  ~DeepPot(){};
+  /**
+   * @brief DP constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank.
+   * @param[in] file_content The content of the frozen model file.
+   **/
+  DeepPot(const std::string &model,
+          const int &gpu_rank = 0,
+          const std::string &file_content = "")
+      : dp(nullptr) {
+    init(model, gpu_rank, file_content);
+  };
+  /**
+   * @brief Initialize the DP.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank.
+   * @param[in] file_content The content of the frozen model file.
+   **/
+  void init(const std::string &model,
+            const int &gpu_rank = 0,
+            const std::string &file_content = "") {
+    if (dp) {
+      std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                   "nothing at the second call of initializer"
+                << std::endl;
+      return;
+    }
+    dp = DP_NewDeepPotWithParam(model.c_str(), gpu_rank, file_content.c_str());
+  };
+
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(double &ener,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+    double *ener_ = &ener;
+    force.resize(natoms * 3);
+    virial.resize(9);
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+
+    _DP_DeepPotCompute<VALUETYPE>(dp, natoms, coord_, atype_, box_, ener_,
+                                  force_, virial_, nullptr, nullptr);
+  };
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(double &ener,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               std::vector<VALUETYPE> &atom_energy,
+               std::vector<VALUETYPE> &atom_virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    double *ener_ = &ener;
+    force.resize(natoms * 3);
+    virial.resize(9);
+    atom_energy.resize(natoms);
+    atom_virial.resize(natoms * 9);
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+    VALUETYPE *atomic_ener_ = &atom_energy[0];
+    VALUETYPE *atomic_virial_ = &atom_virial[0];
+
+    _DP_DeepPotCompute<VALUETYPE>(dp, natoms, coord_, atype_, box_, ener_,
+                                  force_, virial_, atomic_ener_,
+                                  atomic_virial_);
+  };
+
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP with the
+   *neighbor list.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] nlist The neighbor list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   **/
+  template <typename VALUETYPE>
+  void compute(double &ener,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list,
+               const int &ago) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+    double *ener_ = &ener;
+    force.resize(natoms * 3);
+    virial.resize(9);
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+
+    _DP_DeepPotComputeNList<VALUETYPE>(dp, natoms, coord_, atype_, box_, nghost,
+                                       lmp_list.nl, ago, ener_, force_, virial_,
+                                       nullptr, nullptr);
+  };
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP with the neighbor list.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] nlist The neighbor list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   **/
+  template <typename VALUETYPE>
+  void compute(double &ener,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               std::vector<VALUETYPE> &atom_energy,
+               std::vector<VALUETYPE> &atom_virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list,
+               const int &ago) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    double *ener_ = &ener;
+    force.resize(natoms * 3);
+    virial.resize(9);
+    atom_energy.resize(natoms);
+    atom_virial.resize(natoms * 9);
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+    VALUETYPE *atomic_ener_ = &atom_energy[0];
+    VALUETYPE *atomic_virial_ = &atom_virial[0];
+
+    _DP_DeepPotComputeNList<VALUETYPE>(dp, natoms, coord_, atype_, box_, nghost,
+                                       lmp_list.nl, ago, ener_, force_, virial_,
+                                       atomic_ener_, atomic_virial_);
+  };
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(dp);
+    return DP_DeepPotGetCutoff(dp);
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(dp);
+    return DP_DeepPotGetNumbTypes(dp);
+  };
+  /**
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  void get_type_map(std::string &type_map) {
+    const char *type_map_c = DP_DeepPotGetTypeMap(dp);
+    type_map.assign(type_map_c);
+    delete[] type_map_c;
+  };
+  /**
+   * @brief Print the summary of DeePMD-kit, including the version and the build
+   * information.
+   * @param[in] pre The prefix to each line.
+   */
+  void print_summary(const std::string &pre) const {
+    DP_PrintSummary(pre.c_str());
+  }
+
+ private:
+  DP_DeepPot *dp;
+};
+
+/**
+ * @brief Deep Potential model deviation.
+ **/
+class DeepPotModelDevi {
+ public:
+  /**
+   * @brief DP model deviation constructor without initialization.
+   **/
+  DeepPotModelDevi() : dp(nullptr){};
+  ~DeepPotModelDevi(){};
+  /**
+   * @brief DP model deviation constructor with initialization.
+   * @param[in] models The names of the frozen model file.
+   **/
+  DeepPotModelDevi(const std::vector<std::string> &models) : dp(nullptr) {
+    init(models);
+  };
+  /**
+   * @brief Initialize the DP model deviation.
+   * @param[in] model The name of the frozen model file.
+   **/
+  void init(const std::vector<std::string> &models) {
+    if (dp) {
+      std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                   "nothing at the second call of initializer"
+                << std::endl;
+      return;
+    }
+    std::vector<const char *> cstrings;
+    cstrings.reserve(models.size());
+    for (std::string const &str : models) cstrings.push_back(str.data());
+
+    dp = DP_NewDeepPotModelDevi(cstrings.data(), cstrings.size());
+    numb_models = models.size();
+  };
+
+  /**
+   * @brief Evaluate the energy, force and virial by using this DP model
+   *deviation.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<double> &ener,
+               std::vector<std::vector<VALUETYPE>> &force,
+               std::vector<std::vector<VALUETYPE>> &virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list,
+               const int &ago) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    // memory will be continous for std::vector but not std::vector<std::vector>
+    std::vector<double> energy_flat(numb_models);
+    std::vector<VALUETYPE> force_flat(numb_models * natoms * 3);
+    std::vector<VALUETYPE> virial_flat(numb_models * 9);
+    double *ener_ = &energy_flat[0];
+    VALUETYPE *force_ = &force_flat[0];
+    VALUETYPE *virial_ = &virial_flat[0];
+
+    _DP_DeepPotModelDeviComputeNList<VALUETYPE>(
+        dp, natoms, coord_, atype_, box_, nghost, lmp_list.nl, ago, ener_,
+        force_, virial_, nullptr, nullptr);
+
+    // reshape
+    ener.resize(numb_models);
+    force.resize(numb_models);
+    virial.resize(numb_models);
+    for (int i = 0; i < numb_models; i++) {
+      ener[i] = energy_flat[i];
+      force[i].resize(natoms * 3);
+      virial[i].resize(9);
+      for (int j = 0; j < natoms * 3; j++)
+        force[i][j] = force_flat[i * natoms * 3 + j];
+      for (int j = 0; j < 9; j++) virial[i][j] = virial_flat[i * 9 + j];
+    }
+  };
+  /**
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP model deviation.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<double> &ener,
+               std::vector<std::vector<VALUETYPE>> &force,
+               std::vector<std::vector<VALUETYPE>> &virial,
+               std::vector<std::vector<VALUETYPE>> &atom_energy,
+               std::vector<std::vector<VALUETYPE>> &atom_virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list,
+               const int &ago) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    std::vector<double> energy_flat(numb_models);
+    std::vector<VALUETYPE> force_flat(numb_models * natoms * 3);
+    std::vector<VALUETYPE> virial_flat(numb_models * 9);
+    std::vector<VALUETYPE> atom_energy_flat(numb_models * natoms);
+    std::vector<VALUETYPE> atom_virial_flat(numb_models * natoms * 9);
+    double *ener_ = &energy_flat[0];
+    VALUETYPE *force_ = &force_flat[0];
+    VALUETYPE *virial_ = &virial_flat[0];
+    VALUETYPE *atomic_ener_ = &atom_energy_flat[0];
+    VALUETYPE *atomic_virial_ = &atom_virial_flat[0];
+
+    _DP_DeepPotModelDeviComputeNList<VALUETYPE>(
+        dp, natoms, coord_, atype_, box_, nghost, lmp_list.nl, ago, ener_,
+        force_, virial_, atomic_ener_, atomic_virial_);
+
+    // reshape
+    ener.resize(numb_models);
+    force.resize(numb_models);
+    virial.resize(numb_models);
+    atom_energy.resize(numb_models);
+    atom_virial.resize(numb_models);
+    for (int i = 0; i < numb_models; i++) {
+      ener[i] = energy_flat[i];
+      force[i].resize(natoms * 3);
+      virial[i].resize(9);
+      atom_energy[i].resize(natoms);
+      atom_virial[i].resize(natoms * 9);
+      for (int j = 0; j < natoms * 3; j++)
+        force[i][j] = force_flat[i * natoms * 3 + j];
+      for (int j = 0; j < 9; j++) virial[i][j] = virial_flat[i * 9 + j];
+      for (int j = 0; j < natoms; j++)
+        atom_energy[i][j] = atom_energy_flat[i * natoms + j];
+      for (int j = 0; j < natoms * 9; j++)
+        atom_virial[i][j] = atom_virial_flat[i * natoms * 9 + j];
+    }
+  };
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(dp);
+    return DP_DeepPotModelDeviGetCutoff(dp);
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(dp);
+    return DP_DeepPotModelDeviGetNumbTypes(dp);
+  };
+
+ private:
+  DP_DeepPotModelDevi *dp;
+  int numb_models;
+};
+
+/**
+ * @brief Deep Tensor.
+ **/
+class DeepTensor {
+ public:
+  /**
+   * @brief Deep Tensor constructor without initialization.
+   **/
+  DeepTensor() : dt(nullptr){};
+  ~DeepTensor(){};
+  /**
+   * @brief DeepTensor constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   **/
+  DeepTensor(const std::string &model,
+             const int &gpu_rank = 0,
+             const std::string &name_scope = "")
+      : dt(nullptr) {
+    init(model, gpu_rank, name_scope);
+  };
+  /**
+   * @brief Initialize the DeepTensor.
+   * @param[in] model The name of the frozen model file.
+   **/
+  void init(const std::string &model,
+            const int &gpu_rank = 0,
+            const std::string &name_scope = "") {
+    if (dt) {
+      std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                   "nothing at the second call of initializer"
+                << std::endl;
+      return;
+    }
+    dt = DP_NewDeepTensorWithParam(model.c_str(), gpu_rank, name_scope.c_str());
+    odim = output_dim();
+    nsel_types = DP_DeepTensorGetNumbSelTypes(dt);
+  };
+
+  /**
+   * @brief Evaluate the tensor, force and virial by using this Deep Tensor.
+   * @param[out] tensor The atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &tensor,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    VALUETYPE *tensor_;
+    VALUETYPE **p_tensor = &tensor_;
+    int size;
+    int *p_size = &size;
+
+    _DP_DeepTensorComputeTensor<VALUETYPE>(dt, natoms, coord_, atype_, box_,
+                                           p_tensor, p_size);
+
+    tensor.resize(size);
+    std::copy(tensor_, tensor_ + size, tensor.begin());
+    delete[] tensor_;
+  };
+
+  /**
+   * @brief Evaluate the tensor, force and virial by using this Deep Tensor with
+   *the neighbor list.
+   * @param[out] tensor The tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] nlist The neighbor list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &tensor,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    VALUETYPE *tensor_;
+    VALUETYPE **p_tensor = &tensor_;
+    int size;
+    int *p_size = &size;
+
+    _DP_DeepTensorComputeTensorNList<VALUETYPE>(dt, natoms, coord_, atype_,
+                                                box_, nghost, lmp_list.nl,
+                                                p_tensor, p_size);
+
+    tensor.resize(size);
+    std::copy(tensor_, tensor_ + size, tensor.begin());
+    delete[] tensor_;
+  };
+
+  /**
+   * @brief Evaluate the global tensor, force and virial by using this Deep
+   *Tensor.
+   * @param[out] global_tensor The global tensor.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &global_tensor,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+    global_tensor.resize(odim);
+    force.resize(odim * natoms * 3);
+    virial.resize(odim * 9);
+    VALUETYPE *global_tensor_ = &global_tensor[0];
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+
+    _DP_DeepTensorCompute<VALUETYPE>(dt, natoms, coord_, atype_, box_,
+                                     global_tensor_, force_, virial_, nullptr,
+                                     nullptr, nullptr);
+  };
+  /**
+   * @brief Evaluate the global tensor, force, virial, atomic tensor, and atomic
+   *virial by using this Deep Tensor.
+   * @param[out] global_tensor The global tensor.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_tensor The atomic tensor.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &global_tensor,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               std::vector<VALUETYPE> &atom_tensor,
+               std::vector<VALUETYPE> &atom_virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    global_tensor.resize(odim);
+    force.resize(odim * natoms * 3);
+    virial.resize(odim * 9);
+    atom_virial.resize(odim * natoms * 9);
+    VALUETYPE *global_tensor_ = &global_tensor[0];
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+    VALUETYPE *atomic_virial_ = &atom_virial[0];
+
+    VALUETYPE *atomic_tensor_;
+    VALUETYPE **p_atomic_tensor = &atomic_tensor_;
+    int size_at;
+    int *p_size_at = &size_at;
+
+    _DP_DeepTensorCompute<VALUETYPE>(
+        dt, natoms, coord_, atype_, box_, global_tensor_, force_, virial_,
+        p_atomic_tensor, atomic_virial_, p_size_at);
+
+    atom_tensor.resize(size_at);
+    std::copy(atomic_tensor_, atomic_tensor_ + size_at, atom_tensor.begin());
+    delete[] atomic_tensor_;
+  };
+
+  /**
+   * @brief Evaluate the global tensor, force and virial by using this Deep
+   *Tensor with the neighbor list.
+   * @param[out] global_tensor The global tensor.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] nlist The neighbor list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &global_tensor,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+    global_tensor.resize(odim);
+    force.resize(odim * natoms * 3);
+    virial.resize(odim * 9);
+    VALUETYPE *global_tensor_ = &global_tensor[0];
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+
+    _DP_DeepTensorComputeNList<VALUETYPE>(
+        dt, natoms, coord_, atype_, box_, nghost, lmp_list.nl, global_tensor_,
+        force_, virial_, nullptr, nullptr, nullptr);
+  };
+  /**
+   * @brief Evaluate the global tensor, force, virial, atomic tensor, and atomic
+   *virial by using this Deep Tensor with the neighbor list.
+   * @param[out] global_tensor The global tensor.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_tensor The atomic tensor.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9 (PBC) or empty (no PBC).
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] nlist The neighbor list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &global_tensor,
+               std::vector<VALUETYPE> &force,
+               std::vector<VALUETYPE> &virial,
+               std::vector<VALUETYPE> &atom_tensor,
+               std::vector<VALUETYPE> &atom_virial,
+               const std::vector<VALUETYPE> &coord,
+               const std::vector<int> &atype,
+               const std::vector<VALUETYPE> &box,
+               const int nghost,
+               const InputNlist &lmp_list) {
+    unsigned int natoms = atype.size();
+    assert(natoms * 3 == coord.size());
+    if (!box.empty()) {
+      assert(box.size() == 9);
+    }
+    const VALUETYPE *coord_ = &coord[0];
+    const VALUETYPE *box_ = !box.empty() ? &box[0] : nullptr;
+    const int *atype_ = &atype[0];
+
+    global_tensor.resize(odim);
+    force.resize(odim * natoms * 3);
+    virial.resize(odim * 9);
+    atom_virial.resize(odim * natoms * 9);
+    VALUETYPE *global_tensor_ = &global_tensor[0];
+    VALUETYPE *force_ = &force[0];
+    VALUETYPE *virial_ = &virial[0];
+    VALUETYPE *atomic_virial_ = &atom_virial[0];
+
+    VALUETYPE *atomic_tensor_;
+    VALUETYPE **p_atomic_tensor = &atomic_tensor_;
+    int size_at;
+    int *p_size_at = &size_at;
+
+    _DP_DeepTensorComputeNList<VALUETYPE>(
+        dt, natoms, coord_, atype_, box_, nghost, lmp_list.nl, global_tensor_,
+        force_, virial_, p_atomic_tensor, atomic_virial_, p_size_at);
+
+    atom_tensor.resize(size_at);
+    std::copy(atomic_tensor_, atomic_tensor_ + size_at, atom_tensor.begin());
+    delete[] atomic_tensor_;
+  };
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(dt);
+    return DP_DeepTensorGetCutoff(dt);
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(dt);
+    return DP_DeepTensorGetNumbTypes(dt);
+  };
+  /**
+   * @brief Get the output dimension.
+   * @return The output dimension.
+   **/
+  int output_dim() const {
+    assert(dt);
+    return DP_DeepTensorGetOutputDim(dt);
+  }
+
+  std::vector<int> sel_types() const {
+    int *sel_types_arr = DP_DeepTensorGetSelTypes(dt);
+    std::vector<int> sel_types_vec =
+        std::vector<int>(sel_types_arr, sel_types_arr + nsel_types);
+    return sel_types_vec;
+  }
+  /**
+   * @brief Print the summary of DeePMD-kit, including the version and the build
+   * information.
+   * @param[in] pre The prefix to each line.
+   */
+  void print_summary(const std::string &pre) const {
+    DP_PrintSummary(pre.c_str());
+  }
+
+ private:
+  DP_DeepTensor *dt;
+  int odim;
+  int nsel_types;
+};
+
+class DipoleChargeModifier {
+ public:
+  /**
+   * @brief DipoleChargeModifier constructor without initialization.
+   **/
+  DipoleChargeModifier() : dcm(nullptr){};
+  ~DipoleChargeModifier(){};
+  /**
+   * @brief DipoleChargeModifier constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The rank of the GPU to be used.
+   * @param[in] name_scope The name scope of the model.
+   **/
+  DipoleChargeModifier(const std::string &model,
+                       const int &gpu_rank = 0,
+                       const std::string &name_scope = "")
+      : dcm(nullptr) {
+    init(model, gpu_rank, name_scope);
+  };
+  /**
+   * @brief Initialize the DipoleChargeModifier.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The rank of the GPU to be used.
+   * @param[in] name_scope The name scope of the model.
+   **/
+  void init(const std::string &model,
+            const int &gpu_rank = 0,
+            const std::string &name_scope = "") {
+    if (dcm) {
+      std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                   "nothing at the second call of initializer"
+                << std::endl;
+      return;
+    }
+    dcm = DP_NewDipoleChargeModifierWithParam(model.c_str(), gpu_rank,
+                                              name_scope.c_str());
+    nsel_types = DP_DipoleChargeModifierGetNumbSelTypes(dcm);
+  };
+  /**
+   * @brief Evaluate the force and virial correction by using this dipole charge
+   *modifier.
+   * @param[out] dfcorr_ The force correction on each atom.
+   * @param[out] dvcorr_ The virial correction.
+   * @param[in] dcoord_ The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   * @param[in] dbox The cell of the region. The array should be of size 9.
+   * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
+   *of ints.
+   * @param[in] delef_ The electric field on each atom. The array should be of
+   *size natoms x 3.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The neighbor list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE> &dfcorr_,
+               std::vector<VALUETYPE> &dvcorr_,
+               const std::vector<VALUETYPE> &dcoord_,
+               const std::vector<int> &datype_,
+               const std::vector<VALUETYPE> &dbox,
+               const std::vector<std::pair<int, int>> &pairs,
+               const std::vector<VALUETYPE> &delef_,
+               const int nghost,
+               const InputNlist &lmp_list) {
+    unsigned int natoms = datype_.size();
+    assert(natoms * 3 == dcoord_.size());
+    if (!dbox.empty()) {
+      assert(dbox.size() == 9);
+    }
+    const VALUETYPE *dcoord = &dcoord_[0];
+    const VALUETYPE *dbox_ = !dbox.empty() ? &dbox[0] : nullptr;
+    const int *datype = &datype_[0];
+    const int npairs = pairs.size();
+    const int *dpairs = reinterpret_cast<const int *>(&pairs[0]);
+    const VALUETYPE *delef = &delef_[0];
+
+    dfcorr_.resize(natoms * 3);
+    dvcorr_.resize(9);
+    VALUETYPE *dfcorr = &dfcorr_[0];
+    VALUETYPE *dvcorr = &dvcorr_[0];
+
+    _DP_DipoleChargeModifierComputeNList<VALUETYPE>(
+        dcm, natoms, dcoord, datype, dbox_, dpairs, npairs, delef, nghost,
+        lmp_list.nl, dfcorr, dvcorr);
+  };
+  /**
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(dcm);
+    return DP_DipoleChargeModifierGetCutoff(dcm);
+  };
+  /**
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(dcm);
+    return DP_DipoleChargeModifierGetNumbTypes(dcm);
+  };
+
+  std::vector<int> sel_types() const {
+    int *sel_types_arr = DP_DipoleChargeModifierGetSelTypes(dcm);
+    std::vector<int> sel_types_vec =
+        std::vector<int>(sel_types_arr, sel_types_arr + nsel_types);
+    return sel_types_vec;
+  }
+
+  /**
+   * @brief Print the summary of DeePMD-kit, including the version and the build
+   * information.
+   * @param[in] pre The prefix to each line.
+   */
+  void print_summary(const std::string &pre) const {
+    DP_PrintSummary(pre.c_str());
+  }
+
+ private:
+  DP_DipoleChargeModifier *dcm;
+  int nsel_types;
+};
+}  // namespace hpp
+}  // namespace deepmd
diff --git a/source/api_c/src/c_api.cc b/source/api_c/src/c_api.cc
index b511c61a33..4647aea98b 100644
--- a/source/api_c/src/c_api.cc
+++ b/source/api_c/src/c_api.cc
@@ -1,297 +1,279 @@
 #include "c_api.h"
 
-#include <vector>
-#include <string>
 #include <numeric>
-#include "c_api_internal.h"
-#include "common.h"
+#include <string>
+#include <vector>
+
+#include "DataModifier.h"
 #include "DeepPot.h"
 #include "DeepTensor.h"
-#include "DataModifier.h"
+#include "c_api_internal.h"
+#include "common.h"
 
 extern "C" {
 
 DP_Nlist::DP_Nlist(deepmd::InputNlist& nl) : nl(nl) {}
 
-DP_Nlist* DP_NewNlist(
-    int inum_, 
-    int * ilist_,
-    int * numneigh_, 
-    int ** firstneigh_){
-    deepmd::InputNlist nl(inum_, ilist_, numneigh_, firstneigh_);
-    DP_Nlist* new_nl = new DP_Nlist(nl);
-    return new_nl;
+DP_Nlist* DP_NewNlist(int inum_,
+                      int* ilist_,
+                      int* numneigh_,
+                      int** firstneigh_) {
+  deepmd::InputNlist nl(inum_, ilist_, numneigh_, firstneigh_);
+  DP_Nlist* new_nl = new DP_Nlist(nl);
+  return new_nl;
 }
 
-DP_DeepPot::DP_DeepPot(deepmd::DeepPot& dp)
-    : dp(dp) {}
+DP_DeepPot::DP_DeepPot(deepmd::DeepPot& dp) : dp(dp) {}
 
 DP_DeepPot* DP_NewDeepPot(const char* c_model) {
-    std::string model(c_model);
-    deepmd::DeepPot dp(model);
-    DP_DeepPot* new_dp = new DP_DeepPot(dp);
-    return new_dp;
+  std::string model(c_model);
+  deepmd::DeepPot dp(model);
+  DP_DeepPot* new_dp = new DP_DeepPot(dp);
+  return new_dp;
 }
 
-DP_DeepPot* DP_NewDeepPotWithParam(
-        const char* c_model, const int gpu_rank, const char* c_file_content) {
-    std::string model(c_model);
-    std::string file_content(c_file_content);
-    deepmd::DeepPot dp(model, gpu_rank, file_content);
-    DP_DeepPot* new_dp = new DP_DeepPot(dp);
-    return new_dp;
+DP_DeepPot* DP_NewDeepPotWithParam(const char* c_model,
+                                   const int gpu_rank,
+                                   const char* c_file_content) {
+  std::string model(c_model);
+  std::string file_content(c_file_content);
+  deepmd::DeepPot dp(model, gpu_rank, file_content);
+  DP_DeepPot* new_dp = new DP_DeepPot(dp);
+  return new_dp;
 }
 
 DP_DeepPotModelDevi::DP_DeepPotModelDevi(deepmd::DeepPotModelDevi& dp)
     : dp(dp) {}
 
-DP_DeepPotModelDevi* DP_NewDeepPotModelDevi(const char** c_models, int n_models) {
-    std::vector<std::string> model(c_models, c_models + n_models);
-    deepmd::DeepPotModelDevi dp(model);
-    DP_DeepPotModelDevi* new_dp = new DP_DeepPotModelDevi(dp);
-    return new_dp;
+DP_DeepPotModelDevi* DP_NewDeepPotModelDevi(const char** c_models,
+                                            int n_models) {
+  std::vector<std::string> model(c_models, c_models + n_models);
+  deepmd::DeepPotModelDevi dp(model);
+  DP_DeepPotModelDevi* new_dp = new DP_DeepPotModelDevi(dp);
+  return new_dp;
 }
 
-DP_DeepTensor::DP_DeepTensor(deepmd::DeepTensor& dt)
-    : dt(dt) {}
+DP_DeepTensor::DP_DeepTensor(deepmd::DeepTensor& dt) : dt(dt) {}
 
 DP_DeepTensor* DP_NewDeepTensor(const char* c_model) {
-    std::string model(c_model);
-    deepmd::DeepTensor dt(model);
-    DP_DeepTensor* new_dt = new DP_DeepTensor(dt);
-    return new_dt;
+  std::string model(c_model);
+  deepmd::DeepTensor dt(model);
+  DP_DeepTensor* new_dt = new DP_DeepTensor(dt);
+  return new_dt;
 }
 
-DP_DeepTensor* DP_NewDeepTensorWithParam(
-        const char* c_model, const int gpu_rank, const char* c_name_scope) {
-    std::string model(c_model);
-    std::string name_scope(c_name_scope);
-    deepmd::DeepTensor dt(model, gpu_rank, name_scope);
-    DP_DeepTensor* new_dt = new DP_DeepTensor(dt);
-    return new_dt;
+DP_DeepTensor* DP_NewDeepTensorWithParam(const char* c_model,
+                                         const int gpu_rank,
+                                         const char* c_name_scope) {
+  std::string model(c_model);
+  std::string name_scope(c_name_scope);
+  deepmd::DeepTensor dt(model, gpu_rank, name_scope);
+  DP_DeepTensor* new_dt = new DP_DeepTensor(dt);
+  return new_dt;
 }
 
-DP_DipoleChargeModifier::DP_DipoleChargeModifier(deepmd::DipoleChargeModifier& dcm)
+DP_DipoleChargeModifier::DP_DipoleChargeModifier(
+    deepmd::DipoleChargeModifier& dcm)
     : dcm(dcm) {}
 
 DP_DipoleChargeModifier* DP_NewDipoleChargeModifier(const char* c_model) {
-    std::string model(c_model);
-    deepmd::DipoleChargeModifier dcm(model);
-    DP_DipoleChargeModifier* new_dcm = new DP_DipoleChargeModifier(dcm);
-    return new_dcm;
+  std::string model(c_model);
+  deepmd::DipoleChargeModifier dcm(model);
+  DP_DipoleChargeModifier* new_dcm = new DP_DipoleChargeModifier(dcm);
+  return new_dcm;
 }
 
 DP_DipoleChargeModifier* DP_NewDipoleChargeModifierWithParam(
-        const char* c_model, const int gpu_rank, const char* c_name_scope) {
-    std::string model(c_model);
-    std::string name_scope(c_name_scope);
-    deepmd::DipoleChargeModifier dcm(model, gpu_rank, name_scope);
-    DP_DipoleChargeModifier* new_dcm = new DP_DipoleChargeModifier(dcm);
-    return new_dcm;
+    const char* c_model, const int gpu_rank, const char* c_name_scope) {
+  std::string model(c_model);
+  std::string name_scope(c_name_scope);
+  deepmd::DipoleChargeModifier dcm(model, gpu_rank, name_scope);
+  DP_DipoleChargeModifier* new_dcm = new DP_DipoleChargeModifier(dcm);
+  return new_dcm;
 }
 
-} // extern "C"
+}  // extern "C"
 
 template <typename VALUETYPE>
-inline
-void DP_DeepPotCompute_variant (
-    DP_DeepPot* dp,
-    const int natoms,
-    const VALUETYPE* coord,
-    const int* atype,
-    const VALUETYPE* cell,
-    double* energy,
-    VALUETYPE* force,
-    VALUETYPE* virial,
-    VALUETYPE* atomic_energy,
-    VALUETYPE* atomic_virial
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    double e;
-    std::vector<VALUETYPE> f, v, ae, av;
-
-    dp->dp.compute(e, f, v, ae, av, coord_, atype_, cell_);
-    // copy from C++ vectors to C arrays, if not NULL pointer
-    if(energy) *energy = e;
-    if(force) std::copy(f.begin(), f.end(), force);
-    if(virial) std::copy(v.begin(), v.end(), virial);
-    if(atomic_energy) std::copy(ae.begin(), ae.end(), atomic_energy);
-    if(atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
-}
-
-template
-void DP_DeepPotCompute_variant <double> (
-    DP_DeepPot* dp,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    double* energy,
-    double* force,
-    double* virial,
-    double* atomic_energy,
-    double* atomic_virial
-    );
-
-template
-void DP_DeepPotCompute_variant <float> (
-    DP_DeepPot* dp,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    double* energy,
-    float* force,
-    float* virial,
-    float* atomic_energy,
-    float* atomic_virial
-    );
+inline void DP_DeepPotCompute_variant(DP_DeepPot* dp,
+                                      const int natoms,
+                                      const VALUETYPE* coord,
+                                      const int* atype,
+                                      const VALUETYPE* cell,
+                                      double* energy,
+                                      VALUETYPE* force,
+                                      VALUETYPE* virial,
+                                      VALUETYPE* atomic_energy,
+                                      VALUETYPE* atomic_virial) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  double e;
+  std::vector<VALUETYPE> f, v, ae, av;
+
+  dp->dp.compute(e, f, v, ae, av, coord_, atype_, cell_);
+  // copy from C++ vectors to C arrays, if not NULL pointer
+  if (energy) *energy = e;
+  if (force) std::copy(f.begin(), f.end(), force);
+  if (virial) std::copy(v.begin(), v.end(), virial);
+  if (atomic_energy) std::copy(ae.begin(), ae.end(), atomic_energy);
+  if (atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
+}
+
+template void DP_DeepPotCompute_variant<double>(DP_DeepPot* dp,
+                                                const int natoms,
+                                                const double* coord,
+                                                const int* atype,
+                                                const double* cell,
+                                                double* energy,
+                                                double* force,
+                                                double* virial,
+                                                double* atomic_energy,
+                                                double* atomic_virial);
+
+template void DP_DeepPotCompute_variant<float>(DP_DeepPot* dp,
+                                               const int natoms,
+                                               const float* coord,
+                                               const int* atype,
+                                               const float* cell,
+                                               double* energy,
+                                               float* force,
+                                               float* virial,
+                                               float* atomic_energy,
+                                               float* atomic_virial);
 
 template <typename VALUETYPE>
-inline
-void DP_DeepPotComputeNList_variant (
-    DP_DeepPot* dp,
-    const int natoms,
-    const VALUETYPE* coord,
-    const int* atype,
-    const VALUETYPE* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    VALUETYPE* force,
-    VALUETYPE* virial,
-    VALUETYPE* atomic_energy,
-    VALUETYPE* atomic_virial
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    double e;
-    std::vector<VALUETYPE> f, v, ae, av;
-
-    dp->dp.compute(e, f, v, ae, av, coord_, atype_, cell_, nghost, nlist->nl, ago);
-    // copy from C++ vectors to C arrays, if not NULL pointer
-    if(energy) *energy = e;
-    if(force) std::copy(f.begin(), f.end(), force);
-    if(virial) std::copy(v.begin(), v.end(), virial);
-    if(atomic_energy) std::copy(ae.begin(), ae.end(), atomic_energy);
-    if(atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
-}
-
-template
-void DP_DeepPotComputeNList_variant <double> (
-    DP_DeepPot* dp,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    double* force,
-    double* virial,
-    double* atomic_energy,
-    double* atomic_virial
-    );
-
-template
-void DP_DeepPotComputeNList_variant <float> (
-    DP_DeepPot* dp,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    float* force,
-    float* virial,
-    float* atomic_energy,
-    float* atomic_virial
-    );
+inline void DP_DeepPotComputeNList_variant(DP_DeepPot* dp,
+                                           const int natoms,
+                                           const VALUETYPE* coord,
+                                           const int* atype,
+                                           const VALUETYPE* cell,
+                                           const int nghost,
+                                           const DP_Nlist* nlist,
+                                           const int ago,
+                                           double* energy,
+                                           VALUETYPE* force,
+                                           VALUETYPE* virial,
+                                           VALUETYPE* atomic_energy,
+                                           VALUETYPE* atomic_virial) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  double e;
+  std::vector<VALUETYPE> f, v, ae, av;
+
+  dp->dp.compute(e, f, v, ae, av, coord_, atype_, cell_, nghost, nlist->nl,
+                 ago);
+  // copy from C++ vectors to C arrays, if not NULL pointer
+  if (energy) *energy = e;
+  if (force) std::copy(f.begin(), f.end(), force);
+  if (virial) std::copy(v.begin(), v.end(), virial);
+  if (atomic_energy) std::copy(ae.begin(), ae.end(), atomic_energy);
+  if (atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
+}
+
+template void DP_DeepPotComputeNList_variant<double>(DP_DeepPot* dp,
+                                                     const int natoms,
+                                                     const double* coord,
+                                                     const int* atype,
+                                                     const double* cell,
+                                                     const int nghost,
+                                                     const DP_Nlist* nlist,
+                                                     const int ago,
+                                                     double* energy,
+                                                     double* force,
+                                                     double* virial,
+                                                     double* atomic_energy,
+                                                     double* atomic_virial);
+
+template void DP_DeepPotComputeNList_variant<float>(DP_DeepPot* dp,
+                                                    const int natoms,
+                                                    const float* coord,
+                                                    const int* atype,
+                                                    const float* cell,
+                                                    const int nghost,
+                                                    const DP_Nlist* nlist,
+                                                    const int ago,
+                                                    double* energy,
+                                                    float* force,
+                                                    float* virial,
+                                                    float* atomic_energy,
+                                                    float* atomic_virial);
 
 template <typename VALUETYPE>
-inline
-void flatten_vector(std::vector<VALUETYPE> & onedv, const std::vector<std::vector<VALUETYPE>>& twodv) {
-    onedv.clear();
-    for (size_t ii = 0; ii < twodv.size(); ++ii) {
-        onedv.insert(onedv.end(), twodv[ii].begin(), twodv[ii].end());
-    }
+inline void flatten_vector(std::vector<VALUETYPE>& onedv,
+                           const std::vector<std::vector<VALUETYPE>>& twodv) {
+  onedv.clear();
+  for (size_t ii = 0; ii < twodv.size(); ++ii) {
+    onedv.insert(onedv.end(), twodv[ii].begin(), twodv[ii].end());
+  }
 }
 
-
 template <typename VALUETYPE>
-void DP_DeepPotModelDeviComputeNList_variant (
-    DP_DeepPotModelDevi* dp,
-    const int natoms,
-    const VALUETYPE* coord,
-    const int* atype,
-    const VALUETYPE* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    VALUETYPE* force,
-    VALUETYPE* virial,
-    VALUETYPE* atomic_energy,
-    VALUETYPE* atomic_virial
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    // different from DeepPot
-    std::vector<double> e;
-    std::vector<std::vector<VALUETYPE>> f, v, ae, av;
-
-    dp->dp.compute(e, f, v, ae, av, coord_, atype_, cell_, nghost, nlist->nl, ago);
-    // 2D vector to 2D array, flatten first    
-    if(energy) {
-        std::copy(e.begin(), e.end(), energy);
-    }
-    if(force) {
-        std::vector<VALUETYPE> f_flat;
-        flatten_vector(f_flat, f);
-        std::copy(f_flat.begin(), f_flat.end(), force);
-    }
-    if(virial) {
-        std::vector<VALUETYPE> v_flat;
-        flatten_vector(v_flat, v);
-        std::copy(v_flat.begin(), v_flat.end(), virial);
-    }
-    if(atomic_energy) {
-        std::vector<VALUETYPE> ae_flat;
-        flatten_vector(ae_flat, ae);
-        std::copy(ae_flat.begin(), ae_flat.end(), atomic_energy);
-    }
-    if(atomic_virial) {
-        std::vector<VALUETYPE> av_flat;
-        flatten_vector(av_flat, av);
-        std::copy(av_flat.begin(), av_flat.end(), atomic_virial);
-    }
-}
-
-template
-void DP_DeepPotModelDeviComputeNList_variant <double> (
+void DP_DeepPotModelDeviComputeNList_variant(DP_DeepPotModelDevi* dp,
+                                             const int natoms,
+                                             const VALUETYPE* coord,
+                                             const int* atype,
+                                             const VALUETYPE* cell,
+                                             const int nghost,
+                                             const DP_Nlist* nlist,
+                                             const int ago,
+                                             double* energy,
+                                             VALUETYPE* force,
+                                             VALUETYPE* virial,
+                                             VALUETYPE* atomic_energy,
+                                             VALUETYPE* atomic_virial) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  // different from DeepPot
+  std::vector<double> e;
+  std::vector<std::vector<VALUETYPE>> f, v, ae, av;
+
+  dp->dp.compute(e, f, v, ae, av, coord_, atype_, cell_, nghost, nlist->nl,
+                 ago);
+  // 2D vector to 2D array, flatten first
+  if (energy) {
+    std::copy(e.begin(), e.end(), energy);
+  }
+  if (force) {
+    std::vector<VALUETYPE> f_flat;
+    flatten_vector(f_flat, f);
+    std::copy(f_flat.begin(), f_flat.end(), force);
+  }
+  if (virial) {
+    std::vector<VALUETYPE> v_flat;
+    flatten_vector(v_flat, v);
+    std::copy(v_flat.begin(), v_flat.end(), virial);
+  }
+  if (atomic_energy) {
+    std::vector<VALUETYPE> ae_flat;
+    flatten_vector(ae_flat, ae);
+    std::copy(ae_flat.begin(), ae_flat.end(), atomic_energy);
+  }
+  if (atomic_virial) {
+    std::vector<VALUETYPE> av_flat;
+    flatten_vector(av_flat, av);
+    std::copy(av_flat.begin(), av_flat.end(), atomic_virial);
+  }
+}
+
+template void DP_DeepPotModelDeviComputeNList_variant<double>(
     DP_DeepPotModelDevi* dp,
     const int natoms,
     const double* coord,
@@ -304,11 +286,9 @@ void DP_DeepPotModelDeviComputeNList_variant <double> (
     double* force,
     double* virial,
     double* atomic_energy,
-    double* atomic_virial
-    );
+    double* atomic_virial);
 
-template
-void DP_DeepPotModelDeviComputeNList_variant <float> (
+template void DP_DeepPotModelDeviComputeNList_variant<float>(
     DP_DeepPotModelDevi* dp,
     const int natoms,
     const float* coord,
@@ -321,91 +301,77 @@ void DP_DeepPotModelDeviComputeNList_variant <float> (
     float* force,
     float* virial,
     float* atomic_energy,
-    float* atomic_virial
-    );
+    float* atomic_virial);
 
 template <typename VALUETYPE>
-inline
-void DP_DeepTensorComputeTensor_variant (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const VALUETYPE* coord,
-    const int* atype,
-    const VALUETYPE* cell,
-    VALUETYPE** tensor,
-    int* size
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    std::vector<VALUETYPE> t;
-
-    dt->dt.compute(t, coord_, atype_, cell_);
-    // do not know the size of tensor in advance...
-    *tensor = new VALUETYPE[t.size()];
-    std::copy(t.begin(), t.end(), *tensor);
-    *size = t.size();
-}
-
-template
-void DP_DeepTensorComputeTensor_variant <double> (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    double** tensor,
-    int* size
-    );
-
-template
-void DP_DeepTensorComputeTensor_variant <float> (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    float** tensor,
-    int* size
-    );
+inline void DP_DeepTensorComputeTensor_variant(DP_DeepTensor* dt,
+                                               const int natoms,
+                                               const VALUETYPE* coord,
+                                               const int* atype,
+                                               const VALUETYPE* cell,
+                                               VALUETYPE** tensor,
+                                               int* size) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  std::vector<VALUETYPE> t;
+
+  dt->dt.compute(t, coord_, atype_, cell_);
+  // do not know the size of tensor in advance...
+  *tensor = new VALUETYPE[t.size()];
+  std::copy(t.begin(), t.end(), *tensor);
+  *size = t.size();
+}
+
+template void DP_DeepTensorComputeTensor_variant<double>(DP_DeepTensor* dt,
+                                                         const int natoms,
+                                                         const double* coord,
+                                                         const int* atype,
+                                                         const double* cell,
+                                                         double** tensor,
+                                                         int* size);
+
+template void DP_DeepTensorComputeTensor_variant<float>(DP_DeepTensor* dt,
+                                                        const int natoms,
+                                                        const float* coord,
+                                                        const int* atype,
+                                                        const float* cell,
+                                                        float** tensor,
+                                                        int* size);
 
 template <typename VALUETYPE>
-inline
-void DP_DeepTensorComputeTensorNList_variant (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const VALUETYPE* coord,
-    const int* atype,
-    const VALUETYPE* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    VALUETYPE** tensor,
-    int* size
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    std::vector<VALUETYPE> t;
-
-    dt->dt.compute(t, coord_, atype_, cell_, nghost, nlist->nl);
-    // do not know the size of tensor in advance...
-    *tensor = new VALUETYPE[t.size()];
-    std::copy(t.begin(), t.end(), *tensor);
-    *size = t.size();
-}
-
-template
-void DP_DeepTensorComputeTensorNList_variant <double> (
+inline void DP_DeepTensorComputeTensorNList_variant(DP_DeepTensor* dt,
+                                                    const int natoms,
+                                                    const VALUETYPE* coord,
+                                                    const int* atype,
+                                                    const VALUETYPE* cell,
+                                                    const int nghost,
+                                                    const DP_Nlist* nlist,
+                                                    VALUETYPE** tensor,
+                                                    int* size) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  std::vector<VALUETYPE> t;
+
+  dt->dt.compute(t, coord_, atype_, cell_, nghost, nlist->nl);
+  // do not know the size of tensor in advance...
+  *tensor = new VALUETYPE[t.size()];
+  std::copy(t.begin(), t.end(), *tensor);
+  *size = t.size();
+}
+
+template void DP_DeepTensorComputeTensorNList_variant<double>(
     DP_DeepTensor* dt,
     const int natoms,
     const double* coord,
@@ -414,11 +380,9 @@ void DP_DeepTensorComputeTensorNList_variant <double> (
     const int nghost,
     const DP_Nlist* nlist,
     double** tensor,
-    int* size
-    );
+    int* size);
 
-template
-void DP_DeepTensorComputeTensorNList_variant <float> (
+template void DP_DeepTensorComputeTensorNList_variant<float>(
     DP_DeepTensor* dt,
     const int natoms,
     const float* coord,
@@ -427,588 +391,515 @@ void DP_DeepTensorComputeTensorNList_variant <float> (
     const int nghost,
     const DP_Nlist* nlist,
     float** tensor,
-    int* size
-    );
+    int* size);
 
 template <typename VALUETYPE>
-inline
-void DP_DeepTensorCompute_variant (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const VALUETYPE* coord,
-    const int* atype,
-    const VALUETYPE* cell,
-    VALUETYPE* global_tensor,
-    VALUETYPE* force,
-    VALUETYPE* virial,
-    VALUETYPE** atomic_tensor,
-    VALUETYPE* atomic_virial,
-    int* size_at
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    std::vector<VALUETYPE> t, f, v, at, av;
-
-    dt->dt.compute(t, f, v, at, av, coord_, atype_, cell_);
-    // copy from C++ vectors to C arrays, if not NULL pointer
-    if(global_tensor) std::copy(t.begin(), t.end(), global_tensor);
-    if(force) std::copy(f.begin(), f.end(), force);
-    if(virial) std::copy(v.begin(), v.end(), virial);
-    if(atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
-    // do not know the size of atomic tensor in advance...
-    if(atomic_tensor) {
-        *atomic_tensor = new VALUETYPE[at.size()];
-        std::copy(at.begin(), at.end(), *atomic_tensor);
-    }
-    if(size_at) *size_at = at.size();
-}
-
-template
-void DP_DeepTensorCompute_variant <double> (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    double* global_tensor,
-    double* force,
-    double* virial,
-    double** atomic_tensor,
-    double* atomic_virial,
-    int* size_at
-    );
+inline void DP_DeepTensorCompute_variant(DP_DeepTensor* dt,
+                                         const int natoms,
+                                         const VALUETYPE* coord,
+                                         const int* atype,
+                                         const VALUETYPE* cell,
+                                         VALUETYPE* global_tensor,
+                                         VALUETYPE* force,
+                                         VALUETYPE* virial,
+                                         VALUETYPE** atomic_tensor,
+                                         VALUETYPE* atomic_virial,
+                                         int* size_at) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  std::vector<VALUETYPE> t, f, v, at, av;
+
+  dt->dt.compute(t, f, v, at, av, coord_, atype_, cell_);
+  // copy from C++ vectors to C arrays, if not NULL pointer
+  if (global_tensor) std::copy(t.begin(), t.end(), global_tensor);
+  if (force) std::copy(f.begin(), f.end(), force);
+  if (virial) std::copy(v.begin(), v.end(), virial);
+  if (atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
+  // do not know the size of atomic tensor in advance...
+  if (atomic_tensor) {
+    *atomic_tensor = new VALUETYPE[at.size()];
+    std::copy(at.begin(), at.end(), *atomic_tensor);
+  }
+  if (size_at) *size_at = at.size();
+}
+
+template void DP_DeepTensorCompute_variant<double>(DP_DeepTensor* dt,
+                                                   const int natoms,
+                                                   const double* coord,
+                                                   const int* atype,
+                                                   const double* cell,
+                                                   double* global_tensor,
+                                                   double* force,
+                                                   double* virial,
+                                                   double** atomic_tensor,
+                                                   double* atomic_virial,
+                                                   int* size_at);
+
+template void DP_DeepTensorCompute_variant<float>(DP_DeepTensor* dt,
+                                                  const int natoms,
+                                                  const float* coord,
+                                                  const int* atype,
+                                                  const float* cell,
+                                                  float* global_tensor,
+                                                  float* force,
+                                                  float* virial,
+                                                  float** atomic_tensor,
+                                                  float* atomic_virial,
+                                                  int* size_at);
 
-template
-void DP_DeepTensorCompute_variant <float> (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    float* global_tensor,
-    float* force,
-    float* virial,
-    float** atomic_tensor,
-    float* atomic_virial,
-    int* size_at
-    );
+template <typename VALUETYPE>
+inline void DP_DeepTensorComputeNList_variant(DP_DeepTensor* dt,
+                                              const int natoms,
+                                              const VALUETYPE* coord,
+                                              const int* atype,
+                                              const VALUETYPE* cell,
+                                              const int nghost,
+                                              const DP_Nlist* nlist,
+                                              VALUETYPE* global_tensor,
+                                              VALUETYPE* force,
+                                              VALUETYPE* virial,
+                                              VALUETYPE** atomic_tensor,
+                                              VALUETYPE* atomic_virial,
+                                              int* size_at) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  std::vector<VALUETYPE> t, f, v, at, av;
+
+  dt->dt.compute(t, f, v, at, av, coord_, atype_, cell_, nghost, nlist->nl);
+  // copy from C++ vectors to C arrays, if not NULL pointer
+  if (global_tensor) std::copy(t.begin(), t.end(), global_tensor);
+  if (force) std::copy(f.begin(), f.end(), force);
+  if (virial) std::copy(v.begin(), v.end(), virial);
+  if (atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
+  // do not know the size of atomic tensor in advance...
+  if (atomic_tensor) {
+    *atomic_tensor = new VALUETYPE[at.size()];
+    std::copy(at.begin(), at.end(), *atomic_tensor);
+  }
+  if (size_at) *size_at = at.size();
+}
+
+template void DP_DeepTensorComputeNList_variant<double>(DP_DeepTensor* dt,
+                                                        const int natoms,
+                                                        const double* coord,
+                                                        const int* atype,
+                                                        const double* cell,
+                                                        const int nghost,
+                                                        const DP_Nlist* nlist,
+                                                        double* global_tensor,
+                                                        double* force,
+                                                        double* virial,
+                                                        double** atomic_tensor,
+                                                        double* atomic_virial,
+                                                        int* size_at);
+
+template void DP_DeepTensorComputeNList_variant<float>(DP_DeepTensor* dt,
+                                                       const int natoms,
+                                                       const float* coord,
+                                                       const int* atype,
+                                                       const float* cell,
+                                                       const int nghost,
+                                                       const DP_Nlist* nlist,
+                                                       float* global_tensor,
+                                                       float* force,
+                                                       float* virial,
+                                                       float** atomic_tensor,
+                                                       float* atomic_virial,
+                                                       int* size_at);
 
 template <typename VALUETYPE>
-inline
-void DP_DeepTensorComputeNList_variant (
-    DP_DeepTensor* dt,
+inline void DP_DipoleChargeModifierComputeNList_variant(
+    DP_DipoleChargeModifier* dcm,
     const int natoms,
     const VALUETYPE* coord,
     const int* atype,
     const VALUETYPE* cell,
+    const int* pairs,
+    const int npairs,
+    const VALUETYPE* delef,
     const int nghost,
     const DP_Nlist* nlist,
-    VALUETYPE* global_tensor,
-    VALUETYPE* force,
-    VALUETYPE* virial,
-    VALUETYPE** atomic_tensor,
-    VALUETYPE* atomic_virial,
-    int* size_at
-    ) {
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    std::vector<VALUETYPE> t, f, v, at, av;
-
-    dt->dt.compute(t, f, v, at, av, coord_, atype_, cell_, nghost, nlist->nl);
-    // copy from C++ vectors to C arrays, if not NULL pointer
-    if(global_tensor) std::copy(t.begin(), t.end(), global_tensor);
-    if(force) std::copy(f.begin(), f.end(), force);
-    if(virial) std::copy(v.begin(), v.end(), virial);
-    if(atomic_virial) std::copy(av.begin(), av.end(), atomic_virial);
-    // do not know the size of atomic tensor in advance...
-    if(atomic_tensor) {
-        *atomic_tensor = new VALUETYPE[at.size()];
-        std::copy(at.begin(), at.end(), *atomic_tensor);
-    }
-    if(size_at) *size_at = at.size();
-}
-
-template
-void DP_DeepTensorComputeNList_variant <double> (
-    DP_DeepTensor* dt,
+    VALUETYPE* dfcorr_,
+    VALUETYPE* dvcorr_) {
+  // init C++ vectors from C arrays
+  std::vector<VALUETYPE> coord_(coord, coord + natoms * 3);
+  std::vector<int> atype_(atype, atype + natoms);
+  std::vector<VALUETYPE> cell_;
+  if (cell) {
+    // pbc
+    cell_.assign(cell, cell + 9);
+  }
+  // pairs
+  std::vector<std::pair<int, int>> pairs_;
+  for (int i = 0; i < npairs; i++) {
+    pairs_.push_back(std::make_pair(pairs[i * 2], pairs[i * 2 + 1]));
+  }
+  std::vector<VALUETYPE> delef_(delef, delef + natoms * 3);
+  std::vector<VALUETYPE> df, dv;
+
+  dcm->dcm.compute(df, dv, coord_, atype_, cell_, pairs_, delef_, nghost,
+                   nlist->nl);
+  // copy from C++ vectors to C arrays, if not NULL pointer
+  if (dfcorr_) std::copy(df.begin(), df.end(), dfcorr_);
+  if (dvcorr_) std::copy(dv.begin(), dv.end(), dvcorr_);
+}
+
+template void DP_DipoleChargeModifierComputeNList_variant<double>(
+    DP_DipoleChargeModifier* dcm,
     const int natoms,
     const double* coord,
     const int* atype,
     const double* cell,
+    const int* pairs,
+    const int npairs,
+    const double* delef,
     const int nghost,
     const DP_Nlist* nlist,
-    double* global_tensor,
-    double* force,
-    double* virial,
-    double** atomic_tensor,
-    double* atomic_virial,
-    int* size_at
-    );
+    double* dfcorr_,
+    double* dvcorr_);
 
-template
-void DP_DeepTensorComputeNList_variant <float> (
-    DP_DeepTensor* dt,
+template void DP_DipoleChargeModifierComputeNList_variant<float>(
+    DP_DipoleChargeModifier* dcm,
     const int natoms,
     const float* coord,
     const int* atype,
     const float* cell,
+    const int* pairs,
+    const int npairs,
+    const float* delef,
     const int nghost,
     const DP_Nlist* nlist,
-    float* global_tensor,
-    float* force,
-    float* virial,
-    float** atomic_tensor,
-    float* atomic_virial,
-    int* size_at
-    );
-
-template <typename VALUETYPE>
-inline
-void DP_DipoleChargeModifierComputeNList_variant (
-  DP_DipoleChargeModifier* dcm,
-  const int natoms,
-  const VALUETYPE* coord,
-  const int* atype,
-  const VALUETYPE* cell,
-  const int* pairs,
-  const int npairs,
-  const VALUETYPE* delef,
-  const int nghost,
-  const DP_Nlist* nlist,
-  VALUETYPE* dfcorr_,
-  VALUETYPE* dvcorr_
-  ){
-    // init C++ vectors from C arrays
-    std::vector<VALUETYPE> coord_(coord, coord+natoms*3);
-    std::vector<int> atype_(atype, atype+natoms);
-    std::vector<VALUETYPE> cell_;
-    if (cell) {
-        // pbc
-        cell_.assign(cell, cell+9);
-    }
-    // pairs
-    std::vector<std::pair<int, int> > pairs_;
-    for (int i = 0; i < npairs; i++) {
-        pairs_.push_back(std::make_pair(pairs[i*2], pairs[i*2+1]));
-    }
-    std::vector<VALUETYPE> delef_(delef, delef+natoms*3);
-    std::vector<VALUETYPE> df, dv;
-
-    dcm->dcm.compute(df, dv, coord_, atype_, cell_, pairs_, delef_, nghost, nlist->nl);
-    // copy from C++ vectors to C arrays, if not NULL pointer
-    if(dfcorr_) std::copy(df.begin(), df.end(), dfcorr_);
-    if(dvcorr_) std::copy(dv.begin(), dv.end(), dvcorr_);
-}
-
-template
-void DP_DipoleChargeModifierComputeNList_variant <double> (
-  DP_DipoleChargeModifier* dcm,
-  const int natoms,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int* pairs,
-  const int npairs,
-  const double* delef,
-  const int nghost,
-  const DP_Nlist* nlist,
-  double* dfcorr_,
-  double* dvcorr_
-  );
-
-template
-void DP_DipoleChargeModifierComputeNList_variant <float> (
-  DP_DipoleChargeModifier* dcm,
-  const int natoms,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int* pairs,
-  const int npairs,
-  const float* delef,
-  const int nghost,
-  const DP_Nlist* nlist,
-  float* dfcorr_,
-  float* dvcorr_
-  );
-
+    float* dfcorr_,
+    float* dvcorr_);
 
 extern "C" {
 
-void DP_DeepPotCompute (
-    DP_DeepPot* dp,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    double* energy,
-    double* force,
-    double* virial,
-    double* atomic_energy,
-    double* atomic_virial
-    ) {
-    DP_DeepPotCompute_variant<double>(dp, natoms, coord, atype, cell, energy, force, virial, atomic_energy, atomic_virial);
-}
-
-void DP_DeepPotComputef (
-    DP_DeepPot* dp,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    double* energy,
-    float* force,
-    float* virial,
-    float* atomic_energy,
-    float* atomic_virial
-    ) {
-    DP_DeepPotCompute_variant<float>(dp, natoms, coord, atype, cell, energy, force, virial, atomic_energy, atomic_virial);
-}
-
-void DP_DeepPotComputeNList (
-    DP_DeepPot* dp,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    double* force,
-    double* virial,
-    double* atomic_energy,
-    double* atomic_virial
-    ) {
-    DP_DeepPotComputeNList_variant<double>(dp, natoms, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
-}
-
-void DP_DeepPotComputeNListf (
-    DP_DeepPot* dp,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    float* force,
-    float* virial,
-    float* atomic_energy,
-    float* atomic_virial
-    ) {
-    DP_DeepPotComputeNList_variant<float>(dp, natoms, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
-}
-
-const char* DP_DeepPotGetTypeMap(
-    DP_DeepPot* dp
-    ) {
-    std::string type_map;
-    dp->dp.get_type_map(type_map);
-    // copy from string to char*
-    const std::string::size_type size = type_map.size();
-    // +1 for '\0'
-    char *buffer = new char[size + 1];
-    std::copy(type_map.begin(), type_map.end(), buffer);
-    buffer[size] = '\0';
-    return buffer;
-}
-
-double DP_DeepPotGetCutoff(
-    DP_DeepPot* dp
-    ) {
-    return dp->dp.cutoff();
-}
-
-int DP_DeepPotGetNumbTypes(
-    DP_DeepPot* dp
-    ) {
-    return dp->dp.numb_types();
-}
-
-void DP_DeepPotModelDeviComputeNList (
-    DP_DeepPotModelDevi* dp,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    double* force,
-    double* virial,
-    double* atomic_energy,
-    double* atomic_virial
-    ) {
-    DP_DeepPotModelDeviComputeNList_variant<double>(dp, natoms, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
-}
-
-void DP_DeepPotModelDeviComputeNListf (
-    DP_DeepPotModelDevi* dp,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    const int ago,
-    double* energy,
-    float* force,
-    float* virial,
-    float* atomic_energy,
-    float* atomic_virial
-    ) {
-    DP_DeepPotModelDeviComputeNList_variant<float>(dp, natoms, coord, atype, cell, nghost, nlist, ago, energy, force, virial, atomic_energy, atomic_virial);
-}
-
-double DP_DeepPotModelDeviGetCutoff(
-    DP_DeepPotModelDevi* dp
-    ) {
-    return dp->dp.cutoff();
-}
-
-int DP_DeepPotModelDeviGetNumbTypes(
-    DP_DeepPotModelDevi* dp
-    ) {
-    return dp->dp.numb_types();
-}
-
-void DP_DeepTensorComputeTensor (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    double** tensor,
-    int* size
-    ) {
-    DP_DeepTensorComputeTensor_variant<double>(dt, natoms, coord, atype, cell, tensor, size);
-}
-
-void DP_DeepTensorComputeTensorf (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    float** tensor,
-    int* size
-    ) {
-    DP_DeepTensorComputeTensor_variant<float>(dt, natoms, coord, atype, cell, tensor, size);
-}
-
-void DP_DeepTensorComputeTensorNList (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    double** tensor,
-    int* size
-    ) {
-    DP_DeepTensorComputeTensorNList_variant<double>(dt, natoms, coord, atype, cell, nghost, nlist, tensor, size);
-}
-
-void DP_DeepTensorComputeTensorNListf (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    float** tensor,
-    int* size
-    ) {
-    DP_DeepTensorComputeTensorNList_variant<float>(dt, natoms, coord, atype, cell, nghost, nlist, tensor, size);
-}
-
-void DP_DeepTensorCompute (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    double* global_tensor,
-    double* force,
-    double* virial,
-    double** atomic_tensor,
-    double* atomic_virial,
-    int* size_at
-    ) {
-    DP_DeepTensorCompute_variant<double>(dt, natoms, coord, atype, cell, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
-}
-
-void DP_DeepTensorComputef (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    float* global_tensor,
-    float* force,
-    float* virial,
-    float** atomic_tensor,
-    float* atomic_virial,
-    int* size_at
-    ) {
-    DP_DeepTensorCompute_variant<float>(dt, natoms, coord, atype, cell, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
-}
-
-void DP_DeepTensorComputeNList (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const double* coord,
-    const int* atype,
-    const double* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    double* global_tensor,
-    double* force,
-    double* virial,
-    double** atomic_tensor,
-    double* atomic_virial,
-    int* size_at
-    ) {
-    DP_DeepTensorComputeNList_variant<double>(dt, natoms, coord, atype, cell, nghost, nlist, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
-}
-
-void DP_DeepTensorComputeNListf (
-    DP_DeepTensor* dt,
-    const int natoms,
-    const float* coord,
-    const int* atype,
-    const float* cell,
-    const int nghost,
-    const DP_Nlist* nlist,
-    float* global_tensor,
-    float* force,
-    float* virial,
-    float** atomic_tensor,
-    float* atomic_virial,
-    int* size_at
-    ) {
-    DP_DeepTensorComputeNList_variant<float>(dt, natoms, coord, atype, cell, nghost, nlist, global_tensor, force, virial, atomic_tensor, atomic_virial, size_at);
-}
-
-double DP_DeepTensorGetCutoff(
-    DP_DeepTensor* dt
-    ) {
-    return dt->dt.cutoff();
-}
-
-int DP_DeepTensorGetNumbTypes(
-    DP_DeepTensor* dt
-    ) {
-    return dt->dt.numb_types();
-}
-
-int DP_DeepTensorGetOutputDim(
-    DP_DeepTensor* dt
-    ) {
-    return dt->dt.output_dim();
-}
-
-int* DP_DeepTensorGetSelTypes(
-    DP_DeepTensor* dt
-    ) {
-    return (int*) &(dt->dt.sel_types())[0];
-}
-
-int DP_DeepTensorGetNumbSelTypes(
-    DP_DeepTensor* dt
-    ) {
-    return dt->dt.sel_types().size();
-}
-
-void DP_DipoleChargeModifierComputeNList (
-  DP_DipoleChargeModifier* dcm,
-  const int natom,
-  const double* coord,
-  const int* atype,
-  const double* cell,
-  const int* pairs,
-  const int npairs,
-  const double* delef_,
-  const int nghost,
-  const DP_Nlist* nlist,
-  double* dfcorr_,
-  double* dvcorr_
-  ){
-    DP_DipoleChargeModifierComputeNList_variant<double>(dcm, natom, coord, atype, cell, pairs, npairs, delef_, nghost, nlist, dfcorr_, dvcorr_);
-}
-
-void DP_DipoleChargeModifierComputeNListf (
-  DP_DipoleChargeModifier* dcm,
-  const int natom,
-  const float* coord,
-  const int* atype,
-  const float* cell,
-  const int* pairs,
-  const int npairs,
-  const float* delef_,
-  const int nghost,
-  const DP_Nlist* nlist,
-  float* dfcorr_,
-  float* dvcorr_
-  ){
-    DP_DipoleChargeModifierComputeNList_variant<float>(dcm, natom, coord, atype, cell, pairs, npairs, delef_, nghost, nlist, dfcorr_, dvcorr_);
-}
-
-double DP_DipoleChargeModifierGetCutoff(
-    DP_DipoleChargeModifier* dcm
-    ) {
-    return dcm->dcm.cutoff();
-}
-
-int DP_DipoleChargeModifierGetNumbTypes(
-    DP_DipoleChargeModifier* dcm
-    ) {
-    return dcm->dcm.numb_types();
-}
-
-int* DP_DipoleChargeModifierGetSelTypes(
-    DP_DipoleChargeModifier* dcm
-    ) {
-    return (int*) &(dcm->dcm.sel_types())[0];
+void DP_DeepPotCompute(DP_DeepPot* dp,
+                       const int natoms,
+                       const double* coord,
+                       const int* atype,
+                       const double* cell,
+                       double* energy,
+                       double* force,
+                       double* virial,
+                       double* atomic_energy,
+                       double* atomic_virial) {
+  DP_DeepPotCompute_variant<double>(dp, natoms, coord, atype, cell, energy,
+                                    force, virial, atomic_energy,
+                                    atomic_virial);
+}
+
+void DP_DeepPotComputef(DP_DeepPot* dp,
+                        const int natoms,
+                        const float* coord,
+                        const int* atype,
+                        const float* cell,
+                        double* energy,
+                        float* force,
+                        float* virial,
+                        float* atomic_energy,
+                        float* atomic_virial) {
+  DP_DeepPotCompute_variant<float>(dp, natoms, coord, atype, cell, energy,
+                                   force, virial, atomic_energy, atomic_virial);
+}
+
+void DP_DeepPotComputeNList(DP_DeepPot* dp,
+                            const int natoms,
+                            const double* coord,
+                            const int* atype,
+                            const double* cell,
+                            const int nghost,
+                            const DP_Nlist* nlist,
+                            const int ago,
+                            double* energy,
+                            double* force,
+                            double* virial,
+                            double* atomic_energy,
+                            double* atomic_virial) {
+  DP_DeepPotComputeNList_variant<double>(dp, natoms, coord, atype, cell, nghost,
+                                         nlist, ago, energy, force, virial,
+                                         atomic_energy, atomic_virial);
+}
+
+void DP_DeepPotComputeNListf(DP_DeepPot* dp,
+                             const int natoms,
+                             const float* coord,
+                             const int* atype,
+                             const float* cell,
+                             const int nghost,
+                             const DP_Nlist* nlist,
+                             const int ago,
+                             double* energy,
+                             float* force,
+                             float* virial,
+                             float* atomic_energy,
+                             float* atomic_virial) {
+  DP_DeepPotComputeNList_variant<float>(dp, natoms, coord, atype, cell, nghost,
+                                        nlist, ago, energy, force, virial,
+                                        atomic_energy, atomic_virial);
+}
+
+const char* DP_DeepPotGetTypeMap(DP_DeepPot* dp) {
+  std::string type_map;
+  dp->dp.get_type_map(type_map);
+  // copy from string to char*
+  const std::string::size_type size = type_map.size();
+  // +1 for '\0'
+  char* buffer = new char[size + 1];
+  std::copy(type_map.begin(), type_map.end(), buffer);
+  buffer[size] = '\0';
+  return buffer;
+}
+
+double DP_DeepPotGetCutoff(DP_DeepPot* dp) { return dp->dp.cutoff(); }
+
+int DP_DeepPotGetNumbTypes(DP_DeepPot* dp) { return dp->dp.numb_types(); }
+
+void DP_DeepPotModelDeviComputeNList(DP_DeepPotModelDevi* dp,
+                                     const int natoms,
+                                     const double* coord,
+                                     const int* atype,
+                                     const double* cell,
+                                     const int nghost,
+                                     const DP_Nlist* nlist,
+                                     const int ago,
+                                     double* energy,
+                                     double* force,
+                                     double* virial,
+                                     double* atomic_energy,
+                                     double* atomic_virial) {
+  DP_DeepPotModelDeviComputeNList_variant<double>(
+      dp, natoms, coord, atype, cell, nghost, nlist, ago, energy, force, virial,
+      atomic_energy, atomic_virial);
+}
+
+void DP_DeepPotModelDeviComputeNListf(DP_DeepPotModelDevi* dp,
+                                      const int natoms,
+                                      const float* coord,
+                                      const int* atype,
+                                      const float* cell,
+                                      const int nghost,
+                                      const DP_Nlist* nlist,
+                                      const int ago,
+                                      double* energy,
+                                      float* force,
+                                      float* virial,
+                                      float* atomic_energy,
+                                      float* atomic_virial) {
+  DP_DeepPotModelDeviComputeNList_variant<float>(
+      dp, natoms, coord, atype, cell, nghost, nlist, ago, energy, force, virial,
+      atomic_energy, atomic_virial);
+}
+
+double DP_DeepPotModelDeviGetCutoff(DP_DeepPotModelDevi* dp) {
+  return dp->dp.cutoff();
+}
+
+int DP_DeepPotModelDeviGetNumbTypes(DP_DeepPotModelDevi* dp) {
+  return dp->dp.numb_types();
+}
+
+void DP_DeepTensorComputeTensor(DP_DeepTensor* dt,
+                                const int natoms,
+                                const double* coord,
+                                const int* atype,
+                                const double* cell,
+                                double** tensor,
+                                int* size) {
+  DP_DeepTensorComputeTensor_variant<double>(dt, natoms, coord, atype, cell,
+                                             tensor, size);
+}
+
+void DP_DeepTensorComputeTensorf(DP_DeepTensor* dt,
+                                 const int natoms,
+                                 const float* coord,
+                                 const int* atype,
+                                 const float* cell,
+                                 float** tensor,
+                                 int* size) {
+  DP_DeepTensorComputeTensor_variant<float>(dt, natoms, coord, atype, cell,
+                                            tensor, size);
+}
+
+void DP_DeepTensorComputeTensorNList(DP_DeepTensor* dt,
+                                     const int natoms,
+                                     const double* coord,
+                                     const int* atype,
+                                     const double* cell,
+                                     const int nghost,
+                                     const DP_Nlist* nlist,
+                                     double** tensor,
+                                     int* size) {
+  DP_DeepTensorComputeTensorNList_variant<double>(
+      dt, natoms, coord, atype, cell, nghost, nlist, tensor, size);
+}
+
+void DP_DeepTensorComputeTensorNListf(DP_DeepTensor* dt,
+                                      const int natoms,
+                                      const float* coord,
+                                      const int* atype,
+                                      const float* cell,
+                                      const int nghost,
+                                      const DP_Nlist* nlist,
+                                      float** tensor,
+                                      int* size) {
+  DP_DeepTensorComputeTensorNList_variant<float>(dt, natoms, coord, atype, cell,
+                                                 nghost, nlist, tensor, size);
+}
+
+void DP_DeepTensorCompute(DP_DeepTensor* dt,
+                          const int natoms,
+                          const double* coord,
+                          const int* atype,
+                          const double* cell,
+                          double* global_tensor,
+                          double* force,
+                          double* virial,
+                          double** atomic_tensor,
+                          double* atomic_virial,
+                          int* size_at) {
+  DP_DeepTensorCompute_variant<double>(dt, natoms, coord, atype, cell,
+                                       global_tensor, force, virial,
+                                       atomic_tensor, atomic_virial, size_at);
+}
+
+void DP_DeepTensorComputef(DP_DeepTensor* dt,
+                           const int natoms,
+                           const float* coord,
+                           const int* atype,
+                           const float* cell,
+                           float* global_tensor,
+                           float* force,
+                           float* virial,
+                           float** atomic_tensor,
+                           float* atomic_virial,
+                           int* size_at) {
+  DP_DeepTensorCompute_variant<float>(dt, natoms, coord, atype, cell,
+                                      global_tensor, force, virial,
+                                      atomic_tensor, atomic_virial, size_at);
+}
+
+void DP_DeepTensorComputeNList(DP_DeepTensor* dt,
+                               const int natoms,
+                               const double* coord,
+                               const int* atype,
+                               const double* cell,
+                               const int nghost,
+                               const DP_Nlist* nlist,
+                               double* global_tensor,
+                               double* force,
+                               double* virial,
+                               double** atomic_tensor,
+                               double* atomic_virial,
+                               int* size_at) {
+  DP_DeepTensorComputeNList_variant<double>(
+      dt, natoms, coord, atype, cell, nghost, nlist, global_tensor, force,
+      virial, atomic_tensor, atomic_virial, size_at);
+}
+
+void DP_DeepTensorComputeNListf(DP_DeepTensor* dt,
+                                const int natoms,
+                                const float* coord,
+                                const int* atype,
+                                const float* cell,
+                                const int nghost,
+                                const DP_Nlist* nlist,
+                                float* global_tensor,
+                                float* force,
+                                float* virial,
+                                float** atomic_tensor,
+                                float* atomic_virial,
+                                int* size_at) {
+  DP_DeepTensorComputeNList_variant<float>(
+      dt, natoms, coord, atype, cell, nghost, nlist, global_tensor, force,
+      virial, atomic_tensor, atomic_virial, size_at);
+}
+
+double DP_DeepTensorGetCutoff(DP_DeepTensor* dt) { return dt->dt.cutoff(); }
+
+int DP_DeepTensorGetNumbTypes(DP_DeepTensor* dt) { return dt->dt.numb_types(); }
+
+int DP_DeepTensorGetOutputDim(DP_DeepTensor* dt) { return dt->dt.output_dim(); }
+
+int* DP_DeepTensorGetSelTypes(DP_DeepTensor* dt) {
+  return (int*)&(dt->dt.sel_types())[0];
+}
+
+int DP_DeepTensorGetNumbSelTypes(DP_DeepTensor* dt) {
+  return dt->dt.sel_types().size();
+}
+
+void DP_DipoleChargeModifierComputeNList(DP_DipoleChargeModifier* dcm,
+                                         const int natom,
+                                         const double* coord,
+                                         const int* atype,
+                                         const double* cell,
+                                         const int* pairs,
+                                         const int npairs,
+                                         const double* delef_,
+                                         const int nghost,
+                                         const DP_Nlist* nlist,
+                                         double* dfcorr_,
+                                         double* dvcorr_) {
+  DP_DipoleChargeModifierComputeNList_variant<double>(
+      dcm, natom, coord, atype, cell, pairs, npairs, delef_, nghost, nlist,
+      dfcorr_, dvcorr_);
+}
+
+void DP_DipoleChargeModifierComputeNListf(DP_DipoleChargeModifier* dcm,
+                                          const int natom,
+                                          const float* coord,
+                                          const int* atype,
+                                          const float* cell,
+                                          const int* pairs,
+                                          const int npairs,
+                                          const float* delef_,
+                                          const int nghost,
+                                          const DP_Nlist* nlist,
+                                          float* dfcorr_,
+                                          float* dvcorr_) {
+  DP_DipoleChargeModifierComputeNList_variant<float>(
+      dcm, natom, coord, atype, cell, pairs, npairs, delef_, nghost, nlist,
+      dfcorr_, dvcorr_);
+}
+
+double DP_DipoleChargeModifierGetCutoff(DP_DipoleChargeModifier* dcm) {
+  return dcm->dcm.cutoff();
+}
+
+int DP_DipoleChargeModifierGetNumbTypes(DP_DipoleChargeModifier* dcm) {
+  return dcm->dcm.numb_types();
+}
+
+int* DP_DipoleChargeModifierGetSelTypes(DP_DipoleChargeModifier* dcm) {
+  return (int*)&(dcm->dcm.sel_types())[0];
 }
 
-int DP_DipoleChargeModifierGetNumbSelTypes(
-    DP_DipoleChargeModifier* dcm
-    ) {
-    return dcm->dcm.sel_types().size();
+int DP_DipoleChargeModifierGetNumbSelTypes(DP_DipoleChargeModifier* dcm) {
+  return dcm->dcm.sel_types().size();
 }
 
-void DP_ConvertPbtxtToPb(
-    const char* c_pbtxt,
-    const char* c_pb
-    ) {
-    std::string pbtxt(c_pbtxt);
-    std::string pb(c_pb);
-    deepmd::convert_pbtxt_to_pb(pbtxt, pb);
+void DP_ConvertPbtxtToPb(const char* c_pbtxt, const char* c_pb) {
+  std::string pbtxt(c_pbtxt);
+  std::string pb(c_pb);
+  deepmd::convert_pbtxt_to_pb(pbtxt, pb);
 }
 
-void DP_PrintSummary(
-    const char* c_pre
-    ) {
-    std::string pre(c_pre);
-    deepmd::print_summary(pre);
+void DP_PrintSummary(const char* c_pre) {
+  std::string pre(c_pre);
+  deepmd::print_summary(pre);
 }
 
-} // extern "C"
\ No newline at end of file
+}  // extern "C"
diff --git a/source/api_c/tests/CMakeLists.txt b/source/api_c/tests/CMakeLists.txt
index 5abbe8b6e6..a3559a4e19 100644
--- a/source/api_c/tests/CMakeLists.txt
+++ b/source/api_c/tests/CMakeLists.txt
@@ -4,20 +4,13 @@ project(deepmd_api_c_test)
 file(GLOB TEST_SRC test_*.cc)
 
 set_target_properties(
-  ${LIB_DEEPMD_C}
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
-)
+  ${LIB_DEEPMD_C} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}")
 
-add_executable( runUnitTests_c ${TEST_SRC} )
-target_link_libraries(runUnitTests_c PRIVATE GTest::gtest_main ${LIB_DEEPMD_C} rt coverage_config)
+add_executable(runUnitTests_c ${TEST_SRC})
+target_link_libraries(runUnitTests_c PRIVATE GTest::gtest_main ${LIB_DEEPMD_C}
+                                             rt coverage_config)
 target_link_libraries(runUnitTests_c PRIVATE ${LIB_DEEPMD} ${LIB_DEEPMD_CC})
 target_precompile_headers(runUnitTests_c PRIVATE test_utils.h [["deepmd.hpp"]])
-add_test( runUnitTests_c runUnitTests_c )
-set_target_properties(
-  runUnitTests_c
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN/../lib"
-)
+add_test(runUnitTests_c runUnitTests_c)
+set_target_properties(runUnitTests_c PROPERTIES INSTALL_RPATH "$ORIGIN/../lib")
 install(TARGETS runUnitTests_c DESTINATION bin/)
-
diff --git a/source/api_c/tests/test_deepdipole_hpp.cc b/source/api_c/tests/test_deepdipole_hpp.cc
index 6d02e5ccdd..afd3a5eba0 100644
--- a/source/api_c/tests/test_deepdipole_hpp.cc
+++ b/source/api_c/tests/test_deepdipole_hpp.cc
@@ -1,58 +1,49 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "deepmd.hpp"
 #include "test_utils.h"
 
 template <class VALUETYPE>
-class TestInferDeepDipole : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepDipole : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_d = {
-    -9.274180565967479195e-01,2.698028341272042496e+00,2.521268387140979117e-01,2.927260638453461628e+00,-8.571926301526779923e-01,1.667785136187720063e+00
-  };
+      -9.274180565967479195e-01, 2.698028341272042496e+00,
+      2.521268387140979117e-01,  2.927260638453461628e+00,
+      -8.571926301526779923e-01, 1.667785136187720063e+00};
   int natoms = 6;
 
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
-    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole.pbtxt", "deepdipole.pb");
+    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole.pbtxt",
+                                     "deepdipole.pb");
 
     dp.init("deepdipole.pb");
   };
 
-  void TearDown() override {
-    remove( "deepdipole.pb" ) ;
-  };
+  void TearDown() override { remove("deepdipole.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepDipole, ValueTypes);
 
-TYPED_TEST(TestInferDeepDipole, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepDipole, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   EXPECT_EQ(dp.cutoff(), 4.);
   EXPECT_EQ(dp.numb_types(), 2);
   EXPECT_EQ(dp.output_dim(), 3);
@@ -64,71 +55,163 @@ TYPED_TEST(TestInferDeepDipole, cpu_build_nlist)
   dp.compute(value, coord, atype, box);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepDipole, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepDipole, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> value;
-  dp.compute(value, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(value, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepDipoleNew : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepDipoleNew : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_t = {
-    -1.128427726201255282e-01, 2.654103846999197880e-01, 2.625816377288122533e-02, 3.027556488877700680e-01, -7.475444785689989990e-02, 1.526291164572509684e-01
-  };
+      -1.128427726201255282e-01, 2.654103846999197880e-01,
+      2.625816377288122533e-02,  3.027556488877700680e-01,
+      -7.475444785689989990e-02, 1.526291164572509684e-01};
   std::vector<VALUETYPE> expected_f = {
-    8.424897862241968738e-02, -3.823566783202275721e-02, 3.570797165027734810e-01, 6.102563129736437997e-02, -1.351209759852018133e-01, -2.438224487466488510e-01, -1.403204771681088869e-01, 1.719596545791735875e-01, -1.136584427103610045e-01, 2.761686212947551955e-02, -7.247860200915196005e-02, 6.208831127377397591e-02, -2.605870723577520809e-01, -4.504074577536486268e-02, 7.340240097998475266e-02, 2.280160774766013809e-01, 1.189163370225677641e-01, -1.350895372995223886e-01, -4.294311497114180337e-02, 1.524802094783661577e-01, 1.070451777645946290e-01, -1.259336332521076574e-01, -2.087610788959351760e-01, 9.447141346538817652e-02, 1.668125597515543457e-01, 5.487037060760904805e-02, -2.014994036104674757e-01, -7.411985441205551361e-02, 3.614456658821710300e-01, 2.901174891391154476e-01, -4.871926969937838414e-02, -1.252747945819455699e-01, -2.555459318266457558e-01, 1.249033125831290059e-01, -2.347603724902655176e-01, -3.458874493198500766e-02, 3.563990394229877290e-01, 1.052342031228763047e-01, 1.907268232932498031e-01, -2.432737821373903708e-01, 1.016781829972335099e-01, -7.707616437996064884e-02, -1.139199805053340564e-01, -2.068592154909300040e-01, -1.156337826476897951e-01, 6.583817133933017596e-02, 2.902207490750204344e-01, 9.945482314729316153e-02, 7.986986504051810098e-02, -2.549975565538568079e-01, 1.275343199697696051e-01, -1.449133131601115787e-01, -3.527636315034351350e-02, -2.250060193826620980e-01
-  };
+      8.424897862241968738e-02,  -3.823566783202275721e-02,
+      3.570797165027734810e-01,  6.102563129736437997e-02,
+      -1.351209759852018133e-01, -2.438224487466488510e-01,
+      -1.403204771681088869e-01, 1.719596545791735875e-01,
+      -1.136584427103610045e-01, 2.761686212947551955e-02,
+      -7.247860200915196005e-02, 6.208831127377397591e-02,
+      -2.605870723577520809e-01, -4.504074577536486268e-02,
+      7.340240097998475266e-02,  2.280160774766013809e-01,
+      1.189163370225677641e-01,  -1.350895372995223886e-01,
+      -4.294311497114180337e-02, 1.524802094783661577e-01,
+      1.070451777645946290e-01,  -1.259336332521076574e-01,
+      -2.087610788959351760e-01, 9.447141346538817652e-02,
+      1.668125597515543457e-01,  5.487037060760904805e-02,
+      -2.014994036104674757e-01, -7.411985441205551361e-02,
+      3.614456658821710300e-01,  2.901174891391154476e-01,
+      -4.871926969937838414e-02, -1.252747945819455699e-01,
+      -2.555459318266457558e-01, 1.249033125831290059e-01,
+      -2.347603724902655176e-01, -3.458874493198500766e-02,
+      3.563990394229877290e-01,  1.052342031228763047e-01,
+      1.907268232932498031e-01,  -2.432737821373903708e-01,
+      1.016781829972335099e-01,  -7.707616437996064884e-02,
+      -1.139199805053340564e-01, -2.068592154909300040e-01,
+      -1.156337826476897951e-01, 6.583817133933017596e-02,
+      2.902207490750204344e-01,  9.945482314729316153e-02,
+      7.986986504051810098e-02,  -2.549975565538568079e-01,
+      1.275343199697696051e-01,  -1.449133131601115787e-01,
+      -3.527636315034351350e-02, -2.250060193826620980e-01};
   std::vector<VALUETYPE> expected_v = {
-    3.479789535931299138e-02, 4.337414719007849292e-03, -3.647371468256610082e-03, 8.053492919528318708e-03, 1.003834811499279773e-03, -8.441338187607602033e-04, -6.695998268698949256e-03, -8.346286793845711892e-04, 7.018468440279366279e-04, -4.515896716004976635e-02, 1.891794570218296306e-02, 3.417435352652402336e-02, 9.998952222904963771e-02, -4.188750255541257711e-02, -7.566774655171297492e-02, 1.804286120725206444e-01, -7.558495911146115298e-02, -1.365405712981232755e-01, -1.002593446510361419e-01, -1.117945222697993429e-01, 7.449172735713084637e-02, 7.770237313970995707e-02, 1.313723119887387492e-01, -8.655414676270002661e-02, -4.973937467461287537e-02, -8.663006083493235421e-02, 5.703914957966123994e-02, -3.382231967662072125e-02, -4.215813217482468345e-03, 3.545115660155720612e-03, -8.247565860499378454e-03, -1.028025206407854253e-03, 8.644757417520612143e-04, 6.761330949063471332e-03, 8.427721296283078580e-04, -7.086947453692606178e-04, -1.622698090933780493e-02, 1.305372051650728060e-01, -2.082599910094798112e-01, -7.109985131471197733e-03, 2.202585658101286273e-02, -3.554509763049529952e-02, 1.436400379134906459e-02, -3.554915857551419617e-02, 5.763638171798115412e-02, 2.074946305037073946e-01, 5.016353704485233822e-02, -5.700401936915034523e-02, 1.082138666905367308e-01, 2.616159414496492877e-02, -2.972908425564194101e-02, -1.229314789425654392e-01, -2.971969820589494271e-02, 3.377238432488059716e-02, 7.622024445219390681e-03, 9.500540384976005961e-04, -7.989090778275298932e-04, -2.952148931042387209e-02, -3.679732378636401541e-03, 3.094320409307891630e-03, -9.534268115386618486e-04, -1.188407357158671420e-04, 9.993425503379762414e-05, 9.319088860655992679e-02, -3.903942630815338682e-02, -7.052283462118023871e-02, 1.544831983829924038e-01, -6.471593445773991815e-02, -1.169062041817236081e-01, -6.990884596438741438e-02, 2.928613817427033750e-02, 5.290399154061733306e-02, 7.491400658274136037e-02, 1.273824184577304897e-01, -8.391492311946648075e-02, 3.543872837542783732e-02, 4.324623973455964804e-02, -2.873418641045778418e-02, -8.444981234074398768e-02, -1.531171183141288306e-01, 1.007308415346981068e-01, -6.396885751015785743e-03, -7.973455327045167592e-04, 6.704951070469818575e-04, 2.915483242551994078e-02, 3.634030104030812076e-03, -3.055888951116827318e-03, 6.608747470375698129e-04, 8.237532257692081912e-05, -6.927015762150179410e-05, -6.099175331115514430e-03, 2.402310352789886402e-02, -3.861491558256636286e-02, -2.583867422346154685e-02, 6.050621302336450097e-02, -9.822840263095998503e-02, -3.827994718203701213e-02, 1.252239810257823327e-01, -2.018867305507059950e-01, 1.136620144506474833e-01, 2.747872876828840599e-02, -3.122582814578225147e-02, -2.136319389661417989e-01, -5.164728194785846160e-02, 5.869009312256637939e-02, -3.147575788810638014e-02, -7.609523885036708832e-03, 8.647186232996251914e-03, -5.990706138603461330e-03, -7.467169124604876177e-04, 6.279210400235934152e-04, -9.287887182821588476e-04, -1.157696985960763821e-04, 9.735179200124630735e-05, -2.966271471326579340e-02, -3.697335544996301071e-03, 3.109123071928715683e-03, 1.800225987816693740e-01, -7.541487246259104271e-02, -1.362333179969384966e-01, -7.524185541795300192e-02, 3.152023672914239238e-02, 5.693978247845072477e-02, 5.703636164117102669e-02, -2.389361095778780308e-02, -4.316265205277792366e-02, -4.915584336537091176e-02, -8.674240294138457763e-02, 5.709724154860432860e-02, -8.679070528401405804e-02, -1.572017650485294793e-01, 1.034201569997979520e-01, -3.557746655862283752e-02, -8.626268394893003844e-02, 5.645546718878535764e-02, 6.848075985139651621e-03, 8.535845420570665554e-04, -7.177870012752625602e-04, 8.266638576582277997e-04, 1.030402542123569647e-04, -8.664748649675494882e-05, 2.991751925173294011e-02, 3.729095884068693231e-03, -3.135830629785046203e-03, 1.523793442834292522e-02, -3.873020552543556677e-02, 6.275576045602117292e-02, -3.842536616563556329e-02, 1.249268983543572881e-01, -2.014296501045876875e-01, 1.288704808602599873e-02, -6.326999354443738066e-02, 1.014064886873057153e-01, -1.318711149757016143e-01, -3.188092889522457091e-02, 3.622832829002789468e-02, -3.210149046681261276e-02, -7.760799893075580151e-03, 8.819090787585878374e-03, -2.047554776382226327e-01, -4.950132426418570042e-02, 5.625150484566552450e-02
-  };
+      3.479789535931299138e-02,  4.337414719007849292e-03,
+      -3.647371468256610082e-03, 8.053492919528318708e-03,
+      1.003834811499279773e-03,  -8.441338187607602033e-04,
+      -6.695998268698949256e-03, -8.346286793845711892e-04,
+      7.018468440279366279e-04,  -4.515896716004976635e-02,
+      1.891794570218296306e-02,  3.417435352652402336e-02,
+      9.998952222904963771e-02,  -4.188750255541257711e-02,
+      -7.566774655171297492e-02, 1.804286120725206444e-01,
+      -7.558495911146115298e-02, -1.365405712981232755e-01,
+      -1.002593446510361419e-01, -1.117945222697993429e-01,
+      7.449172735713084637e-02,  7.770237313970995707e-02,
+      1.313723119887387492e-01,  -8.655414676270002661e-02,
+      -4.973937467461287537e-02, -8.663006083493235421e-02,
+      5.703914957966123994e-02,  -3.382231967662072125e-02,
+      -4.215813217482468345e-03, 3.545115660155720612e-03,
+      -8.247565860499378454e-03, -1.028025206407854253e-03,
+      8.644757417520612143e-04,  6.761330949063471332e-03,
+      8.427721296283078580e-04,  -7.086947453692606178e-04,
+      -1.622698090933780493e-02, 1.305372051650728060e-01,
+      -2.082599910094798112e-01, -7.109985131471197733e-03,
+      2.202585658101286273e-02,  -3.554509763049529952e-02,
+      1.436400379134906459e-02,  -3.554915857551419617e-02,
+      5.763638171798115412e-02,  2.074946305037073946e-01,
+      5.016353704485233822e-02,  -5.700401936915034523e-02,
+      1.082138666905367308e-01,  2.616159414496492877e-02,
+      -2.972908425564194101e-02, -1.229314789425654392e-01,
+      -2.971969820589494271e-02, 3.377238432488059716e-02,
+      7.622024445219390681e-03,  9.500540384976005961e-04,
+      -7.989090778275298932e-04, -2.952148931042387209e-02,
+      -3.679732378636401541e-03, 3.094320409307891630e-03,
+      -9.534268115386618486e-04, -1.188407357158671420e-04,
+      9.993425503379762414e-05,  9.319088860655992679e-02,
+      -3.903942630815338682e-02, -7.052283462118023871e-02,
+      1.544831983829924038e-01,  -6.471593445773991815e-02,
+      -1.169062041817236081e-01, -6.990884596438741438e-02,
+      2.928613817427033750e-02,  5.290399154061733306e-02,
+      7.491400658274136037e-02,  1.273824184577304897e-01,
+      -8.391492311946648075e-02, 3.543872837542783732e-02,
+      4.324623973455964804e-02,  -2.873418641045778418e-02,
+      -8.444981234074398768e-02, -1.531171183141288306e-01,
+      1.007308415346981068e-01,  -6.396885751015785743e-03,
+      -7.973455327045167592e-04, 6.704951070469818575e-04,
+      2.915483242551994078e-02,  3.634030104030812076e-03,
+      -3.055888951116827318e-03, 6.608747470375698129e-04,
+      8.237532257692081912e-05,  -6.927015762150179410e-05,
+      -6.099175331115514430e-03, 2.402310352789886402e-02,
+      -3.861491558256636286e-02, -2.583867422346154685e-02,
+      6.050621302336450097e-02,  -9.822840263095998503e-02,
+      -3.827994718203701213e-02, 1.252239810257823327e-01,
+      -2.018867305507059950e-01, 1.136620144506474833e-01,
+      2.747872876828840599e-02,  -3.122582814578225147e-02,
+      -2.136319389661417989e-01, -5.164728194785846160e-02,
+      5.869009312256637939e-02,  -3.147575788810638014e-02,
+      -7.609523885036708832e-03, 8.647186232996251914e-03,
+      -5.990706138603461330e-03, -7.467169124604876177e-04,
+      6.279210400235934152e-04,  -9.287887182821588476e-04,
+      -1.157696985960763821e-04, 9.735179200124630735e-05,
+      -2.966271471326579340e-02, -3.697335544996301071e-03,
+      3.109123071928715683e-03,  1.800225987816693740e-01,
+      -7.541487246259104271e-02, -1.362333179969384966e-01,
+      -7.524185541795300192e-02, 3.152023672914239238e-02,
+      5.693978247845072477e-02,  5.703636164117102669e-02,
+      -2.389361095778780308e-02, -4.316265205277792366e-02,
+      -4.915584336537091176e-02, -8.674240294138457763e-02,
+      5.709724154860432860e-02,  -8.679070528401405804e-02,
+      -1.572017650485294793e-01, 1.034201569997979520e-01,
+      -3.557746655862283752e-02, -8.626268394893003844e-02,
+      5.645546718878535764e-02,  6.848075985139651621e-03,
+      8.535845420570665554e-04,  -7.177870012752625602e-04,
+      8.266638576582277997e-04,  1.030402542123569647e-04,
+      -8.664748649675494882e-05, 2.991751925173294011e-02,
+      3.729095884068693231e-03,  -3.135830629785046203e-03,
+      1.523793442834292522e-02,  -3.873020552543556677e-02,
+      6.275576045602117292e-02,  -3.842536616563556329e-02,
+      1.249268983543572881e-01,  -2.014296501045876875e-01,
+      1.288704808602599873e-02,  -6.326999354443738066e-02,
+      1.014064886873057153e-01,  -1.318711149757016143e-01,
+      -3.188092889522457091e-02, 3.622832829002789468e-02,
+      -3.210149046681261276e-02, -7.760799893075580151e-03,
+      8.819090787585878374e-03,  -2.047554776382226327e-01,
+      -4.950132426418570042e-02, 5.625150484566552450e-02};
   std::vector<VALUETYPE> expected_gt;
   std::vector<VALUETYPE> expected_gv;
   int natoms = 6;
@@ -138,49 +221,47 @@ class TestInferDeepDipoleNew : public ::testing::Test
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deepdipole_new.pbtxt";
-    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole_new.pbtxt", "deepdipole_new.pb");
+    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole_new.pbtxt",
+                                     "deepdipole_new.pb");
     dp.init("deepdipole_new.pb");
-    odim = dp.output_dim ();
+    odim = dp.output_dim();
 
     expected_gt.resize(odim);
-    for(int ii = 0; ii < nsel; ++ii){
-      for(int dd = 0; dd < odim; ++dd){
-	      expected_gt[dd] += expected_t[ii*odim+dd];
+    for (int ii = 0; ii < nsel; ++ii) {
+      for (int dd = 0; dd < odim; ++dd) {
+        expected_gt[dd] += expected_t[ii * odim + dd];
       }
     }
 
     expected_gv.resize(odim * 9);
-    for (int kk = 0; kk < odim; ++kk){
-      for(int ii = 0; ii < natoms; ++ii){
-        for(int dd = 0; dd < 9; ++dd){
-          expected_gv[kk*9 + dd] += expected_v[kk*natoms*9 + ii*9 + dd];
+    for (int kk = 0; kk < odim; ++kk) {
+      for (int ii = 0; ii < natoms; ++ii) {
+        for (int dd = 0; dd < 9; ++dd) {
+          expected_gv[kk * 9 + dd] += expected_v[kk * natoms * 9 + ii * 9 + dd];
         }
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deepdipole_new.pb" ) ;
-  };
+  void TearDown() override { remove("deepdipole_new.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepDipoleNew, ValueTypes);
 
-TYPED_TEST(TestInferDeepDipoleNew, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleNew, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_t = this -> expected_t;
-  std::vector<VALUETYPE>& expected_f = this -> expected_f;
-  std::vector<VALUETYPE>& expected_v = this -> expected_v;
-  std::vector<VALUETYPE>& expected_gt = this -> expected_gt;
-  std::vector<VALUETYPE>& expected_gv = this -> expected_gv;
-  int& natoms = this -> natoms;
-  int& nsel = this -> nsel;
-  int& odim = this -> odim;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_t = this->expected_t;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  std::vector<VALUETYPE>& expected_gt = this->expected_gt;
+  std::vector<VALUETYPE>& expected_gv = this->expected_gv;
+  int& natoms = this->natoms;
+  int& nsel = this->nsel;
+  int& odim = this->odim;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   EXPECT_EQ(dp.cutoff(), 4.);
   EXPECT_EQ(dp.numb_types(), 2);
   EXPECT_EQ(dp.output_dim(), 3);
@@ -192,191 +273,185 @@ TYPED_TEST(TestInferDeepDipoleNew, cpu_build_nlist)
 
   dp.compute(at, coord, atype, box);
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, at, av, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   EXPECT_EQ(av.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(av[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepDipoleNew, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleNew, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_t = this -> expected_t;
-  std::vector<VALUETYPE>& expected_f = this -> expected_f;
-  std::vector<VALUETYPE>& expected_v = this -> expected_v;
-  std::vector<VALUETYPE>& expected_gt = this -> expected_gt;
-  std::vector<VALUETYPE>& expected_gv = this -> expected_gv;
-  int& natoms = this -> natoms;
-  int& nsel = this -> nsel;
-  int& odim = this -> odim;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_t = this->expected_t;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  std::vector<VALUETYPE>& expected_gt = this->expected_gt;
+  std::vector<VALUETYPE>& expected_gv = this->expected_gv;
+  int& natoms = this->natoms;
+  int& nsel = this->nsel;
+  int& odim = this->odim;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> gt, ff, vv, at, av;
 
-  dp.compute(at, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(at, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
-
-  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  std::vector<VALUETYPE> rff (odim * nloc * 3);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  std::vector<VALUETYPE> rff(odim * nloc * 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
+  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist);
 
-  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall-nloc, inlist);
-  
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   // atom tensor
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   // atom virial
-  std::vector<VALUETYPE> rav (odim * nloc * 9);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9, av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
+  std::vector<VALUETYPE> rav(odim * nloc * 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9,
+                          av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
   }
   EXPECT_EQ(rav.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(rav[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepDipoleFake : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepDipoleFake : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_d = {
-    -3.186217894664857830e-01, 1.082220317383403296e+00, 5.646623185237639730e-02, 7.426508038929955369e-01, -3.115996324658170114e-01, -5.619108089573777720e-01, -4.181578166874897473e-01, -7.579762930974662805e-01, 4.980618433125854616e-01, 1.059635561913792712e+00, -2.641989315855929332e-01, 5.307984468104405273e-01, -1.484512535335152095e-01, 4.978588497891502374e-01, -8.022467807199461509e-01, -9.165936539882671985e-01, -2.238112120606238209e-01, 2.553133145814526217e-01
-  };
+      -3.186217894664857830e-01, 1.082220317383403296e+00,
+      5.646623185237639730e-02,  7.426508038929955369e-01,
+      -3.115996324658170114e-01, -5.619108089573777720e-01,
+      -4.181578166874897473e-01, -7.579762930974662805e-01,
+      4.980618433125854616e-01,  1.059635561913792712e+00,
+      -2.641989315855929332e-01, 5.307984468104405273e-01,
+      -1.484512535335152095e-01, 4.978588497891502374e-01,
+      -8.022467807199461509e-01, -9.165936539882671985e-01,
+      -2.238112120606238209e-01, 2.553133145814526217e-01};
   int natoms = 6;
 
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
-    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole_fake.pbtxt", "deepdipole_fake.pb");
+    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deepdipole_fake.pbtxt",
+                                     "deepdipole_fake.pb");
 
     dp.init("deepdipole_fake.pb");
   };
 
-  void TearDown() override {
-    remove( "deepdipole_fake.pb" ) ;
-  };
+  void TearDown() override { remove("deepdipole_fake.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepDipoleFake, ValueTypes);
 
-TYPED_TEST(TestInferDeepDipoleFake, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleFake, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   EXPECT_EQ(dp.cutoff(), 2.);
   EXPECT_EQ(dp.numb_types(), 2);
   EXPECT_EQ(dp.output_dim(), 3);
@@ -389,40 +464,37 @@ TYPED_TEST(TestInferDeepDipoleFake, cpu_build_nlist)
   dp.compute(value, coord, atype, box);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepDipoleFake, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleFake, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> value;
-  dp.compute(value, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(value, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
-
diff --git a/source/api_c/tests/test_deeppolar_hpp.cc b/source/api_c/tests/test_deeppolar_hpp.cc
index f6417d9429..691200bc42 100644
--- a/source/api_c/tests/test_deeppolar_hpp.cc
+++ b/source/api_c/tests/test_deeppolar_hpp.cc
@@ -1,61 +1,58 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "deepmd.hpp"
 #include "test_utils.h"
 
 template <class VALUETYPE>
-class TestInferDeepPolar : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPolar : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_d = {
-    1.061407927405987051e-01,-3.569013342133873778e-01,-2.862108976089940138e-02,-3.569013342133875444e-01,1.304367268874677244e+00,1.037647501453442256e-01,-2.862108976089940138e-02,1.037647501453441284e-01,8.100521520762453409e-03,1.236797829492216616e+00,-3.717307430531632262e-01,7.371515676976750919e-01,-3.717307430531630041e-01,1.127222682121889058e-01,-2.239181552775717510e-01,7.371515676976746478e-01,-2.239181552775717787e-01,4.448255365635306879e-01
-  };
+      1.061407927405987051e-01,  -3.569013342133873778e-01,
+      -2.862108976089940138e-02, -3.569013342133875444e-01,
+      1.304367268874677244e+00,  1.037647501453442256e-01,
+      -2.862108976089940138e-02, 1.037647501453441284e-01,
+      8.100521520762453409e-03,  1.236797829492216616e+00,
+      -3.717307430531632262e-01, 7.371515676976750919e-01,
+      -3.717307430531630041e-01, 1.127222682121889058e-01,
+      -2.239181552775717510e-01, 7.371515676976746478e-01,
+      -2.239181552775717787e-01, 4.448255365635306879e-01};
   int natoms;
 
   deepmd::hpp::DeepTensor dp;
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppolar.pbtxt";
-    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppolar.pbtxt", "deeppolar.pb");
+    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppolar.pbtxt",
+                                     "deeppolar.pb");
 
     dp.init("deeppolar.pb");
 
     natoms = expected_d.size();
   };
 
-  void TearDown() override {
-    remove( "deeppolar.pb" ) ;
-  };
+  void TearDown() override { remove("deeppolar.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPolar, ValueTypes);
 
-TYPED_TEST(TestInferDeepPolar, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPolar, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::hpp::DeepTensor& dp = this->dp;
 
   EXPECT_EQ(dp.cutoff(), 6.);
   EXPECT_EQ(dp.numb_types(), 2);
@@ -68,71 +65,385 @@ TYPED_TEST(TestInferDeepPolar, cpu_build_nlist)
   dp.compute(value, coord, atype, box);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPolar, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPolar, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::hpp::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::hpp::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> value;
-  dp.compute(value, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(value, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepPolarNew : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPolarNew : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_t = {
-    1.936327241487292961e+00, 5.198696351735779264e-02, 3.888336625074450149e-03, 5.198696351735781346e-02, 1.764967784387830196e+00, -1.354658545697527347e-02, 3.888336625074451016e-03, -1.354658545697527000e-02, 1.939288409902199639e+00, 1.786740420980893029e+00, 4.868765294055640847e-02, -9.812132615180739481e-02, 4.868765294055640847e-02, 1.925999147066305373e+00, 2.895028407651457567e-02, -9.812132615180743644e-02, 2.895028407651457220e-02, 1.883109989034779996e+00
-  };
+      1.936327241487292961e+00, 5.198696351735779264e-02,
+      3.888336625074450149e-03, 5.198696351735781346e-02,
+      1.764967784387830196e+00, -1.354658545697527347e-02,
+      3.888336625074451016e-03, -1.354658545697527000e-02,
+      1.939288409902199639e+00, 1.786740420980893029e+00,
+      4.868765294055640847e-02, -9.812132615180739481e-02,
+      4.868765294055640847e-02, 1.925999147066305373e+00,
+      2.895028407651457567e-02, -9.812132615180743644e-02,
+      2.895028407651457220e-02, 1.883109989034779996e+00};
   std::vector<VALUETYPE> expected_f = {
-    5.305178446980116092e-02, -1.127314829623577049e-02, 1.136493514861047216e-01, 5.598130220328862322e-05, -4.352126938892845326e-02, -7.700608888887500170e-02, -1.050015668789053697e-01, 5.882396336737016895e-02, -3.723875897544067642e-02, -7.850322286760008650e-02, 7.279117637753844405e-02, -6.178451060078461732e-02, 3.404361490778949895e-01, 5.447934529195214842e-02, -8.698375128815737101e-02, -2.100391251033939810e-01, -1.313000673516965255e-01, 1.493637582671529240e-01, -9.589318874236771317e-02, 6.285887854370801608e-02, -1.824395427630142175e-01, -3.264267092869802683e-02, 3.637498661083633789e-02, 1.524859582123189172e-01, 1.442484990808054202e-01, -8.957992476622803069e-02, 3.076469140583825215e-02, 4.909822745881124717e-02, -2.559151672032903835e-01, -1.522830913546814324e-01, -2.885480042033320910e-02, 7.730841025065784966e-02, 1.553301391955271560e-01, -3.595606644821771475e-02, 1.689528165643162105e-01, -3.858154695988691516e-03, 5.018843026262573281e-02, 1.756005154318779349e-02, 3.489323893614350303e-02, -4.020411124876955428e-02, 2.218648284685413238e-02, -8.086177159691650476e-03, -2.222392408702593067e-02, -3.825892777133557687e-02, -1.689393838770965675e-02, -5.465804822761769627e-02, -1.398775875506316768e-01, -1.165702490994514756e-01, 5.449067849718619572e-02, 1.588580450812354106e-01, -8.209560373418453572e-02, 1.240697480360127003e-02, -2.046806414931008622e-02, 1.887527294448937965e-01, -9.589318874236771317e-02, 6.285887854370801608e-02, -1.824395427630142175e-01, -3.264267092869802683e-02, 3.637498661083633789e-02, 1.524859582123189172e-01, 1.442484990808054202e-01, -8.957992476622803069e-02, 3.076469140583825215e-02, 4.909822745881124717e-02, -2.559151672032903835e-01, -1.522830913546814324e-01, -2.885480042033320910e-02, 7.730841025065784966e-02, 1.553301391955271560e-01, -3.595606644821771475e-02, 1.689528165643162105e-01, -3.858154695988691516e-03, 4.038746042068122599e-02, -2.549213597407858356e-01, -1.131801705114504619e-01, 1.489732376295762606e-01, 2.734584831542113958e-01, -1.125511889088352951e-01, -1.908551011160136424e-01, -2.400995606986339528e-02, 2.255650484976146619e-01, -2.185213968874370055e-02, 1.475333123369945709e-01, 9.584417756169674729e-02, -1.576380405016522893e-02, -5.153693137796186430e-02, -8.489897831367294867e-02, 3.911034680466508873e-02, -9.052354830259493057e-02, -1.077888832535272776e-02, -1.970229486427777510e-01, -6.538978166042377915e-02, -1.570533119125729904e-01, 1.417940206277617798e-01, -4.684714285705613573e-02, 6.070882964241105378e-02, 5.715183445260185735e-02, 1.138024049318459713e-01, 9.374622673558237473e-02, 3.096927839536914306e-02, -9.232883741117139942e-02, -6.499836527010099951e-02, 2.839980861544661936e-02, 8.097497759757724123e-03, 1.006700103228213017e-01, -6.129199344840163821e-02, 8.266585923704758421e-02, -3.307338951814068478e-02, 5.018843026262574669e-02, 1.756005154318778308e-02, 3.489323893614350997e-02, -4.020411124876957509e-02, 2.218648284685414279e-02, -8.086177159691652211e-03, -2.222392408702593067e-02, -3.825892777133557687e-02, -1.689393838770965675e-02, -5.465804822761770321e-02, -1.398775875506316491e-01, -1.165702490994514756e-01, 5.449067849718619572e-02, 1.588580450812354106e-01, -8.209560373418453572e-02, 1.240697480360125615e-02, -2.046806414931009316e-02, 1.887527294448937965e-01, -1.970229486427777510e-01, -6.538978166042375140e-02, -1.570533119125729626e-01, 1.417940206277618076e-01, -4.684714285705613573e-02, 6.070882964241105378e-02, 5.715183445260184347e-02, 1.138024049318459713e-01, 9.374622673558236086e-02, 3.096927839536912919e-02, -9.232883741117139942e-02, -6.499836527010102727e-02, 2.839980861544661589e-02, 8.097497759757731062e-03, 1.006700103228213017e-01, -6.129199344840162433e-02, 8.266585923704758421e-02, -3.307338951814066397e-02, -3.078161564779093723e-02, -8.748776750553553111e-03, -2.162930108693108394e-02, 2.135313622214399243e-02, -8.845621737097757523e-03, 9.365293934359546560e-03, 8.562579091543631032e-03, 1.772751551871581607e-02, 1.573655414890783033e-02, -3.649820158632081230e-02, -1.904914900326310223e-01, -1.076542087674599024e-01, -5.186655049718805199e-02, 1.686765146765009937e-01, -6.620206332305828001e-02, 8.923065241761217459e-02, 2.168185832506550753e-02, 1.703837250941818704e-01
-  };
+      5.305178446980116092e-02,  -1.127314829623577049e-02,
+      1.136493514861047216e-01,  5.598130220328862322e-05,
+      -4.352126938892845326e-02, -7.700608888887500170e-02,
+      -1.050015668789053697e-01, 5.882396336737016895e-02,
+      -3.723875897544067642e-02, -7.850322286760008650e-02,
+      7.279117637753844405e-02,  -6.178451060078461732e-02,
+      3.404361490778949895e-01,  5.447934529195214842e-02,
+      -8.698375128815737101e-02, -2.100391251033939810e-01,
+      -1.313000673516965255e-01, 1.493637582671529240e-01,
+      -9.589318874236771317e-02, 6.285887854370801608e-02,
+      -1.824395427630142175e-01, -3.264267092869802683e-02,
+      3.637498661083633789e-02,  1.524859582123189172e-01,
+      1.442484990808054202e-01,  -8.957992476622803069e-02,
+      3.076469140583825215e-02,  4.909822745881124717e-02,
+      -2.559151672032903835e-01, -1.522830913546814324e-01,
+      -2.885480042033320910e-02, 7.730841025065784966e-02,
+      1.553301391955271560e-01,  -3.595606644821771475e-02,
+      1.689528165643162105e-01,  -3.858154695988691516e-03,
+      5.018843026262573281e-02,  1.756005154318779349e-02,
+      3.489323893614350303e-02,  -4.020411124876955428e-02,
+      2.218648284685413238e-02,  -8.086177159691650476e-03,
+      -2.222392408702593067e-02, -3.825892777133557687e-02,
+      -1.689393838770965675e-02, -5.465804822761769627e-02,
+      -1.398775875506316768e-01, -1.165702490994514756e-01,
+      5.449067849718619572e-02,  1.588580450812354106e-01,
+      -8.209560373418453572e-02, 1.240697480360127003e-02,
+      -2.046806414931008622e-02, 1.887527294448937965e-01,
+      -9.589318874236771317e-02, 6.285887854370801608e-02,
+      -1.824395427630142175e-01, -3.264267092869802683e-02,
+      3.637498661083633789e-02,  1.524859582123189172e-01,
+      1.442484990808054202e-01,  -8.957992476622803069e-02,
+      3.076469140583825215e-02,  4.909822745881124717e-02,
+      -2.559151672032903835e-01, -1.522830913546814324e-01,
+      -2.885480042033320910e-02, 7.730841025065784966e-02,
+      1.553301391955271560e-01,  -3.595606644821771475e-02,
+      1.689528165643162105e-01,  -3.858154695988691516e-03,
+      4.038746042068122599e-02,  -2.549213597407858356e-01,
+      -1.131801705114504619e-01, 1.489732376295762606e-01,
+      2.734584831542113958e-01,  -1.125511889088352951e-01,
+      -1.908551011160136424e-01, -2.400995606986339528e-02,
+      2.255650484976146619e-01,  -2.185213968874370055e-02,
+      1.475333123369945709e-01,  9.584417756169674729e-02,
+      -1.576380405016522893e-02, -5.153693137796186430e-02,
+      -8.489897831367294867e-02, 3.911034680466508873e-02,
+      -9.052354830259493057e-02, -1.077888832535272776e-02,
+      -1.970229486427777510e-01, -6.538978166042377915e-02,
+      -1.570533119125729904e-01, 1.417940206277617798e-01,
+      -4.684714285705613573e-02, 6.070882964241105378e-02,
+      5.715183445260185735e-02,  1.138024049318459713e-01,
+      9.374622673558237473e-02,  3.096927839536914306e-02,
+      -9.232883741117139942e-02, -6.499836527010099951e-02,
+      2.839980861544661936e-02,  8.097497759757724123e-03,
+      1.006700103228213017e-01,  -6.129199344840163821e-02,
+      8.266585923704758421e-02,  -3.307338951814068478e-02,
+      5.018843026262574669e-02,  1.756005154318778308e-02,
+      3.489323893614350997e-02,  -4.020411124876957509e-02,
+      2.218648284685414279e-02,  -8.086177159691652211e-03,
+      -2.222392408702593067e-02, -3.825892777133557687e-02,
+      -1.689393838770965675e-02, -5.465804822761770321e-02,
+      -1.398775875506316491e-01, -1.165702490994514756e-01,
+      5.449067849718619572e-02,  1.588580450812354106e-01,
+      -8.209560373418453572e-02, 1.240697480360125615e-02,
+      -2.046806414931009316e-02, 1.887527294448937965e-01,
+      -1.970229486427777510e-01, -6.538978166042375140e-02,
+      -1.570533119125729626e-01, 1.417940206277618076e-01,
+      -4.684714285705613573e-02, 6.070882964241105378e-02,
+      5.715183445260184347e-02,  1.138024049318459713e-01,
+      9.374622673558236086e-02,  3.096927839536912919e-02,
+      -9.232883741117139942e-02, -6.499836527010102727e-02,
+      2.839980861544661589e-02,  8.097497759757731062e-03,
+      1.006700103228213017e-01,  -6.129199344840162433e-02,
+      8.266585923704758421e-02,  -3.307338951814066397e-02,
+      -3.078161564779093723e-02, -8.748776750553553111e-03,
+      -2.162930108693108394e-02, 2.135313622214399243e-02,
+      -8.845621737097757523e-03, 9.365293934359546560e-03,
+      8.562579091543631032e-03,  1.772751551871581607e-02,
+      1.573655414890783033e-02,  -3.649820158632081230e-02,
+      -1.904914900326310223e-01, -1.076542087674599024e-01,
+      -5.186655049718805199e-02, 1.686765146765009937e-01,
+      -6.620206332305828001e-02, 8.923065241761217459e-02,
+      2.168185832506550753e-02,  1.703837250941818704e-01};
   std::vector<VALUETYPE> expected_v = {
-    -2.123013313652813774e-03, -2.646248889538913257e-04, 2.225254748021367093e-04, 9.843593195853941446e-04, 1.226963457840150472e-04, -1.031764725911038809e-04, -8.467513732241481721e-04, -1.055440805151912256e-04, 8.875297679686559459e-05, 1.829118379697145316e-02, 2.302438731350108913e-03, -1.890198823577125386e-03, 3.300229266409118040e-02, -1.339230641165423293e-02, -2.445540228188634868e-02, 5.127826101331301595e-02, -2.458314752619149279e-02, -4.252530480245884925e-02, 9.733043787604266084e-02, -6.217238566516904152e-02, 3.767656091618994812e-02, 6.674680725588777973e-03, 4.245867422406505304e-02, -2.752200660186601699e-02, -8.318636634138946995e-03, -2.738884420387305285e-02, 1.785195524121836741e-02, -3.151218435289559073e-03, -3.927864338604547816e-04, 3.302976830190196104e-04, 1.387198082848713948e-06, 1.729085429046553641e-07, -1.454003656243721975e-07, -4.056191292896940703e-05, -5.055875832506090064e-06, 4.251531950061960394e-06, 7.087482338961141604e-02, -1.643445525800983908e-01, 2.668682182870234509e-01, 7.752581706917366366e-03, -2.674714571946596939e-02, 4.308263417785011123e-02, -9.385640612496094423e-03, 4.307848167667025635e-02, -6.910099104451945806e-02, -1.822493611414978121e-01, -4.510097387143227610e-02, 5.157836206906134952e-02, -1.170389534066011428e-01, -2.858136680923874240e-02, 3.256883555835647648e-02, 1.336331160725280354e-01, 3.257484898923947853e-02, -3.710113093740719653e-02, 3.343993600586595179e-03, 4.168150663620683060e-04, -3.505035785317401481e-04, -4.312491363797464269e-03, -5.375343342977005178e-04, 4.520175083867039156e-04, -5.045304632809267465e-04, -6.288764981405317546e-05, 5.288279643454484632e-05, 2.176577726533836937e-02, -1.041710664445027849e-02, -1.802940684978692962e-02, -3.097121964369356495e-02, 1.077096511204005125e-02, 2.079488766754130843e-02, -1.120464690158002596e-01, 4.736950869652114399e-02, 8.530900293808066359e-02, 3.029112757823893692e-02, 1.058529311156591879e-01, -6.894903720238335088e-02, -5.089618157121258979e-02, -6.973511953466600410e-02, 4.618114280030299196e-02, 1.143309394598741001e-02, 2.319568285212985151e-02, -1.522637168466081138e-02, -1.535733649675188493e-03, -1.914228911776438445e-04, 1.609692493993826663e-04, -2.603290366421702733e-03, -3.244894507721100851e-04, 2.728661290583660171e-04, 6.938458118266074663e-04, 8.648503036932213837e-05, -7.272604826511198082e-05, -2.609239945314979423e-02, 1.142603664459106681e-02, -2.051406106454568487e-02, 5.779549344910496142e-03, -3.860615030463052100e-02, 6.168332781226748551e-02, 2.068839156841529789e-02, -7.643723474881176927e-02, 1.229844977392647865e-01, -3.554667688747349674e-02, -8.262665730398828859e-03, 9.285295046969522723e-03, 1.497274901467501862e-01, 3.666859638982037511e-02, -4.181688913175674732e-02, -3.257377626487627069e-03, -8.171909213273372040e-04, 9.379633299917983094e-04, 5.408910405506226968e-04, 6.741984641424190365e-05, -5.669396175743082354e-05, 4.696290607396237790e-04, 5.853733334998140626e-05, -4.922457577157541143e-05, -5.350269144276139158e-03, -6.668890718077903363e-04, 5.607930831110977251e-04, 3.013271000130106694e-02, -1.241570117891089425e-02, -2.255430712666738058e-02, -1.643158253499693577e-02, 6.876116339617440766e-03, 1.242585434168311936e-02, 2.120265775977717496e-03, -2.988284987993197143e-03, -4.123302560925387432e-03, 3.528008965720315360e-02, -1.132921329184741026e-02, 6.435692645130823564e-03, -2.115291124444698342e-02, -2.971050496327276927e-02, 1.966236467455729359e-02, -2.194244461519655187e-02, -1.469000955331024871e-02, 1.000316933044766328e-02, -2.208576023807404254e-03, -2.752899293131040766e-04, 2.314938041951108548e-04, -5.840262773118632192e-04, -7.279647649213021596e-05, 6.121521886838239123e-05, -1.263538670848133802e-03, -1.574949051482092536e-04, 1.324388975109944740e-04, 8.955566031735841259e-03, -2.660296383100100095e-02, 4.296567375352825652e-02, 2.380373596470350059e-02, -7.784355459714024927e-02, 1.255004729498893912e-01, -1.824501349606120690e-02, 3.948761180940744964e-02, -6.423389834199008663e-02, 1.038606825469970407e-02, 2.616819816765628484e-03, -3.006960935423359793e-03, -1.864007491704058883e-02, -4.504736174636920880e-03, 5.118497771104377897e-03, 1.680266347982039554e-01, 4.105963063126880086e-02, -4.679634408112137711e-02, 3.343993600586595179e-03, 4.168150663620683060e-04, -3.505035785317401481e-04, -4.312491363797464269e-03, -5.375343342977005178e-04, 4.520175083867039156e-04, -5.045304632809267465e-04, -6.288764981405317546e-05, 5.288279643454484632e-05, 2.176577726533836937e-02, -1.041710664445027849e-02, -1.802940684978692962e-02, -3.097121964369356495e-02, 1.077096511204005125e-02, 2.079488766754130843e-02, -1.120464690158002596e-01, 4.736950869652114399e-02, 8.530900293808066359e-02, 3.029112757823893692e-02, 1.058529311156591879e-01, -6.894903720238335088e-02, -5.089618157121258979e-02, -6.973511953466600410e-02, 4.618114280030299196e-02, 1.143309394598741001e-02, 2.319568285212985151e-02, -1.522637168466081138e-02, -1.535733649675188493e-03, -1.914228911776438445e-04, 1.609692493993826663e-04, -2.603290366421702733e-03, -3.244894507721100851e-04, 2.728661290583660171e-04, 6.938458118266074663e-04, 8.648503036932213837e-05, -7.272604826511198082e-05, -2.609239945314979423e-02, 1.142603664459106681e-02, -2.051406106454568487e-02, 5.779549344910496142e-03, -3.860615030463052100e-02, 6.168332781226748551e-02, 2.068839156841529789e-02, -7.643723474881176927e-02, 1.229844977392647865e-01, -3.554667688747349674e-02, -8.262665730398828859e-03, 9.285295046969522723e-03, 1.497274901467501862e-01, 3.666859638982037511e-02, -4.181688913175674732e-02, -3.257377626487627069e-03, -8.171909213273372040e-04, 9.379633299917983094e-04, 1.097257666720985849e-03, 1.367686610077148478e-04, -1.150100103928514269e-04, -3.252401295559594844e-03, -4.053984617694676175e-04, 3.409032519425078027e-04, -1.217154259382106555e-04, -1.517132787898375553e-05, 1.275770753460001047e-05, -1.104423096905816498e-01, 4.615651100464009809e-02, 8.344619780982527601e-02, -1.998235369855275168e-01, 8.508819942125579738e-02, 1.528709647298205909e-01, 8.333302476347614896e-02, -3.488524142655123617e-02, -6.303339769808283255e-02, -7.468341447282240975e-02, -1.443673498458480642e-01, 9.485360739696327426e-02, -2.685004652445167612e-04, -1.702408228533323561e-02, 1.097613894113106531e-02, 9.496752299747332482e-02, 1.714581306702349373e-01, -1.128066531362114239e-01, -2.109671824413435984e-03, -2.629619271223545066e-04, 2.211270750801623281e-04, 1.011694656468142307e-02, 1.261035832424879221e-03, -1.060416495448196581e-03, 2.326027531269699879e-04, 2.899297772687444119e-05, -2.438045854305356789e-05, -9.775618976121780001e-04, 7.897148922927013995e-03, -1.259878571596698138e-02, -5.534571406250721713e-03, 2.552681480358522451e-02, -4.094434810336724379e-02, -1.258721457759937913e-02, 4.161890111720080443e-02, -6.708566706120022705e-02, 3.521744971093632853e-02, 8.557787631933998912e-03, -9.738493960065902622e-03, -8.446926488038911107e-02, -2.017604402799078392e-02, 2.285024948138817888e-02, -9.755577915095828626e-03, -2.364722966186930900e-03, 2.689144780896026744e-03, 8.392348196279006065e-05, 1.046071729847805219e-05, -8.796512273720217211e-06, -2.967282659264359589e-03, -3.698595949224694123e-04, 3.110182957302592738e-04, -1.688223115474902841e-03, -2.104300767164184042e-04, 1.769525645115341121e-04, -1.040849854787611189e-01, 4.406117175034113265e-02, 7.931633477513304331e-02, 3.539829580561168476e-02, -1.443144702217136026e-02, -2.631106338063535569e-02, -4.383990895980735547e-02, 1.895493123709470276e-02, 3.388325869579450478e-02, 1.809448338386955915e-02, 4.269882582195522885e-02, -2.795653019460052346e-02, 4.363124777259473619e-02, 8.597058258914810902e-02, -5.646456449126337207e-02, 4.431189331687027805e-02, 7.186269332716928304e-02, -4.739074421553418626e-02, 7.807665162715203382e-05, 9.731933913865978996e-06, -8.183671700296416994e-06, 2.525821455836478949e-03, 3.148332692827336839e-04, -2.647461582604813284e-04, 5.088778918832323993e-03, 6.342953893162101269e-04, -5.333847591977234877e-04, 1.765533347871811772e-03, -1.422682766506909793e-02, 2.269730547460076936e-02, 2.888222424864686153e-04, -4.083171371247279469e-03, 6.494062010930001794e-03, 1.594130471018519873e-02, -4.922350239779287734e-02, 7.944117864515577720e-02, -5.516443865142822006e-02, -1.340804559261108905e-02, 1.525892700429632917e-02, 7.450140187529649682e-02, 1.809617933997387934e-02, -2.059052256811338619e-02, -3.118940445306414219e-02, -7.412336287839308216e-03, 8.382871287998559101e-03, 5.408910405506207452e-04, 6.741984641424155129e-05, -5.669396175743063380e-05, 4.696290607396231285e-04, 5.853733334998132494e-05, -4.922457577157534367e-05, -5.350269144276134821e-03, -6.668890718077897942e-04, 5.607930831110975083e-04, 3.013271000130106694e-02, -1.241570117891090119e-02, -2.255430712666738752e-02, -1.643158253499694271e-02, 6.876116339617444236e-03, 1.242585434168312457e-02, 2.120265775977718363e-03, -2.988284987993198010e-03, -4.123302560925387432e-03, 3.528008965720314666e-02, -1.132921329184741026e-02, 6.435692645130823564e-03, -2.115291124444698342e-02, -2.971050496327276927e-02, 1.966236467455729012e-02, -2.194244461519655881e-02, -1.469000955331024871e-02, 1.000316933044766501e-02, -2.208576023807403820e-03, -2.752899293131040766e-04, 2.314938041951108548e-04, -5.840262773118632192e-04, -7.279647649213021596e-05, 6.121521886838239123e-05, -1.263538670848133802e-03, -1.574949051482092536e-04, 1.324388975109944740e-04, 8.955566031735841259e-03, -2.660296383100100095e-02, 4.296567375352825652e-02, 2.380373596470350059e-02, -7.784355459714024927e-02, 1.255004729498893912e-01, -1.824501349606121037e-02, 3.948761180940744964e-02, -6.423389834199008663e-02, 1.038606825469969019e-02, 2.616819816765625015e-03, -3.006960935423356324e-03, -1.864007491704059577e-02, -4.504736174636922615e-03, 5.118497771104379632e-03, 1.680266347982039554e-01, 4.105963063126880086e-02, -4.679634408112137711e-02, 8.392348196278930170e-05, 1.046071729847797087e-05, -8.796512273720142672e-06, -2.967282659264356987e-03, -3.698595949224691413e-04, 3.110182957302590027e-04, -1.688223115474903708e-03, -2.104300767164184855e-04, 1.769525645115341934e-04, -1.040849854787611189e-01, 4.406117175034113265e-02, 7.931633477513304331e-02, 3.539829580561167782e-02, -1.443144702217136026e-02, -2.631106338063535569e-02, -4.383990895980735547e-02, 1.895493123709470276e-02, 3.388325869579450478e-02, 1.809448338386955221e-02, 4.269882582195521498e-02, -2.795653019460051653e-02, 4.363124777259472925e-02, 8.597058258914809514e-02, -5.646456449126335819e-02, 4.431189331687027111e-02, 7.186269332716926916e-02, -4.739074421553417932e-02, 7.807665162715246750e-05, 9.731933913866019654e-06, -8.183671700296457651e-06, 2.525821455836478515e-03, 3.148332692827336297e-04, -2.647461582604812742e-04, 5.088778918832324860e-03, 6.342953893162102353e-04, -5.333847591977235961e-04, 1.765533347871809603e-03, -1.422682766506909793e-02, 2.269730547460076589e-02, 2.888222424864694826e-04, -4.083171371247282938e-03, 6.494062010930008733e-03, 1.594130471018519873e-02, -4.922350239779287040e-02, 7.944117864515577720e-02, -5.516443865142821312e-02, -1.340804559261108558e-02, 1.525892700429632570e-02, 7.450140187529649682e-02, 1.809617933997387934e-02, -2.059052256811338966e-02, -3.118940445306412831e-02, -7.412336287839304746e-03, 8.382871287998553897e-03, -9.575909105642434974e-04, -1.193597735547498307e-04, 1.003707186710399045e-04, -9.520061199010912585e-05, -1.186636523389461756e-05, 9.978534401229592523e-06, -5.876800709203859434e-03, -7.325190685693192200e-04, 6.159819440242017292e-04, -1.659431774532551043e-02, 6.520628417529478540e-03, 1.204087494393247214e-02, 6.518824051016284399e-03, -2.745500204548994606e-03, -4.950724849051978994e-03, -5.340810191179472081e-03, 3.101366677982481286e-03, 5.077959020099345744e-03, 7.727976016970144156e-03, 7.022558645366243878e-03, -4.714356496325102820e-03, 7.018017321145150929e-03, 1.341962078953426278e-02, -8.818944869050635710e-03, -2.755773236988961865e-03, 1.079245666846929096e-02, -6.886663303228377636e-03, 9.801230913130992879e-04, 1.221683173308112048e-04, -1.027324486645460452e-04, 1.233918620327190629e-04, 1.538028875195364422e-05, -1.293342463232469071e-05, 4.892751025155074075e-03, 6.098613175830685205e-04, -5.128379261493998297e-04, -7.792305682365031905e-03, 2.541307371885552502e-02, -4.097328323558844382e-02, 2.530143617608526449e-02, -8.265149730513186854e-02, 1.332544508945474881e-01, -1.184335640259520997e-02, 3.220055758982264676e-02, -5.209911236104310117e-02, 8.090761694886683397e-02, 1.959431243541279177e-02, -2.227702786419644143e-02, 1.968691296265078980e-02, 4.764576998712748319e-03, -5.415896903683155988e-03, 1.534638141861073557e-01, 3.728680895816388619e-02, -4.242975875503233324e-02
-  };
+      -2.123013313652813774e-03, -2.646248889538913257e-04,
+      2.225254748021367093e-04,  9.843593195853941446e-04,
+      1.226963457840150472e-04,  -1.031764725911038809e-04,
+      -8.467513732241481721e-04, -1.055440805151912256e-04,
+      8.875297679686559459e-05,  1.829118379697145316e-02,
+      2.302438731350108913e-03,  -1.890198823577125386e-03,
+      3.300229266409118040e-02,  -1.339230641165423293e-02,
+      -2.445540228188634868e-02, 5.127826101331301595e-02,
+      -2.458314752619149279e-02, -4.252530480245884925e-02,
+      9.733043787604266084e-02,  -6.217238566516904152e-02,
+      3.767656091618994812e-02,  6.674680725588777973e-03,
+      4.245867422406505304e-02,  -2.752200660186601699e-02,
+      -8.318636634138946995e-03, -2.738884420387305285e-02,
+      1.785195524121836741e-02,  -3.151218435289559073e-03,
+      -3.927864338604547816e-04, 3.302976830190196104e-04,
+      1.387198082848713948e-06,  1.729085429046553641e-07,
+      -1.454003656243721975e-07, -4.056191292896940703e-05,
+      -5.055875832506090064e-06, 4.251531950061960394e-06,
+      7.087482338961141604e-02,  -1.643445525800983908e-01,
+      2.668682182870234509e-01,  7.752581706917366366e-03,
+      -2.674714571946596939e-02, 4.308263417785011123e-02,
+      -9.385640612496094423e-03, 4.307848167667025635e-02,
+      -6.910099104451945806e-02, -1.822493611414978121e-01,
+      -4.510097387143227610e-02, 5.157836206906134952e-02,
+      -1.170389534066011428e-01, -2.858136680923874240e-02,
+      3.256883555835647648e-02,  1.336331160725280354e-01,
+      3.257484898923947853e-02,  -3.710113093740719653e-02,
+      3.343993600586595179e-03,  4.168150663620683060e-04,
+      -3.505035785317401481e-04, -4.312491363797464269e-03,
+      -5.375343342977005178e-04, 4.520175083867039156e-04,
+      -5.045304632809267465e-04, -6.288764981405317546e-05,
+      5.288279643454484632e-05,  2.176577726533836937e-02,
+      -1.041710664445027849e-02, -1.802940684978692962e-02,
+      -3.097121964369356495e-02, 1.077096511204005125e-02,
+      2.079488766754130843e-02,  -1.120464690158002596e-01,
+      4.736950869652114399e-02,  8.530900293808066359e-02,
+      3.029112757823893692e-02,  1.058529311156591879e-01,
+      -6.894903720238335088e-02, -5.089618157121258979e-02,
+      -6.973511953466600410e-02, 4.618114280030299196e-02,
+      1.143309394598741001e-02,  2.319568285212985151e-02,
+      -1.522637168466081138e-02, -1.535733649675188493e-03,
+      -1.914228911776438445e-04, 1.609692493993826663e-04,
+      -2.603290366421702733e-03, -3.244894507721100851e-04,
+      2.728661290583660171e-04,  6.938458118266074663e-04,
+      8.648503036932213837e-05,  -7.272604826511198082e-05,
+      -2.609239945314979423e-02, 1.142603664459106681e-02,
+      -2.051406106454568487e-02, 5.779549344910496142e-03,
+      -3.860615030463052100e-02, 6.168332781226748551e-02,
+      2.068839156841529789e-02,  -7.643723474881176927e-02,
+      1.229844977392647865e-01,  -3.554667688747349674e-02,
+      -8.262665730398828859e-03, 9.285295046969522723e-03,
+      1.497274901467501862e-01,  3.666859638982037511e-02,
+      -4.181688913175674732e-02, -3.257377626487627069e-03,
+      -8.171909213273372040e-04, 9.379633299917983094e-04,
+      5.408910405506226968e-04,  6.741984641424190365e-05,
+      -5.669396175743082354e-05, 4.696290607396237790e-04,
+      5.853733334998140626e-05,  -4.922457577157541143e-05,
+      -5.350269144276139158e-03, -6.668890718077903363e-04,
+      5.607930831110977251e-04,  3.013271000130106694e-02,
+      -1.241570117891089425e-02, -2.255430712666738058e-02,
+      -1.643158253499693577e-02, 6.876116339617440766e-03,
+      1.242585434168311936e-02,  2.120265775977717496e-03,
+      -2.988284987993197143e-03, -4.123302560925387432e-03,
+      3.528008965720315360e-02,  -1.132921329184741026e-02,
+      6.435692645130823564e-03,  -2.115291124444698342e-02,
+      -2.971050496327276927e-02, 1.966236467455729359e-02,
+      -2.194244461519655187e-02, -1.469000955331024871e-02,
+      1.000316933044766328e-02,  -2.208576023807404254e-03,
+      -2.752899293131040766e-04, 2.314938041951108548e-04,
+      -5.840262773118632192e-04, -7.279647649213021596e-05,
+      6.121521886838239123e-05,  -1.263538670848133802e-03,
+      -1.574949051482092536e-04, 1.324388975109944740e-04,
+      8.955566031735841259e-03,  -2.660296383100100095e-02,
+      4.296567375352825652e-02,  2.380373596470350059e-02,
+      -7.784355459714024927e-02, 1.255004729498893912e-01,
+      -1.824501349606120690e-02, 3.948761180940744964e-02,
+      -6.423389834199008663e-02, 1.038606825469970407e-02,
+      2.616819816765628484e-03,  -3.006960935423359793e-03,
+      -1.864007491704058883e-02, -4.504736174636920880e-03,
+      5.118497771104377897e-03,  1.680266347982039554e-01,
+      4.105963063126880086e-02,  -4.679634408112137711e-02,
+      3.343993600586595179e-03,  4.168150663620683060e-04,
+      -3.505035785317401481e-04, -4.312491363797464269e-03,
+      -5.375343342977005178e-04, 4.520175083867039156e-04,
+      -5.045304632809267465e-04, -6.288764981405317546e-05,
+      5.288279643454484632e-05,  2.176577726533836937e-02,
+      -1.041710664445027849e-02, -1.802940684978692962e-02,
+      -3.097121964369356495e-02, 1.077096511204005125e-02,
+      2.079488766754130843e-02,  -1.120464690158002596e-01,
+      4.736950869652114399e-02,  8.530900293808066359e-02,
+      3.029112757823893692e-02,  1.058529311156591879e-01,
+      -6.894903720238335088e-02, -5.089618157121258979e-02,
+      -6.973511953466600410e-02, 4.618114280030299196e-02,
+      1.143309394598741001e-02,  2.319568285212985151e-02,
+      -1.522637168466081138e-02, -1.535733649675188493e-03,
+      -1.914228911776438445e-04, 1.609692493993826663e-04,
+      -2.603290366421702733e-03, -3.244894507721100851e-04,
+      2.728661290583660171e-04,  6.938458118266074663e-04,
+      8.648503036932213837e-05,  -7.272604826511198082e-05,
+      -2.609239945314979423e-02, 1.142603664459106681e-02,
+      -2.051406106454568487e-02, 5.779549344910496142e-03,
+      -3.860615030463052100e-02, 6.168332781226748551e-02,
+      2.068839156841529789e-02,  -7.643723474881176927e-02,
+      1.229844977392647865e-01,  -3.554667688747349674e-02,
+      -8.262665730398828859e-03, 9.285295046969522723e-03,
+      1.497274901467501862e-01,  3.666859638982037511e-02,
+      -4.181688913175674732e-02, -3.257377626487627069e-03,
+      -8.171909213273372040e-04, 9.379633299917983094e-04,
+      1.097257666720985849e-03,  1.367686610077148478e-04,
+      -1.150100103928514269e-04, -3.252401295559594844e-03,
+      -4.053984617694676175e-04, 3.409032519425078027e-04,
+      -1.217154259382106555e-04, -1.517132787898375553e-05,
+      1.275770753460001047e-05,  -1.104423096905816498e-01,
+      4.615651100464009809e-02,  8.344619780982527601e-02,
+      -1.998235369855275168e-01, 8.508819942125579738e-02,
+      1.528709647298205909e-01,  8.333302476347614896e-02,
+      -3.488524142655123617e-02, -6.303339769808283255e-02,
+      -7.468341447282240975e-02, -1.443673498458480642e-01,
+      9.485360739696327426e-02,  -2.685004652445167612e-04,
+      -1.702408228533323561e-02, 1.097613894113106531e-02,
+      9.496752299747332482e-02,  1.714581306702349373e-01,
+      -1.128066531362114239e-01, -2.109671824413435984e-03,
+      -2.629619271223545066e-04, 2.211270750801623281e-04,
+      1.011694656468142307e-02,  1.261035832424879221e-03,
+      -1.060416495448196581e-03, 2.326027531269699879e-04,
+      2.899297772687444119e-05,  -2.438045854305356789e-05,
+      -9.775618976121780001e-04, 7.897148922927013995e-03,
+      -1.259878571596698138e-02, -5.534571406250721713e-03,
+      2.552681480358522451e-02,  -4.094434810336724379e-02,
+      -1.258721457759937913e-02, 4.161890111720080443e-02,
+      -6.708566706120022705e-02, 3.521744971093632853e-02,
+      8.557787631933998912e-03,  -9.738493960065902622e-03,
+      -8.446926488038911107e-02, -2.017604402799078392e-02,
+      2.285024948138817888e-02,  -9.755577915095828626e-03,
+      -2.364722966186930900e-03, 2.689144780896026744e-03,
+      8.392348196279006065e-05,  1.046071729847805219e-05,
+      -8.796512273720217211e-06, -2.967282659264359589e-03,
+      -3.698595949224694123e-04, 3.110182957302592738e-04,
+      -1.688223115474902841e-03, -2.104300767164184042e-04,
+      1.769525645115341121e-04,  -1.040849854787611189e-01,
+      4.406117175034113265e-02,  7.931633477513304331e-02,
+      3.539829580561168476e-02,  -1.443144702217136026e-02,
+      -2.631106338063535569e-02, -4.383990895980735547e-02,
+      1.895493123709470276e-02,  3.388325869579450478e-02,
+      1.809448338386955915e-02,  4.269882582195522885e-02,
+      -2.795653019460052346e-02, 4.363124777259473619e-02,
+      8.597058258914810902e-02,  -5.646456449126337207e-02,
+      4.431189331687027805e-02,  7.186269332716928304e-02,
+      -4.739074421553418626e-02, 7.807665162715203382e-05,
+      9.731933913865978996e-06,  -8.183671700296416994e-06,
+      2.525821455836478949e-03,  3.148332692827336839e-04,
+      -2.647461582604813284e-04, 5.088778918832323993e-03,
+      6.342953893162101269e-04,  -5.333847591977234877e-04,
+      1.765533347871811772e-03,  -1.422682766506909793e-02,
+      2.269730547460076936e-02,  2.888222424864686153e-04,
+      -4.083171371247279469e-03, 6.494062010930001794e-03,
+      1.594130471018519873e-02,  -4.922350239779287734e-02,
+      7.944117864515577720e-02,  -5.516443865142822006e-02,
+      -1.340804559261108905e-02, 1.525892700429632917e-02,
+      7.450140187529649682e-02,  1.809617933997387934e-02,
+      -2.059052256811338619e-02, -3.118940445306414219e-02,
+      -7.412336287839308216e-03, 8.382871287998559101e-03,
+      5.408910405506207452e-04,  6.741984641424155129e-05,
+      -5.669396175743063380e-05, 4.696290607396231285e-04,
+      5.853733334998132494e-05,  -4.922457577157534367e-05,
+      -5.350269144276134821e-03, -6.668890718077897942e-04,
+      5.607930831110975083e-04,  3.013271000130106694e-02,
+      -1.241570117891090119e-02, -2.255430712666738752e-02,
+      -1.643158253499694271e-02, 6.876116339617444236e-03,
+      1.242585434168312457e-02,  2.120265775977718363e-03,
+      -2.988284987993198010e-03, -4.123302560925387432e-03,
+      3.528008965720314666e-02,  -1.132921329184741026e-02,
+      6.435692645130823564e-03,  -2.115291124444698342e-02,
+      -2.971050496327276927e-02, 1.966236467455729012e-02,
+      -2.194244461519655881e-02, -1.469000955331024871e-02,
+      1.000316933044766501e-02,  -2.208576023807403820e-03,
+      -2.752899293131040766e-04, 2.314938041951108548e-04,
+      -5.840262773118632192e-04, -7.279647649213021596e-05,
+      6.121521886838239123e-05,  -1.263538670848133802e-03,
+      -1.574949051482092536e-04, 1.324388975109944740e-04,
+      8.955566031735841259e-03,  -2.660296383100100095e-02,
+      4.296567375352825652e-02,  2.380373596470350059e-02,
+      -7.784355459714024927e-02, 1.255004729498893912e-01,
+      -1.824501349606121037e-02, 3.948761180940744964e-02,
+      -6.423389834199008663e-02, 1.038606825469969019e-02,
+      2.616819816765625015e-03,  -3.006960935423356324e-03,
+      -1.864007491704059577e-02, -4.504736174636922615e-03,
+      5.118497771104379632e-03,  1.680266347982039554e-01,
+      4.105963063126880086e-02,  -4.679634408112137711e-02,
+      8.392348196278930170e-05,  1.046071729847797087e-05,
+      -8.796512273720142672e-06, -2.967282659264356987e-03,
+      -3.698595949224691413e-04, 3.110182957302590027e-04,
+      -1.688223115474903708e-03, -2.104300767164184855e-04,
+      1.769525645115341934e-04,  -1.040849854787611189e-01,
+      4.406117175034113265e-02,  7.931633477513304331e-02,
+      3.539829580561167782e-02,  -1.443144702217136026e-02,
+      -2.631106338063535569e-02, -4.383990895980735547e-02,
+      1.895493123709470276e-02,  3.388325869579450478e-02,
+      1.809448338386955221e-02,  4.269882582195521498e-02,
+      -2.795653019460051653e-02, 4.363124777259472925e-02,
+      8.597058258914809514e-02,  -5.646456449126335819e-02,
+      4.431189331687027111e-02,  7.186269332716926916e-02,
+      -4.739074421553417932e-02, 7.807665162715246750e-05,
+      9.731933913866019654e-06,  -8.183671700296457651e-06,
+      2.525821455836478515e-03,  3.148332692827336297e-04,
+      -2.647461582604812742e-04, 5.088778918832324860e-03,
+      6.342953893162102353e-04,  -5.333847591977235961e-04,
+      1.765533347871809603e-03,  -1.422682766506909793e-02,
+      2.269730547460076589e-02,  2.888222424864694826e-04,
+      -4.083171371247282938e-03, 6.494062010930008733e-03,
+      1.594130471018519873e-02,  -4.922350239779287040e-02,
+      7.944117864515577720e-02,  -5.516443865142821312e-02,
+      -1.340804559261108558e-02, 1.525892700429632570e-02,
+      7.450140187529649682e-02,  1.809617933997387934e-02,
+      -2.059052256811338966e-02, -3.118940445306412831e-02,
+      -7.412336287839304746e-03, 8.382871287998553897e-03,
+      -9.575909105642434974e-04, -1.193597735547498307e-04,
+      1.003707186710399045e-04,  -9.520061199010912585e-05,
+      -1.186636523389461756e-05, 9.978534401229592523e-06,
+      -5.876800709203859434e-03, -7.325190685693192200e-04,
+      6.159819440242017292e-04,  -1.659431774532551043e-02,
+      6.520628417529478540e-03,  1.204087494393247214e-02,
+      6.518824051016284399e-03,  -2.745500204548994606e-03,
+      -4.950724849051978994e-03, -5.340810191179472081e-03,
+      3.101366677982481286e-03,  5.077959020099345744e-03,
+      7.727976016970144156e-03,  7.022558645366243878e-03,
+      -4.714356496325102820e-03, 7.018017321145150929e-03,
+      1.341962078953426278e-02,  -8.818944869050635710e-03,
+      -2.755773236988961865e-03, 1.079245666846929096e-02,
+      -6.886663303228377636e-03, 9.801230913130992879e-04,
+      1.221683173308112048e-04,  -1.027324486645460452e-04,
+      1.233918620327190629e-04,  1.538028875195364422e-05,
+      -1.293342463232469071e-05, 4.892751025155074075e-03,
+      6.098613175830685205e-04,  -5.128379261493998297e-04,
+      -7.792305682365031905e-03, 2.541307371885552502e-02,
+      -4.097328323558844382e-02, 2.530143617608526449e-02,
+      -8.265149730513186854e-02, 1.332544508945474881e-01,
+      -1.184335640259520997e-02, 3.220055758982264676e-02,
+      -5.209911236104310117e-02, 8.090761694886683397e-02,
+      1.959431243541279177e-02,  -2.227702786419644143e-02,
+      1.968691296265078980e-02,  4.764576998712748319e-03,
+      -5.415896903683155988e-03, 1.534638141861073557e-01,
+      3.728680895816388619e-02,  -4.242975875503233324e-02};
   std::vector<VALUETYPE> expected_gt;
   std::vector<VALUETYPE> expected_gv;
   int natoms = 6;
@@ -142,36 +453,34 @@ class TestInferDeepPolarNew : public ::testing::Test
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppolar_new.pbtxt";
-    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppolar_new.pbtxt", "deeppolar_new.pb");
+    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppolar_new.pbtxt",
+                                     "deeppolar_new.pb");
     dp.init("deeppolar_new.pb");
-    odim = dp.output_dim ();
+    odim = dp.output_dim();
 
     expected_gt.resize(odim);
-    for(int ii = 0; ii < nsel; ++ii){
-      for(int dd = 0; dd < odim; ++dd){
-	      expected_gt[dd] += expected_t[ii*odim+dd];
+    for (int ii = 0; ii < nsel; ++ii) {
+      for (int dd = 0; dd < odim; ++dd) {
+        expected_gt[dd] += expected_t[ii * odim + dd];
       }
     }
 
     expected_gv.resize(odim * 9);
-    for (int kk = 0; kk < odim; ++kk){
-      for(int ii = 0; ii < natoms; ++ii){
-        for(int dd = 0; dd < 9; ++dd){
-          expected_gv[kk*9 + dd] += expected_v[kk*natoms*9 + ii*9 + dd];
+    for (int kk = 0; kk < odim; ++kk) {
+      for (int ii = 0; ii < natoms; ++ii) {
+        for (int dd = 0; dd < 9; ++dd) {
+          expected_gv[kk * 9 + dd] += expected_v[kk * natoms * 9 + ii * 9 + dd];
         }
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppolar_new.pb" ) ;
-  };
+  void TearDown() override { remove("deeppolar_new.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPolarNew, ValueTypes);
 
-TYPED_TEST(TestInferDeepPolarNew, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPolarNew, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -196,50 +505,48 @@ TYPED_TEST(TestInferDeepPolarNew, cpu_build_nlist)
 
   dp.compute(at, coord, atype, box);
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, at, av, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   EXPECT_EQ(av.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(av[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPolarNew, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPolarNew, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -254,87 +561,88 @@ TYPED_TEST(TestInferDeepPolarNew, cpu_lmp_nlist)
   int& odim = this->odim;
   deepmd::hpp::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> gt, ff, vv, at, av;
 
-  dp.compute(at, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(at, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
-
-  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  std::vector<VALUETYPE> rff (odim * nloc * 3);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  std::vector<VALUETYPE> rff(odim * nloc * 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
+  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist);
 
-  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall-nloc, inlist);
-  
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   // atom tensor
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   // atom virial
-  std::vector<VALUETYPE> rav (odim * nloc * 9);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9, av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
+  std::vector<VALUETYPE> rav(odim * nloc * 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9,
+                          av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
   }
   EXPECT_EQ(rav.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(rav[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPolarNew, print_summary)
-{
+TYPED_TEST(TestInferDeepPolarNew, print_summary) {
   deepmd::hpp::DeepTensor& dp = this->dp;
   dp.print_summary("");
 }
diff --git a/source/api_c/tests/test_deeppot_a.cc b/source/api_c/tests/test_deeppot_a.cc
index 52d252a92f..c51391c1f7 100644
--- a/source/api_c/tests/test_deeppot_a.cc
+++ b/source/api_c/tests/test_deeppot_a.cc
@@ -1,50 +1,66 @@
 #include <gtest/gtest.h>
+
 #include <cmath>
 #include <vector>
+
 #include "c_api.h"
 
-class TestInferDeepPotA : public ::testing::Test
-{  
-protected:  
-  double coord[18] = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  float coordf[18] = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  int atype[6] = {
-    0, 1, 1, 0, 1, 1
-  };
-  double box[9] = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
-  float boxf[9] = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPotA : public ::testing::Test {
+ protected:
+  double coord[18] = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74, 00.25, 3.32, 1.68,
+                      3.36,  3.00, 1.81, 3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  float coordf[18] = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74, 00.25, 3.32, 1.68,
+                      3.36,  3.00, 1.81, 3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  int atype[6] = {0, 1, 1, 0, 1, 1};
+  double box[9] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
+  float boxf[9] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<double> expected_e = {
-    -9.275780747115504710e+01,-1.863501786584258468e+02,-1.863392472863538103e+02,-9.279281325486221021e+01,-1.863671545232153903e+02,-1.863619822847602165e+02
-  };
+      -9.275780747115504710e+01, -1.863501786584258468e+02,
+      -1.863392472863538103e+02, -9.279281325486221021e+01,
+      -1.863671545232153903e+02, -1.863619822847602165e+02};
   std::vector<double> expected_f = {
-    -3.034045420701179663e-01,8.405844663871177014e-01,7.696947487118485642e-02,7.662001266663505117e-01,-1.880601391333554251e-01,-6.183333871091722944e-01,-5.036172391059643427e-01,-6.529525836149027151e-01,5.432962643022043459e-01,6.382357912332115024e-01,-1.748518296794561167e-01,3.457363524891907125e-01,1.286482986991941552e-03,3.757251165286925043e-01,-5.972588700887541124e-01,-5.987006197104716154e-01,-2.004450304880958100e-01,2.495901655353461868e-01
-  };
+      -3.034045420701179663e-01, 8.405844663871177014e-01,
+      7.696947487118485642e-02,  7.662001266663505117e-01,
+      -1.880601391333554251e-01, -6.183333871091722944e-01,
+      -5.036172391059643427e-01, -6.529525836149027151e-01,
+      5.432962643022043459e-01,  6.382357912332115024e-01,
+      -1.748518296794561167e-01, 3.457363524891907125e-01,
+      1.286482986991941552e-03,  3.757251165286925043e-01,
+      -5.972588700887541124e-01, -5.987006197104716154e-01,
+      -2.004450304880958100e-01, 2.495901655353461868e-01};
   std::vector<double> expected_v = {
-    -2.912234126853306959e-01,-3.800610846612756388e-02,2.776624987489437202e-01,-5.053761003913598976e-02,-3.152373041953385746e-01,1.060894290092162379e-01,2.826389131596073745e-01,1.039129970665329250e-01,-2.584378792325942586e-01,-3.121722367954994914e-01,8.483275876786681990e-02,2.524662342344257682e-01,4.142176771106586414e-02,-3.820285230785245428e-02,-2.727311173065460545e-02,2.668859789777112135e-01,-6.448243569420382404e-02,-2.121731470426218846e-01,-8.624335220278558922e-02,-1.809695356746038597e-01,1.529875294531883312e-01,-1.283658185172031341e-01,-1.992682279795223999e-01,1.409924999632362341e-01,1.398322735274434292e-01,1.804318474574856390e-01,-1.470309318999652726e-01,-2.593983661598450730e-01,-4.236536279233147489e-02,3.386387920184946720e-02,-4.174017537818433543e-02,-1.003500282164128260e-01,1.525690815194478966e-01,3.398976109910181037e-02,1.522253908435125536e-01,-2.349125581341701963e-01,9.515545977581392825e-04,-1.643218849228543846e-02,1.993234765412972564e-02,6.027265332209678569e-04,-9.563256398907417355e-02,1.510815124001868293e-01,-7.738094816888557714e-03,1.502832772532304295e-01,-2.380965783745832010e-01,-2.309456719810296654e-01,-6.666961081213038098e-02,7.955566551234216632e-02,-8.099093777937517447e-02,-3.386641099800401927e-02,4.447884755740908608e-02,1.008593228579038742e-01,4.556718179228393811e-02,-6.078081273849572641e-02
-  };
+      -2.912234126853306959e-01, -3.800610846612756388e-02,
+      2.776624987489437202e-01,  -5.053761003913598976e-02,
+      -3.152373041953385746e-01, 1.060894290092162379e-01,
+      2.826389131596073745e-01,  1.039129970665329250e-01,
+      -2.584378792325942586e-01, -3.121722367954994914e-01,
+      8.483275876786681990e-02,  2.524662342344257682e-01,
+      4.142176771106586414e-02,  -3.820285230785245428e-02,
+      -2.727311173065460545e-02, 2.668859789777112135e-01,
+      -6.448243569420382404e-02, -2.121731470426218846e-01,
+      -8.624335220278558922e-02, -1.809695356746038597e-01,
+      1.529875294531883312e-01,  -1.283658185172031341e-01,
+      -1.992682279795223999e-01, 1.409924999632362341e-01,
+      1.398322735274434292e-01,  1.804318474574856390e-01,
+      -1.470309318999652726e-01, -2.593983661598450730e-01,
+      -4.236536279233147489e-02, 3.386387920184946720e-02,
+      -4.174017537818433543e-02, -1.003500282164128260e-01,
+      1.525690815194478966e-01,  3.398976109910181037e-02,
+      1.522253908435125536e-01,  -2.349125581341701963e-01,
+      9.515545977581392825e-04,  -1.643218849228543846e-02,
+      1.993234765412972564e-02,  6.027265332209678569e-04,
+      -9.563256398907417355e-02, 1.510815124001868293e-01,
+      -7.738094816888557714e-03, 1.502832772532304295e-01,
+      -2.380965783745832010e-01, -2.309456719810296654e-01,
+      -6.666961081213038098e-02, 7.955566551234216632e-02,
+      -8.099093777937517447e-02, -3.386641099800401927e-02,
+      4.447884755740908608e-02,  1.008593228579038742e-01,
+      4.556718179228393811e-02,  -6.078081273849572641e-02};
   int natoms;
   double expected_tot_e;
-  std::vector<double>expected_tot_v;
+  std::vector<double> expected_tot_v;
 
-  DP_DeepPot* dp; 
+  DP_DeepPot* dp;
 
   void SetUp() override {
     const char* file_name = "../../tests/infer/deeppot.pbtxt";
@@ -59,61 +75,60 @@ class TestInferDeepPotA : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(int ii = 0; ii < natoms; ++ii){
+    for (int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	      expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
-TEST_F(TestInferDeepPotA, double_infer)
-{
+TEST_F(TestInferDeepPotA, double_infer) {
   double* ener_ = new double;
   double* force_ = new double[natoms * 3];
   double* virial_ = new double[9];
   double* atomic_ener_ = new double[natoms];
   double* atomic_virial_ = new double[natoms * 9];
 
-  DP_DeepPotCompute (dp, natoms, coord, atype, box, ener_, force_, virial_, atomic_ener_, atomic_virial_);
+  DP_DeepPotCompute(dp, natoms, coord, atype, box, ener_, force_, virial_,
+                    atomic_ener_, atomic_virial_);
 
   double ener = *ener_;
   std::vector<double> force(force_, force_ + natoms * 3);
   std::vector<double> virial(virial_, virial_ + 9);
   std::vector<double> atomic_ener(atomic_ener_, atomic_ener_ + natoms);
-  std::vector<double> atomic_virial(atomic_virial_, atomic_virial_ + natoms * 9);
+  std::vector<double> atomic_virial(atomic_virial_,
+                                    atomic_virial_ + natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), 1e-10);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-10);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-10);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), 1e-10);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atomic_ener[ii] - expected_e[ii]), 1e-10);
   }
-  for(int ii = 0; ii < natoms * 9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-10);
   }
 }
 
-TEST_F(TestInferDeepPotA, float_infer)
-{
+TEST_F(TestInferDeepPotA, float_infer) {
   double* ener_ = new double;
   float* force_ = new float[natoms * 3];
   float* virial_ = new float[9];
   float* atomic_ener_ = new float[natoms];
   float* atomic_virial_ = new float[natoms * 9];
 
-  DP_DeepPotComputef (dp, natoms, coordf, atype, boxf, ener_, force_, virial_, atomic_ener_, atomic_virial_);
+  DP_DeepPotComputef(dp, natoms, coordf, atype, boxf, ener_, force_, virial_,
+                     atomic_ener_, atomic_virial_);
 
   double ener = *ener_;
   std::vector<float> force(force_, force_ + natoms * 3);
@@ -122,78 +137,91 @@ TEST_F(TestInferDeepPotA, float_infer)
   std::vector<float> atomic_virial(atomic_virial_, atomic_virial_ + natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), 1e-6);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-6);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-6);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), 1e-6);
   }
 
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atomic_ener[ii] - expected_e[ii]), 1e-5);
   }
-  for(int ii = 0; ii < natoms * 9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-6);
   }
 }
 
-
-TEST_F(TestInferDeepPotA, cutoff)
-{
-  double cutoff = DP_DeepPotGetCutoff (dp);
+TEST_F(TestInferDeepPotA, cutoff) {
+  double cutoff = DP_DeepPotGetCutoff(dp);
   EXPECT_EQ(cutoff, 6.0);
 }
 
-TEST_F(TestInferDeepPotA, numb_types)
-{
-  int numb_types = DP_DeepPotGetNumbTypes (dp);
+TEST_F(TestInferDeepPotA, numb_types) {
+  int numb_types = DP_DeepPotGetNumbTypes(dp);
   EXPECT_EQ(numb_types, 2);
 }
 
-TEST_F(TestInferDeepPotA, type_map)
-{
-  const char* type_map = DP_DeepPotGetTypeMap (dp);
+TEST_F(TestInferDeepPotA, type_map) {
+  const char* type_map = DP_DeepPotGetTypeMap(dp);
   char expected_type_map[] = "O H";
   EXPECT_EQ(strcmp(type_map, expected_type_map), 0);
 }
 
-
-class TestInferDeepPotANoPBC : public ::testing::Test
-{  
-protected:  
-  double coord[18] = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  float coordf[18] = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  int atype[6] = {
-    0, 1, 1, 0, 1, 1
-  };
+class TestInferDeepPotANoPBC : public ::testing::Test {
+ protected:
+  double coord[18] = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74, 00.25, 3.32, 1.68,
+                      3.36,  3.00, 1.81, 3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  float coordf[18] = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74, 00.25, 3.32, 1.68,
+                      3.36,  3.00, 1.81, 3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  int atype[6] = {0, 1, 1, 0, 1, 1};
   std::vector<double> expected_e = {
-    -9.255934839310273787e+01,-1.863253376736990106e+02,-1.857237299341402945e+02,-9.279308539717486326e+01,-1.863708105823244239e+02,-1.863635196514972563e+02
-  };
+      -9.255934839310273787e+01, -1.863253376736990106e+02,
+      -1.857237299341402945e+02, -9.279308539717486326e+01,
+      -1.863708105823244239e+02, -1.863635196514972563e+02};
   std::vector<double> expected_f = {
-    -2.161037360255332107e+00,9.052994347015581589e-01,1.635379623977007979e+00,2.161037360255332107e+00,-9.052994347015581589e-01,-1.635379623977007979e+00,-1.167128117249453811e-02,1.371975700096064992e-03,-1.575265180249604477e-03,6.226508593971802341e-01,-1.816734122009256991e-01,3.561766019664774907e-01,-1.406075393906316626e-02,3.789140061530929526e-01,-6.018777878642909140e-01,-5.969188242856223736e-01,-1.986125696522633155e-01,2.472764510780630642e-01    
-  };
+      -2.161037360255332107e+00, 9.052994347015581589e-01,
+      1.635379623977007979e+00,  2.161037360255332107e+00,
+      -9.052994347015581589e-01, -1.635379623977007979e+00,
+      -1.167128117249453811e-02, 1.371975700096064992e-03,
+      -1.575265180249604477e-03, 6.226508593971802341e-01,
+      -1.816734122009256991e-01, 3.561766019664774907e-01,
+      -1.406075393906316626e-02, 3.789140061530929526e-01,
+      -6.018777878642909140e-01, -5.969188242856223736e-01,
+      -1.986125696522633155e-01, 2.472764510780630642e-01};
   std::vector<double> expected_v = {
-    -7.042445481792056761e-01,2.950213647777754078e-01,5.329418202437231633e-01,2.950213647777752968e-01,-1.235900311906896754e-01,-2.232594111831812944e-01,5.329418202437232743e-01,-2.232594111831813499e-01,-4.033073234276823849e-01,-8.949230984097404917e-01,3.749002169013777030e-01,6.772391014992630298e-01,3.749002169013777586e-01,-1.570527935667933583e-01,-2.837082722496912512e-01,6.772391014992631408e-01,-2.837082722496912512e-01,-5.125052659994422388e-01,4.858210330291591605e-02,-6.902596153269104431e-03,6.682612642430500391e-03,-5.612247004554610057e-03,9.767795567660207592e-04,-9.773758942738038254e-04,5.638322117219018645e-03,-9.483806049779926932e-04,8.493873281881353637e-04,-2.941738570564985666e-01,-4.482529909499673171e-02,4.091569840186781021e-02,-4.509020615859140463e-02,-1.013919988807244071e-01,1.551440772665269030e-01,4.181857726606644232e-02,1.547200233064863484e-01,-2.398213304685777592e-01,-3.218625798524068354e-02,-1.012438450438508421e-02,1.271639330380921855e-02,3.072814938490859779e-03,-9.556241797915024372e-02,1.512251983492413077e-01,-8.277872384009607454e-03,1.505412040827929787e-01,-2.386150620881526407e-01,-2.312295470054945568e-01,-6.631490213524345034e-02,7.932427266386249398e-02,-8.053754366323923053e-02,-3.294595881137418747e-02,4.342495071150231922e-02,1.004599500126941436e-01,4.450400364869536163e-02,-5.951077548033092968e-02
-  };
+      -7.042445481792056761e-01, 2.950213647777754078e-01,
+      5.329418202437231633e-01,  2.950213647777752968e-01,
+      -1.235900311906896754e-01, -2.232594111831812944e-01,
+      5.329418202437232743e-01,  -2.232594111831813499e-01,
+      -4.033073234276823849e-01, -8.949230984097404917e-01,
+      3.749002169013777030e-01,  6.772391014992630298e-01,
+      3.749002169013777586e-01,  -1.570527935667933583e-01,
+      -2.837082722496912512e-01, 6.772391014992631408e-01,
+      -2.837082722496912512e-01, -5.125052659994422388e-01,
+      4.858210330291591605e-02,  -6.902596153269104431e-03,
+      6.682612642430500391e-03,  -5.612247004554610057e-03,
+      9.767795567660207592e-04,  -9.773758942738038254e-04,
+      5.638322117219018645e-03,  -9.483806049779926932e-04,
+      8.493873281881353637e-04,  -2.941738570564985666e-01,
+      -4.482529909499673171e-02, 4.091569840186781021e-02,
+      -4.509020615859140463e-02, -1.013919988807244071e-01,
+      1.551440772665269030e-01,  4.181857726606644232e-02,
+      1.547200233064863484e-01,  -2.398213304685777592e-01,
+      -3.218625798524068354e-02, -1.012438450438508421e-02,
+      1.271639330380921855e-02,  3.072814938490859779e-03,
+      -9.556241797915024372e-02, 1.512251983492413077e-01,
+      -8.277872384009607454e-03, 1.505412040827929787e-01,
+      -2.386150620881526407e-01, -2.312295470054945568e-01,
+      -6.631490213524345034e-02, 7.932427266386249398e-02,
+      -8.053754366323923053e-02, -3.294595881137418747e-02,
+      4.342495071150231922e-02,  1.004599500126941436e-01,
+      4.450400364869536163e-02,  -5.951077548033092968e-02};
   int natoms;
   double expected_tot_e;
-  std::vector<double>expected_tot_v;
+  std::vector<double> expected_tot_v;
 
-  DP_DeepPot* dp; 
+  DP_DeepPot* dp;
 
   void SetUp() override {
     const char* file_name = "../../tests/infer/deeppot.pbtxt";
@@ -208,61 +236,60 @@ class TestInferDeepPotANoPBC : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(int ii = 0; ii < natoms; ++ii){
+    for (int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	      expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
-TEST_F(TestInferDeepPotANoPBC, double_infer)
-{
+TEST_F(TestInferDeepPotANoPBC, double_infer) {
   double* ener_ = new double;
   double* force_ = new double[natoms * 3];
   double* virial_ = new double[9];
   double* atomic_ener_ = new double[natoms];
   double* atomic_virial_ = new double[natoms * 9];
 
-  DP_DeepPotCompute (dp, natoms, coord, atype, nullptr, ener_, force_, virial_, atomic_ener_, atomic_virial_);
+  DP_DeepPotCompute(dp, natoms, coord, atype, nullptr, ener_, force_, virial_,
+                    atomic_ener_, atomic_virial_);
 
   double ener = *ener_;
   std::vector<double> force(force_, force_ + natoms * 3);
   std::vector<double> virial(virial_, virial_ + 9);
   std::vector<double> atomic_ener(atomic_ener_, atomic_ener_ + natoms);
-  std::vector<double> atomic_virial(atomic_virial_, atomic_virial_ + natoms * 9);
+  std::vector<double> atomic_virial(atomic_virial_,
+                                    atomic_virial_ + natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), 1e-10);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-10);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-10);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), 1e-10);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atomic_ener[ii] - expected_e[ii]), 1e-10);
   }
-  for(int ii = 0; ii < natoms * 9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-10);
   }
 }
 
-TEST_F(TestInferDeepPotANoPBC, float_infer)
-{
+TEST_F(TestInferDeepPotANoPBC, float_infer) {
   double* ener_ = new double;
   float* force_ = new float[natoms * 3];
   float* virial_ = new float[9];
   float* atomic_ener_ = new float[natoms];
   float* atomic_virial_ = new float[natoms * 9];
 
-  DP_DeepPotComputef (dp, natoms, coordf, atype, nullptr, ener_, force_, virial_, atomic_ener_, atomic_virial_);
+  DP_DeepPotComputef(dp, natoms, coordf, atype, nullptr, ener_, force_, virial_,
+                     atomic_ener_, atomic_virial_);
 
   double ener = *ener_;
   std::vector<float> force(force_, force_ + natoms * 3);
@@ -271,17 +298,17 @@ TEST_F(TestInferDeepPotANoPBC, float_infer)
   std::vector<float> atomic_virial(atomic_virial_, atomic_virial_ + natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), 1e-6);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-6);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-6);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), 1e-6);
   }
 
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atomic_ener[ii] - expected_e[ii]), 1e-5);
   }
-  for(int ii = 0; ii < natoms * 9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atomic_virial[ii] - expected_v[ii]), 1e-6);
   }
-}
\ No newline at end of file
+}
diff --git a/source/api_c/tests/test_deeppot_a_hpp.cc b/source/api_c/tests/test_deeppot_a_hpp.cc
index e08add9485..6371c51ef6 100644
--- a/source/api_c/tests/test_deeppot_a_hpp.cc
+++ b/source/api_c/tests/test_deeppot_a_hpp.cc
@@ -1,47 +1,73 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "deepmd.hpp"
 #include "test_utils.h"
 
 template <class VALUETYPE>
-class TestInferDeepPotAHPP : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPotAHPP : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_e = {
-    -9.275780747115504710e+01,-1.863501786584258468e+02,-1.863392472863538103e+02,-9.279281325486221021e+01,-1.863671545232153903e+02,-1.863619822847602165e+02
-  };
+      -9.275780747115504710e+01, -1.863501786584258468e+02,
+      -1.863392472863538103e+02, -9.279281325486221021e+01,
+      -1.863671545232153903e+02, -1.863619822847602165e+02};
   std::vector<VALUETYPE> expected_f = {
-    -3.034045420701179663e-01,8.405844663871177014e-01,7.696947487118485642e-02,7.662001266663505117e-01,-1.880601391333554251e-01,-6.183333871091722944e-01,-5.036172391059643427e-01,-6.529525836149027151e-01,5.432962643022043459e-01,6.382357912332115024e-01,-1.748518296794561167e-01,3.457363524891907125e-01,1.286482986991941552e-03,3.757251165286925043e-01,-5.972588700887541124e-01,-5.987006197104716154e-01,-2.004450304880958100e-01,2.495901655353461868e-01
-  };
+      -3.034045420701179663e-01, 8.405844663871177014e-01,
+      7.696947487118485642e-02,  7.662001266663505117e-01,
+      -1.880601391333554251e-01, -6.183333871091722944e-01,
+      -5.036172391059643427e-01, -6.529525836149027151e-01,
+      5.432962643022043459e-01,  6.382357912332115024e-01,
+      -1.748518296794561167e-01, 3.457363524891907125e-01,
+      1.286482986991941552e-03,  3.757251165286925043e-01,
+      -5.972588700887541124e-01, -5.987006197104716154e-01,
+      -2.004450304880958100e-01, 2.495901655353461868e-01};
   std::vector<VALUETYPE> expected_v = {
-    -2.912234126853306959e-01,-3.800610846612756388e-02,2.776624987489437202e-01,-5.053761003913598976e-02,-3.152373041953385746e-01,1.060894290092162379e-01,2.826389131596073745e-01,1.039129970665329250e-01,-2.584378792325942586e-01,-3.121722367954994914e-01,8.483275876786681990e-02,2.524662342344257682e-01,4.142176771106586414e-02,-3.820285230785245428e-02,-2.727311173065460545e-02,2.668859789777112135e-01,-6.448243569420382404e-02,-2.121731470426218846e-01,-8.624335220278558922e-02,-1.809695356746038597e-01,1.529875294531883312e-01,-1.283658185172031341e-01,-1.992682279795223999e-01,1.409924999632362341e-01,1.398322735274434292e-01,1.804318474574856390e-01,-1.470309318999652726e-01,-2.593983661598450730e-01,-4.236536279233147489e-02,3.386387920184946720e-02,-4.174017537818433543e-02,-1.003500282164128260e-01,1.525690815194478966e-01,3.398976109910181037e-02,1.522253908435125536e-01,-2.349125581341701963e-01,9.515545977581392825e-04,-1.643218849228543846e-02,1.993234765412972564e-02,6.027265332209678569e-04,-9.563256398907417355e-02,1.510815124001868293e-01,-7.738094816888557714e-03,1.502832772532304295e-01,-2.380965783745832010e-01,-2.309456719810296654e-01,-6.666961081213038098e-02,7.955566551234216632e-02,-8.099093777937517447e-02,-3.386641099800401927e-02,4.447884755740908608e-02,1.008593228579038742e-01,4.556718179228393811e-02,-6.078081273849572641e-02
-  };
+      -2.912234126853306959e-01, -3.800610846612756388e-02,
+      2.776624987489437202e-01,  -5.053761003913598976e-02,
+      -3.152373041953385746e-01, 1.060894290092162379e-01,
+      2.826389131596073745e-01,  1.039129970665329250e-01,
+      -2.584378792325942586e-01, -3.121722367954994914e-01,
+      8.483275876786681990e-02,  2.524662342344257682e-01,
+      4.142176771106586414e-02,  -3.820285230785245428e-02,
+      -2.727311173065460545e-02, 2.668859789777112135e-01,
+      -6.448243569420382404e-02, -2.121731470426218846e-01,
+      -8.624335220278558922e-02, -1.809695356746038597e-01,
+      1.529875294531883312e-01,  -1.283658185172031341e-01,
+      -1.992682279795223999e-01, 1.409924999632362341e-01,
+      1.398322735274434292e-01,  1.804318474574856390e-01,
+      -1.470309318999652726e-01, -2.593983661598450730e-01,
+      -4.236536279233147489e-02, 3.386387920184946720e-02,
+      -4.174017537818433543e-02, -1.003500282164128260e-01,
+      1.525690815194478966e-01,  3.398976109910181037e-02,
+      1.522253908435125536e-01,  -2.349125581341701963e-01,
+      9.515545977581392825e-04,  -1.643218849228543846e-02,
+      1.993234765412972564e-02,  6.027265332209678569e-04,
+      -9.563256398907417355e-02, 1.510815124001868293e-01,
+      -7.738094816888557714e-03, 1.502832772532304295e-01,
+      -2.380965783745832010e-01, -2.309456719810296654e-01,
+      -6.666961081213038098e-02, 7.955566551234216632e-02,
+      -8.099093777937517447e-02, -3.386641099800401927e-02,
+      4.447884755740908608e-02,  1.008593228579038742e-01,
+      4.556718179228393811e-02,  -6.078081273849572641e-02};
   unsigned int natoms;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::hpp::DeepPot dp;
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
-    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt", "deeppot.pb");
+    deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
+                                     "deeppot.pb");
 
     dp.init("deeppot.pb");
 
@@ -51,25 +77,22 @@ class TestInferDeepPotAHPP : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(unsigned int ii = 0; ii < natoms; ++ii){
+    for (unsigned int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(unsigned int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (unsigned int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotAHPP, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -79,27 +102,26 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial;
 
   dp.compute(ener, force, virial, coord, atype, box);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_numfv)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_numfv) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -109,23 +131,20 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_numfv)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
-  class MyModel : public EnergyModelTest<VALUETYPE>
-  {
-    deepmd::hpp::DeepPot & mydp;
-    const std::vector<int > atype;
-public:
-    MyModel(
-	deepmd::hpp::DeepPot & dp_,
-	const std::vector<int> & atype_
-	) : mydp(dp_), atype(atype_) {};
-    virtual void compute (
-	double & ener,
-	std::vector<VALUETYPE> &	force,
-	std::vector<VALUETYPE> &	virial,
-	const std::vector<VALUETYPE> & coord,
-	const std::vector<VALUETYPE> & box) {
+  class MyModel : public EnergyModelTest<VALUETYPE> {
+    deepmd::hpp::DeepPot& mydp;
+    const std::vector<int> atype;
+
+   public:
+    MyModel(deepmd::hpp::DeepPot& dp_, const std::vector<int>& atype_)
+        : mydp(dp_), atype(atype_){};
+    virtual void compute(double& ener,
+                         std::vector<VALUETYPE>& force,
+                         std::vector<VALUETYPE>& virial,
+                         const std::vector<VALUETYPE>& coord,
+                         const std::vector<VALUETYPE>& box) {
       mydp.compute(ener, force, virial, coord, atype, box);
     }
   };
@@ -153,9 +172,7 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_numfv)
   model.test_v(coord, box_);
 }
 
-
-TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_atomic)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -165,35 +182,33 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial, atom_ener, atom_vir;
   dp.compute(ener, force, virial, atom_ener, atom_vir, coord, atype, box);
-  
-  EXPECT_EQ(force.size(), natoms*3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -203,58 +218,58 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  deepmd::hpp::convert_nlist(inlist, nlist_data);  
+  deepmd::hpp::convert_nlist(inlist, nlist_data);
   double ener;
   std::vector<VALUETYPE> force_, virial;
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 
   ener = 0.;
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_atomic)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -264,45 +279,46 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  deepmd::hpp::convert_nlist(inlist, nlist_data);  
-  
+  deepmd::hpp::convert_nlist(inlist, nlist_data);
+
   double ener;
   std::vector<VALUETYPE> force_, atom_ener_, atom_vir_, virial;
   std::vector<VALUETYPE> force, atom_ener, atom_vir;
-  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 0);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
   _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
   _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 
@@ -310,35 +326,34 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_atomic)
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
   std::fill(atom_ener_.begin(), atom_ener_.end(), 0.0);
-  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);  
-  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
   _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
   _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_2rc)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_2rc) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -348,59 +363,59 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_2rc)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc*2);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc * 2);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  deepmd::hpp::convert_nlist(inlist, nlist_data);  
-  
+  deepmd::hpp::convert_nlist(inlist, nlist_data);
+
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 
   ener = 0.;
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_type_sel)
-{
+TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_type_sel) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -410,30 +425,31 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_type_sel)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
 
   // add vir atoms
   int nvir = 2;
-  std::vector<VALUETYPE> coord_vir(nvir*3);
+  std::vector<VALUETYPE> coord_vir(nvir * 3);
   std::vector<int> atype_vir(nvir, 2);
-  for(int ii = 0; ii < nvir; ++ii){
+  for (int ii = 0; ii < nvir; ++ii) {
     coord_vir[ii] = coord[ii];
-  }  
+  }
   coord.insert(coord.begin(), coord_vir.begin(), coord_vir.end());
   atype.insert(atype.begin(), atype_vir.begin(), atype_vir.end());
   natoms += nvir;
-  std::vector<VALUETYPE> expected_f_vir(nvir*3, 0.0);
-  expected_f.insert(expected_f.begin(), expected_f_vir.begin(), expected_f_vir.end());
+  std::vector<VALUETYPE> expected_f_vir(nvir * 3, 0.0);
+  expected_f.insert(expected_f.begin(), expected_f_vir.begin(),
+                    expected_f_vir.end());
 
   // build nlist
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
@@ -442,59 +458,83 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_type_sel)
 
   // dp compute
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   // fold back
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPotAHPP, print_summary)
-{
+TYPED_TEST(TestInferDeepPotAHPP, print_summary) {
   deepmd::hpp::DeepPot& dp = this->dp;
   dp.print_summary("");
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepPotANoPbcHPP : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
+class TestInferDeepPotANoPbcHPP : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
   std::vector<VALUETYPE> box = {};
   std::vector<VALUETYPE> expected_e = {
-    -9.255934839310273787e+01,-1.863253376736990106e+02,-1.857237299341402945e+02,-9.279308539717486326e+01,-1.863708105823244239e+02,-1.863635196514972563e+02
-  };
+      -9.255934839310273787e+01, -1.863253376736990106e+02,
+      -1.857237299341402945e+02, -9.279308539717486326e+01,
+      -1.863708105823244239e+02, -1.863635196514972563e+02};
   std::vector<VALUETYPE> expected_f = {
-    -2.161037360255332107e+00,9.052994347015581589e-01,1.635379623977007979e+00,2.161037360255332107e+00,-9.052994347015581589e-01,-1.635379623977007979e+00,-1.167128117249453811e-02,1.371975700096064992e-03,-1.575265180249604477e-03,6.226508593971802341e-01,-1.816734122009256991e-01,3.561766019664774907e-01,-1.406075393906316626e-02,3.789140061530929526e-01,-6.018777878642909140e-01,-5.969188242856223736e-01,-1.986125696522633155e-01,2.472764510780630642e-01    
-  };
+      -2.161037360255332107e+00, 9.052994347015581589e-01,
+      1.635379623977007979e+00,  2.161037360255332107e+00,
+      -9.052994347015581589e-01, -1.635379623977007979e+00,
+      -1.167128117249453811e-02, 1.371975700096064992e-03,
+      -1.575265180249604477e-03, 6.226508593971802341e-01,
+      -1.816734122009256991e-01, 3.561766019664774907e-01,
+      -1.406075393906316626e-02, 3.789140061530929526e-01,
+      -6.018777878642909140e-01, -5.969188242856223736e-01,
+      -1.986125696522633155e-01, 2.472764510780630642e-01};
   std::vector<VALUETYPE> expected_v = {
-    -7.042445481792056761e-01,2.950213647777754078e-01,5.329418202437231633e-01,2.950213647777752968e-01,-1.235900311906896754e-01,-2.232594111831812944e-01,5.329418202437232743e-01,-2.232594111831813499e-01,-4.033073234276823849e-01,-8.949230984097404917e-01,3.749002169013777030e-01,6.772391014992630298e-01,3.749002169013777586e-01,-1.570527935667933583e-01,-2.837082722496912512e-01,6.772391014992631408e-01,-2.837082722496912512e-01,-5.125052659994422388e-01,4.858210330291591605e-02,-6.902596153269104431e-03,6.682612642430500391e-03,-5.612247004554610057e-03,9.767795567660207592e-04,-9.773758942738038254e-04,5.638322117219018645e-03,-9.483806049779926932e-04,8.493873281881353637e-04,-2.941738570564985666e-01,-4.482529909499673171e-02,4.091569840186781021e-02,-4.509020615859140463e-02,-1.013919988807244071e-01,1.551440772665269030e-01,4.181857726606644232e-02,1.547200233064863484e-01,-2.398213304685777592e-01,-3.218625798524068354e-02,-1.012438450438508421e-02,1.271639330380921855e-02,3.072814938490859779e-03,-9.556241797915024372e-02,1.512251983492413077e-01,-8.277872384009607454e-03,1.505412040827929787e-01,-2.386150620881526407e-01,-2.312295470054945568e-01,-6.631490213524345034e-02,7.932427266386249398e-02,-8.053754366323923053e-02,-3.294595881137418747e-02,4.342495071150231922e-02,1.004599500126941436e-01,4.450400364869536163e-02,-5.951077548033092968e-02
-  };
+      -7.042445481792056761e-01, 2.950213647777754078e-01,
+      5.329418202437231633e-01,  2.950213647777752968e-01,
+      -1.235900311906896754e-01, -2.232594111831812944e-01,
+      5.329418202437232743e-01,  -2.232594111831813499e-01,
+      -4.033073234276823849e-01, -8.949230984097404917e-01,
+      3.749002169013777030e-01,  6.772391014992630298e-01,
+      3.749002169013777586e-01,  -1.570527935667933583e-01,
+      -2.837082722496912512e-01, 6.772391014992631408e-01,
+      -2.837082722496912512e-01, -5.125052659994422388e-01,
+      4.858210330291591605e-02,  -6.902596153269104431e-03,
+      6.682612642430500391e-03,  -5.612247004554610057e-03,
+      9.767795567660207592e-04,  -9.773758942738038254e-04,
+      5.638322117219018645e-03,  -9.483806049779926932e-04,
+      8.493873281881353637e-04,  -2.941738570564985666e-01,
+      -4.482529909499673171e-02, 4.091569840186781021e-02,
+      -4.509020615859140463e-02, -1.013919988807244071e-01,
+      1.551440772665269030e-01,  4.181857726606644232e-02,
+      1.547200233064863484e-01,  -2.398213304685777592e-01,
+      -3.218625798524068354e-02, -1.012438450438508421e-02,
+      1.271639330380921855e-02,  3.072814938490859779e-03,
+      -9.556241797915024372e-02, 1.512251983492413077e-01,
+      -8.277872384009607454e-03, 1.505412040827929787e-01,
+      -2.386150620881526407e-01, -2.312295470054945568e-01,
+      -6.631490213524345034e-02, 7.932427266386249398e-02,
+      -8.053754366323923053e-02, -3.294595881137418747e-02,
+      4.342495071150231922e-02,  1.004599500126941436e-01,
+      4.450400364869536163e-02,  -5.951077548033092968e-02};
   unsigned int natoms;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::hpp::DeepPot dp;
 
@@ -510,25 +550,22 @@ class TestInferDeepPotANoPbcHPP : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(unsigned int ii = 0; ii < natoms; ++ii){
+    for (unsigned int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(unsigned int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (unsigned int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotANoPbcHPP, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotANoPbcHPP, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPotANoPbcHPP, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -538,20 +575,20 @@ TYPED_TEST(TestInferDeepPotANoPbcHPP, cpu_build_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   unsigned int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial;
   dp.compute(ener, force, virial, coord, atype, box);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(unsigned int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (unsigned int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(unsigned int ii = 0; ii < 3*3; ++ii){
+  for (unsigned int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
diff --git a/source/api_c/tests/test_deeppot_model_devi_hpp.cc b/source/api_c/tests/test_deeppot_model_devi_hpp.cc
index 6a8232d838..8d846d02dc 100644
--- a/source/api_c/tests/test_deeppot_model_devi_hpp.cc
+++ b/source/api_c/tests/test_deeppot_model_devi_hpp.cc
@@ -1,29 +1,21 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "deepmd.hpp"
 #include "test_utils.h"
 
 template <class VALUETYPE>
-class TestInferDeepPotModeDevi : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPotModeDevi : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   int natoms;
 
   deepmd::hpp::DeepPot dp0;
@@ -33,50 +25,48 @@ class TestInferDeepPotModeDevi : public ::testing::Test
   void SetUp() override {
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
-      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt", "deeppot.pb");
+      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
+                                       "deeppot.pb");
       dp0.init("deeppot.pb");
     }
     {
       std::string file_name = "../../tests/infer/deeppot-1.pbtxt";
-      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt", "deeppot-1.pb");
+      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt",
+                                       "deeppot-1.pb");
       dp1.init("deeppot-1.pb");
     }
     dp_md.init(std::vector<std::string>({"deeppot.pb", "deeppot-1.pb"}));
   };
 
   void TearDown() override {
-    remove( "deeppot.pb" ) ;
-    remove( "deeppot-1.pb" ) ;
+    remove("deeppot.pb");
+    remove("deeppot-1.pb");
   };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotModeDevi, ValueTypes);
 
 template <class VALUETYPE>
-class TestInferDeepPotModeDeviPython : public ::testing::Test
-{  
-protected:  
+class TestInferDeepPotModeDeviPython : public ::testing::Test {
+ protected:
   std::vector<VALUETYPE> coord = {
-    4.170220047025740423e-02,7.203244934421580703e-02,1.000114374817344942e-01,
-    4.053881673400336005e+00,4.191945144032948461e-02,6.852195003967595510e-02,
-    1.130233257263184132e+00,1.467558908171130543e-02,1.092338594768797883e-01,
-    1.862602113776709242e-02,1.134556072704304919e+00,1.396767474230670159e-01,
-    5.120445224973151355e+00,8.781174363909455272e-02,2.738759319792616331e-03,
-    4.067046751017840300e+00,1.141730480236712753e+00,5.586898284457517128e-02,
-  };
-  std::vector<int> atype = {
-    0, 0, 1, 1, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    20., 0., 0., 0., 20., 0., 0., 0., 20.
+      4.170220047025740423e-02, 7.203244934421580703e-02,
+      1.000114374817344942e-01, 4.053881673400336005e+00,
+      4.191945144032948461e-02, 6.852195003967595510e-02,
+      1.130233257263184132e+00, 1.467558908171130543e-02,
+      1.092338594768797883e-01, 1.862602113776709242e-02,
+      1.134556072704304919e+00, 1.396767474230670159e-01,
+      5.120445224973151355e+00, 8.781174363909455272e-02,
+      2.738759319792616331e-03, 4.067046751017840300e+00,
+      1.141730480236712753e+00, 5.586898284457517128e-02,
   };
+  std::vector<int> atype = {0, 0, 1, 1, 1, 1};
+  std::vector<VALUETYPE> box = {20., 0., 0., 0., 20., 0., 0., 0., 20.};
   int natoms;
-  std::vector<VALUETYPE> expected_md_f = {
-    0.509504727653, 0.458424067748, 0.481978258466
-  }; // max min avg
-  std::vector<VALUETYPE> expected_md_v = {
-    0.167004837423,0.00041822790564,0.0804864867641
-  }; // max min avg
+  std::vector<VALUETYPE> expected_md_f = {0.509504727653, 0.458424067748,
+                                          0.481978258466};  // max min avg
+  std::vector<VALUETYPE> expected_md_v = {0.167004837423, 0.00041822790564,
+                                          0.0804864867641};  // max min avg
 
   deepmd::hpp::DeepPot dp0;
   deepmd::hpp::DeepPot dp1;
@@ -85,27 +75,28 @@ class TestInferDeepPotModeDeviPython : public ::testing::Test
   void SetUp() override {
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
-      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt", "deeppot.pb");
+      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
+                                       "deeppot.pb");
       dp0.init("deeppot.pb");
     }
     {
       std::string file_name = "../../tests/infer/deeppot-1.pbtxt";
-      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt", "deeppot-1.pb");
+      deepmd::hpp::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt",
+                                       "deeppot-1.pb");
       dp1.init("deeppot-1.pb");
     }
     dp_md.init(std::vector<std::string>({"deeppot.pb", "deeppot-1.pb"}));
   };
 
   void TearDown() override {
-    remove( "deeppot.pb" ) ;
-    remove( "deeppot-1.pb" ) ;
+    remove("deeppot.pb");
+    remove("deeppot-1.pb");
   };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotModeDeviPython, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotModeDevi, attrs)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, attrs) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -116,16 +107,15 @@ TYPED_TEST(TestInferDeepPotModeDevi, attrs)
   deepmd::hpp::DeepPotModelDevi& dp_md = this->dp_md;
   EXPECT_EQ(dp0.cutoff(), dp_md.cutoff());
   EXPECT_EQ(dp0.numb_types(), dp_md.numb_types());
-  //EXPECT_EQ(dp0.dim_fparam(), dp_md.dim_fparam());
-  //EXPECT_EQ(dp0.dim_aparam(), dp_md.dim_aparam());
+  // EXPECT_EQ(dp0.dim_fparam(), dp_md.dim_fparam());
+  // EXPECT_EQ(dp0.dim_aparam(), dp_md.dim_aparam());
   EXPECT_EQ(dp1.cutoff(), dp_md.cutoff());
   EXPECT_EQ(dp1.numb_types(), dp_md.numb_types());
-  //EXPECT_EQ(dp1.dim_fparam(), dp_md.dim_fparam());
-  //EXPECT_EQ(dp1.dim_aparam(), dp_md.dim_aparam());
+  // EXPECT_EQ(dp1.dim_fparam(), dp_md.dim_fparam());
+  // EXPECT_EQ(dp1.dim_aparam(), dp_md.dim_aparam());
 }
 
-TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -135,50 +125,52 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list)
   deepmd::hpp::DeepPot& dp1 = this->dp1;
   deepmd::hpp::DeepPotModelDevi& dp_md = this->dp_md;
   float rc = dp_md.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  deepmd::hpp::convert_nlist(inlist, nlist_data);  
+  deepmd::hpp::convert_nlist(inlist, nlist_data);
 
   int nmodel = 2;
-  std::vector<double > edir(nmodel), emd;
-  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel), vdir(nmodel), fmd_, fmd(nmodel), vmd;
-  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp_md.compute(emd, fmd_, vmd, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  for(int kk = 0; kk < nmodel; ++kk){
+  std::vector<double> edir(nmodel), emd;
+  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel),
+      vdir(nmodel), fmd_, fmd(nmodel), vmd;
+  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp_md.compute(emd, fmd_, vmd, coord_cpy, atype_cpy, box, nall - nloc, inlist,
+                0);
+  for (int kk = 0; kk < nmodel; ++kk) {
     _fold_back<VALUETYPE>(fdir[kk], fdir_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(fmd[kk], fmd_[kk], mapping, nloc, nall, 3);
-  }  
+  }
 
   EXPECT_EQ(edir.size(), emd.size());
   EXPECT_EQ(fdir.size(), fmd.size());
   EXPECT_EQ(vdir.size(), vmd.size());
-  for(int kk = 0; kk < nmodel; ++kk){
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_EQ(fdir[kk].size(), fmd[kk].size());
     EXPECT_EQ(vdir[kk].size(), vmd[kk].size());
-  }  
-  for(int kk = 0; kk < nmodel; ++kk){
+  }
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_LT(fabs(edir[kk] - emd[kk]), EPSILON);
-    for(int ii = 0; ii < fdir[0].size(); ++ii){
+    for (int ii = 0; ii < fdir[0].size(); ++ii) {
       EXPECT_LT(fabs(fdir[kk][ii] - fmd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < vdir[0].size(); ++ii){
+    for (int ii = 0; ii < vdir[0].size(); ++ii) {
       EXPECT_LT(fabs(vdir[kk][ii] - vmd[kk][ii]), EPSILON);
     }
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_atomic)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -188,76 +180,78 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_atomic)
   deepmd::hpp::DeepPot& dp1 = this->dp1;
   deepmd::hpp::DeepPotModelDevi& dp_md = this->dp_md;
   float rc = dp_md.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  deepmd::hpp::convert_nlist(inlist, nlist_data);  
+  deepmd::hpp::convert_nlist(inlist, nlist_data);
 
   int nmodel = 2;
-  std::vector<double > edir(nmodel), emd;
-  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel), vdir(nmodel), fmd_, fmd(nmodel), vmd, aedir(nmodel), aemd, avdir(nmodel), avdir_(nmodel), avmd(nmodel), avmd_;
-  dp0.compute(edir[0], fdir_[0], vdir[0], aedir[0], avdir_[0], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp1.compute(edir[1], fdir_[1], vdir[1], aedir[1], avdir_[1], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp_md.compute(emd, fmd_, vmd, aemd, avmd_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  for(int kk = 0; kk < nmodel; ++kk){
+  std::vector<double> edir(nmodel), emd;
+  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel),
+      vdir(nmodel), fmd_, fmd(nmodel), vmd, aedir(nmodel), aemd, avdir(nmodel),
+      avdir_(nmodel), avmd(nmodel), avmd_;
+  dp0.compute(edir[0], fdir_[0], vdir[0], aedir[0], avdir_[0], coord_cpy,
+              atype_cpy, box, nall - nloc, inlist, 0);
+  dp1.compute(edir[1], fdir_[1], vdir[1], aedir[1], avdir_[1], coord_cpy,
+              atype_cpy, box, nall - nloc, inlist, 0);
+  dp_md.compute(emd, fmd_, vmd, aemd, avmd_, coord_cpy, atype_cpy, box,
+                nall - nloc, inlist, 0);
+  for (int kk = 0; kk < nmodel; ++kk) {
     _fold_back<VALUETYPE>(fdir[kk], fdir_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(fmd[kk], fmd_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(avdir[kk], avdir_[kk], mapping, nloc, nall, 9);
     _fold_back<VALUETYPE>(avmd[kk], avmd_[kk], mapping, nloc, nall, 9);
-  }  
+  }
 
   EXPECT_EQ(edir.size(), emd.size());
   EXPECT_EQ(fdir.size(), fmd.size());
   EXPECT_EQ(vdir.size(), vmd.size());
   EXPECT_EQ(aedir.size(), aemd.size());
   EXPECT_EQ(avdir.size(), avmd.size());
-  for(int kk = 0; kk < nmodel; ++kk){
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_EQ(fdir[kk].size(), fmd[kk].size());
     EXPECT_EQ(vdir[kk].size(), vmd[kk].size());
     EXPECT_EQ(aedir[kk].size(), aemd[kk].size());
     EXPECT_EQ(avdir[kk].size(), avmd[kk].size());
-  }  
-  for(int kk = 0; kk < nmodel; ++kk){
+  }
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_LT(fabs(edir[kk] - emd[kk]), EPSILON);
-    for(int ii = 0; ii < fdir[0].size(); ++ii){
+    for (int ii = 0; ii < fdir[0].size(); ++ii) {
       EXPECT_LT(fabs(fdir[kk][ii] - fmd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < vdir[0].size(); ++ii){
+    for (int ii = 0; ii < vdir[0].size(); ++ii) {
       EXPECT_LT(fabs(vdir[kk][ii] - vmd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < aedir[0].size(); ++ii){
+    for (int ii = 0; ii < aedir[0].size(); ++ii) {
       EXPECT_LT(fabs(aedir[kk][ii] - aemd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < avdir[0].size(); ++ii){
+    for (int ii = 0; ii < avdir[0].size(); ++ii) {
       EXPECT_LT(fabs(avdir[kk][ii] - avmd[kk][ii]), EPSILON);
     }
   }
 }
 
-
 template <class VALUETYPE>
-inline VALUETYPE mymax(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE mymax(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 0;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     if (xx[ii] > ret) {
       ret = xx[ii];
     }
   }
   return ret;
-};  
+};
 template <class VALUETYPE>
-inline VALUETYPE mymin(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE mymin(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 1e10;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     if (xx[ii] < ret) {
       ret = xx[ii];
     }
@@ -265,21 +259,18 @@ inline VALUETYPE mymin(const std::vector<VALUETYPE > & xx)
   return ret;
 };
 template <class VALUETYPE>
-inline VALUETYPE myavg(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE myavg(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 0;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     ret += xx[ii];
   }
   return (ret / xx.size());
 };
 template <class VALUETYPE>
-inline VALUETYPE mystd(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE mystd(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 0;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     ret += xx[ii] * xx[ii];
   }
   return sqrt(ret / xx.size());
 };
-
diff --git a/source/api_c/tests/test_dipolecharge.cc b/source/api_c/tests/test_dipolecharge.cc
index 77ec0a091c..9f6980be8c 100644
--- a/source/api_c/tests/test_dipolecharge.cc
+++ b/source/api_c/tests/test_dipolecharge.cc
@@ -1,51 +1,54 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
-#include "deepmd.hpp"
-#include "test_utils.h"
+
 #include "common.h"
+#include "deepmd.hpp"
 #include "ewald.h"
 #include "region.h"
+#include "test_utils.h"
 
 template <class VALUETYPE>
-class TestDipoleCharge : public ::testing::Test
-{  
-protected:  
+class TestDipoleCharge : public ::testing::Test {
+ protected:
   std::vector<VALUETYPE> coord = {
-    4.6067455554,    8.8719311819,    6.3886531197,
-    4.0044515745,    4.2449530507,    7.7902855220,
-    2.6453069446,    0.8772647726,    1.2804446790,
-    1.1445332290,    0.0067366438,    1.8606485070,
-    7.1002867706,    5.0325506787,    3.1805888348,
-    4.5352891138,    7.7389683929,    9.4260970128,
-    2.1833238914,    9.0916071034,    7.2299906064,
-    4.1040157820,    1.0496745045,    5.4748315591,
-  };
-  std::vector<int> atype = {
-    0,3,2,1,3,4,1,4
-  };
-  std::vector<VALUETYPE> box = {
-    10., 0., 0., 0., 10., 0., 0., 0., 10.
-  };
-  std::vector<double> expected_e = {
-    3.671081837126222158e+00
+      4.6067455554, 8.8719311819, 6.3886531197, 4.0044515745, 4.2449530507,
+      7.7902855220, 2.6453069446, 0.8772647726, 1.2804446790, 1.1445332290,
+      0.0067366438, 1.8606485070, 7.1002867706, 5.0325506787, 3.1805888348,
+      4.5352891138, 7.7389683929, 9.4260970128, 2.1833238914, 9.0916071034,
+      7.2299906064, 4.1040157820, 1.0496745045, 5.4748315591,
   };
+  std::vector<int> atype = {0, 3, 2, 1, 3, 4, 1, 4};
+  std::vector<VALUETYPE> box = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
+  std::vector<double> expected_e = {3.671081837126222158e+00};
   std::vector<VALUETYPE> expected_f = {
-    8.786854427753210128e-01,-1.590752486903602159e-01,-2.709225006303785932e-01,-4.449513960033193438e-01,-1.564291540964127813e-01,2.139031741772115178e-02,1.219699614140521193e+00,-5.580358618499958734e-02,-3.878662478349682585e-01,-1.286685244990778854e+00,1.886475802950296488e-01,3.904450515493615437e-01,1.605017382138404849e-02,2.138016869742287995e-01,-2.617514921203008965e-02,2.877081057057793712e-01,-3.846449683844421763e-01,3.048855616906603894e-02,-9.075632811311897807e-01,-6.509653472431625731e-03,2.302010972126376787e-01,2.370565856822822726e-01,3.600133435593881881e-01,1.243887532859055609e-02
-  };
+      8.786854427753210128e-01,  -1.590752486903602159e-01,
+      -2.709225006303785932e-01, -4.449513960033193438e-01,
+      -1.564291540964127813e-01, 2.139031741772115178e-02,
+      1.219699614140521193e+00,  -5.580358618499958734e-02,
+      -3.878662478349682585e-01, -1.286685244990778854e+00,
+      1.886475802950296488e-01,  3.904450515493615437e-01,
+      1.605017382138404849e-02,  2.138016869742287995e-01,
+      -2.617514921203008965e-02, 2.877081057057793712e-01,
+      -3.846449683844421763e-01, 3.048855616906603894e-02,
+      -9.075632811311897807e-01, -6.509653472431625731e-03,
+      2.302010972126376787e-01,  2.370565856822822726e-01,
+      3.600133435593881881e-01,  1.243887532859055609e-02};
   std::vector<VALUETYPE> expected_v = {
-    3.714071471995848417e-01,6.957130186032146613e-01,-1.158289779017217302e+00,6.957130186032139951e-01,-1.400130091653774933e+01,-3.631620234653316626e-01,-1.158289779017217302e+00,-3.631620234653316626e-01,3.805077486043773050e+00
-  };
-  std::vector<VALUETYPE> charge_map = {
-    1., 1., 1., 1., 1., -1., -3.
-  };
+      3.714071471995848417e-01,  6.957130186032146613e-01,
+      -1.158289779017217302e+00, 6.957130186032139951e-01,
+      -1.400130091653774933e+01, -3.631620234653316626e-01,
+      -1.158289779017217302e+00, -3.631620234653316626e-01,
+      3.805077486043773050e+00};
+  std::vector<VALUETYPE> charge_map = {1., 1., 1., 1., 1., -1., -3.};
   int natoms;
   int ntypes;
   std::vector<int> type_asso;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::hpp::DeepTensor dp;
   deepmd::hpp::DipoleChargeModifier dm;
@@ -67,26 +70,20 @@ class TestDipoleCharge : public ::testing::Test
     EXPECT_EQ(9, expected_v.size());
   };
 
-  void TearDown() override {
-    remove( "dipolecharge_e.pb" ) ;
-  };
+  void TearDown() override { remove("dipolecharge_e.pb"); };
 };
 
-static bool
-_in_vec(const int & value,
-	const std::vector<int> & vec)
-{
+static bool _in_vec(const int& value, const std::vector<int>& vec) {
   // naive impl.
-  for(int ii = 0; ii < vec.size(); ++ii){
-    if(value == vec[ii]) return true;
+  for (int ii = 0; ii < vec.size(); ++ii) {
+    if (value == vec[ii]) return true;
   }
   return false;
 }
 
 TYPED_TEST_SUITE(TestDipoleCharge, ValueTypes);
 
-TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
-{
+TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -99,80 +96,83 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   int& ntypes = this->ntypes;
   std::vector<int>& type_asso = this->type_asso;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::hpp::DeepTensor& dp = this->dp;
   deepmd::hpp::DipoleChargeModifier& dm = this->dm;
   // build nlist
   // float rc = dp.cutoff();
   float rc = 4.0;
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-  	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int>> nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   int nghost = nall - nloc;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::hpp::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   // evaluate dipole
-  std::vector<VALUETYPE> dipole, dipole_recd(nloc*3, 0.0);
-  dp.compute(dipole, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  std::vector<VALUETYPE> dipole, dipole_recd(nloc * 3, 0.0);
+  dp.compute(dipole, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   // add virtual atoms to the system
   // // a lot of mappings
   std::vector<int> sel_types = dp.sel_types();
   std::vector<int> sel_fwd, sel_bwd;
   int sel_nghost;
-  deepmd::select_by_type(sel_fwd, sel_bwd, sel_nghost, coord_cpy, atype_cpy, nghost, sel_types);
+  deepmd::select_by_type(sel_fwd, sel_bwd, sel_nghost, coord_cpy, atype_cpy,
+                         nghost, sel_types);
   int sel_nall = sel_bwd.size();
   int sel_nloc = sel_nall - sel_nghost;
   std::vector<int> sel_atype(sel_bwd.size());
   deepmd::select_map<int>(sel_atype, atype, sel_fwd, 1);
-  // Yixiao: because the deeptensor already return the correct order, the following map is no longer needed
-  // deepmd::AtomMap<double> nnp_map(sel_atype.begin(), sel_atype.begin() + sel_nloc);
-  // const std::vector<int> & sort_fwd_map(nnp_map.get_fwd_map());
+  // Yixiao: because the deeptensor already return the correct order, the
+  // following map is no longer needed deepmd::AtomMap<double>
+  // nnp_map(sel_atype.begin(), sel_atype.begin() + sel_nloc); const
+  // std::vector<int> & sort_fwd_map(nnp_map.get_fwd_map());
 
   // // add coords
-  std::vector<VALUETYPE > add_coord;
-  std::vector<int > add_atype;
-  std::vector<std::pair<int,int>> pairs;
-  for(int ii = 0; ii < nloc; ++ii){
-    if(_in_vec(atype[ii], sel_types)){
+  std::vector<VALUETYPE> add_coord;
+  std::vector<int> add_atype;
+  std::vector<std::pair<int, int>> pairs;
+  for (int ii = 0; ii < nloc; ++ii) {
+    if (_in_vec(atype[ii], sel_types)) {
       // Yixiao: the sort map is no longer needed
       // int res_idx = sort_fwd_map[sel_fwd[ii]];
       int res_idx = sel_fwd[ii];
-      std::vector<VALUETYPE > tmp_coord(3);
-      for(int dd = 0; dd < 3; ++dd){
-	tmp_coord[dd] = coord[ii*3+dd] + dipole[res_idx*3+dd];
-	dipole_recd[ii*3+dd] = dipole[res_idx*3+dd];
+      std::vector<VALUETYPE> tmp_coord(3);
+      for (int dd = 0; dd < 3; ++dd) {
+        tmp_coord[dd] = coord[ii * 3 + dd] + dipole[res_idx * 3 + dd];
+        dipole_recd[ii * 3 + dd] = dipole[res_idx * 3 + dd];
       }
-      pairs.push_back(std::pair<int,int>(ii, add_atype.size()+atype.size()));
-      // std::cout << ii <<  " " 
-      // 		<< atype[ii] << " " 
-      // 		<< res_idx << " " 
-      // 		<< type_asso[atype[ii]] << " " 
-      // 		<< " pair "  
-      // 		<< pairs.back().first << " " << pairs.back().second << " "
+      pairs.push_back(std::pair<int, int>(ii, add_atype.size() + atype.size()));
+      // std::cout << ii <<  " "
+      // 		<< atype[ii] << " "
+      // 		<< res_idx << " "
+      // 		<< type_asso[atype[ii]] << " "
+      // 		<< " pair "
+      // 		<< pairs.back().first << " " << pairs.back().second << "
+      // "
       // 		<< std::endl;
       add_coord.insert(add_coord.end(), tmp_coord.begin(), tmp_coord.end());
-      add_atype.push_back(type_asso[atype[ii]]);      
+      add_atype.push_back(type_asso[atype[ii]]);
     }
   }
   coord.insert(coord.end(), add_coord.begin(), add_coord.end());
   atype.insert(atype.end(), add_atype.begin(), add_atype.end());
   nloc = atype.size();
-  EXPECT_EQ(atype.size()*3, coord.size());
+  EXPECT_EQ(atype.size() * 3, coord.size());
 
   // get charge value
   std::vector<VALUETYPE> charge(nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     charge[ii] = charge_map[atype[ii]];
   }
-  
+
   // compute the recp part of the ele interaction
   VALUETYPE eener;
   std::vector<VALUETYPE> eforce, evirial;
@@ -182,14 +182,14 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   eparam.beta = 0.2;
   eparam.spacing = 4;
   ewald_recp(eener, eforce, evirial, coord, charge, region, eparam);
-  
+
   EXPECT_LT(fabs(eener - expected_e[0]), 1e-6);
   EXPECT_EQ(eforce.size(), coord.size());
-  EXPECT_EQ(evirial.size(), 9);  
+  EXPECT_EQ(evirial.size(), 9);
 
   // extend the system with virtual atoms, and build nlist
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-  	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   nall = coord_cpy.size() / 3;
   nghost = nall - nloc;
   ilist.resize(nloc);
@@ -202,8 +202,9 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   convert_nlist(inlist, nlist_data);
 
   // compute force and virial
-  std::vector<VALUETYPE > force_, force, virial;
-  dm.compute(force_, virial, coord_cpy, atype_cpy, box, pairs, eforce, nghost, inlist);
+  std::vector<VALUETYPE> force_, force, virial;
+  dm.compute(force_, virial, coord_cpy, atype_cpy, box, pairs, eforce, nghost,
+             inlist);
   // for(int ii = 0; ii < force_.size(); ++ii){
   //   std::cout << force_[ii] << " " ;
   // }
@@ -211,37 +212,37 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
   // compare force
-  EXPECT_EQ(force.size(), nloc*3);
+  EXPECT_EQ(force.size(), nloc * 3);
   // note nloc > expected_f.size(), because nloc contains virtual atoms.
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-6);
   }
 
   // add recp virial and viral corr to virial
   // virial = virial_recp + virial_dipolecharge + virial_corr
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
-      virial[dd0*3+dd1] += evirial[dd0*3+dd1];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      virial[dd0 * 3 + dd1] += evirial[dd0 * 3 + dd1];
     }
-  }    
-  for(int ii = 0; ii < pairs.size(); ++ii){
+  }
+  for (int ii = 0; ii < pairs.size(); ++ii) {
     int idx0 = pairs[ii].first;
     int idx1 = pairs[ii].second;
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-      for (int dd1 = 0; dd1 < 3; ++dd1){
-	virial[dd0*3+dd1] -= eforce[idx1*3+dd0] * dipole_recd[idx0*3+dd1];
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        virial[dd0 * 3 + dd1] -=
+            eforce[idx1 * 3 + dd0] * dipole_recd[idx0 * 3 + dd1];
       }
-    }    
+    }
   }
   // compare virial
-  EXPECT_EQ(virial.size(), 3*3);
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  EXPECT_EQ(virial.size(), 3 * 3);
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_v[ii]), 1e-5);
   }
 }
 
-TYPED_TEST(TestDipoleCharge, print_summary)
-{
+TYPED_TEST(TestDipoleCharge, print_summary) {
   deepmd::hpp::DipoleChargeModifier& dm = this->dm;
   dm.print_summary("");
 }
diff --git a/source/api_c/tests/test_utils.h b/source/api_c/tests/test_utils.h
index 64d38ea60c..45d4f3e515 100644
--- a/source/api_c/tests/test_utils.h
+++ b/source/api_c/tests/test_utils.h
@@ -1,6 +1,8 @@
 #pragma once
 #include <gtest/gtest.h>
+
 #include <cmath>
+
 #include "coord.h"
 #include "neighbor_list.h"
 
@@ -8,100 +10,90 @@
 
 typedef testing::Types<double, float> ValueTypes;
 
-template<typename VALUETYPE>
-inline void 
-_fold_back(
-    typename std::vector<VALUETYPE >::iterator out,
-    const typename std::vector<VALUETYPE >::const_iterator in, 
-    const std::vector<int> &mapping,
-    const int nloc,
-    const int nall,
-    const int ndim)
-{
+template <typename VALUETYPE>
+inline void _fold_back(typename std::vector<VALUETYPE>::iterator out,
+                       const typename std::vector<VALUETYPE>::const_iterator in,
+                       const std::vector<int> &mapping,
+                       const int nloc,
+                       const int nall,
+                       const int ndim) {
   // out.resize(nloc*ndim);
-  std::copy(in, in + nloc*ndim, out);
-  for(int ii = nloc; ii < nall; ++ii){
+  std::copy(in, in + nloc * ndim, out);
+  for (int ii = nloc; ii < nall; ++ii) {
     int in_idx = ii;
     int out_idx = mapping[in_idx];
-    for(int dd = 0; dd < ndim; ++dd){
+    for (int dd = 0; dd < ndim; ++dd) {
       *(out + out_idx * ndim + dd) += *(in + in_idx * ndim + dd);
     }
   }
 }
 
-template<typename VALUETYPE>
-inline void 
-_fold_back(
-    std::vector<VALUETYPE > &out,
-    const std::vector<VALUETYPE > &in,
-    const std::vector<int> &mapping,
-    const int nloc,
-    const int nall,
-    const int ndim)
-{
-  out.resize(nloc*ndim);
+template <typename VALUETYPE>
+inline void _fold_back(std::vector<VALUETYPE> &out,
+                       const std::vector<VALUETYPE> &in,
+                       const std::vector<int> &mapping,
+                       const int nloc,
+                       const int nall,
+                       const int ndim) {
+  out.resize(nloc * ndim);
   _fold_back<VALUETYPE>(out.begin(), in.begin(), mapping, nloc, nall, ndim);
 }
 
-template<typename VALUETYPE>
-inline void
-_build_nlist(
-    std::vector<std::vector<int>> &nlist_data,
-    std::vector<VALUETYPE > & coord_cpy,
-    std::vector<int > & atype_cpy,
-    std::vector<int > & mapping,
-    const std::vector<VALUETYPE > & coord,
-    const std::vector<int > & atype,
-    const std::vector<VALUETYPE > & box,
-    const float & rc)
-{
+template <typename VALUETYPE>
+inline void _build_nlist(std::vector<std::vector<int>> &nlist_data,
+                         std::vector<VALUETYPE> &coord_cpy,
+                         std::vector<int> &atype_cpy,
+                         std::vector<int> &mapping,
+                         const std::vector<VALUETYPE> &coord,
+                         const std::vector<int> &atype,
+                         const std::vector<VALUETYPE> &box,
+                         const float &rc) {
   // convert VALUETYPE to double, it looks like copy_coord only accepts double
   std::vector<double> coord_cpy_;
   std::vector<double> coord_(coord.begin(), coord.end());
   std::vector<double> box_(box.begin(), box.end());
 
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   region.reinitBox(&box_[0]);
   std::vector<int> ncell, ngcell;
-  copy_coord(coord_cpy_, atype_cpy, mapping, ncell, ngcell, coord_, atype, rc, region);
+  copy_coord(coord_cpy_, atype_cpy, mapping, ncell, ngcell, coord_, atype, rc,
+             region);
   std::vector<int> nat_stt, ext_stt, ext_end;
   nat_stt.resize(3);
   ext_stt.resize(3);
   ext_end.resize(3);
-  for (int dd = 0; dd < 3; ++dd){
+  for (int dd = 0; dd < 3; ++dd) {
     ext_stt[dd] = -ngcell[dd];
     ext_end[dd] = ncell[dd] + ngcell[dd];
   }
   int nloc = coord_.size() / 3;
   int nall = coord_cpy_.size() / 3;
   std::vector<std::vector<int>> nlist_r_cpy;
-  build_nlist(nlist_data, nlist_r_cpy, coord_cpy_, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_data, nlist_r_cpy, coord_cpy_, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
 
   // convert double to VALUETYPE
   coord_cpy.assign(coord_cpy_.begin(), coord_cpy_.end());
 }
 
-template<typename VALUETYPE>
-class EnergyModelTest
-{
+template <typename VALUETYPE>
+class EnergyModelTest {
   double hh = std::is_same<VALUETYPE, double>::value ? 1e-5 : 1e-2;
-  double level = std::is_same<VALUETYPE, double>::value ? 1e-6 : 1e-2; // expected?
-public:
-  virtual void compute (
-      double & ener,
-      std::vector<VALUETYPE> &	force,
-      std::vector<VALUETYPE> &	virial,
-      const std::vector<VALUETYPE> & coord,
-      const std::vector<VALUETYPE> & box
-      ) = 0;
-  void test_f (
-      const std::vector<VALUETYPE> & coord,
-      const std::vector<VALUETYPE> & box) {
+  double level =
+      std::is_same<VALUETYPE, double>::value ? 1e-6 : 1e-2;  // expected?
+ public:
+  virtual void compute(double &ener,
+                       std::vector<VALUETYPE> &force,
+                       std::vector<VALUETYPE> &virial,
+                       const std::vector<VALUETYPE> &coord,
+                       const std::vector<VALUETYPE> &box) = 0;
+  void test_f(const std::vector<VALUETYPE> &coord,
+              const std::vector<VALUETYPE> &box) {
     int ndof = coord.size();
     double ener;
     std::vector<VALUETYPE> force, virial;
     compute(ener, force, virial, coord, box);
-    for(int ii = 0; ii < ndof; ++ii){
+    for (int ii = 0; ii < ndof; ++ii) {
       std::vector<VALUETYPE> coord0(coord), coord1(coord);
       double ener0, ener1;
       std::vector<VALUETYPE> forcet, virialt;
@@ -109,21 +101,20 @@ class EnergyModelTest
       coord1[ii] -= hh;
       compute(ener0, forcet, virialt, coord0, box);
       compute(ener1, forcet, virialt, coord1, box);
-      VALUETYPE num = - (ener0 - ener1) / (2.*hh);
+      VALUETYPE num = -(ener0 - ener1) / (2. * hh);
       VALUETYPE ana = force[ii];
       EXPECT_LT(fabs(num - ana), level);
     }
-  }  
-  void test_v(
-      const std::vector<VALUETYPE> & coord,
-      const std::vector<VALUETYPE> & box) {
+  }
+  void test_v(const std::vector<VALUETYPE> &coord,
+              const std::vector<VALUETYPE> &box) {
     std::vector<VALUETYPE> num_diff(9);
     double ener;
     std::vector<VALUETYPE> force, virial;
     compute(ener, force, virial, coord, box);
     deepmd::Region<VALUETYPE> region;
     init_region_cpu(region, &box[0]);
-    for(int ii = 0; ii < 9; ++ii){
+    for (int ii = 0; ii < 9; ++ii) {
       std::vector<VALUETYPE> box0(box), box1(box);
       box0[ii] += hh;
       box1[ii] -= hh;
@@ -132,35 +123,34 @@ class EnergyModelTest
       init_region_cpu(region1, &box1[0]);
       std::vector<VALUETYPE> coord0(coord), coord1(coord);
       int natoms = coord.size() / 3;
-      for(int ii = 0; ii < natoms; ++ii){
-	VALUETYPE pi[3];
-	convert_to_inter_cpu(pi, region, &coord[ii*3]);
-	convert_to_phys_cpu(&coord0[ii*3], region0, pi);
+      for (int ii = 0; ii < natoms; ++ii) {
+        VALUETYPE pi[3];
+        convert_to_inter_cpu(pi, region, &coord[ii * 3]);
+        convert_to_phys_cpu(&coord0[ii * 3], region0, pi);
       }
-      for(int ii = 0; ii < natoms; ++ii){
-	VALUETYPE pi[3];
-	convert_to_inter_cpu(pi, region, &coord[ii*3]);
-	convert_to_phys_cpu(&coord1[ii*3], region1, pi);
+      for (int ii = 0; ii < natoms; ++ii) {
+        VALUETYPE pi[3];
+        convert_to_inter_cpu(pi, region, &coord[ii * 3]);
+        convert_to_phys_cpu(&coord1[ii * 3], region1, pi);
       }
       double ener0, ener1;
       std::vector<VALUETYPE> forcet, virialt;
       compute(ener0, forcet, virialt, coord0, box0);
       compute(ener1, forcet, virialt, coord1, box1);
-      num_diff[ii] = - (ener0 - ener1) / (2.*hh);
+      num_diff[ii] = -(ener0 - ener1) / (2. * hh);
     }
     std::vector<VALUETYPE> num_virial(9, 0);
-    for(int dd0 = 0; dd0 < 3; ++dd0){
-      for(int dd1 = 0; dd1 < 3; ++dd1){
-	for(int dd = 0; dd < 3; ++dd){
-	  num_virial[dd0*3+dd1] += num_diff[dd*3+dd0] * box[dd*3+dd1];
-	  // num_virial[dd0*3+dd1] += num_diff[dd0*3+dd] * box[dd1*3+dd];
-	}
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        for (int dd = 0; dd < 3; ++dd) {
+          num_virial[dd0 * 3 + dd1] +=
+              num_diff[dd * 3 + dd0] * box[dd * 3 + dd1];
+          // num_virial[dd0*3+dd1] += num_diff[dd0*3+dd] * box[dd1*3+dd];
+        }
       }
     }
-    for(int ii = 0; ii < 9; ++ii){
+    for (int ii = 0; ii < 9; ++ii) {
       EXPECT_LT(fabs(num_virial[ii] - virial[ii]), level);
     }
   }
 };
-
-
diff --git a/source/api_cc/CMakeLists.txt b/source/api_cc/CMakeLists.txt
index 1ee043a45b..7dc836a873 100644
--- a/source/api_cc/CMakeLists.txt
+++ b/source/api_cc/CMakeLists.txt
@@ -1,28 +1,23 @@
 # libmd
 
-configure_file(
-  ${CMAKE_CURRENT_SOURCE_DIR}/include/version.h.in
-  version.h
-  @ONLY
-)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/version.h.in version.h @ONLY)
 
 file(GLOB LIB_SRC src/*.cc src/*.cpp)
 file(GLOB INC_SRC include/*.h ${CMAKE_CURRENT_BINARY_DIR}/version.h)
 
-
-set (libname "${LIB_DEEPMD_CC}")
+set(libname "${LIB_DEEPMD_CC}")
 
 add_library(${libname} SHARED ${LIB_SRC})
 
 # link: libdeepmd libdeepmd_op libtensorflow_cc libtensorflow_framework
-target_link_libraries (${libname} PUBLIC ${LIB_DEEPMD})
-target_link_libraries (${libname} PRIVATE TensorFlow::tensorflow_cc TensorFlow::tensorflow_framework)
+target_link_libraries(${libname} PUBLIC ${LIB_DEEPMD})
+target_link_libraries(${libname} PRIVATE TensorFlow::tensorflow_cc
+                                         TensorFlow::tensorflow_framework)
 target_include_directories(
-  ${libname} PUBLIC
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
-  $<INSTALL_INTERFACE:include>
-  )
+  ${libname}
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+         $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+         $<INSTALL_INTERFACE:include>)
 target_precompile_headers(${libname} PUBLIC [["common.h"]])
 
 if(Protobuf_LIBRARY)
@@ -30,44 +25,32 @@ if(Protobuf_LIBRARY)
 endif()
 
 set_target_properties(
-  ${libname} 
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
-)
-target_compile_definitions(${libname}
-  PRIVATE TF_PRIVATE
-)
-if (CMAKE_TESTING_ENABLED)
+  ${libname} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}")
+target_compile_definitions(${libname} PRIVATE TF_PRIVATE)
+if(CMAKE_TESTING_ENABLED)
   target_link_libraries(${libname} PRIVATE coverage_config)
 endif()
 target_compile_features(${libname} PUBLIC cxx_std_11)
 
 if(BUILD_PY_IF)
+  install(TARGETS ${libname} DESTINATION deepmd/op/)
+else(BUILD_PY_IF)
   install(
     TARGETS ${libname}
-    DESTINATION deepmd/op/
-  )
-else(BUILD_PY_IF)
-install(
-  TARGETS ${libname}
-  EXPORT ${CMAKE_PROJECT_NAME}Targets
-  DESTINATION lib/
-)
+    EXPORT ${CMAKE_PROJECT_NAME}Targets
+    DESTINATION lib/)
 
-install(
-  FILES		${INC_SRC}
-  DESTINATION	include/deepmd
-)
+  install(FILES ${INC_SRC} DESTINATION include/deepmd)
 
-# make a link to libdeepmd_cc_low.so for compatibility
-INSTALL(CODE "execute_process( \
+  # make a link to libdeepmd_cc_low.so for compatibility
+  install(
+    CODE "execute_process( \
 COMMAND ${CMAKE_COMMAND} -E create_symlink \
 ${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${CMAKE_SHARED_LIBRARY_SUFFIX} \
 ${CMAKE_INSTALL_PREFIX}/lib/${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${LOW_PREC_VARIANT}${CMAKE_SHARED_LIBRARY_SUFFIX}   \
-)"
-)
+)")
 
-if (CMAKE_TESTING_ENABLED)
-  add_subdirectory(tests)
-endif()
-endif(BUILD_PY_IF)
\ No newline at end of file
+  if(CMAKE_TESTING_ENABLED)
+    add_subdirectory(tests)
+  endif()
+endif(BUILD_PY_IF)
diff --git a/source/api_cc/include/AtomMap.h b/source/api_cc/include/AtomMap.h
index 1cc3b6b67e..db500ab4be 100644
--- a/source/api_cc/include/AtomMap.h
+++ b/source/api_cc/include/AtomMap.h
@@ -4,27 +4,27 @@
 
 // using namespace std;
 
-namespace deepmd{
-class AtomMap 
-{
-public:
+namespace deepmd {
+class AtomMap {
+ public:
   AtomMap();
-  AtomMap(const std::vector<int >::const_iterator in_begin, 
-	     const std::vector<int >::const_iterator in_end);
+  AtomMap(const std::vector<int>::const_iterator in_begin,
+          const std::vector<int>::const_iterator in_end);
   template <typename VALUETYPE>
-  void forward (typename std::vector<VALUETYPE >::iterator out,
-		const typename std::vector<VALUETYPE >::const_iterator in, 
-		const int stride = 1) const ;
+  void forward(typename std::vector<VALUETYPE>::iterator out,
+               const typename std::vector<VALUETYPE>::const_iterator in,
+               const int stride = 1) const;
   template <typename VALUETYPE>
-  void backward (typename std::vector<VALUETYPE >::iterator out,
-		 const typename std::vector<VALUETYPE >::const_iterator in, 
-		 const int stride = 1) const ;
-  const std::vector<int > & get_type () const {return atype;}
-  const std::vector<int > & get_fwd_map () const {return fwd_idx_map;}
-  const std::vector<int > & get_bkw_map () const {return idx_map;}
-private:
+  void backward(typename std::vector<VALUETYPE>::iterator out,
+                const typename std::vector<VALUETYPE>::const_iterator in,
+                const int stride = 1) const;
+  const std::vector<int>& get_type() const { return atype; }
+  const std::vector<int>& get_fwd_map() const { return fwd_idx_map; }
+  const std::vector<int>& get_bkw_map() const { return idx_map; }
+
+ private:
   std::vector<int> idx_map;
   std::vector<int> fwd_idx_map;
   std::vector<int> atype;
 };
-}
+}  // namespace deepmd
diff --git a/source/api_cc/include/DataModifier.h b/source/api_cc/include/DataModifier.h
index 50cba7fb5d..cb2737b76f 100644
--- a/source/api_cc/include/DataModifier.h
+++ b/source/api_cc/include/DataModifier.h
@@ -2,80 +2,94 @@
 
 #include "DeepPot.h"
 
-namespace deepmd{
+namespace deepmd {
 /**
-* @brief Dipole charge modifier.
-**/
-class DipoleChargeModifier
-{
-public:
+ * @brief Dipole charge modifier.
+ **/
+class DipoleChargeModifier {
+ public:
   /**
-  * @brief Dipole charge modifier without initialization.
-  **/
+   * @brief Dipole charge modifier without initialization.
+   **/
   DipoleChargeModifier();
   /**
-  * @brief Dipole charge modifier without initialization.
-  * @param[in] model The name of the frozen model file.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] name_scope The name scope.
-  **/
-  DipoleChargeModifier(const std::string & model, 
-	       const int & gpu_rank = 0, 
-	       const std::string & name_scope = "");
-  ~DipoleChargeModifier ();
+   * @brief Dipole charge modifier without initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope The name scope.
+   **/
+  DipoleChargeModifier(const std::string& model,
+                       const int& gpu_rank = 0,
+                       const std::string& name_scope = "");
+  ~DipoleChargeModifier();
   /**
-  * @brief Initialize the dipole charge modifier.
-  * @param[in] model The name of the frozen model file.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] name_scope The name scope.
-  **/
-  void init (const std::string & model, 
-	     const int & gpu_rank = 0, 
-	     const std::string & name_scope = "");
+   * @brief Initialize the dipole charge modifier.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope The name scope.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& name_scope = "");
   /**
-  * @brief Print the DP summary to the screen.
-  * @param[in] pre The prefix to each line.
-  **/
-  void print_summary(const std::string &pre) const;
-public:
+   * @brief Print the DP summary to the screen.
+   * @param[in] pre The prefix to each line.
+   **/
+  void print_summary(const std::string& pre) const;
+
+ public:
   /**
-  * @brief Evaluate the force and virial correction by using this dipole charge modifier.
-  * @param[out] dfcorr_ The force correction on each atom.
-  * @param[out] dvcorr_ The virial correction.
-  * @param[in] dcoord_ The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] datype_ The atom types. The list should contain natoms ints.
-  * @param[in] dbox The cell of the region. The array should be of size 9.
-  * @param[in] pairs The pairs of atoms. The list should contain npairs pairs of ints.
-  * @param[in] delef_ The electric field on each atom. The array should be of size natoms x 3.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] lmp_list The neighbor list.
-  **/
-  template<typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &		dfcorr_,
-		std::vector<VALUETYPE> &		dvcorr_,
-		const std::vector<VALUETYPE> &	dcoord_,
-		const std::vector<int> &		datype_,
-		const std::vector<VALUETYPE> &	dbox, 
-		const std::vector<std::pair<int,int>> &	pairs,
-		const std::vector<VALUETYPE> &	delef_, 
-		const int			nghost,
-		const InputNlist &	lmp_list);
+   * @brief Evaluate the force and virial correction by using this dipole charge
+   *modifier.
+   * @param[out] dfcorr_ The force correction on each atom.
+   * @param[out] dvcorr_ The virial correction.
+   * @param[in] dcoord_ The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] datype_ The atom types. The list should contain natoms ints.
+   * @param[in] dbox The cell of the region. The array should be of size 9.
+   * @param[in] pairs The pairs of atoms. The list should contain npairs pairs
+   *of ints.
+   * @param[in] delef_ The electric field on each atom. The array should be of
+   *size natoms x 3.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The neighbor list.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<VALUETYPE>& dfcorr_,
+               std::vector<VALUETYPE>& dvcorr_,
+               const std::vector<VALUETYPE>& dcoord_,
+               const std::vector<int>& datype_,
+               const std::vector<VALUETYPE>& dbox,
+               const std::vector<std::pair<int, int>>& pairs,
+               const std::vector<VALUETYPE>& delef_,
+               const int nghost,
+               const InputNlist& lmp_list);
   /**
    * @brief Get cutoff radius.
    * @return double cutoff radius.
    */
-  double cutoff () const {assert(inited); return rcut;};
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
   /**
    * @brief Get the number of atom types.
    * @return int number of atom types.
    */
-  int numb_types () const {assert(inited); return ntypes;};
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
   /**
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  std::vector<int> sel_types () const {assert(inited); return sel_type;};
-private:
+  std::vector<int> sel_types() const {
+    assert(inited);
+    return sel_type;
+  };
+
+ private:
   tensorflow::Session* session;
   std::string name_scope, name_prefix;
   int num_intra_nthreads, num_inter_nthreads;
@@ -87,15 +101,17 @@ class DipoleChargeModifier
   int ntypes;
   std::string model_type;
   std::vector<int> sel_type;
-  template<class VT> VT get_scalar(const std::string & name) const;
-  template<class VT> void get_vector(std::vector<VT> & vec, const std::string & name) const;
-  template<typename MODELTYPE, typename VALUETYPE>
-  void run_model (std::vector<VALUETYPE> &		dforce,
-		  std::vector<VALUETYPE> &		dvirial,
-		  tensorflow::Session *			session,
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &	atommap,
-		  const int			nghost);
+  template <class VT>
+  VT get_scalar(const std::string& name) const;
+  template <class VT>
+  void get_vector(std::vector<VT>& vec, const std::string& name) const;
+  template <typename MODELTYPE, typename VALUETYPE>
+  void run_model(std::vector<VALUETYPE>& dforce,
+                 std::vector<VALUETYPE>& dvirial,
+                 tensorflow::Session* session,
+                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
+                     input_tensors,
+                 const AtomMap& atommap,
+                 const int nghost);
 };
-}
-
+}  // namespace deepmd
diff --git a/source/api_cc/include/DeepPot.h b/source/api_cc/include/DeepPot.h
index 9b928c47f1..3f58aef89c 100644
--- a/source/api_cc/include/DeepPot.h
+++ b/source/api_cc/include/DeepPot.h
@@ -3,189 +3,227 @@
 #include "common.h"
 #include "neighbor_list.h"
 
-namespace deepmd{
+namespace deepmd {
 /**
-* @brief Deep Potential.
-**/
-class DeepPot 
-{
-public:
+ * @brief Deep Potential.
+ **/
+class DeepPot {
+ public:
   /**
-  * @brief DP constructor without initialization.
-  **/
-  DeepPot () ;
-  ~DeepPot() ;
+   * @brief DP constructor without initialization.
+   **/
+  DeepPot();
+  ~DeepPot();
   /**
-  * @brief DP constructor with initialization.
-  * @param[in] model The name of the frozen model file.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] file_content The content of the model file. If it is not empty, DP will read from the string instead of the file.
-  **/
-  DeepPot  (const std::string & model, const int & gpu_rank = 0, const std::string & file_content = "");
+   * @brief DP constructor with initialization.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  DeepPot(const std::string& model,
+          const int& gpu_rank = 0,
+          const std::string& file_content = "");
   /**
-  * @brief Initialize the DP.
-  * @param[in] model The name of the frozen model file.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] file_content The content of the model file. If it is not empty, DP will read from the string instead of the file.
-  **/
-  void init (const std::string & model, const int & gpu_rank = 0, const std::string & file_content = "");
+   * @brief Initialize the DP.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_content The content of the model file. If it is not empty,
+   *DP will read from the string instead of the file.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& file_content = "");
   /**
-  * @brief Print the DP summary to the screen.
-  * @param[in] pre The prefix to each line.
-  **/
-  void print_summary(const std::string &pre) const;
-public:
+   * @brief Print the DP summary to the screen.
+   * @param[in] pre The prefix to each line.
+   **/
+  void print_summary(const std::string& pre) const;
+
+ public:
   /**
-  * @brief Evaluate the energy, force and virial by using this DP.
-  * @param[out] ener The system energy.
-  * @param[out] force The force on each atom.
-  * @param[out] virial The virial.
-  * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size nframes x 9.
-  * @param[in] fparam The frame parameter. The array can be of size :
-      * nframes x dim_fparam.
-      * dim_fparam. Then all frames are assumed to be provided with the same fparam.
-  * @param[in] aparam The atomic parameter The array can be of size :
-      * nframes x natoms x dim_aparam.
-      * natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-      * dim_aparam. Then all frames and atoms are provided with the same aparam.
-  **/
-  template<typename VALUETYPE>
-  void compute (ENERGYTYPE &			ener,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box, 
-		const std::vector<VALUETYPE>&	fparam = std::vector<VALUETYPE>(),
-		const std::vector<VALUETYPE>&	aparam = std::vector<VALUETYPE>());
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam. dim_aparam. Then all frames and atoms are provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
   /**
-  * @brief Evaluate the energy, force and virial by using this DP.
-  * @param[out] ener The system energy.
-  * @param[out] force The force on each atom.
-  * @param[out] virial The virial.
-  * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size nframes x 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] inlist The input neighbour list.
-  * @param[in] ago Update the internal neighbour list if ago is 0.
-  * @param[in] fparam The frame parameter. The array can be of size :
-      * nframes x dim_fparam.
-      * dim_fparam. Then all frames are assumed to be provided with the same fparam.
-  * @param[in] aparam The atomic parameter The array can be of size :
-      * nframes x natoms x dim_aparam.
-      * natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-      * dim_aparam. Then all frames and atoms are provided with the same aparam.
-  **/
-  template<typename VALUETYPE>
-  void compute (ENERGYTYPE &			ener,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box, 
-		const int			nghost,
-		const InputNlist &		inlist,
-		const int&			ago,
-		const std::vector<VALUETYPE>&	fparam = std::vector<VALUETYPE>(),
-		const std::vector<VALUETYPE>&	aparam = std::vector<VALUETYPE>());
+   * @brief Evaluate the energy, force and virial by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam. dim_aparam. Then all frames and atoms are provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
   /**
-  * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial by using this DP.
-  * @param[out] ener The system energy.
-  * @param[out] force The force on each atom.
-  * @param[out] virial The virial.
-  * @param[out] atom_energy The atomic energy.
-  * @param[out] atom_virial The atomic virial.
-  * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size nframes x 9.
-  * @param[in] fparam The frame parameter. The array can be of size :
-      * nframes x dim_fparam.
-      * dim_fparam. Then all frames are assumed to be provided with the same fparam.
-  * @param[in] aparam The atomic parameter The array can be of size :
-      * nframes x natoms x dim_aparam.
-      * natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-      * dim_aparam. Then all frames and atoms are provided with the same aparam.
-  **/
-  template<typename VALUETYPE>
-  void compute (ENERGYTYPE &			ener,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		std::vector<VALUETYPE> &	atom_energy,
-		std::vector<VALUETYPE> &	atom_virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box,
-		const std::vector<VALUETYPE>&	fparam = std::vector<VALUETYPE>(),
-		const std::vector<VALUETYPE>&	aparam = std::vector<VALUETYPE>());
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam. dim_aparam. Then all frames and atoms are provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
   /**
-  * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial by using this DP.
-  * @param[out] ener The system energy.
-  * @param[out] force The force on each atom.
-  * @param[out] virial The virial.
-  * @param[out] atom_energy The atomic energy.
-  * @param[out] atom_virial The atomic virial.
-  * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size nframes x 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] lmp_list The input neighbour list.
-  * @param[in] ago Update the internal neighbour list if ago is 0.
-  * @param[in] fparam The frame parameter. The array can be of size :
-      * nframes x dim_fparam.
-      * dim_fparam. Then all frames are assumed to be provided with the same fparam.
-  * @param[in] aparam The atomic parameter The array can be of size :
-      * nframes x natoms x dim_aparam.
-      * natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-      * dim_aparam. Then all frames and atoms are provided with the same aparam.
-  **/
-  template<typename VALUETYPE>
-  void compute (ENERGYTYPE &			ener,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		std::vector<VALUETYPE> &	atom_energy,
-		std::vector<VALUETYPE> &	atom_virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box, 
-		const int			nghost, 
-		const InputNlist &	lmp_list,
-		const int&			ago,
-		const std::vector<VALUETYPE>&	fparam = std::vector<VALUETYPE>(),
-		const std::vector<VALUETYPE>&	aparam = std::vector<VALUETYPE>());
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using this DP.
+   * @param[out] ener The system energy.
+   * @param[out] force The force on each atom.
+   * @param[out] virial The virial.
+   * @param[out] atom_energy The atomic energy.
+   * @param[out] atom_virial The atomic virial.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam. dim_aparam. Then all frames and atoms are provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE>
+  void compute(ENERGYTYPE& ener,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_energy,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
   /**
-  * @brief Get the cutoff radius.
-  * @return The cutoff radius.
-  **/
-  double cutoff () const {assert(inited); return rcut;};
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
   /**
-  * @brief Get the number of types.
-  * @return The number of types.
-  **/
-  int numb_types () const {assert(inited); return ntypes;};
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
   /**
-  * @brief Get the dimension of the frame parameter.
-  * @return The dimension of the frame parameter.
-  **/
-  int dim_fparam () const {assert(inited); return dfparam;};
+   * @brief Get the dimension of the frame parameter.
+   * @return The dimension of the frame parameter.
+   **/
+  int dim_fparam() const {
+    assert(inited);
+    return dfparam;
+  };
   /**
-  * @brief Get the dimension of the atomic parameter.
-  * @return The dimension of the atomic parameter.
-  **/
-  int dim_aparam () const {assert(inited); return daparam;};
+   * @brief Get the dimension of the atomic parameter.
+   * @return The dimension of the atomic parameter.
+   **/
+  int dim_aparam() const {
+    assert(inited);
+    return daparam;
+  };
   /**
-  * @brief Get the type map (element name of the atom types) of this model.
-  * @param[out] type_map The type map of this model.
-  **/
-  void get_type_map (std::string & type_map);
-private:
+   * @brief Get the type map (element name of the atom types) of this model.
+   * @param[out] type_map The type map of this model.
+   **/
+  void get_type_map(std::string& type_map);
+
+ private:
   tensorflow::Session* session;
   int num_intra_nthreads, num_inter_nthreads;
   tensorflow::GraphDef* graph_def;
   bool inited;
-  template<class VT> VT get_scalar(const std::string & name) const;
+  template <class VT>
+  VT get_scalar(const std::string& name) const;
   // VALUETYPE get_rcut () const;
   // int get_ntypes () const;
   double rcut;
@@ -196,21 +234,22 @@ class DeepPot
   int ntypes;
   int dfparam;
   int daparam;
-  template<typename VALUETYPE>
-  void validate_fparam_aparam(const int & nloc,
-			      const std::vector<VALUETYPE> &fparam,
-			      const std::vector<VALUETYPE> &aparam)const ;
-  template<typename VALUETYPE>
-  void compute_inner (ENERGYTYPE &			ener,
-		      std::vector<VALUETYPE> &		force,
-		      std::vector<VALUETYPE> &		virial,
-		      const std::vector<VALUETYPE> &	coord,
-		      const std::vector<int> &		atype,
-		      const std::vector<VALUETYPE> &	box, 
-		      const int				nghost,
-		      const int &			ago,
-		      const std::vector<VALUETYPE>&	fparam = std::vector<VALUETYPE>(),
-		      const std::vector<VALUETYPE>&	aparam = std::vector<VALUETYPE>());
+  template <typename VALUETYPE>
+  void validate_fparam_aparam(const int& nloc,
+                              const std::vector<VALUETYPE>& fparam,
+                              const std::vector<VALUETYPE>& aparam) const;
+  template <typename VALUETYPE>
+  void compute_inner(
+      ENERGYTYPE& ener,
+      std::vector<VALUETYPE>& force,
+      std::vector<VALUETYPE>& virial,
+      const std::vector<VALUETYPE>& coord,
+      const std::vector<int>& atype,
+      const std::vector<VALUETYPE>& box,
+      const int nghost,
+      const int& ago,
+      const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+      const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
 
   // copy neighbor list info from host
   bool init_nbor;
@@ -223,194 +262,222 @@ class DeepPot
   std::vector<int> get_sel_a() const;
 };
 
-class DeepPotModelDevi
-{
-public:
+class DeepPotModelDevi {
+ public:
   /**
-  * @brief DP model deviation constructor without initialization.
-  **/
-  DeepPotModelDevi () ;
-  ~DeepPotModelDevi() ;
+   * @brief DP model deviation constructor without initialization.
+   **/
+  DeepPotModelDevi();
+  ~DeepPotModelDevi();
   /**
-  * @brief DP model deviation constructor with initialization.
-  * @param[in] models The names of the frozen model files.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] file_contents The contents of the model files. If it is not empty, DP will read from the strings instead of the files.
-  **/
-  DeepPotModelDevi  (const std::vector<std::string> & models, const int & gpu_rank = 0, const std::vector<std::string> & file_contents = std::vector<std::string>());
+   * @brief DP model deviation constructor with initialization.
+   * @param[in] models The names of the frozen model files.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_contents The contents of the model files. If it is not
+   *empty, DP will read from the strings instead of the files.
+   **/
+  DeepPotModelDevi(const std::vector<std::string>& models,
+                   const int& gpu_rank = 0,
+                   const std::vector<std::string>& file_contents =
+                       std::vector<std::string>());
   /**
-  * @brief Initialize the DP model deviation contrcutor.
-  * @param[in] models The names of the frozen model files.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] file_contents The contents of the model files. If it is not empty, DP will read from the strings instead of the files.
-  **/
-  void init (const std::vector<std::string> & models, const int & gpu_rank = 0, const std::vector<std::string> & file_contents = std::vector<std::string>());
-public:
+   * @brief Initialize the DP model deviation contrcutor.
+   * @param[in] models The names of the frozen model files.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] file_contents The contents of the model files. If it is not
+   *empty, DP will read from the strings instead of the files.
+   **/
+  void init(const std::vector<std::string>& models,
+            const int& gpu_rank = 0,
+            const std::vector<std::string>& file_contents =
+                std::vector<std::string>());
+
+ public:
   /**
-  * @brief Evaluate the energy, force and virial by using these DP models.
-  * @param[out] all_ener The system energies of all models.
-  * @param[out] all_force The forces on each atom of all models.
-  * @param[out] all_virial The virials of all models.
-  * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size nframes x 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] lmp_list The input neighbour list.
-  * @param[in] ago Update the internal neighbour list if ago is 0.
-  * @param[in] fparam The frame parameter. The array can be of size :
-      * nframes x dim_fparam.
-      * dim_fparam. Then all frames are assumed to be provided with the same fparam.
-  * @param[in] aparam The atomic parameter The array can be of size :
-      * nframes x natoms x dim_aparam.
-      * natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-      * dim_aparam. Then all frames and atoms are provided with the same aparam.
-  **/
-  template<typename VALUETYPE>
-  void compute (std::vector<ENERGYTYPE> &		all_ener,
-		std::vector<std::vector<VALUETYPE> > &	all_force,
-		std::vector<std::vector<VALUETYPE> > &	all_virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &		atype,
-		const std::vector<VALUETYPE> &	box,
-		const int			nghost,
-		const InputNlist &	lmp_list,
-		const int 				&   ago,
-		const std::vector<VALUETYPE>	&	fparam = std::vector<VALUETYPE>(),
-		const std::vector<VALUETYPE>	&	aparam = std::vector<VALUETYPE>());
+   * @brief Evaluate the energy, force and virial by using these DP models.
+   * @param[out] all_ener The system energies of all models.
+   * @param[out] all_force The forces on each atom of all models.
+   * @param[out] all_virial The virials of all models.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam. dim_aparam. Then all frames and atoms are provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<ENERGYTYPE>& all_ener,
+               std::vector<std::vector<VALUETYPE> >& all_force,
+               std::vector<std::vector<VALUETYPE> >& all_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
   /**
-  * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial by using these DP models.
-  * @param[out] all_ener The system energies of all models.
-  * @param[out] all_force The forces on each atom of all models.
-  * @param[out] all_virial The virials of all models.
-  * @param[out] all_atom_energy The atomic energies of all models.
-  * @param[out] all_atom_virial The atomic virials of all models.
-  * @param[in] coord The coordinates of atoms. The array should be of size nframes x natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size nframes x 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] lmp_list The input neighbour list.
-  * @param[in] ago Update the internal neighbour list if ago is 0.
-  * @param[in] fparam The frame parameter. The array can be of size :
-      * nframes x dim_fparam.
-      * dim_fparam. Then all frames are assumed to be provided with the same fparam.
-  * @param[in] aparam The atomic parameter The array can be of size :
-      * nframes x natoms x dim_aparam.
-      * natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
-      * dim_aparam. Then all frames and atoms are provided with the same aparam.
-  **/
-  template<typename VALUETYPE>
-  void compute (std::vector<ENERGYTYPE> &		all_ener,
-		std::vector<std::vector<VALUETYPE> > &	all_force,
-		std::vector<std::vector<VALUETYPE> > &	all_virial,
-		std::vector<std::vector<VALUETYPE> > &	all_atom_energy,
-		std::vector<std::vector<VALUETYPE> > &	all_atom_virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &		atype,
-		const std::vector<VALUETYPE> &	box,
-		const int			nghost,
-		const InputNlist &	lmp_list,
-		const int 				&   ago,
-		const std::vector<VALUETYPE>	&	fparam = std::vector<VALUETYPE>(),
-		const std::vector<VALUETYPE>	&	aparam = std::vector<VALUETYPE>());
+   * @brief Evaluate the energy, force, virial, atomic energy, and atomic virial
+   *by using these DP models.
+   * @param[out] all_ener The system energies of all models.
+   * @param[out] all_force The forces on each atom of all models.
+   * @param[out] all_virial The virials of all models.
+   * @param[out] all_atom_energy The atomic energies of all models.
+   * @param[out] all_atom_virial The atomic virials of all models.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *nframes x natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size nframes
+   *x 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] lmp_list The input neighbour list.
+   * @param[in] ago Update the internal neighbour list if ago is 0.
+   * @param[in] fparam The frame parameter. The array can be of size :
+   * nframes x dim_fparam.
+   * dim_fparam. Then all frames are assumed to be provided with the same
+   *fparam.
+   * @param[in] aparam The atomic parameter The array can be of size :
+   * nframes x natoms x dim_aparam.
+   * natoms x dim_aparam. Then all frames are assumed to be provided with the
+   *same aparam. dim_aparam. Then all frames and atoms are provided with the
+   *same aparam.
+   **/
+  template <typename VALUETYPE>
+  void compute(std::vector<ENERGYTYPE>& all_ener,
+               std::vector<std::vector<VALUETYPE> >& all_force,
+               std::vector<std::vector<VALUETYPE> >& all_virial,
+               std::vector<std::vector<VALUETYPE> >& all_atom_energy,
+               std::vector<std::vector<VALUETYPE> >& all_atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& lmp_list,
+               const int& ago,
+               const std::vector<VALUETYPE>& fparam = std::vector<VALUETYPE>(),
+               const std::vector<VALUETYPE>& aparam = std::vector<VALUETYPE>());
   /**
-  * @brief Get the cutoff radius.
-  * @return The cutoff radius.
-  **/
-  double cutoff () const {assert(inited); return rcut;};
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
   /**
-  * @brief Get the number of types.
-  * @return The number of types.
-  **/
-  int numb_types () const {assert(inited); return ntypes;};
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
   /**
-  * @brief Get the dimension of the frame parameter.
-  * @return The dimension of the frame parameter.
-  **/
-  int dim_fparam () const {assert(inited); return dfparam;};
+   * @brief Get the dimension of the frame parameter.
+   * @return The dimension of the frame parameter.
+   **/
+  int dim_fparam() const {
+    assert(inited);
+    return dfparam;
+  };
   /**
-  * @brief Get the dimension of the atomic parameter.
-  * @return The dimension of the atomic parameter.
-  **/
-  int dim_aparam () const {assert(inited); return daparam;};
+   * @brief Get the dimension of the atomic parameter.
+   * @return The dimension of the atomic parameter.
+   **/
+  int dim_aparam() const {
+    assert(inited);
+    return daparam;
+  };
   /**
-  * @brief Compute the average energy.
-  * @param[out] dener The average energy.
-  * @param[in] all_energy The energies of all models.
-  **/
+   * @brief Compute the average energy.
+   * @param[out] dener The average energy.
+   * @param[in] all_energy The energies of all models.
+   **/
   template <typename VALUETYPE>
-  void compute_avg (VALUETYPE &			dener,
-		    const std::vector<VALUETYPE > &	all_energy);
+  void compute_avg(VALUETYPE& dener, const std::vector<VALUETYPE>& all_energy);
   /**
-  * @brief Compute the average of vectors.
-  * @param[out] avg The average of vectors.
-  * @param[in] xx The vectors of all models.
-  **/
+   * @brief Compute the average of vectors.
+   * @param[out] avg The average of vectors.
+   * @param[in] xx The vectors of all models.
+   **/
   template <typename VALUETYPE>
-  void compute_avg (std::vector<VALUETYPE> &		avg,
-		    const std::vector<std::vector<VALUETYPE> > &	xx);
+  void compute_avg(std::vector<VALUETYPE>& avg,
+                   const std::vector<std::vector<VALUETYPE> >& xx);
   /**
-  * @brief Compute the standard deviation of vectors.
-  * @param[out] std The standard deviation of vectors.
-  * @param[in] avg The average of vectors.
-  * @param[in] xx The vectors of all models.
-  * @param[in] stride The stride to compute the deviation.
-  **/
+   * @brief Compute the standard deviation of vectors.
+   * @param[out] std The standard deviation of vectors.
+   * @param[in] avg The average of vectors.
+   * @param[in] xx The vectors of all models.
+   * @param[in] stride The stride to compute the deviation.
+   **/
   template <typename VALUETYPE>
-  void compute_std (
-      std::vector<VALUETYPE> & std,
-      const std::vector<VALUETYPE> & avg,
-      const std::vector<std::vector<VALUETYPE> >& xx, 
-      const int & stride);
+  void compute_std(std::vector<VALUETYPE>& std,
+                   const std::vector<VALUETYPE>& avg,
+                   const std::vector<std::vector<VALUETYPE> >& xx,
+                   const int& stride);
   /**
-  * @brief Compute the relative standard deviation of vectors.
-  * @param[out] std The standard deviation of vectors.
-  * @param[in] avg The average of vectors.
-  * @param[in] eps The level parameter for computing the deviation.
-  * @param[in] stride The stride to compute the deviation.
-  **/
+   * @brief Compute the relative standard deviation of vectors.
+   * @param[out] std The standard deviation of vectors.
+   * @param[in] avg The average of vectors.
+   * @param[in] eps The level parameter for computing the deviation.
+   * @param[in] stride The stride to compute the deviation.
+   **/
   template <typename VALUETYPE>
-  void compute_relative_std (
-      std::vector<VALUETYPE> & std,
-      const std::vector<VALUETYPE> & avg,
-      const VALUETYPE eps, 
-      const int & stride);
+  void compute_relative_std(std::vector<VALUETYPE>& std,
+                            const std::vector<VALUETYPE>& avg,
+                            const VALUETYPE eps,
+                            const int& stride);
   /**
-  * @brief Compute the standard deviation of atomic energies.
-  * @param[out] std The standard deviation of atomic energies.
-  * @param[in] avg The average of atomic energies.
-  * @param[in] xx The vectors of all atomic energies.
-  **/
+   * @brief Compute the standard deviation of atomic energies.
+   * @param[out] std The standard deviation of atomic energies.
+   * @param[in] avg The average of atomic energies.
+   * @param[in] xx The vectors of all atomic energies.
+   **/
   template <typename VALUETYPE>
-  void compute_std_e (std::vector<VALUETYPE> &		std,
-		      const std::vector<VALUETYPE> &		avg,
-		      const std::vector<std::vector<VALUETYPE> >&	xx);
+  void compute_std_e(std::vector<VALUETYPE>& std,
+                     const std::vector<VALUETYPE>& avg,
+                     const std::vector<std::vector<VALUETYPE> >& xx);
   /**
-  * @brief Compute the standard deviation of forces.
-  * @param[out] std The standard deviation of forces.
-  * @param[in] avg The average of forces.
-  * @param[in] xx The vectors of all forces.
-  **/
+   * @brief Compute the standard deviation of forces.
+   * @param[out] std The standard deviation of forces.
+   * @param[in] avg The average of forces.
+   * @param[in] xx The vectors of all forces.
+   **/
   template <typename VALUETYPE>
-  void compute_std_f (std::vector<VALUETYPE> &		std,
-		      const std::vector<VALUETYPE> &		avg,
-		      const std::vector<std::vector<VALUETYPE> >& xx);
+  void compute_std_f(std::vector<VALUETYPE>& std,
+                     const std::vector<VALUETYPE>& avg,
+                     const std::vector<std::vector<VALUETYPE> >& xx);
   /**
-  * @brief Compute the relative standard deviation of forces.
-  * @param[out] std The relative standard deviation of forces.
-  * @param[in] avg The relative average of forces.
-  * @param[in] eps The level parameter for computing the deviation.
-  **/
+   * @brief Compute the relative standard deviation of forces.
+   * @param[out] std The relative standard deviation of forces.
+   * @param[in] avg The relative average of forces.
+   * @param[in] eps The level parameter for computing the deviation.
+   **/
   template <typename VALUETYPE>
-  void compute_relative_std_f (std::vector<VALUETYPE> &		std,
-		      const std::vector<VALUETYPE> &		avg,
-		      const VALUETYPE eps);
-private:
+  void compute_relative_std_f(std::vector<VALUETYPE>& std,
+                              const std::vector<VALUETYPE>& avg,
+                              const VALUETYPE eps);
+
+ private:
   unsigned numb_models;
   std::vector<tensorflow::Session*> sessions;
   int num_intra_nthreads, num_inter_nthreads;
   std::vector<tensorflow::GraphDef*> graph_defs;
   bool inited;
-  template<class VT> VT get_scalar(const std::string name) const;
+  template <class VT>
+  VT get_scalar(const std::string name) const;
   // VALUETYPE get_rcut () const;
   // int get_ntypes () const;
   double rcut;
@@ -422,9 +489,9 @@ class DeepPotModelDevi
   int dfparam;
   int daparam;
   template <typename VALUETYPE>
-  void validate_fparam_aparam(const int & nloc,
-			      const std::vector<VALUETYPE> &fparam,
-			      const std::vector<VALUETYPE> &aparam)const ;
+  void validate_fparam_aparam(const int& nloc,
+                              const std::vector<VALUETYPE>& fparam,
+                              const std::vector<VALUETYPE>& aparam) const;
 
   // copy neighbor list info from host
   bool init_nbor;
@@ -436,6 +503,4 @@ class DeepPotModelDevi
   // function used for nborlist copy
   std::vector<std::vector<int> > get_sel() const;
 };
-}
-
-
+}  // namespace deepmd
diff --git a/source/api_cc/include/DeepTensor.h b/source/api_cc/include/DeepTensor.h
index dc02f41bef..dcc9350c80 100644
--- a/source/api_cc/include/DeepTensor.h
+++ b/source/api_cc/include/DeepTensor.h
@@ -3,171 +3,202 @@
 #include "common.h"
 #include "neighbor_list.h"
 
-namespace deepmd{
+namespace deepmd {
 /**
-* @brief Deep Tensor.
-**/
-class DeepTensor
-{
-public:
+ * @brief Deep Tensor.
+ **/
+class DeepTensor {
+ public:
   /**
-  * @brief Deep Tensor constructor without initialization.
-  **/
+   * @brief Deep Tensor constructor without initialization.
+   **/
   DeepTensor();
   ~DeepTensor();
   /**
-  * @brief Deep Tensor constructor with initialization..
-  * @param[in] model The name of the frozen model file.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] name_scope Name scopes of operations.
-  **/
-  DeepTensor(const std::string & model, 
-	     const int & gpu_rank = 0, 
-	     const std::string &name_scope = "");
+   * @brief Deep Tensor constructor with initialization..
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope Name scopes of operations.
+   **/
+  DeepTensor(const std::string& model,
+             const int& gpu_rank = 0,
+             const std::string& name_scope = "");
   /**
-  * @brief Initialize the Deep Tensor.
-  * @param[in] model The name of the frozen model file.
-  * @param[in] gpu_rank The GPU rank. Default is 0.
-  * @param[in] name_scope Name scopes of operations.
-  **/
-  void init (const std::string & model, 
-	     const int & gpu_rank = 0, 
-	     const std::string &name_scope = "");
+   * @brief Initialize the Deep Tensor.
+   * @param[in] model The name of the frozen model file.
+   * @param[in] gpu_rank The GPU rank. Default is 0.
+   * @param[in] name_scope Name scopes of operations.
+   **/
+  void init(const std::string& model,
+            const int& gpu_rank = 0,
+            const std::string& name_scope = "");
   /**
-  * @brief Print the DP summary to the screen.
-  * @param[in] pre The prefix to each line.
-  **/
-  void print_summary(const std::string &pre) const;
-public:
+   * @brief Print the DP summary to the screen.
+   * @param[in] pre The prefix to each line.
+   **/
+  void print_summary(const std::string& pre) const;
+
+ public:
   /**
-  * @brief Evaluate the value by using this model.
-  * @param[out] value The value to evalute, usually would be the atomic tensor.
-  * @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size 9.
-  **/
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   **/
   template <typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &	value,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box);
+  void compute(std::vector<VALUETYPE>& value,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box);
   /**
-  * @brief Evaluate the value by using this model.
-  * @param[out] value The value to evalute, usually would be the atomic tensor.
-  * @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] inlist The input neighbour list.
-  **/
+   * @brief Evaluate the value by using this model.
+   * @param[out] value The value to evalute, usually would be the atomic tensor.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   **/
   template <typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &	value,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box, 
-		const int			nghost,
-		const InputNlist &	inlist);
+  void compute(std::vector<VALUETYPE>& value,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist);
   /**
-  * @brief Evaluate the global tensor and component-wise force and virial.
-  * @param[out] global_tensor The global tensor to evalute.
-  * @param[out] force The component-wise force of the global tensor, size odim x natoms x 3.
-  * @param[out] virial The component-wise virial of the global tensor, size odim x 9.
-  * @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size 9.
-  **/
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   **/
   template <typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &	global_tensor,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box);
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box);
   /**
-  * @brief Evaluate the global tensor and component-wise force and virial.
-  * @param[out] global_tensor The global tensor to evalute.
-  * @param[out] force The component-wise force of the global tensor, size odim x natoms x 3.
-  * @param[out] virial The component-wise virial of the global tensor, size odim x 9.
-  * @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] inlist The input neighbour list.
-  **/
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   **/
   template <typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &	global_tensor,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box, 
-		const int			nghost,
-		const InputNlist &	inlist);
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist);
   /**
-  * @brief Evaluate the global tensor and component-wise force and virial.
-  * @param[out] global_tensor The global tensor to evalute.
-  * @param[out] force The component-wise force of the global tensor, size odim x natoms x 3.
-  * @param[out] virial The component-wise virial of the global tensor, size odim x 9.
-  * @param[out] atom_tensor The atomic tensor value of the model, size natoms x odim.
-  * @param[out] atom_virial The component-wise atomic virial of the global tensor, size odim x natoms x 9.
-  * @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size 9.
-  **/
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   **/
   template <typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &	global_tensor,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		std::vector<VALUETYPE> &	atom_tensor,
-		std::vector<VALUETYPE> &	atom_virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box);
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_tensor,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box);
   /**
-  * @brief Evaluate the global tensor and component-wise force and virial.
-  * @param[out] global_tensor The global tensor to evalute.
-  * @param[out] force The component-wise force of the global tensor, size odim x natoms x 3.
-  * @param[out] virial The component-wise virial of the global tensor, size odim x 9.
-  * @param[out] atom_tensor The atomic tensor value of the model, size natoms x odim.
-  * @param[out] atom_virial The component-wise atomic virial of the global tensor, size odim x natoms x 9.
-  * @param[in] coord The coordinates of atoms. The array should be of size natoms x 3.
-  * @param[in] atype The atom types. The list should contain natoms ints.
-  * @param[in] box The cell of the region. The array should be of size 9.
-  * @param[in] nghost The number of ghost atoms.
-  * @param[in] inlist The input neighbour list.
-  **/
+   * @brief Evaluate the global tensor and component-wise force and virial.
+   * @param[out] global_tensor The global tensor to evalute.
+   * @param[out] force The component-wise force of the global tensor, size odim
+   *x natoms x 3.
+   * @param[out] virial The component-wise virial of the global tensor, size
+   *odim x 9.
+   * @param[out] atom_tensor The atomic tensor value of the model, size natoms x
+   *odim.
+   * @param[out] atom_virial The component-wise atomic virial of the global
+   *tensor, size odim x natoms x 9.
+   * @param[in] coord The coordinates of atoms. The array should be of size
+   *natoms x 3.
+   * @param[in] atype The atom types. The list should contain natoms ints.
+   * @param[in] box The cell of the region. The array should be of size 9.
+   * @param[in] nghost The number of ghost atoms.
+   * @param[in] inlist The input neighbour list.
+   **/
   template <typename VALUETYPE>
-  void compute (std::vector<VALUETYPE> &	global_tensor,
-		std::vector<VALUETYPE> &	force,
-		std::vector<VALUETYPE> &	virial,
-		std::vector<VALUETYPE> &	atom_tensor,
-		std::vector<VALUETYPE> &	atom_virial,
-		const std::vector<VALUETYPE> &	coord,
-		const std::vector<int> &	atype,
-		const std::vector<VALUETYPE> &	box, 
-		const int			nghost,
-		const InputNlist &	inlist);
+  void compute(std::vector<VALUETYPE>& global_tensor,
+               std::vector<VALUETYPE>& force,
+               std::vector<VALUETYPE>& virial,
+               std::vector<VALUETYPE>& atom_tensor,
+               std::vector<VALUETYPE>& atom_virial,
+               const std::vector<VALUETYPE>& coord,
+               const std::vector<int>& atype,
+               const std::vector<VALUETYPE>& box,
+               const int nghost,
+               const InputNlist& inlist);
   /**
-  * @brief Get the cutoff radius.
-  * @return The cutoff radius.
-  **/
-  double cutoff () const {assert(inited); return rcut;};
+   * @brief Get the cutoff radius.
+   * @return The cutoff radius.
+   **/
+  double cutoff() const {
+    assert(inited);
+    return rcut;
+  };
   /**
-  * @brief Get the number of types.
-  * @return The number of types.
-  **/
-  int numb_types () const {assert(inited); return ntypes;};
+   * @brief Get the number of types.
+   * @return The number of types.
+   **/
+  int numb_types() const {
+    assert(inited);
+    return ntypes;
+  };
   /**
-  * @brief Get the output dimension.
-  * @return The output dimension.
-  **/
-  int output_dim () const {assert(inited); return odim;};
+   * @brief Get the output dimension.
+   * @return The output dimension.
+   **/
+  int output_dim() const {
+    assert(inited);
+    return odim;
+  };
   /**
    * @brief Get the list of sel types.
    * @return The list of sel types.
    */
-  const std::vector<int> & sel_types () const {assert(inited); return sel_type;};
-private:
+  const std::vector<int>& sel_types() const {
+    assert(inited);
+    return sel_type;
+  };
+
+ private:
   tensorflow::Session* session;
   std::string name_scope;
   int num_intra_nthreads, num_inter_nthreads;
@@ -181,58 +212,61 @@ class DeepTensor
   std::string model_version;
   int odim;
   std::vector<int> sel_type;
-  template<class VT> VT get_scalar(const std::string & name) const;
-  template<class VT> void get_vector (std::vector<VT> & vec, const std::string & name) const;
-  template<typename MODELTYPE, typename VALUETYPE>
-  void run_model (std::vector<VALUETYPE> &		d_tensor_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost = 0);
-  template<typename MODELTYPE, typename VALUETYPE>
-  void run_model (std::vector<VALUETYPE> &		dglobal_tensor_,
-		  std::vector<VALUETYPE> &	dforce_,
-		  std::vector<VALUETYPE> &	dvirial_,
-		  std::vector<VALUETYPE> &	datom_tensor_,
-		  std::vector<VALUETYPE> &	datom_virial_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost = 0);
-  template<typename VALUETYPE>
-  void compute_inner (std::vector<VALUETYPE> &		value,
-		      const std::vector<VALUETYPE> &	coord,
-		      const std::vector<int> &		atype,
-		      const std::vector<VALUETYPE> &	box);
-  template<typename VALUETYPE>
-  void compute_inner (std::vector<VALUETYPE> &		value,
-		      const std::vector<VALUETYPE> &	coord,
-		      const std::vector<int> &		atype,
-		      const std::vector<VALUETYPE> &	box, 
-		      const int				nghost,
-		      const InputNlist&			inlist);
-  template<typename VALUETYPE>
-  void compute_inner (std::vector<VALUETYPE> &		global_tensor,
-		      std::vector<VALUETYPE> &	force,
-		      std::vector<VALUETYPE> &	virial,
-		      std::vector<VALUETYPE> &	atom_tensor,
-		      std::vector<VALUETYPE> &	atom_virial,
-		      const std::vector<VALUETYPE> &	coord,
-		      const std::vector<int> &		atype,
-		      const std::vector<VALUETYPE> &	box);
-  template<typename VALUETYPE>
-  void compute_inner (std::vector<VALUETYPE> &		global_tensor,
-		      std::vector<VALUETYPE> &	force,
-		      std::vector<VALUETYPE> &	virial,
-		      std::vector<VALUETYPE> &	atom_tensor,
-		      std::vector<VALUETYPE> &	atom_virial,
-		      const std::vector<VALUETYPE> &	coord,
-		      const std::vector<int> &		atype,
-		      const std::vector<VALUETYPE> &	box, 
-		      const int				nghost,
-		      const InputNlist&			inlist);
+  template <class VT>
+  VT get_scalar(const std::string& name) const;
+  template <class VT>
+  void get_vector(std::vector<VT>& vec, const std::string& name) const;
+  template <typename MODELTYPE, typename VALUETYPE>
+  void run_model(std::vector<VALUETYPE>& d_tensor_,
+                 tensorflow::Session* session,
+                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
+                     input_tensors,
+                 const AtomMap& atommap,
+                 const std::vector<int>& sel_fwd,
+                 const int nghost = 0);
+  template <typename MODELTYPE, typename VALUETYPE>
+  void run_model(std::vector<VALUETYPE>& dglobal_tensor_,
+                 std::vector<VALUETYPE>& dforce_,
+                 std::vector<VALUETYPE>& dvirial_,
+                 std::vector<VALUETYPE>& datom_tensor_,
+                 std::vector<VALUETYPE>& datom_virial_,
+                 tensorflow::Session* session,
+                 const std::vector<std::pair<std::string, tensorflow::Tensor>>&
+                     input_tensors,
+                 const AtomMap& atommap,
+                 const std::vector<int>& sel_fwd,
+                 const int nghost = 0);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& value,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& value,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box,
+                     const int nghost,
+                     const InputNlist& inlist);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& global_tensor,
+                     std::vector<VALUETYPE>& force,
+                     std::vector<VALUETYPE>& virial,
+                     std::vector<VALUETYPE>& atom_tensor,
+                     std::vector<VALUETYPE>& atom_virial,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box);
+  template <typename VALUETYPE>
+  void compute_inner(std::vector<VALUETYPE>& global_tensor,
+                     std::vector<VALUETYPE>& force,
+                     std::vector<VALUETYPE>& virial,
+                     std::vector<VALUETYPE>& atom_tensor,
+                     std::vector<VALUETYPE>& atom_virial,
+                     const std::vector<VALUETYPE>& coord,
+                     const std::vector<int>& atype,
+                     const std::vector<VALUETYPE>& box,
+                     const int nghost,
+                     const InputNlist& inlist);
 };
-}
-
+}  // namespace deepmd
diff --git a/source/api_cc/include/common.h b/source/api_cc/include/common.h
index 3beb5b726f..770115b5d9 100644
--- a/source/api_cc/include/common.h
+++ b/source/api_cc/include/common.h
@@ -1,12 +1,13 @@
 #pragma once
 
-#include <vector>
-#include <string>
 #include <iostream>
-#include "version.h"
-#include "neighbor_list.h"
+#include <string>
+#include <vector>
+
 #include "AtomMap.h"
 #include "errors.h"
+#include "neighbor_list.h"
+#include "version.h"
 
 #ifdef TF_PRIVATE
 #include "tf_private.h"
@@ -14,247 +15,223 @@
 #include "tf_public.h"
 #endif
 
-
-namespace deepmd{
+namespace deepmd {
 
 typedef double ENERGYTYPE;
 
-struct NeighborListData 
-{
+struct NeighborListData {
   /// Array stores the core region atom's index
-  std::vector<int > ilist;
+  std::vector<int> ilist;
   /// Array stores the core region atom's neighbor index
-  std::vector<std::vector<int> > jlist;
+  std::vector<std::vector<int>> jlist;
   /// Array stores the number of neighbors of core region atoms
-  std::vector<int > numneigh;
+  std::vector<int> numneigh;
   /// Array stores the the location of the first neighbor of core region atoms
-  std::vector<int* > firstneigh;  
-public:
-  void copy_from_nlist(const InputNlist & inlist);
-  void shuffle(const std::vector<int> & fwd_map);
-  void shuffle(const deepmd::AtomMap & map);
-  void shuffle_exclude_empty(const std::vector<int> & fwd_map);
-  void make_inlist(InputNlist & inlist);
+  std::vector<int*> firstneigh;
+
+ public:
+  void copy_from_nlist(const InputNlist& inlist);
+  void shuffle(const std::vector<int>& fwd_map);
+  void shuffle(const deepmd::AtomMap& map);
+  void shuffle_exclude_empty(const std::vector<int>& fwd_map);
+  void make_inlist(InputNlist& inlist);
 };
 
-
 /**
-* @brief Check if the model version is supported.
-* @param[in] model_version The model version.
-* @return Whether the model is supported (true or false).
-**/
-bool
-model_compatable(
-    std::string & model_version);
-
-template<typename VALUETYPE>
-void 
-select_by_type(std::vector<int> & fwd_map,
-	       std::vector<int> & bkw_map,
-	       int & nghost_real, 
-	       const std::vector<VALUETYPE> & dcoord_, 
-	       const std::vector<int> & datype_,
-	       const int & nghost,
-	       const std::vector<int> & sel_type_);
-
-template<typename VALUETYPE>
-void
-select_real_atoms(std::vector<int> & fwd_map,
-		  std::vector<int> & bkw_map,
-		  int & nghost_real,
-		  const std::vector<VALUETYPE> & dcoord_, 
-		  const std::vector<int> & datype_,
-		  const int & nghost,
-		  const int & ntypes);
-
-template<typename VT>
-void 
-select_map(std::vector<VT> & out,
-	   const std::vector<VT > & in,
-	   const std::vector<int > & fwd_map, 
-	   const int & stride);
-
-template<typename VT>
-void 
-select_map(typename std::vector<VT >::iterator out,
-	   const typename std::vector<VT >::const_iterator in, 
-	   const std::vector<int > & fwd_map, 
-	   const int & stride);
-
-template<typename VT>
-void 
-select_map_inv(std::vector<VT> & out,
-	   const std::vector<VT > & in,
-	   const std::vector<int > & fwd_map, 
-	   const int & stride);
-
-template<typename VT>
-void 
-select_map_inv(typename std::vector<VT >::iterator out,
-	   const typename std::vector<VT >::const_iterator in, 
-	   const std::vector<int > & fwd_map, 
-	   const int & stride);
+ * @brief Check if the model version is supported.
+ * @param[in] model_version The model version.
+ * @return Whether the model is supported (true or false).
+ **/
+bool model_compatable(std::string& model_version);
+
+template <typename VALUETYPE>
+void select_by_type(std::vector<int>& fwd_map,
+                    std::vector<int>& bkw_map,
+                    int& nghost_real,
+                    const std::vector<VALUETYPE>& dcoord_,
+                    const std::vector<int>& datype_,
+                    const int& nghost,
+                    const std::vector<int>& sel_type_);
+
+template <typename VALUETYPE>
+void select_real_atoms(std::vector<int>& fwd_map,
+                       std::vector<int>& bkw_map,
+                       int& nghost_real,
+                       const std::vector<VALUETYPE>& dcoord_,
+                       const std::vector<int>& datype_,
+                       const int& nghost,
+                       const int& ntypes);
+
+template <typename VT>
+void select_map(std::vector<VT>& out,
+                const std::vector<VT>& in,
+                const std::vector<int>& fwd_map,
+                const int& stride);
+
+template <typename VT>
+void select_map(typename std::vector<VT>::iterator out,
+                const typename std::vector<VT>::const_iterator in,
+                const std::vector<int>& fwd_map,
+                const int& stride);
+
+template <typename VT>
+void select_map_inv(std::vector<VT>& out,
+                    const std::vector<VT>& in,
+                    const std::vector<int>& fwd_map,
+                    const int& stride);
+
+template <typename VT>
+void select_map_inv(typename std::vector<VT>::iterator out,
+                    const typename std::vector<VT>::const_iterator in,
+                    const std::vector<int>& fwd_map,
+                    const int& stride);
 
 /**
-* @brief Get the number of threads from the environment variable.
-* @details A warning will be thrown if environmental variables are not set.
-* @param[out] num_intra_nthreads The number of intra threads. Read from TF_INTRA_OP_PARALLELISM_THREADS.
-* @param[out] num_inter_nthreads The number of inter threads. Read from TF_INTER_OP_PARALLELISM_THREADS.
-**/
-void
-get_env_nthreads(int & num_intra_nthreads,
-		 int & num_inter_nthreads);
+ * @brief Get the number of threads from the environment variable.
+ * @details A warning will be thrown if environmental variables are not set.
+ * @param[out] num_intra_nthreads The number of intra threads. Read from
+ *TF_INTRA_OP_PARALLELISM_THREADS.
+ * @param[out] num_inter_nthreads The number of inter threads. Read from
+ *TF_INTER_OP_PARALLELISM_THREADS.
+ **/
+void get_env_nthreads(int& num_intra_nthreads, int& num_inter_nthreads);
 
 /**
- * @brief Dynamically load OP library. This should be called before loading graphs.
+ * @brief Dynamically load OP library. This should be called before loading
+ * graphs.
  */
-void
-load_op_library();
+void load_op_library();
 
 /** @struct deepmd::deepmd_exception
  **/
 
 /**
-* @brief Throw exception if TensorFlow doesn't work.
-**/
-struct
-tf_exception: public deepmd::deepmd_exception {
-public:
-	tf_exception(): deepmd::deepmd_exception("TensorFlow Error!") {};
-	tf_exception(const std::string& msg): deepmd::deepmd_exception(std::string("TensorFlow Error: ") + msg) {};
+ * @brief Throw exception if TensorFlow doesn't work.
+ **/
+struct tf_exception : public deepmd::deepmd_exception {
+ public:
+  tf_exception() : deepmd::deepmd_exception("TensorFlow Error!"){};
+  tf_exception(const std::string& msg)
+      : deepmd::deepmd_exception(std::string("TensorFlow Error: ") + msg){};
 };
 
 /**
-* @brief Check TensorFlow status. Exit if not OK.
-* @param[in] status TensorFlow status.
-**/
-void
-check_status(
-    const tensorflow::Status& status);
+ * @brief Check TensorFlow status. Exit if not OK.
+ * @param[in] status TensorFlow status.
+ **/
+void check_status(const tensorflow::Status& status);
 
-std::string 
-name_prefix(
-    const std::string & name_scope);
+std::string name_prefix(const std::string& name_scope);
 
 /**
-* @brief Get the value of a tensor.
-* @param[in] session TensorFlow session.
-* @param[in] name The name of the tensor.
-* @param[in] scope The scope of the tensor.
-* @return The value of the tensor.
-**/
-template<typename VT>
-VT
-session_get_scalar(
-    tensorflow::Session* session, 
-    const std::string name, 
-    const std::string scope = "");
+ * @brief Get the value of a tensor.
+ * @param[in] session TensorFlow session.
+ * @param[in] name The name of the tensor.
+ * @param[in] scope The scope of the tensor.
+ * @return The value of the tensor.
+ **/
+template <typename VT>
+VT session_get_scalar(tensorflow::Session* session,
+                      const std::string name,
+                      const std::string scope = "");
 
 /**
-* @brief Get the vector of a tensor.
-* @param[out] o_vec The output vector.
-* @param[in] session TensorFlow session.
-* @param[in] name The name of the tensor.
-* @param[in] scope The scope of the tensor.
-**/
-template<typename VT>
-void
-session_get_vector(
-    std::vector<VT> & o_vec, 
-    tensorflow::Session* session, 
-    const std::string name_, 
-    const std::string scope = "");
+ * @brief Get the vector of a tensor.
+ * @param[out] o_vec The output vector.
+ * @param[in] session TensorFlow session.
+ * @param[in] name The name of the tensor.
+ * @param[in] scope The scope of the tensor.
+ **/
+template <typename VT>
+void session_get_vector(std::vector<VT>& o_vec,
+                        tensorflow::Session* session,
+                        const std::string name_,
+                        const std::string scope = "");
 
 /**
-* @brief Get the type of a tensor.
-* @param[in] session TensorFlow session.
-* @param[in] name The name of the tensor.
-* @param[in] scope The scope of the tensor.
-* @return The type of the tensor as int.
-**/
-int
-session_get_dtype(
-	tensorflow::Session* session,
-	const std::string name,
-	const std::string scope = "");
+ * @brief Get the type of a tensor.
+ * @param[in] session TensorFlow session.
+ * @param[in] name The name of the tensor.
+ * @param[in] scope The scope of the tensor.
+ * @return The type of the tensor as int.
+ **/
+int session_get_dtype(tensorflow::Session* session,
+                      const std::string name,
+                      const std::string scope = "");
 
 /**
-* @brief Get input tensors.
-* @param[out] input_tensors Input tensors.
-* @param[in] dcoord_ Coordinates of atoms.
-* @param[in] ntypes Number of atom types.
-* @param[in] datype_ Atom types.
-* @param[in] dbox Box matrix.
-* @param[in] cell_size Cell size.
-* @param[in] fparam_ Frame parameters.
-* @param[in] aparam_ Atom parameters.
-* @param[in] atommap Atom map.
-* @param[in] scope The scope of the tensors.
-*/
+ * @brief Get input tensors.
+ * @param[out] input_tensors Input tensors.
+ * @param[in] dcoord_ Coordinates of atoms.
+ * @param[in] ntypes Number of atom types.
+ * @param[in] datype_ Atom types.
+ * @param[in] dbox Box matrix.
+ * @param[in] cell_size Cell size.
+ * @param[in] fparam_ Frame parameters.
+ * @param[in] aparam_ Atom parameters.
+ * @param[in] atommap Atom map.
+ * @param[in] scope The scope of the tensors.
+ */
 template <typename MODELTYPE, typename VALUETYPE>
-int
-session_input_tensors (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<VALUETYPE> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<VALUETYPE> &	dbox, 
-		       const double &		cell_size,
-		       const std::vector<VALUETYPE> &	fparam_,
-		       const std::vector<VALUETYPE> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const std::string		scope = "");
+int session_input_tensors(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const double& cell_size,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope = "");
 
 /**
-* @brief Get input tensors.
-* @param[out] input_tensors Input tensors.
-* @param[in] dcoord_ Coordinates of atoms.
-* @param[in] ntypes Number of atom types.
-* @param[in] datype_ Atom types.
-* @param[in] dlist Neighbor list.
-* @param[in] fparam_ Frame parameters.
-* @param[in] aparam_ Atom parameters.
-* @param[in] atommap Atom map.
-* @param[in] nghost Number of ghost atoms.
-* @param[in] ago Update the internal neighbour list if ago is 0.
-* @param[in] scope The scope of the tensors.
-*/
+ * @brief Get input tensors.
+ * @param[out] input_tensors Input tensors.
+ * @param[in] dcoord_ Coordinates of atoms.
+ * @param[in] ntypes Number of atom types.
+ * @param[in] datype_ Atom types.
+ * @param[in] dlist Neighbor list.
+ * @param[in] fparam_ Frame parameters.
+ * @param[in] aparam_ Atom parameters.
+ * @param[in] atommap Atom map.
+ * @param[in] nghost Number of ghost atoms.
+ * @param[in] ago Update the internal neighbour list if ago is 0.
+ * @param[in] scope The scope of the tensors.
+ */
 template <typename MODELTYPE, typename VALUETYPE>
-int
-session_input_tensors (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<VALUETYPE> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<VALUETYPE> &	dbox,		    
-		       InputNlist &		dlist, 
-		       const std::vector<VALUETYPE> &	fparam_,
-		       const std::vector<VALUETYPE> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const int			nghost,
-		       const int			ago,
-		       const std::string		scope = "");
+int session_input_tensors(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    InputNlist& dlist,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope = "");
 
 /**
-* @brief Read model file to a string.
-* @param[in] model Path to the model.
-* @param[out] file_content Content of the model file.
-**/
-void
-read_file_to_string(std::string model, std::string & file_content);
-
+ * @brief Read model file to a string.
+ * @param[in] model Path to the model.
+ * @param[out] file_content Content of the model file.
+ **/
+void read_file_to_string(std::string model, std::string& file_content);
 
 /**
-* @brief Convert pbtxt to pb.
-* @param[in] fn_pb_txt Filename of the pb txt file.
-* @param[in] fn_pb Filename of the pb file.
-**/
-void
-convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb);
+ * @brief Convert pbtxt to pb.
+ * @param[in] fn_pb_txt Filename of the pb txt file.
+ * @param[in] fn_pb Filename of the pb file.
+ **/
+void convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb);
 
 /**
- * @brief Print the summary of DeePMD-kit, including the version and the build information.
+ * @brief Print the summary of DeePMD-kit, including the version and the build
+ * information.
  * @param[in] pre The prefix to each line.
  */
-void
-print_summary(const std::string &pre);
-}
-
+void print_summary(const std::string& pre);
+}  // namespace deepmd
diff --git a/source/api_cc/include/tf_private.h b/source/api_cc/include/tf_private.h
index 12b7077139..1e6c2125f2 100644
--- a/source/api_cc/include/tf_private.h
+++ b/source/api_cc/include/tf_private.h
@@ -1,14 +1,15 @@
 /**
  * @file tf_private.h
  * @brief This file includes TensorFlow headers used for compilation.
- * 
+ *
  */
 
+#include <tensorflow/core/graph/default_device.h>
+#include <tensorflow/core/graph/graph_def_builder.h>
+
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/public/session.h"
 #include "tensorflow/core/public/version.h"
-#include <tensorflow/core/graph/default_device.h>
-#include <tensorflow/core/graph/graph_def_builder.h>
 
 namespace deepmd {
 #if TF_MAJOR_VERSION >= 2 && TF_MINOR_VERSION >= 2
@@ -16,4 +17,4 @@ typedef tensorflow::tstring STRINGTYPE;
 #else
 typedef std::string STRINGTYPE;
 #endif
-}
+}  // namespace deepmd
diff --git a/source/api_cc/include/tf_public.h b/source/api_cc/include/tf_public.h
index e766168c98..6e85644dd1 100644
--- a/source/api_cc/include/tf_public.h
+++ b/source/api_cc/include/tf_public.h
@@ -1,15 +1,16 @@
 /**
  * @file tf_public.h
- * @brief This file declares incompleted TensorFlow class used for public headers.
- * 
+ * @brief This file declares incompleted TensorFlow class used for public
+ * headers.
+ *
  */
 
 // skip if TF headers have been included
 #ifndef TF_MAJOR_VERSION
-namespace tensorflow{
-    class Session;
-    class Tensor;
-    class GraphDef;
-    class Status;
-}
+namespace tensorflow {
+class Session;
+class Tensor;
+class GraphDef;
+class Status;
+}  // namespace tensorflow
 #endif
diff --git a/source/api_cc/src/AtomMap.cc b/source/api_cc/src/AtomMap.cc
index 8aeb8bbd23..0eccfd7ffd 100644
--- a/source/api_cc/src/AtomMap.cc
+++ b/source/api_cc/src/AtomMap.cc
@@ -5,24 +5,21 @@
 
 using namespace deepmd;
 
-AtomMap::
-AtomMap() {}
+AtomMap::AtomMap() {}
 
-AtomMap::
-AtomMap(const std::vector<int >::const_iterator in_begin, 
-	   const std::vector<int >::const_iterator in_end)
-{
+AtomMap::AtomMap(const std::vector<int>::const_iterator in_begin,
+                 const std::vector<int>::const_iterator in_end) {
   int natoms = in_end - in_begin;
-  atype.resize (natoms);
-  std::vector<std::pair<int, int > > sorting (natoms);
-  std::vector<int >::const_iterator iter = in_begin;
-  for (unsigned ii = 0; ii < sorting.size(); ++ii){
-    sorting[ii] = std::pair<int, int > (*(iter++), ii);
+  atype.resize(natoms);
+  std::vector<std::pair<int, int> > sorting(natoms);
+  std::vector<int>::const_iterator iter = in_begin;
+  for (unsigned ii = 0; ii < sorting.size(); ++ii) {
+    sorting[ii] = std::pair<int, int>(*(iter++), ii);
   }
-  sort (sorting.begin(), sorting.end());
+  sort(sorting.begin(), sorting.end());
   idx_map.resize(natoms);
   fwd_idx_map.resize(natoms);
-  for (unsigned ii = 0; ii < idx_map.size(); ++ii){
+  for (unsigned ii = 0; ii < idx_map.size(); ++ii) {
     idx_map[ii] = sorting[ii].second;
     fwd_idx_map[sorting[ii].second] = ii;
     atype[ii] = sorting[ii].first;
@@ -30,63 +27,49 @@ AtomMap(const std::vector<int >::const_iterator in_begin,
 }
 
 template <typename VALUETYPE>
-void
-AtomMap::
-forward (typename std::vector<VALUETYPE >::iterator out,
-	 const typename std::vector<VALUETYPE >::const_iterator in, 
-	 const int stride) const 
-{
+void AtomMap::forward(typename std::vector<VALUETYPE>::iterator out,
+                      const typename std::vector<VALUETYPE>::const_iterator in,
+                      const int stride) const {
   int natoms = idx_map.size();
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map[ii];
-    for (int dd = 0; dd < stride; ++dd){
+    for (int dd = 0; dd < stride; ++dd) {
       // out[ii*stride+dd] = in[gro_i*stride+dd];
-      *(out + ii*stride + dd) = *(in + gro_i*stride + dd);
+      *(out + ii * stride + dd) = *(in + gro_i * stride + dd);
     }
   }
 }
 
 template <typename VALUETYPE>
-void
-AtomMap::
-backward (typename std::vector<VALUETYPE >::iterator out,
-	  const typename std::vector<VALUETYPE >::const_iterator in, 
-	  const int stride) const 
-{
+void AtomMap::backward(typename std::vector<VALUETYPE>::iterator out,
+                       const typename std::vector<VALUETYPE>::const_iterator in,
+                       const int stride) const {
   int natoms = idx_map.size();
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map[ii];
-    for (int dd = 0; dd < stride; ++dd){
+    for (int dd = 0; dd < stride; ++dd) {
       // out[gro_i*stride+dd] = in[ii*stride+dd];
-      *(out + gro_i*stride + dd) = *(in + ii*stride + dd);
+      *(out + gro_i * stride + dd) = *(in + ii * stride + dd);
     }
   }
 }
 
-template
-void
-AtomMap::
-forward <double> (typename std::vector<double >::iterator out,
-	 const typename std::vector<double >::const_iterator in, 
-	 const int stride) const ;
+template void AtomMap::forward<double>(
+    typename std::vector<double>::iterator out,
+    const typename std::vector<double>::const_iterator in,
+    const int stride) const;
 
-template
-void
-AtomMap::
-forward <float> (typename std::vector<float >::iterator out,
-	 const typename std::vector<float >::const_iterator in, 
-	 const int stride) const ;
+template void AtomMap::forward<float>(
+    typename std::vector<float>::iterator out,
+    const typename std::vector<float>::const_iterator in,
+    const int stride) const;
 
-template
-void
-AtomMap::
-backward <double> (typename std::vector<double >::iterator out,
-	  const typename std::vector<double >::const_iterator in, 
-	  const int stride) const ;
+template void AtomMap::backward<double>(
+    typename std::vector<double>::iterator out,
+    const typename std::vector<double>::const_iterator in,
+    const int stride) const;
 
-template
-void
-AtomMap::
-backward <float> (typename std::vector<float >::iterator out,
-	  const typename std::vector<float >::const_iterator in, 
-	  const int stride) const ;
+template void AtomMap::backward<float>(
+    typename std::vector<float>::iterator out,
+    const typename std::vector<float>::const_iterator in,
+    const int stride) const;
diff --git a/source/api_cc/src/DataModifier.cc b/source/api_cc/src/DataModifier.cc
index 8035dfcc02..4827e593fa 100644
--- a/source/api_cc/src/DataModifier.cc
+++ b/source/api_cc/src/DataModifier.cc
@@ -3,37 +3,26 @@
 using namespace deepmd;
 using namespace tensorflow;
 
-DipoleChargeModifier::
-DipoleChargeModifier()
-    : inited (false),
-      graph_def(new GraphDef())
-{
-}
+DipoleChargeModifier::DipoleChargeModifier()
+    : inited(false), graph_def(new GraphDef()) {}
 
-DipoleChargeModifier::
-DipoleChargeModifier(const std::string & model, 
-	     const int & gpu_rank, 
-	     const std::string &name_scope_)
-    : inited (false), name_scope(name_scope_),
-      graph_def(new GraphDef())
-{
-  init(model, gpu_rank, name_scope_);  
+DipoleChargeModifier::DipoleChargeModifier(const std::string& model,
+                                           const int& gpu_rank,
+                                           const std::string& name_scope_)
+    : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
+  init(model, gpu_rank, name_scope_);
 }
 
-DipoleChargeModifier::
-~DipoleChargeModifier () {
-  delete graph_def;
-};
+DipoleChargeModifier::~DipoleChargeModifier() { delete graph_def; };
 
-void
-DipoleChargeModifier::
-init (const std::string & model, 
-      const int & gpu_rank, 
-      const std::string &name_scope_)
-{  
-  if (inited){
-    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-    return ;
+void DipoleChargeModifier::init(const std::string& model,
+                                const int& gpu_rank,
+                                const std::string& name_scope_) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
   }
   name_scope = name_scope_;
   SessionOptions options;
@@ -43,7 +32,7 @@ init (const std::string & model,
   deepmd::load_op_library();
   deepmd::check_status(NewSession(options, &session));
   deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def));
-  deepmd::check_status(session->Create(*graph_def));  
+  deepmd::check_status(session->Create(*graph_def));
   // int nnodes = graph_def.node_size();
   // for (int ii = 0; ii < nnodes; ++ii){
   //   cout << ii << " \t " << graph_def.node(ii).name() << endl;
@@ -62,32 +51,25 @@ init (const std::string & model,
   inited = true;
 }
 
-template<class VT>
-VT
-DipoleChargeModifier::
-get_scalar (const std::string & name) const
-{
+template <class VT>
+VT DipoleChargeModifier::get_scalar(const std::string& name) const {
   return session_get_scalar<VT>(session, name, name_scope);
 }
 
-template<class VT>
-void
-DipoleChargeModifier::
-get_vector (std::vector<VT> & vec, const std::string & name) const
-{
+template <class VT>
+void DipoleChargeModifier::get_vector(std::vector<VT>& vec,
+                                      const std::string& name) const {
   session_get_vector<VT>(vec, session, name, name_scope);
 }
 
 template <typename MODELTYPE, typename VALUETYPE>
-void 
-DipoleChargeModifier::
-run_model (std::vector<VALUETYPE> &		dforce,
-	   std::vector<VALUETYPE> &		dvirial,
-	   Session *				session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap &	atommap, 
-	   const int				nghost)
-{
+void DipoleChargeModifier::run_model(
+    std::vector<VALUETYPE>& dforce,
+    std::vector<VALUETYPE>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost) {
   unsigned nloc = atommap.get_type().size();
   unsigned nall = nloc + nghost;
   if (nloc == 0) {
@@ -97,101 +79,92 @@ run_model (std::vector<VALUETYPE> &		dforce,
   }
 
   std::vector<Tensor> output_tensors;
-  deepmd::check_status (session->Run(input_tensors, 
-			    {"o_dm_force", "o_dm_virial", "o_dm_av"},
-			    {}, 
-			    &output_tensors));
+  deepmd::check_status(session->Run(input_tensors,
+                                    {"o_dm_force", "o_dm_virial", "o_dm_av"},
+                                    {}, &output_tensors));
   int cc = 0;
   Tensor output_f = output_tensors[cc++];
   Tensor output_v = output_tensors[cc++];
   Tensor output_av = output_tensors[cc++];
-  assert (output_f.dims() == 2), "dim of output tensor should be 2";
-  assert (output_v.dims() == 2), "dim of output tensor should be 2";
-  assert (output_av.dims() == 2), "dim of output tensor should be 2";
+  assert(output_f.dims() == 2), "dim of output tensor should be 2";
+  assert(output_v.dims() == 2), "dim of output tensor should be 2";
+  assert(output_av.dims() == 2), "dim of output tensor should be 2";
   int nframes = output_f.dim_size(0);
   int natoms = output_f.dim_size(1) / 3;
-  assert (output_f.dim_size(0) == 1), "nframes should match";
-  assert (natoms == nall), "natoms should be nall";
-  assert (output_v.dim_size(0) == nframes), "nframes should match";
-  assert (output_v.dim_size(1) == 9), "dof of virial should be 9";
-  assert (output_av.dim_size(0) == nframes), "nframes should match";
-  assert (output_av.dim_size(1) == natoms * 9), "dof of atom virial should be 9 * natoms";  
+  assert(output_f.dim_size(0) == 1), "nframes should match";
+  assert(natoms == nall), "natoms should be nall";
+  assert(output_v.dim_size(0) == nframes), "nframes should match";
+  assert(output_v.dim_size(1) == 9), "dof of virial should be 9";
+  assert(output_av.dim_size(0) == nframes), "nframes should match";
+  assert(output_av.dim_size(1) == natoms * 9),
+      "dof of atom virial should be 9 * natoms";
 
-  auto of = output_f.flat<MODELTYPE> ();
-  auto ov = output_v.flat<MODELTYPE> ();
+  auto of = output_f.flat<MODELTYPE>();
+  auto ov = output_v.flat<MODELTYPE>();
 
-  dforce.resize(nall*3);
+  dforce.resize(nall * 3);
   dvirial.resize(9);
-  for (int ii = 0; ii < nall * 3; ++ii){
+  for (int ii = 0; ii < nall * 3; ++ii) {
     dforce[ii] = of(ii);
   }
-  for (int ii = 0; ii < 9; ++ii){
+  for (int ii = 0; ii < 9; ++ii) {
     dvirial[ii] = ov(ii);
   }
 }
 
-template
-void 
-DipoleChargeModifier::
-run_model <double, double> (std::vector<double> &		dforce,
-	   std::vector<double> &		dvirial,
-	   Session *				session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap &	atommap, 
-	   const int				nghost);
+template void DipoleChargeModifier::run_model<double, double>(
+    std::vector<double>& dforce,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
 
-template
-void 
-DipoleChargeModifier::
-run_model <float, double> (std::vector<double> &		dforce,
-	   std::vector<double> &		dvirial,
-	   Session *				session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap &	atommap, 
-	   const int				nghost);
+template void DipoleChargeModifier::run_model<float, double>(
+    std::vector<double>& dforce,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
 
-template
-void 
-DipoleChargeModifier::
-run_model <double, float> (std::vector<float> &		dforce,
-	   std::vector<float> &		dvirial,
-	   Session *				session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap &	atommap, 
-	   const int				nghost);
+template void DipoleChargeModifier::run_model<double, float>(
+    std::vector<float>& dforce,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
 
-template
-void 
-DipoleChargeModifier::
-run_model <float, float> (std::vector<float> &		dforce,
-	   std::vector<float> &		dvirial,
-	   Session *				session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap &	atommap, 
-	   const int				nghost);
+template void DipoleChargeModifier::run_model<float, float>(
+    std::vector<float>& dforce,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
 
 template <typename VALUETYPE>
-void
-DipoleChargeModifier::
-compute (std::vector<VALUETYPE> &		dfcorr_,
-	 std::vector<VALUETYPE> &		dvcorr_,
-	 const std::vector<VALUETYPE> &		dcoord_,
-	 const std::vector<int> &		datype_,
-	 const std::vector<VALUETYPE> &		dbox, 
-	 const std::vector<std::pair<int,int>>&	pairs,
-	 const std::vector<VALUETYPE> &		delef_, 
-	 const int				nghost,
-	 const InputNlist &		lmp_list)
-{
+void DipoleChargeModifier::compute(
+    std::vector<VALUETYPE>& dfcorr_,
+    std::vector<VALUETYPE>& dvcorr_,
+    const std::vector<VALUETYPE>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<VALUETYPE>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list) {
   // firstly do selection
   int nall = datype_.size();
   int nloc = nall - nghost;
   int nghost_real;
-  std::vector<int > real_fwd_map, real_bkw_map;
-  select_real_atoms(real_fwd_map, real_bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);  
+  std::vector<int> real_fwd_map, real_bkw_map;
+  select_real_atoms(real_fwd_map, real_bkw_map, nghost_real, dcoord_, datype_,
+                    nghost, ntypes);
   int nall_real = real_bkw_map.size();
   int nloc_real = nall_real - nghost_real;
-  if (nloc_real == 0){
+  if (nloc_real == 0) {
     dfcorr_.resize(nall * 3);
     dvcorr_.resize(9);
     fill(dfcorr_.begin(), dfcorr_.end(), (VALUETYPE)0.0);
@@ -212,12 +185,12 @@ compute (std::vector<VALUETYPE> &		dfcorr_,
   // internal nlist
   NeighborListData nlist_data;
   nlist_data.copy_from_nlist(lmp_list);
-  nlist_data.shuffle_exclude_empty(real_fwd_map);  
+  nlist_data.shuffle_exclude_empty(real_fwd_map);
   // sort atoms
-  AtomMap atommap (datype_real.begin(), datype_real.begin() + nloc_real);
-  assert (nloc_real == atommap.get_type().size());
-  const std::vector<int> & sort_fwd_map(atommap.get_fwd_map());
-  const std::vector<int> & sort_bkw_map(atommap.get_bkw_map());
+  AtomMap atommap(datype_real.begin(), datype_real.begin() + nloc_real);
+  assert(nloc_real == atommap.get_type().size());
+  const std::vector<int>& sort_fwd_map(atommap.get_fwd_map());
+  const std::vector<int>& sort_bkw_map(atommap.get_bkw_map());
   // shuffle nlist
   nlist_data.shuffle(atommap);
   InputNlist nlist;
@@ -226,81 +199,89 @@ compute (std::vector<VALUETYPE> &		dfcorr_,
   std::vector<std::pair<std::string, Tensor>> input_tensors;
   int ret;
   if (dtype == tensorflow::DT_DOUBLE) {
-    ret = session_input_tensors<double> (input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost_real, 0, name_scope);
+    ret = session_input_tensors<double>(
+        input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        nghost_real, 0, name_scope);
   } else {
-    ret = session_input_tensors<float> (input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost_real, 0, name_scope);
+    ret = session_input_tensors<float>(
+        input_tensors, dcoord_real, ntypes, datype_real, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        nghost_real, 0, name_scope);
   }
-  assert (nloc_real == ret);
+  assert(nloc_real == ret);
   // make bond idx map
-  std::vector<int > bd_idx(nall, -1);
-  for (int ii = 0; ii < pairs.size(); ++ii){
+  std::vector<int> bd_idx(nall, -1);
+  for (int ii = 0; ii < pairs.size(); ++ii) {
     bd_idx[pairs[ii].first] = pairs[ii].second;
   }
   // make extf by bond idx map
-  std::vector<int > dtype_sort_loc = atommap.get_type();
+  std::vector<int> dtype_sort_loc = atommap.get_type();
   std::vector<VALUETYPE> dextf;
-  for(int ii = 0; ii < dtype_sort_loc.size(); ++ii){
-    if (binary_search(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii])){
+  for (int ii = 0; ii < dtype_sort_loc.size(); ++ii) {
+    if (binary_search(sel_type.begin(), sel_type.end(), dtype_sort_loc[ii])) {
       // selected atom
       int first_idx = real_bkw_map[sort_bkw_map[ii]];
       int second_idx = bd_idx[first_idx];
       assert(second_idx >= 0);
-      dextf.push_back(delef_[second_idx*3+0]);
-      dextf.push_back(delef_[second_idx*3+1]);
-      dextf.push_back(delef_[second_idx*3+2]);
+      dextf.push_back(delef_[second_idx * 3 + 0]);
+      dextf.push_back(delef_[second_idx * 3 + 1]);
+      dextf.push_back(delef_[second_idx * 3 + 2]);
     }
   }
   // dextf should be loc and virtual
-  assert(dextf.size() == (nloc - nloc_real)*3);
+  assert(dextf.size() == (nloc - nloc_real) * 3);
   // make tensor for extf
   int nframes = 1;
-  TensorShape extf_shape ;
-  extf_shape.AddDim (nframes);
-  extf_shape.AddDim (dextf.size());
-  Tensor extf_tensor	((tensorflow::DataType) dtype, extf_shape);
+  TensorShape extf_shape;
+  extf_shape.AddDim(nframes);
+  extf_shape.AddDim(dextf.size());
+  Tensor extf_tensor((tensorflow::DataType)dtype, extf_shape);
   if (dtype == tensorflow::DT_DOUBLE) {
-    auto extf = extf_tensor.matrix<double> ();
-    for (int ii = 0; ii < nframes; ++ii){
-      for (int jj = 0; jj < extf.size(); ++jj){
-        extf(ii,jj) = dextf[jj];
+    auto extf = extf_tensor.matrix<double>();
+    for (int ii = 0; ii < nframes; ++ii) {
+      for (int jj = 0; jj < extf.size(); ++jj) {
+        extf(ii, jj) = dextf[jj];
       }
     }
   } else {
-    auto extf = extf_tensor.matrix<float> ();
-    for (int ii = 0; ii < nframes; ++ii){
-      for (int jj = 0; jj < extf.size(); ++jj){
-        extf(ii,jj) = dextf[jj];
+    auto extf = extf_tensor.matrix<float>();
+    for (int ii = 0; ii < nframes; ++ii) {
+      for (int jj = 0; jj < extf.size(); ++jj) {
+        extf(ii, jj) = dextf[jj];
       }
     }
   }
   // append extf to input tensor
-  input_tensors.push_back({"t_ef", extf_tensor});  
+  input_tensors.push_back({"t_ef", extf_tensor});
   // run model
   std::vector<VALUETYPE> dfcorr, dvcorr;
   if (dtype == tensorflow::DT_DOUBLE) {
-    run_model <double> (dfcorr, dvcorr, session, input_tensors, atommap, nghost_real);
+    run_model<double>(dfcorr, dvcorr, session, input_tensors, atommap,
+                      nghost_real);
   } else {
-    run_model <float> (dfcorr, dvcorr, session, input_tensors, atommap, nghost_real);
+    run_model<float>(dfcorr, dvcorr, session, input_tensors, atommap,
+                     nghost_real);
   }
   assert(dfcorr.size() == nall_real * 3);
   // back map force
   std::vector<VALUETYPE> dfcorr_1 = dfcorr;
-  atommap.backward<VALUETYPE> (dfcorr_1.begin(), dfcorr.begin(), 3);
+  atommap.backward<VALUETYPE>(dfcorr_1.begin(), dfcorr.begin(), 3);
   assert(dfcorr_1.size() == nall_real * 3);
   // resize to all and clear
-  std::vector<VALUETYPE> dfcorr_2(nall*3);
+  std::vector<VALUETYPE> dfcorr_2(nall * 3);
   fill(dfcorr_2.begin(), dfcorr_2.end(), (VALUETYPE)0.0);
   // back map to original position
-  for (int ii = 0; ii < nall_real; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      dfcorr_2[real_bkw_map[ii]*3+dd] += dfcorr_1[ii*3+dd];
+  for (int ii = 0; ii < nall_real; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dfcorr_2[real_bkw_map[ii] * 3 + dd] += dfcorr_1[ii * 3 + dd];
     }
   }
   // self correction of bonded force
-  for (int ii = 0; ii < pairs.size(); ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      dfcorr_2[pairs[ii].first*3+dd] += delef_[pairs[ii].second*3+dd];
-    }    
+  for (int ii = 0; ii < pairs.size(); ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dfcorr_2[pairs[ii].first * 3 + dd] += delef_[pairs[ii].second * 3 + dd];
+    }
   }
   // add ele contrinution
   dfcorr_ = dfcorr_2;
@@ -308,45 +289,38 @@ compute (std::vector<VALUETYPE> &		dfcorr_,
   //   for (int dd = 0; dd < 3; ++dd){
   //     dfcorr_[ii*3+dd] += delef_[ii*3+dd];
   //   }
-  // }  
-  for (int ii = 0; ii < nloc_real; ++ii){
+  // }
+  for (int ii = 0; ii < nloc_real; ++ii) {
     int oii = real_bkw_map[ii];
-    for (int dd = 0; dd < 3; ++dd){
-      dfcorr_[oii*3+dd] += delef_[oii*3+dd];
-    }    
+    for (int dd = 0; dd < 3; ++dd) {
+      dfcorr_[oii * 3 + dd] += delef_[oii * 3 + dd];
+    }
   }
   dvcorr_ = dvcorr;
 }
 
-template
-void
-DipoleChargeModifier::
-compute <double> (std::vector<double> &		dfcorr_,
-	 std::vector<double> &		dvcorr_,
-	 const std::vector<double> &		dcoord_,
-	 const std::vector<int> &		datype_,
-	 const std::vector<double> &		dbox, 
-	 const std::vector<std::pair<int,int>>&	pairs,
-	 const std::vector<double> &		delef_, 
-	 const int				nghost,
-	 const InputNlist &		lmp_list);
+template void DipoleChargeModifier::compute<double>(
+    std::vector<double>& dfcorr_,
+    std::vector<double>& dvcorr_,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<double>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list);
 
-template
-void
-DipoleChargeModifier::
-compute <float> (std::vector<float> &		dfcorr_,
-	 std::vector<float> &		dvcorr_,
-	 const std::vector<float> &		dcoord_,
-	 const std::vector<int> &		datype_,
-	 const std::vector<float> &		dbox, 
-	 const std::vector<std::pair<int,int>>&	pairs,
-	 const std::vector<float> &		delef_, 
-	 const int				nghost,
-	 const InputNlist &		lmp_list);
+template void DipoleChargeModifier::compute<float>(
+    std::vector<float>& dfcorr_,
+    std::vector<float>& dvcorr_,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const std::vector<std::pair<int, int>>& pairs,
+    const std::vector<float>& delef_,
+    const int nghost,
+    const InputNlist& lmp_list);
 
-void 
-DipoleChargeModifier::
-print_summary(const std::string &pre) const
-{
+void DipoleChargeModifier::print_summary(const std::string& pre) const {
   deepmd::print_summary(pre);
 }
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 1459d744e3..e829eaeb74 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -1,33 +1,32 @@
 #include "DeepPot.h"
+
+#include <stdexcept>
+
 #include "AtomMap.h"
-#include <stdexcept>	
 #include "device.h"
 
 using namespace tensorflow;
 using namespace deepmd;
 
-static 
-std::vector<int> cum_sum (const std::vector<int32> & n_sel) {
-    std::vector<int> sec;
-    sec.resize (n_sel.size() + 1);
-    sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii) {
-        sec[ii] = sec[ii-1] + n_sel[ii-1];
-    }
-    return sec;
+static std::vector<int> cum_sum(const std::vector<int32>& n_sel) {
+  std::vector<int> sec;
+  sec.resize(n_sel.size() + 1);
+  sec[0] = 0;
+  for (int ii = 1; ii < sec.size(); ++ii) {
+    sec[ii] = sec[ii - 1] + n_sel[ii - 1];
+  }
+  return sec;
 }
 
-
 template <typename MODELTYPE, typename VALUETYPE>
-static void 
-run_model (ENERGYTYPE &			dener,
-	   std::vector<VALUETYPE> &	dforce_,
-	   std::vector<VALUETYPE> &	dvirial,
-	   Session *			session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap&	atommap, 
-	   const int			nghost = 0)
-{
+static void run_model(
+    ENERGYTYPE& dener,
+    std::vector<VALUETYPE>& dforce_,
+    std::vector<VALUETYPE>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost = 0) {
   unsigned nloc = atommap.get_type().size();
   unsigned nall = nloc + nghost;
   if (nloc == 0) {
@@ -43,234 +42,222 @@ run_model (ENERGYTYPE &			dener,
   }
 
   std::vector<Tensor> output_tensors;
-  check_status (session->Run(input_tensors, 
-			    {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"}, 
-			    {}, 
-			    &output_tensors));
-  
+  check_status(session->Run(
+      input_tensors, {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"},
+      {}, &output_tensors));
+
   Tensor output_e = output_tensors[0];
   Tensor output_f = output_tensors[1];
   Tensor output_av = output_tensors[3];
 
-  auto oe = output_e.flat <ENERGYTYPE> ();
-  auto of = output_f.flat <MODELTYPE> ();
-  auto oav = output_av.flat <MODELTYPE> ();
+  auto oe = output_e.flat<ENERGYTYPE>();
+  auto of = output_f.flat<MODELTYPE>();
+  auto oav = output_av.flat<MODELTYPE>();
 
   dener = oe(0);
-  std::vector<VALUETYPE> dforce (3 * nall);
-  dvirial.resize (9);
-  for (unsigned ii = 0; ii < nall * 3; ++ii){
+  std::vector<VALUETYPE> dforce(3 * nall);
+  dvirial.resize(9);
+  for (unsigned ii = 0; ii < nall * 3; ++ii) {
     dforce[ii] = of(ii);
   }
   // set dvirial to zero, prevent input vector is not zero (#1123)
   std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
   for (int ii = 0; ii < nall; ++ii) {
-    dvirial[0] += (VALUETYPE)1.0 * oav(9*ii+0);
-    dvirial[1] += (VALUETYPE)1.0 * oav(9*ii+1);
-    dvirial[2] += (VALUETYPE)1.0 * oav(9*ii+2);
-    dvirial[3] += (VALUETYPE)1.0 * oav(9*ii+3);
-    dvirial[4] += (VALUETYPE)1.0 * oav(9*ii+4);
-    dvirial[5] += (VALUETYPE)1.0 * oav(9*ii+5);
-    dvirial[6] += (VALUETYPE)1.0 * oav(9*ii+6);
-    dvirial[7] += (VALUETYPE)1.0 * oav(9*ii+7);
-    dvirial[8] += (VALUETYPE)1.0 * oav(9*ii+8);
+    dvirial[0] += (VALUETYPE)1.0 * oav(9 * ii + 0);
+    dvirial[1] += (VALUETYPE)1.0 * oav(9 * ii + 1);
+    dvirial[2] += (VALUETYPE)1.0 * oav(9 * ii + 2);
+    dvirial[3] += (VALUETYPE)1.0 * oav(9 * ii + 3);
+    dvirial[4] += (VALUETYPE)1.0 * oav(9 * ii + 4);
+    dvirial[5] += (VALUETYPE)1.0 * oav(9 * ii + 5);
+    dvirial[6] += (VALUETYPE)1.0 * oav(9 * ii + 6);
+    dvirial[7] += (VALUETYPE)1.0 * oav(9 * ii + 7);
+    dvirial[8] += (VALUETYPE)1.0 * oav(9 * ii + 8);
   }
   dforce_ = dforce;
-  atommap.backward<VALUETYPE> (dforce_.begin(), dforce.begin(), 3);
+  atommap.backward<VALUETYPE>(dforce_.begin(), dforce.begin(), 3);
 }
 
-template
-void
-run_model <double, double> (ENERGYTYPE &			dener,
-	   std::vector<double> &	dforce_,
-	   std::vector<double> &	dvirial,
-	   Session *			session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap&	atommap, 
-	   const int			nghost);
-
-template
-void
-run_model <double, float> (ENERGYTYPE &			dener,
-     std::vector<float> &	dforce_,
-     std::vector<float> &	dvirial,
-     Session *			session, 
-     const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-     const AtomMap&	atommap, 
-     const int			nghost);
-
-template
-void
-run_model <float, double> (ENERGYTYPE &			dener,
-	   std::vector<double> &	dforce_,
-	   std::vector<double> &	dvirial,
-	   Session *			session, 
-	   const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-	   const AtomMap&	atommap, 
-	   const int			nghost);
-
-template
-void
-run_model <float, float> (ENERGYTYPE &			dener,
-     std::vector<float> &	dforce_,
-     std::vector<float> &	dvirial,
-     Session *			session, 
-     const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-     const AtomMap&	atommap, 
-     const int			nghost);
+template void run_model<double, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template void run_model<double, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template void run_model<float, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
+
+template void run_model<float, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const AtomMap& atommap,
+    const int nghost);
 
 template <typename MODELTYPE, typename VALUETYPE>
-static void run_model (ENERGYTYPE   &		dener,
-		       std::vector<VALUETYPE>&	dforce_,
-		       std::vector<VALUETYPE>&	dvirial,	   
-		       std::vector<VALUETYPE>&	datom_energy_,
-		       std::vector<VALUETYPE>&	datom_virial_,
-		       Session*			session, 
-		       const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-		       const deepmd::AtomMap &   atommap, 
-		       const int&		nghost = 0)
-{
-    unsigned nloc = atommap.get_type().size();
-    unsigned nall = nloc + nghost;
-    if (nloc == 0) {
-        dener = 0;
-        // no backward map needed
-        // dforce of size nall * 3
-        dforce_.resize(nall * 3);
-        fill(dforce_.begin(), dforce_.end(), (VALUETYPE)0.0);
-        // dvirial of size 9
-        dvirial.resize(9);
-        fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.0);
-        // datom_energy_ of size nall
-        datom_energy_.resize(nall);
-        fill(datom_energy_.begin(), datom_energy_.end(), (VALUETYPE)0.0);
-        // datom_virial_ of size nall * 9
-        datom_virial_.resize(nall * 9);
-        fill(datom_virial_.begin(), datom_virial_.end(), (VALUETYPE)0.0);
-        return;
-    }
-    std::vector<Tensor> output_tensors;
-
-    check_status (session->Run(input_tensors, 
-			    {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"}, 
-			    {},
-			    &output_tensors));
-
-    Tensor output_e = output_tensors[0];
-    Tensor output_f = output_tensors[1];
-    Tensor output_ae = output_tensors[2];
-    Tensor output_av = output_tensors[3];
-
-    auto oe = output_e.flat <ENERGYTYPE> ();
-    auto of = output_f.flat <MODELTYPE> ();
-    auto oae = output_ae.flat <MODELTYPE> ();
-    auto oav = output_av.flat <MODELTYPE> ();
-
-    dener = oe(0);
-    std::vector<VALUETYPE> dforce (3 * nall);
-    std::vector<VALUETYPE> datom_energy (nall, 0);
-    std::vector<VALUETYPE> datom_virial (9 * nall);
-    dvirial.resize (9);
-    for (int ii = 0; ii < nall * 3; ++ii) {
-        dforce[ii] = of(ii);
-    }
-    for (int ii = 0; ii < nloc; ++ii) {
-        datom_energy[ii] = oae(ii);
-    }
-    for (int ii = 0; ii < nall * 9; ++ii) {
-        datom_virial[ii] = oav(ii);
-    }
-    // set dvirial to zero, prevent input vector is not zero (#1123)
-    std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
-    for (int ii = 0; ii < nall; ++ii) {
-        dvirial[0] += (VALUETYPE)1.0 * datom_virial[9*ii+0];
-        dvirial[1] += (VALUETYPE)1.0 * datom_virial[9*ii+1];
-        dvirial[2] += (VALUETYPE)1.0 * datom_virial[9*ii+2];
-        dvirial[3] += (VALUETYPE)1.0 * datom_virial[9*ii+3];
-        dvirial[4] += (VALUETYPE)1.0 * datom_virial[9*ii+4];
-        dvirial[5] += (VALUETYPE)1.0 * datom_virial[9*ii+5];
-        dvirial[6] += (VALUETYPE)1.0 * datom_virial[9*ii+6];
-        dvirial[7] += (VALUETYPE)1.0 * datom_virial[9*ii+7];
-        dvirial[8] += (VALUETYPE)1.0 * datom_virial[9*ii+8];
-	}
-    dforce_ = dforce;
-    datom_energy_ = datom_energy;
-    datom_virial_ = datom_virial;
-    atommap.backward<VALUETYPE> (dforce_.begin(), dforce.begin(), 3);
-    atommap.backward<VALUETYPE> (datom_energy_.begin(), datom_energy.begin(), 1);
-    atommap.backward<VALUETYPE> (datom_virial_.begin(), datom_virial.begin(), 9);
-}
+static void run_model(
+    ENERGYTYPE& dener,
+    std::vector<VALUETYPE>& dforce_,
+    std::vector<VALUETYPE>& dvirial,
+    std::vector<VALUETYPE>& datom_energy_,
+    std::vector<VALUETYPE>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nghost = 0) {
+  unsigned nloc = atommap.get_type().size();
+  unsigned nall = nloc + nghost;
+  if (nloc == 0) {
+    dener = 0;
+    // no backward map needed
+    // dforce of size nall * 3
+    dforce_.resize(nall * 3);
+    fill(dforce_.begin(), dforce_.end(), (VALUETYPE)0.0);
+    // dvirial of size 9
+    dvirial.resize(9);
+    fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.0);
+    // datom_energy_ of size nall
+    datom_energy_.resize(nall);
+    fill(datom_energy_.begin(), datom_energy_.end(), (VALUETYPE)0.0);
+    // datom_virial_ of size nall * 9
+    datom_virial_.resize(nall * 9);
+    fill(datom_virial_.begin(), datom_virial_.end(), (VALUETYPE)0.0);
+    return;
+  }
+  std::vector<Tensor> output_tensors;
 
-template
-void run_model <double, double> (ENERGYTYPE   &		dener,
-    std::vector<double>&	dforce_,
-    std::vector<double>&	dvirial,	   
-    std::vector<double>&	datom_energy_,
-    std::vector<double>&	datom_virial_,
-    Session*			session, 
-    const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-    const deepmd::AtomMap &   atommap, 
-    const int&		nghost);
-
-template
-void run_model <double, float> (ENERGYTYPE   &		dener,
-    std::vector<float>&	dforce_,
-    std::vector<float>&	dvirial,	   
-    std::vector<float>&	datom_energy_,
-    std::vector<float>&	datom_virial_,
-    Session*			session, 
-    const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-    const deepmd::AtomMap &   atommap, 
-    const int&		nghost);
-
-template
-void run_model <float, double> (ENERGYTYPE   &		dener,
-    std::vector<double>&	dforce_,
-    std::vector<double>&	dvirial,	   
-    std::vector<double>&	datom_energy_,
-    std::vector<double>&	datom_virial_,
-    Session*			session, 
-    const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-    const deepmd::AtomMap &   atommap, 
-    const int&		nghost);
-
-template
-void run_model <float, float> (ENERGYTYPE   &		dener,
-    std::vector<float>&	dforce_,
-    std::vector<float>&	dvirial,	   
-    std::vector<float>&	datom_energy_,
-    std::vector<float>&	datom_virial_,
-    Session*			session, 
-    const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-    const deepmd::AtomMap &   atommap, 
-    const int&		nghost);
-
-DeepPot::
-DeepPot ()
-    : inited (false), init_nbor (false),
-      graph_def(new GraphDef())
-{
-}
+  check_status(session->Run(
+      input_tensors, {"o_energy", "o_force", "o_atom_energy", "o_atom_virial"},
+      {}, &output_tensors));
 
-DeepPot::
-DeepPot (const std::string & model, const int & gpu_rank, const std::string & file_content)
-    : inited (false), init_nbor (false),
-      graph_def(new GraphDef())
-{
-  init(model, gpu_rank, file_content);  
+  Tensor output_e = output_tensors[0];
+  Tensor output_f = output_tensors[1];
+  Tensor output_ae = output_tensors[2];
+  Tensor output_av = output_tensors[3];
+
+  auto oe = output_e.flat<ENERGYTYPE>();
+  auto of = output_f.flat<MODELTYPE>();
+  auto oae = output_ae.flat<MODELTYPE>();
+  auto oav = output_av.flat<MODELTYPE>();
+
+  dener = oe(0);
+  std::vector<VALUETYPE> dforce(3 * nall);
+  std::vector<VALUETYPE> datom_energy(nall, 0);
+  std::vector<VALUETYPE> datom_virial(9 * nall);
+  dvirial.resize(9);
+  for (int ii = 0; ii < nall * 3; ++ii) {
+    dforce[ii] = of(ii);
+  }
+  for (int ii = 0; ii < nloc; ++ii) {
+    datom_energy[ii] = oae(ii);
+  }
+  for (int ii = 0; ii < nall * 9; ++ii) {
+    datom_virial[ii] = oav(ii);
+  }
+  // set dvirial to zero, prevent input vector is not zero (#1123)
+  std::fill(dvirial.begin(), dvirial.end(), (VALUETYPE)0.);
+  for (int ii = 0; ii < nall; ++ii) {
+    dvirial[0] += (VALUETYPE)1.0 * datom_virial[9 * ii + 0];
+    dvirial[1] += (VALUETYPE)1.0 * datom_virial[9 * ii + 1];
+    dvirial[2] += (VALUETYPE)1.0 * datom_virial[9 * ii + 2];
+    dvirial[3] += (VALUETYPE)1.0 * datom_virial[9 * ii + 3];
+    dvirial[4] += (VALUETYPE)1.0 * datom_virial[9 * ii + 4];
+    dvirial[5] += (VALUETYPE)1.0 * datom_virial[9 * ii + 5];
+    dvirial[6] += (VALUETYPE)1.0 * datom_virial[9 * ii + 6];
+    dvirial[7] += (VALUETYPE)1.0 * datom_virial[9 * ii + 7];
+    dvirial[8] += (VALUETYPE)1.0 * datom_virial[9 * ii + 8];
+  }
+  dforce_ = dforce;
+  datom_energy_ = datom_energy;
+  datom_virial_ = datom_virial;
+  atommap.backward<VALUETYPE>(dforce_.begin(), dforce.begin(), 3);
+  atommap.backward<VALUETYPE>(datom_energy_.begin(), datom_energy.begin(), 1);
+  atommap.backward<VALUETYPE>(datom_virial_.begin(), datom_virial.begin(), 9);
 }
 
-DeepPot::~DeepPot() {
-  delete graph_def;
+template void run_model<double, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nghost);
+
+template void run_model<double, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nghost);
+
+template void run_model<float, double>(
+    ENERGYTYPE& dener,
+    std::vector<double>& dforce_,
+    std::vector<double>& dvirial,
+    std::vector<double>& datom_energy_,
+    std::vector<double>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nghost);
+
+template void run_model<float, float>(
+    ENERGYTYPE& dener,
+    std::vector<float>& dforce_,
+    std::vector<float>& dvirial,
+    std::vector<float>& datom_energy_,
+    std::vector<float>& datom_virial_,
+    Session* session,
+    const std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const deepmd::AtomMap& atommap,
+    const int& nghost);
+
+DeepPot::DeepPot()
+    : inited(false), init_nbor(false), graph_def(new GraphDef()) {}
+
+DeepPot::DeepPot(const std::string& model,
+                 const int& gpu_rank,
+                 const std::string& file_content)
+    : inited(false), init_nbor(false), graph_def(new GraphDef()) {
+  init(model, gpu_rank, file_content);
 }
 
-void
-DeepPot::
-init (const std::string & model, const int & gpu_rank, const std::string & file_content)
-{
-  if (inited){
-    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-    return ;
+DeepPot::~DeepPot() { delete graph_def; }
+
+void DeepPot::init(const std::string& model,
+                   const int& gpu_rank,
+                   const std::string& file_content) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
   }
   SessionOptions options;
   get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
@@ -278,25 +265,26 @@ init (const std::string & model, const int & gpu_rank, const std::string & file_
   options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
   deepmd::load_op_library();
 
-  if(file_content.size() == 0)
-    check_status (ReadBinaryProto(Env::Default(), model, graph_def));
+  if (file_content.size() == 0)
+    check_status(ReadBinaryProto(Env::Default(), model, graph_def));
   else
     (*graph_def).ParseFromString(file_content);
   int gpu_num = -1;
-  #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  DPGetDeviceCount(gpu_num); // check current device environment
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  DPGetDeviceCount(gpu_num);  // check current device environment
   if (gpu_num > 0) {
     options.config.set_allow_soft_placement(true);
-    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.9);
+    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
+        0.9);
     options.config.mutable_gpu_options()->set_allow_growth(true);
     DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
     std::string str = "/gpu:";
     str += std::to_string(gpu_rank % gpu_num);
     graph::SetDefaultDevice(str, graph_def);
   }
-  #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  check_status (NewSession(options, &session));
-  check_status (session->Create(*graph_def));
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  check_status(NewSession(options, &session));
+  check_status(session->Create(*graph_def));
   dtype = session_get_dtype(session, "descrpt_attr/rcut");
   if (dtype == tensorflow::DT_DOUBLE) {
     rcut = get_scalar<double>("descrpt_attr/rcut");
@@ -310,186 +298,172 @@ init (const std::string & model, const int & gpu_rank, const std::string & file_
   if (dfparam < 0) dfparam = 0;
   if (daparam < 0) daparam = 0;
   model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
-  try{
-  model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
-  } catch (deepmd::tf_exception& e){
+  try {
+    model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
+  } catch (deepmd::tf_exception& e) {
     // no model version defined in old models
     model_version = "0.0";
   }
-  if(! model_compatable(model_version)){
-    throw deepmd::deepmd_exception(
-	"incompatable model: version " + model_version 
-	+ " in graph, but version " + global_model_version 
-	+ " supported ");
+  if (!model_compatable(model_version)) {
+    throw deepmd::deepmd_exception("incompatable model: version " +
+                                   model_version + " in graph, but version " +
+                                   global_model_version + " supported ");
   }
   inited = true;
-  
+
   init_nbor = false;
 }
 
-void 
-DeepPot::
-print_summary(const std::string &pre) const
-{
+void DeepPot::print_summary(const std::string& pre) const {
   deepmd::print_summary(pre);
 }
 
-template<class VT>
-VT
-DeepPot::
-get_scalar (const std::string & name) const
-{
+template <class VT>
+VT DeepPot::get_scalar(const std::string& name) const {
   return session_get_scalar<VT>(session, name);
 }
 
-std::string graph_info(const GraphDef & graph_def) {
-    // std::stringstream buffer;
-    // std::streambuf * old = std::cout.rdbuf(buffer.rdbuf());
-    std::string str = "";
-    for (int ii = 0; ii < graph_def.node_size(); ii++) {
-        if (graph_def.node(ii).name() == "DescrptSeA") {
-            // str = graph_def.node(ii).PrintDebugString();
-            str = graph_def.node(ii).DebugString();
-            return str;
-            // std::cout << str << std::endl;
-        }
-        if (graph_def.node(ii).name() == "DescrptSeR") {
-            // str = graph_def.node(ii).PrintDebugString();
-            str = graph_def.node(ii).DebugString();
-            return str;
-            // std::cout << str << std::endl;
-        }
+std::string graph_info(const GraphDef& graph_def) {
+  // std::stringstream buffer;
+  // std::streambuf * old = std::cout.rdbuf(buffer.rdbuf());
+  std::string str = "";
+  for (int ii = 0; ii < graph_def.node_size(); ii++) {
+    if (graph_def.node(ii).name() == "DescrptSeA") {
+      // str = graph_def.node(ii).PrintDebugString();
+      str = graph_def.node(ii).DebugString();
+      return str;
+      // std::cout << str << std::endl;
+    }
+    if (graph_def.node(ii).name() == "DescrptSeR") {
+      // str = graph_def.node(ii).PrintDebugString();
+      str = graph_def.node(ii).DebugString();
+      return str;
+      // std::cout << str << std::endl;
     }
-    return str;
+  }
+  return str;
 }
 
 // init the tmp array data
-std::vector<int> DeepPot::get_sel_a () const {
-    std::vector<int> sel_a;
-    std::istringstream is(graph_info(*graph_def));
-    std::string line = "";
-    while(is >> line) {
-        if (line.find("sel_a") != line.npos) {
-            while (std::getline(is, line) && line != "}") {
-                if (line.find("i:") != line.npos) {
-                    sel_a.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
-                }
-            } break;
+std::vector<int> DeepPot::get_sel_a() const {
+  std::vector<int> sel_a;
+  std::istringstream is(graph_info(*graph_def));
+  std::string line = "";
+  while (is >> line) {
+    if (line.find("sel_a") != line.npos) {
+      while (std::getline(is, line) && line != "}") {
+        if (line.find("i:") != line.npos) {
+          sel_a.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
         }
-        if (line.find("sel") != line.npos) {
-            while (std::getline(is, line) && line != "}") {
-                if (line.find("i:") != line.npos) {
-                    sel_a.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
-                }
-            } break;
+      }
+      break;
+    }
+    if (line.find("sel") != line.npos) {
+      while (std::getline(is, line) && line != "}") {
+        if (line.find("i:") != line.npos) {
+          sel_a.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
         }
+      }
+      break;
     }
-    return sel_a;
+  }
+  return sel_a;
 }
 
 template <typename VALUETYPE>
-void
-DeepPot::
-validate_fparam_aparam(const int & nloc,
-		       const std::vector<VALUETYPE> &fparam,
-		       const std::vector<VALUETYPE> &aparam)const 
-{
+void DeepPot::validate_fparam_aparam(
+    const int& nloc,
+    const std::vector<VALUETYPE>& fparam,
+    const std::vector<VALUETYPE>& aparam) const {
   if (fparam.size() != dfparam) {
-    throw deepmd::deepmd_exception("the dim of frame parameter provided is not consistent with what the model uses");
+    throw deepmd::deepmd_exception(
+        "the dim of frame parameter provided is not consistent with what the "
+        "model uses");
   }
   if (aparam.size() != daparam * nloc) {
-    throw deepmd::deepmd_exception("the dim of atom parameter provided is not consistent with what the model uses");
-  }  
+    throw deepmd::deepmd_exception(
+        "the dim of atom parameter provided is not consistent with what the "
+        "model uses");
+  }
 }
 
-template
-void
-DeepPot::
-validate_fparam_aparam<double>(const int & nloc,
-           const std::vector<double> &fparam,
-           const std::vector<double> &aparam)const ;
+template void DeepPot::validate_fparam_aparam<double>(
+    const int& nloc,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam) const;
 
-template
-void
-DeepPot::
-validate_fparam_aparam<float>(const int & nloc,
-           const std::vector<float> &fparam,
-           const std::vector<float> &aparam)const ;
+template void DeepPot::validate_fparam_aparam<float>(
+    const int& nloc,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam) const;
 
 template <typename VALUETYPE>
-void
-DeepPot::
-compute (ENERGYTYPE &			dener,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox, 
-	 const std::vector<VALUETYPE> &	fparam,
-	 const std::vector<VALUETYPE> &	aparam)
-{
+void DeepPot::compute(ENERGYTYPE& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const std::vector<VALUETYPE>& fparam,
+                      const std::vector<VALUETYPE>& aparam) {
   int nall = dcoord_.size() / 3;
   int nloc = nall;
-  atommap = deepmd::AtomMap (datype_.begin(), datype_.begin() + nloc);
-  assert (nloc == atommap.get_type().size());
+  atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
   validate_fparam_aparam(nloc, fparam, aparam);
 
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, atommap);
-    assert (ret == nloc);
-    run_model<double> (dener, dforce_, dvirial, session, input_tensors, atommap);
+    int ret =
+        session_input_tensors<double>(input_tensors, dcoord_, ntypes, datype_,
+                                      dbox, cell_size, fparam, aparam, atommap);
+    assert(ret == nloc);
+    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap);
   } else {
-    int ret = session_input_tensors<float> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, atommap);
-    assert (ret == nloc);
-    run_model<float> (dener, dforce_, dvirial, session, input_tensors, atommap);
+    int ret =
+        session_input_tensors<float>(input_tensors, dcoord_, ntypes, datype_,
+                                     dbox, cell_size, fparam, aparam, atommap);
+    assert(ret == nloc);
+    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap);
   }
 }
 
-template
-void
-DeepPot::
-compute <double> (ENERGYTYPE &			dener,
-	 std::vector<double> &	dforce_,
-	 std::vector<double> &	dvirial,
-	 const std::vector<double> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<double> &	dbox, 
-	 const std::vector<double> &	fparam,
-	 const std::vector<double> &	aparam);
-
-template
-void
-DeepPot::
-compute <float> (ENERGYTYPE &			dener,
-	 std::vector<float> &	dforce_,
-	 std::vector<float> &	dvirial,
-	 const std::vector<float> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<float> &	dbox, 
-	 const std::vector<float> &	fparam,
-	 const std::vector<float> &	aparam);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam);
 
 template <typename VALUETYPE>
-void
-DeepPot::
-compute (ENERGYTYPE &			dener,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox, 
-	 const int			nghost,
-	 const InputNlist &		lmp_list,
-	 const int&			ago,
-	 const std::vector<VALUETYPE> &	fparam,
-	 const std::vector<VALUETYPE> &	aparam_)
-{
+void DeepPot::compute(ENERGYTYPE& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const int nghost,
+                      const InputNlist& lmp_list,
+                      const int& ago,
+                      const std::vector<VALUETYPE>& fparam,
+                      const std::vector<VALUETYPE>& aparam_) {
   std::vector<VALUETYPE> dcoord, dforce, aparam;
   std::vector<int> datype, fwd_map, bkw_map;
   int nghost_real;
-  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost,
+                    ntypes);
   // resize to nall_real
   dcoord.resize(bkw_map.size() * 3);
   datype.resize(bkw_map.size());
@@ -497,185 +471,175 @@ compute (ENERGYTYPE &			dener,
   select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
   select_map<int>(datype, datype_, fwd_map, 1);
   // aparam
-  if (daparam > 0){
+  if (daparam > 0) {
     aparam.resize(bkw_map.size() - nghost_real);
     select_map<VALUETYPE>(aparam, aparam_, fwd_map, daparam);
   }
   // internal nlist
-  if (ago == 0){
+  if (ago == 0) {
     nlist_data.copy_from_nlist(lmp_list);
-    nlist_data.shuffle_exclude_empty(fwd_map);  
+    nlist_data.shuffle_exclude_empty(fwd_map);
   }
-  compute_inner(dener, dforce, dvirial, dcoord, datype, dbox, nghost_real, ago, fparam, aparam);
+  compute_inner(dener, dforce, dvirial, dcoord, datype, dbox, nghost_real, ago,
+                fparam, aparam);
   // bkw map
   dforce_.resize(fwd_map.size() * 3);
   select_map<VALUETYPE>(dforce_, dforce, bkw_map, 3);
 }
 
-template
-void
-DeepPot::
-compute <double> (ENERGYTYPE &			dener,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox, 
-   const int			nghost,
-   const InputNlist &		lmp_list,
-   const int&			ago,
-   const std::vector<double> &	fparam,
-   const std::vector<double> &	aparam_);
-
-template
-void
-DeepPot::
-compute <float> (ENERGYTYPE &			dener,
-   std::vector<float> &	dforce_,
-   std::vector<float> &	dvirial,
-   const std::vector<float> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<float> &	dbox, 
-   const int			nghost,
-   const InputNlist &		lmp_list,
-   const int&			ago,
-   const std::vector<float> &	fparam,
-   const std::vector<float> &	aparam_);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const int nghost,
+                                       const InputNlist& lmp_list,
+                                       const int& ago,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam_);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const int nghost,
+                                      const InputNlist& lmp_list,
+                                      const int& ago,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam_);
 
 template <typename VALUETYPE>
-void
-DeepPot::
-compute_inner (ENERGYTYPE &			dener,
-	       std::vector<VALUETYPE> &		dforce_,
-	       std::vector<VALUETYPE> &		dvirial,
-	       const std::vector<VALUETYPE> &	dcoord_,
-	       const std::vector<int> &		datype_,
-	       const std::vector<VALUETYPE> &	dbox, 
-	       const int			nghost,
-	       const int&			ago,
-	       const std::vector<VALUETYPE> &	fparam,
-	       const std::vector<VALUETYPE> &	aparam)
-{
+void DeepPot::compute_inner(ENERGYTYPE& dener,
+                            std::vector<VALUETYPE>& dforce_,
+                            std::vector<VALUETYPE>& dvirial,
+                            const std::vector<VALUETYPE>& dcoord_,
+                            const std::vector<int>& datype_,
+                            const std::vector<VALUETYPE>& dbox,
+                            const int nghost,
+                            const int& ago,
+                            const std::vector<VALUETYPE>& fparam,
+                            const std::vector<VALUETYPE>& aparam) {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
 
-    validate_fparam_aparam(nloc, fparam, aparam);
-    std::vector<std::pair<std::string, Tensor>> input_tensors;
+  validate_fparam_aparam(nloc, fparam, aparam);
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
 
-    // agp == 0 means that the LAMMPS nbor list has been updated
-    if (ago == 0) {
-      atommap = deepmd::AtomMap (datype_.begin(), datype_.begin() + nloc);
-      assert (nloc == atommap.get_type().size());
-      nlist_data.shuffle(atommap);
-      nlist_data.make_inlist(nlist);
-    }
-    if (dtype == tensorflow::DT_DOUBLE) {
-      int ret = session_input_tensors<double> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, atommap, nghost, ago);
-      assert (nloc == ret);
-      run_model<double> (dener, dforce_, dvirial, session, input_tensors, atommap, nghost);
-    } else {
-      int ret = session_input_tensors<float> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, atommap, nghost, ago);
-      assert (nloc == ret);
-      run_model<float> (dener, dforce_, dvirial, session, input_tensors, atommap, nghost);
-    }
+  // agp == 0 means that the LAMMPS nbor list has been updated
+  if (ago == 0) {
+    atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
+    assert(nloc == atommap.get_type().size());
+    nlist_data.shuffle(atommap);
+    nlist_data.make_inlist(nlist);
+  }
+  if (dtype == tensorflow::DT_DOUBLE) {
+    int ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes,
+                                            datype_, dbox, nlist, fparam,
+                                            aparam, atommap, nghost, ago);
+    assert(nloc == ret);
+    run_model<double>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                      nghost);
+  } else {
+    int ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes,
+                                           datype_, dbox, nlist, fparam, aparam,
+                                           atommap, nghost, ago);
+    assert(nloc == ret);
+    run_model<float>(dener, dforce_, dvirial, session, input_tensors, atommap,
+                     nghost);
+  }
 }
 
-template
-void
-DeepPot::
-compute_inner <double> (ENERGYTYPE &			dener,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox, 
-   const int			nghost,
-   const int&			ago,
-   const std::vector<double> &	fparam,
-   const std::vector<double> &	aparam);
+template void DeepPot::compute_inner<double>(ENERGYTYPE& dener,
+                                             std::vector<double>& dforce_,
+                                             std::vector<double>& dvirial,
+                                             const std::vector<double>& dcoord_,
+                                             const std::vector<int>& datype_,
+                                             const std::vector<double>& dbox,
+                                             const int nghost,
+                                             const int& ago,
+                                             const std::vector<double>& fparam,
+                                             const std::vector<double>& aparam);
 
 template <typename VALUETYPE>
-void
-DeepPot::
-compute (ENERGYTYPE &			dener,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial,
-	 std::vector<VALUETYPE> &	datom_energy_,
-	 std::vector<VALUETYPE> &	datom_virial_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox,
-	 const std::vector<VALUETYPE> &	fparam,
-	 const std::vector<VALUETYPE> &	aparam)
-{
-  atommap = deepmd::AtomMap (datype_.begin(), datype_.end());
+void DeepPot::compute(ENERGYTYPE& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      std::vector<VALUETYPE>& datom_energy_,
+                      std::vector<VALUETYPE>& datom_virial_,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const std::vector<VALUETYPE>& fparam,
+                      const std::vector<VALUETYPE>& aparam) {
+  atommap = deepmd::AtomMap(datype_.begin(), datype_.end());
   validate_fparam_aparam(atommap.get_type().size(), fparam, aparam);
 
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int nloc = session_input_tensors<double> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, atommap);
-    run_model<double> (dener, dforce_, dvirial, datom_energy_, datom_virial_, session, input_tensors, atommap);
+    int nloc =
+        session_input_tensors<double>(input_tensors, dcoord_, ntypes, datype_,
+                                      dbox, cell_size, fparam, aparam, atommap);
+    run_model<double>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                      session, input_tensors, atommap);
   } else {
-    int nloc = session_input_tensors<float> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, atommap);
-    run_model<float> (dener, dforce_, dvirial, datom_energy_, datom_virial_, session, input_tensors, atommap);
+    int nloc =
+        session_input_tensors<float>(input_tensors, dcoord_, ntypes, datype_,
+                                     dbox, cell_size, fparam, aparam, atommap);
+    run_model<float>(dener, dforce_, dvirial, datom_energy_, datom_virial_,
+                     session, input_tensors, atommap);
   }
 }
 
-template
-void
-DeepPot::
-compute <double> (ENERGYTYPE &			dener,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial,
-   std::vector<double> &	datom_energy_,
-   std::vector<double> &	datom_virial_,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox,
-   const std::vector<double> &	fparam,
-   const std::vector<double> &	aparam);
-
-template
-void
-DeepPot::
-compute <float> (ENERGYTYPE &			dener,
-   std::vector<float> &	dforce_,
-   std::vector<float> &	dvirial,
-   std::vector<float> &	datom_energy_,
-   std::vector<float> &	datom_virial_,
-   const std::vector<float> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<float> &	dbox,
-   const std::vector<float> &	fparam,
-   const std::vector<float> &	aparam);
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       std::vector<double>& datom_energy_,
+                                       std::vector<double>& datom_virial_,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      std::vector<float>& datom_energy_,
+                                      std::vector<float>& datom_virial_,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam);
 
 template <typename VALUETYPE>
-void
-DeepPot::
-compute (ENERGYTYPE &			dener,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial,
-	 std::vector<VALUETYPE> &	datom_energy_,
-	 std::vector<VALUETYPE> &	datom_virial_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox, 
-	 const int			nghost, 
-	 const InputNlist &	lmp_list,
-	 const int               &	ago,
-	 const std::vector<VALUETYPE> &	fparam,
-	 const std::vector<VALUETYPE> &	aparam_)
-{
+void DeepPot::compute(ENERGYTYPE& dener,
+                      std::vector<VALUETYPE>& dforce_,
+                      std::vector<VALUETYPE>& dvirial,
+                      std::vector<VALUETYPE>& datom_energy_,
+                      std::vector<VALUETYPE>& datom_virial_,
+                      const std::vector<VALUETYPE>& dcoord_,
+                      const std::vector<int>& datype_,
+                      const std::vector<VALUETYPE>& dbox,
+                      const int nghost,
+                      const InputNlist& lmp_list,
+                      const int& ago,
+                      const std::vector<VALUETYPE>& fparam,
+                      const std::vector<VALUETYPE>& aparam_) {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   validate_fparam_aparam(nloc, fparam, aparam_);
-    std::vector<std::pair<std::string, Tensor>> input_tensors;
+  std::vector<std::pair<std::string, Tensor>> input_tensors;
   // select real atoms
   std::vector<VALUETYPE> dcoord, dforce, aparam, datom_energy, datom_virial;
   std::vector<int> datype, fwd_map, bkw_map;
   int nghost_real;
-  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost,
+                    ntypes);
   // resize to nall_real
   int nall_real = bkw_map.size();
   int nloc_real = nall_real - nghost_real;
@@ -685,28 +649,34 @@ compute (ENERGYTYPE &			dener,
   select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
   select_map<int>(datype, datype_, fwd_map, 1);
   // aparam
-  if (daparam > 0){
+  if (daparam > 0) {
     aparam.resize(nloc_real);
     select_map<VALUETYPE>(aparam, aparam_, fwd_map, daparam);
   }
-    if (ago == 0) {
-    atommap = deepmd::AtomMap (datype.begin(), datype.begin() + nloc_real);
-    assert (nloc_real == atommap.get_type().size());
-
-        nlist_data.copy_from_nlist(lmp_list);
-        nlist_data.shuffle_exclude_empty(fwd_map);
-        nlist_data.shuffle(atommap);
-	nlist_data.make_inlist(nlist);
-    }
+  if (ago == 0) {
+    atommap = deepmd::AtomMap(datype.begin(), datype.begin() + nloc_real);
+    assert(nloc_real == atommap.get_type().size());
+
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle_exclude_empty(fwd_map);
+    nlist_data.shuffle(atommap);
+    nlist_data.make_inlist(nlist);
+  }
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors<double> (input_tensors, dcoord, ntypes, datype, dbox, nlist, fparam, aparam, atommap, nghost_real, ago);
-    assert (nloc_real == ret);
-    run_model<double> (dener, dforce, dvirial, datom_energy, datom_virial, session, input_tensors, atommap, nghost_real);
+    int ret = session_input_tensors<double>(input_tensors, dcoord, ntypes,
+                                            datype, dbox, nlist, fparam, aparam,
+                                            atommap, nghost_real, ago);
+    assert(nloc_real == ret);
+    run_model<double>(dener, dforce, dvirial, datom_energy, datom_virial,
+                      session, input_tensors, atommap, nghost_real);
   } else {
-    int ret = session_input_tensors<float> (input_tensors, dcoord, ntypes, datype, dbox, nlist, fparam, aparam, atommap, nghost_real, ago);
-    assert (nloc_real == ret);
-    run_model<float> (dener, dforce, dvirial, datom_energy, datom_virial, session, input_tensors, atommap, nghost_real);
+    int ret = session_input_tensors<float>(input_tensors, dcoord, ntypes,
+                                           datype, dbox, nlist, fparam, aparam,
+                                           atommap, nghost_real, ago);
+    assert(nloc_real == ret);
+    run_model<float>(dener, dforce, dvirial, datom_energy, datom_virial,
+                     session, input_tensors, atommap, nghost_real);
   }
 
   // bkw map
@@ -718,116 +688,102 @@ compute (ENERGYTYPE &			dener,
   select_map<VALUETYPE>(datom_virial_, datom_virial, bkw_map, 9);
 }
 
-template
-void
-DeepPot::
-compute <double> (ENERGYTYPE &			dener,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial,
-   std::vector<double> &	datom_energy_,
-   std::vector<double> &	datom_virial_,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox, 
-   const int			nghost, 
-   const InputNlist &	lmp_list,
-   const int               &	ago,
-   const std::vector<double> &	fparam,
-   const std::vector<double> &	aparam_);
-
-template
-void
-DeepPot::
-compute <float> (ENERGYTYPE &			dener,
-   std::vector<float> &	dforce_,
-   std::vector<float> &	dvirial,
-   std::vector<float> &	datom_energy_,
-   std::vector<float> &	datom_virial_,
-   const std::vector<float> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<float> &	dbox, 
-   const int			nghost, 
-   const InputNlist &	lmp_list,
-   const int               &	ago,
-   const std::vector<float> &	fparam,
-   const std::vector<float> &	aparam_);
-
-void
-DeepPot::
-get_type_map(std::string & type_map){
-    type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
+template void DeepPot::compute<double>(ENERGYTYPE& dener,
+                                       std::vector<double>& dforce_,
+                                       std::vector<double>& dvirial,
+                                       std::vector<double>& datom_energy_,
+                                       std::vector<double>& datom_virial_,
+                                       const std::vector<double>& dcoord_,
+                                       const std::vector<int>& datype_,
+                                       const std::vector<double>& dbox,
+                                       const int nghost,
+                                       const InputNlist& lmp_list,
+                                       const int& ago,
+                                       const std::vector<double>& fparam,
+                                       const std::vector<double>& aparam_);
+
+template void DeepPot::compute<float>(ENERGYTYPE& dener,
+                                      std::vector<float>& dforce_,
+                                      std::vector<float>& dvirial,
+                                      std::vector<float>& datom_energy_,
+                                      std::vector<float>& datom_virial_,
+                                      const std::vector<float>& dcoord_,
+                                      const std::vector<int>& datype_,
+                                      const std::vector<float>& dbox,
+                                      const int nghost,
+                                      const InputNlist& lmp_list,
+                                      const int& ago,
+                                      const std::vector<float>& fparam,
+                                      const std::vector<float>& aparam_);
+
+void DeepPot::get_type_map(std::string& type_map) {
+  type_map = get_scalar<STRINGTYPE>("model_attr/tmap");
 }
 
+DeepPotModelDevi::DeepPotModelDevi()
+    : inited(false), init_nbor(false), numb_models(0) {}
 
-
-DeepPotModelDevi::
-DeepPotModelDevi ()
-    : inited (false), 
-      init_nbor (false),
-      numb_models (0)
-{
-}
-
-DeepPotModelDevi::
-DeepPotModelDevi (const std::vector<std::string> & models, const int & gpu_rank, const std::vector<std::string> & file_contents)
-    : inited (false), 
-      init_nbor(false),
-      numb_models (0)
-{
+DeepPotModelDevi::DeepPotModelDevi(
+    const std::vector<std::string>& models,
+    const int& gpu_rank,
+    const std::vector<std::string>& file_contents)
+    : inited(false), init_nbor(false), numb_models(0) {
   init(models, gpu_rank, file_contents);
 }
 
 DeepPotModelDevi::~DeepPotModelDevi() {
-  for (unsigned ii = 0; ii < numb_models; ++ii){
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
     delete graph_defs[ii];
   }
 }
 
-void
-DeepPotModelDevi::
-init (const std::vector<std::string> & models, const int & gpu_rank, const std::vector<std::string> & file_contents)
-{
-  if (inited){
-    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-    return ;
+void DeepPotModelDevi::init(const std::vector<std::string>& models,
+                            const int& gpu_rank,
+                            const std::vector<std::string>& file_contents) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
   }
   numb_models = models.size();
   sessions.resize(numb_models);
   graph_defs.resize(numb_models);
-  
+
   int gpu_num = -1;
-  #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   DPGetDeviceCount(gpu_num);
-  #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
   SessionOptions options;
   get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
   options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
   options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
-  for (unsigned ii = 0; ii < numb_models; ++ii){
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
     graph_defs[ii] = new GraphDef();
     if (file_contents.size() == 0)
-      check_status (ReadBinaryProto(Env::Default(), models[ii], graph_defs[ii]));
+      check_status(ReadBinaryProto(Env::Default(), models[ii], graph_defs[ii]));
     else
       (*graph_defs[ii]).ParseFromString(file_contents[ii]);
   }
-  #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   if (gpu_num > 0) {
-      options.config.set_allow_soft_placement(true);
-      options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(0.9);
-      options.config.mutable_gpu_options()->set_allow_growth(true);
-      DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
+    options.config.set_allow_soft_placement(true);
+    options.config.mutable_gpu_options()->set_per_process_gpu_memory_fraction(
+        0.9);
+    options.config.mutable_gpu_options()->set_allow_growth(true);
+    DPErrcheck(DPSetDevice(gpu_rank % gpu_num));
   }
-  #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
   for (unsigned ii = 0; ii < numb_models; ++ii) {
     if (gpu_num > 0) {
       std::string str = "/gpu:";
       str += std::to_string(gpu_rank % gpu_num);
       graph::SetDefaultDevice(str, &(*graph_defs[ii]));
     }
-    check_status (NewSession(options, &(sessions[ii])));
-    check_status (sessions[ii]->Create(*graph_defs[ii]));
+    check_status(NewSession(options, &(sessions[ii])));
+    check_status(sessions[ii]->Create(*graph_defs[ii]));
   }
   dtype = session_get_dtype(sessions[0], "descrpt_attr/rcut");
   if (dtype == tensorflow::DT_DOUBLE) {
@@ -843,98 +799,89 @@ init (const std::vector<std::string> & models, const int & gpu_rank, const std::
   if (daparam < 0) daparam = 0;
   model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
   model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
-  if(! model_compatable(model_version)){
-    throw deepmd::deepmd_exception(
-	"incompatable model: version " + model_version 
-	+ " in graph, but version " + global_model_version 
-	+ " supported ");
+  if (!model_compatable(model_version)) {
+    throw deepmd::deepmd_exception("incompatable model: version " +
+                                   model_version + " in graph, but version " +
+                                   global_model_version + " supported ");
   }
   // rcut = get_rcut();
   // cell_size = rcut;
   // ntypes = get_ntypes();
   inited = true;
-  
+
   init_nbor = false;
 }
 
-template<class VT>
-VT
-DeepPotModelDevi::
-get_scalar(const std::string name) const 
-{
+template <class VT>
+VT DeepPotModelDevi::get_scalar(const std::string name) const {
   VT myrcut;
-  for (unsigned ii = 0; ii < numb_models; ++ii){
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
     VT ret = session_get_scalar<VT>(sessions[ii], name);
-    if (ii == 0){
+    if (ii == 0) {
       myrcut = ret;
-    }
-    else {
-      assert (myrcut == ret);
+    } else {
+      assert(myrcut == ret);
     }
   }
   return myrcut;
 }
 
 // init the tmp array data
-std::vector<std::vector<int> > 
-DeepPotModelDevi::
-get_sel () const 
-{
-    std::vector<std::vector<int> > sec;
-    for (int ii = 0; ii < numb_models; ii++) {
-        std::vector<int> sel;
-        std::istringstream is(graph_info(*graph_defs[ii]));
-        std::string line = "";
-        while(is >> line) {
-            if (line.find("sel") != line.npos) {
-                while (std::getline(is, line) && line != "}") {
-                    if (line.find("i:") != line.npos) {
-                        sel.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
-                    }
-                } break;
-            }
-            if (line.find("sel_a") != line.npos) {
-                while (std::getline(is, line) && line != "}") {
-                    if (line.find("i:") != line.npos) {
-                        sel.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
-                    }
-                } break;
-            }
+std::vector<std::vector<int>> DeepPotModelDevi::get_sel() const {
+  std::vector<std::vector<int>> sec;
+  for (int ii = 0; ii < numb_models; ii++) {
+    std::vector<int> sel;
+    std::istringstream is(graph_info(*graph_defs[ii]));
+    std::string line = "";
+    while (is >> line) {
+      if (line.find("sel") != line.npos) {
+        while (std::getline(is, line) && line != "}") {
+          if (line.find("i:") != line.npos) {
+            sel.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
+          }
+        }
+        break;
+      }
+      if (line.find("sel_a") != line.npos) {
+        while (std::getline(is, line) && line != "}") {
+          if (line.find("i:") != line.npos) {
+            sel.push_back(atoi((line.substr(line.find("i:") + 2)).c_str()));
+          }
         }
-        sec.push_back(sel);
+        break;
+      }
     }
-    return sec;
+    sec.push_back(sel);
+  }
+  return sec;
 }
 
-
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-validate_fparam_aparam(const int & nloc,
-		       const std::vector<VALUETYPE> &fparam,
-		       const std::vector<VALUETYPE> &aparam)const 
-{
+void DeepPotModelDevi::validate_fparam_aparam(
+    const int& nloc,
+    const std::vector<VALUETYPE>& fparam,
+    const std::vector<VALUETYPE>& aparam) const {
   if (fparam.size() != dfparam) {
-    throw deepmd::deepmd_exception("the dim of frame parameter provided is not consistent with what the model uses");
+    throw deepmd::deepmd_exception(
+        "the dim of frame parameter provided is not consistent with what the "
+        "model uses");
   }
   if (aparam.size() != daparam * nloc) {
-    throw deepmd::deepmd_exception("the dim of atom parameter provided is not consistent with what the model uses");
-  }  
+    throw deepmd::deepmd_exception(
+        "the dim of atom parameter provided is not consistent with what the "
+        "model uses");
+  }
 }
 
-template
-void
-DeepPotModelDevi::
-validate_fparam_aparam<double>(const int & nloc,
-            const std::vector<double> &fparam,
-            const std::vector<double> &aparam)const ;
+template void DeepPotModelDevi::validate_fparam_aparam<double>(
+    const int& nloc,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam) const;
 
-template
-void
-DeepPotModelDevi::
-validate_fparam_aparam<float>(const int & nloc,
-            const std::vector<float> &fparam,
-            const std::vector<float> &aparam)const ;
+template void DeepPotModelDevi::validate_fparam_aparam<float>(
+    const int& nloc,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam) const;
 
 // void
 // DeepPotModelDevi::
@@ -954,14 +901,16 @@ validate_fparam_aparam<float>(const int & nloc,
 //   validate_fparam_aparam(atommap.get_type().size(), fparam, aparam);
 
 //   std::vector<std::pair<std::string, Tensor>> input_tensors;
-//   int nloc = session_input_tensors (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, fparam, aparam, atommap);
+//   int nloc = session_input_tensors (input_tensors, dcoord_, ntypes, datype_,
+//   dbox, cell_size, fparam, aparam, atommap);
 
 //   std::vector<ENERGYTYPE > all_energy (numb_models);
 //   std::vector<std::vector<VALUETYPE > > all_force (numb_models);
 //   std::vector<std::vector<VALUETYPE > > all_virial (numb_models);
 
 //   for (unsigned ii = 0; ii < numb_models; ++ii){
-//     run_model (all_energy[ii], all_force[ii], all_virial[ii], sessions[ii], input_tensors, atommap);
+//     run_model (all_energy[ii], all_force[ii], all_virial[ii], sessions[ii],
+//     input_tensors, atommap);
 //   }
 
 //   dener = 0;
@@ -969,372 +918,328 @@ validate_fparam_aparam<float>(const int & nloc,
 //     dener += all_energy[ii];
 //   }
 //   dener /= VALUETYPE(numb_models);
-//   compute_avg (dvirial, all_virial);  
+//   compute_avg (dvirial, all_virial);
 //   compute_avg (dforce_, all_force);
-  
+
 //   compute_std_f (model_devi, dforce_, all_force);
-  
+
 //   // for (unsigned ii = 0; ii < numb_models; ++ii){
-//   //   cout << all_force[ii][573] << " " << all_force[ii][574] << " " << all_force[ii][575] << endl;
+//   //   cout << all_force[ii][573] << " " << all_force[ii][574] << " " <<
+//   all_force[ii][575] << endl;
 //   // }
-//   // cout << dforce_[573] << " " 
-//   //      << dforce_[574] << " " 
-//   //      << dforce_[575] << " " 
+//   // cout << dforce_[573] << " "
+//   //      << dforce_[574] << " "
+//   //      << dforce_[575] << " "
 //   //      << model_devi[191] << endl;
 // }
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute (std::vector<ENERGYTYPE> &		all_energy,
-	 std::vector<std::vector<VALUETYPE>> &	all_force,
-	 std::vector<std::vector<VALUETYPE>> &	all_virial,
-	 const std::vector<VALUETYPE> &		dcoord_,
-	 const std::vector<int> &		datype_,
-	 const std::vector<VALUETYPE> &		dbox,
-	 const int				nghost,
-	 const InputNlist &		lmp_list,
-	 const int                &		ago,
-	 const std::vector<VALUETYPE> &		fparam,
-	 const std::vector<VALUETYPE> &		aparam)
-{
+void DeepPotModelDevi::compute(std::vector<ENERGYTYPE>& all_energy,
+                               std::vector<std::vector<VALUETYPE>>& all_force,
+                               std::vector<std::vector<VALUETYPE>>& all_virial,
+                               const std::vector<VALUETYPE>& dcoord_,
+                               const std::vector<int>& datype_,
+                               const std::vector<VALUETYPE>& dbox,
+                               const int nghost,
+                               const InputNlist& lmp_list,
+                               const int& ago,
+                               const std::vector<VALUETYPE>& fparam,
+                               const std::vector<VALUETYPE>& aparam) {
   if (numb_models == 0) return;
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   validate_fparam_aparam(nloc, fparam, aparam);
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
-    // agp == 0 means that the LAMMPS nbor list has been updated
-    if (ago == 0) {
-        atommap = AtomMap (datype_.begin(), datype_.begin() + nloc);
-        assert (nloc == atommap.get_type().size());
+  // agp == 0 means that the LAMMPS nbor list has been updated
+  if (ago == 0) {
+    atommap = AtomMap(datype_.begin(), datype_.begin() + nloc);
+    assert(nloc == atommap.get_type().size());
 
-        nlist_data.copy_from_nlist(lmp_list);
-        nlist_data.shuffle(atommap);
-	nlist_data.make_inlist(nlist);
-    }
-    int ret;
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle(atommap);
+    nlist_data.make_inlist(nlist);
+  }
+  int ret;
+  if (dtype == tensorflow::DT_DOUBLE) {
+    ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes, datype_,
+                                        dbox, nlist, fparam, aparam, atommap,
+                                        nghost, ago);
+  } else {
+    ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes, datype_,
+                                       dbox, nlist, fparam, aparam, atommap,
+                                       nghost, ago);
+  }
+  all_energy.resize(numb_models);
+  all_force.resize(numb_models);
+  all_virial.resize(numb_models);
+  assert(nloc == ret);
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
     if (dtype == tensorflow::DT_DOUBLE) {
-      ret = session_input_tensors <double> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, atommap, nghost, ago);
+      run_model<double>(all_energy[ii], all_force[ii], all_virial[ii],
+                        sessions[ii], input_tensors, atommap, nghost);
     } else {
-      ret = session_input_tensors <float> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, atommap, nghost, ago);
-    }
-    all_energy.resize (numb_models);
-    all_force.resize (numb_models);
-    all_virial.resize (numb_models);
-    assert (nloc == ret);
-    for (unsigned ii = 0; ii < numb_models; ++ii) {
-      if (dtype == tensorflow::DT_DOUBLE) {
-        run_model<double> (all_energy[ii], all_force[ii], all_virial[ii], sessions[ii], input_tensors, atommap, nghost);
-      } else {
-        run_model<float> (all_energy[ii], all_force[ii], all_virial[ii], sessions[ii], input_tensors, atommap, nghost);
-      }
+      run_model<float>(all_energy[ii], all_force[ii], all_virial[ii],
+                       sessions[ii], input_tensors, atommap, nghost);
     }
+  }
 }
 
-template
-void
-DeepPotModelDevi::
-compute <double> (std::vector<ENERGYTYPE> &		all_energy,
-   std::vector<std::vector<double>> &	all_force,
-   std::vector<std::vector<double>> &	all_virial,
-   const std::vector<double> &		dcoord_,
-   const std::vector<int> &		datype_,
-   const std::vector<double> &		dbox,
-   const int				nghost,
-   const InputNlist &		lmp_list,
-   const int                &		ago,
-   const std::vector<double> &		fparam,
-   const std::vector<double> &		aparam);
-
-template
-void
-DeepPotModelDevi::
-compute <float> (std::vector<ENERGYTYPE> &		all_energy,
-   std::vector<std::vector<float>> &	all_force,
-   std::vector<std::vector<float>> &	all_virial,
-   const std::vector<float> &		dcoord_,
-   const std::vector<int> &		datype_,
-   const std::vector<float> &		dbox,
-   const int				nghost,
-   const InputNlist &		lmp_list,
-   const int                &		ago,
-   const std::vector<float> &		fparam,
-   const std::vector<float> &		aparam);
+template void DeepPotModelDevi::compute<double>(
+    std::vector<ENERGYTYPE>& all_energy,
+    std::vector<std::vector<double>>& all_force,
+    std::vector<std::vector<double>>& all_virial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotModelDevi::compute<float>(
+    std::vector<ENERGYTYPE>& all_energy,
+    std::vector<std::vector<float>>& all_force,
+    std::vector<std::vector<float>>& all_virial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute (std::vector<ENERGYTYPE> &		all_energy,
-	 std::vector<std::vector<VALUETYPE>> &	all_force,
-	 std::vector<std::vector<VALUETYPE>> &	all_virial,
-	 std::vector<std::vector<VALUETYPE>> &	all_atom_energy,
-	 std::vector<std::vector<VALUETYPE>> &	all_atom_virial,
-	 const std::vector<VALUETYPE> &		dcoord_,
-	 const std::vector<int> &		datype_,
-	 const std::vector<VALUETYPE> &		dbox,
-	 const int				nghost,
-	 const InputNlist &		lmp_list,
-	 const int	             &		ago,
-	 const std::vector<VALUETYPE> &	 	fparam,
-	 const std::vector<VALUETYPE> &	 	aparam)
-{
+void DeepPotModelDevi::compute(
+    std::vector<ENERGYTYPE>& all_energy,
+    std::vector<std::vector<VALUETYPE>>& all_force,
+    std::vector<std::vector<VALUETYPE>>& all_virial,
+    std::vector<std::vector<VALUETYPE>>& all_atom_energy,
+    std::vector<std::vector<VALUETYPE>>& all_atom_virial,
+    const std::vector<VALUETYPE>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<VALUETYPE>& fparam,
+    const std::vector<VALUETYPE>& aparam) {
   if (numb_models == 0) return;
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   validate_fparam_aparam(nloc, fparam, aparam);
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
-    // agp == 0 means that the LAMMPS nbor list has been updated
-    if (ago == 0) {
-        atommap = AtomMap (datype_.begin(), datype_.begin() + nloc);
-        assert (nloc == atommap.get_type().size());
+  // agp == 0 means that the LAMMPS nbor list has been updated
+  if (ago == 0) {
+    atommap = AtomMap(datype_.begin(), datype_.begin() + nloc);
+    assert(nloc == atommap.get_type().size());
 
-        nlist_data.copy_from_nlist(lmp_list);
-        nlist_data.shuffle(atommap);
-	nlist_data.make_inlist(nlist);
-    }
-    int ret;
+    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.shuffle(atommap);
+    nlist_data.make_inlist(nlist);
+  }
+  int ret;
+  if (dtype == tensorflow::DT_DOUBLE) {
+    ret = session_input_tensors<double>(input_tensors, dcoord_, ntypes, datype_,
+                                        dbox, nlist, fparam, aparam, atommap,
+                                        nghost, ago);
+  } else {
+    ret = session_input_tensors<float>(input_tensors, dcoord_, ntypes, datype_,
+                                       dbox, nlist, fparam, aparam, atommap,
+                                       nghost, ago);
+  }
+
+  all_energy.resize(numb_models);
+  all_force.resize(numb_models);
+  all_virial.resize(numb_models);
+  all_atom_energy.resize(numb_models);
+  all_atom_virial.resize(numb_models);
+  assert(nloc == ret);
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
     if (dtype == tensorflow::DT_DOUBLE) {
-      ret = session_input_tensors <double> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, atommap, nghost, ago);
+      run_model<double>(all_energy[ii], all_force[ii], all_virial[ii],
+                        all_atom_energy[ii], all_atom_virial[ii], sessions[ii],
+                        input_tensors, atommap, nghost);
     } else {
-      ret = session_input_tensors <float> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, fparam, aparam, atommap, nghost, ago);
-    }
-
-    all_energy.resize (numb_models);
-    all_force .resize (numb_models);
-    all_virial.resize (numb_models);
-    all_atom_energy.resize (numb_models);
-    all_atom_virial.resize (numb_models); 
-    assert (nloc == ret);
-    for (unsigned ii = 0; ii < numb_models; ++ii) {
-      if (dtype == tensorflow::DT_DOUBLE) {
-        run_model<double> (all_energy[ii], all_force[ii], all_virial[ii], all_atom_energy[ii], all_atom_virial[ii], sessions[ii], input_tensors, atommap, nghost);
-      } else {
-        run_model<float> (all_energy[ii], all_force[ii], all_virial[ii], all_atom_energy[ii], all_atom_virial[ii], sessions[ii], input_tensors, atommap, nghost);
-      }
+      run_model<float>(all_energy[ii], all_force[ii], all_virial[ii],
+                       all_atom_energy[ii], all_atom_virial[ii], sessions[ii],
+                       input_tensors, atommap, nghost);
     }
+  }
 }
 
-template
-void
-DeepPotModelDevi::
-compute <double> (std::vector<ENERGYTYPE> &		all_energy,
-   std::vector<std::vector<double>> &	all_force,
-   std::vector<std::vector<double>> &	all_virial,
-   std::vector<std::vector<double>> &	all_atom_energy,
-   std::vector<std::vector<double>> &	all_atom_virial,
-   const std::vector<double> &		dcoord_,
-   const std::vector<int> &		datype_,
-   const std::vector<double> &		dbox,
-   const int				nghost,
-   const InputNlist &		lmp_list,
-   const int                &		ago,
-   const std::vector<double> &		fparam,
-   const std::vector<double> &		aparam);
-
-template
-void
-DeepPotModelDevi::
-compute <float> (std::vector<ENERGYTYPE> &		all_energy,
-   std::vector<std::vector<float>> &	all_force,
-   std::vector<std::vector<float>> &	all_virial,
-   std::vector<std::vector<float>> &	all_atom_energy,
-   std::vector<std::vector<float>> &	all_atom_virial,
-   const std::vector<float> &		dcoord_,
-   const std::vector<int> &		datype_,
-   const std::vector<float> &		dbox,
-   const int				nghost,
-   const InputNlist &		lmp_list,
-   const int                &		ago,
-   const std::vector<float> &		fparam,
-   const std::vector<float> &		aparam);
+template void DeepPotModelDevi::compute<double>(
+    std::vector<ENERGYTYPE>& all_energy,
+    std::vector<std::vector<double>>& all_force,
+    std::vector<std::vector<double>>& all_virial,
+    std::vector<std::vector<double>>& all_atom_energy,
+    std::vector<std::vector<double>>& all_atom_virial,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<double>& fparam,
+    const std::vector<double>& aparam);
+
+template void DeepPotModelDevi::compute<float>(
+    std::vector<ENERGYTYPE>& all_energy,
+    std::vector<std::vector<float>>& all_force,
+    std::vector<std::vector<float>>& all_virial,
+    std::vector<std::vector<float>>& all_atom_energy,
+    std::vector<std::vector<float>>& all_atom_virial,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const int nghost,
+    const InputNlist& lmp_list,
+    const int& ago,
+    const std::vector<float>& fparam,
+    const std::vector<float>& aparam);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_avg (VALUETYPE &		dener, 
-	     const std::vector<VALUETYPE > &	all_energy) 
-{
-  assert (all_energy.size() == numb_models);
+void DeepPotModelDevi::compute_avg(VALUETYPE& dener,
+                                   const std::vector<VALUETYPE>& all_energy) {
+  assert(all_energy.size() == numb_models);
   if (numb_models == 0) return;
 
   dener = 0;
-  for (unsigned ii = 0; ii < numb_models; ++ii){
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
     dener += all_energy[ii];
   }
-  dener /= (VALUETYPE)(numb_models);  
+  dener /= (VALUETYPE)(numb_models);
 }
 
-template
-void
-DeepPotModelDevi::
-compute_avg <double> (double &		dener, 
-       const std::vector<double > &	all_energy);
+template void DeepPotModelDevi::compute_avg<double>(
+    double& dener, const std::vector<double>& all_energy);
 
-template
-void
-DeepPotModelDevi::
-compute_avg <float> (float &		dener, 
-       const std::vector<float > &	all_energy);
+template void DeepPotModelDevi::compute_avg<float>(
+    float& dener, const std::vector<float>& all_energy);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_avg (std::vector<VALUETYPE> &		avg, 
-	     const std::vector<std::vector<VALUETYPE> > &	xx) 
-{
-  assert (xx.size() == numb_models);
+void DeepPotModelDevi::compute_avg(
+    std::vector<VALUETYPE>& avg,
+    const std::vector<std::vector<VALUETYPE>>& xx) {
+  assert(xx.size() == numb_models);
   if (numb_models == 0) return;
-  
+
   avg.resize(xx[0].size());
-  fill (avg.begin(), avg.end(), VALUETYPE(0.));
-  
-  for (unsigned ii = 0; ii < numb_models; ++ii){
-    for (unsigned jj = 0; jj < avg.size(); ++jj){
+  fill(avg.begin(), avg.end(), VALUETYPE(0.));
+
+  for (unsigned ii = 0; ii < numb_models; ++ii) {
+    for (unsigned jj = 0; jj < avg.size(); ++jj) {
       avg[jj] += xx[ii][jj];
     }
   }
 
-  for (unsigned jj = 0; jj < avg.size(); ++jj){
+  for (unsigned jj = 0; jj < avg.size(); ++jj) {
     avg[jj] /= VALUETYPE(numb_models);
   }
 }
 
-template
-void
-DeepPotModelDevi::
-compute_avg <double> (std::vector<double> &		avg, 
-       const std::vector<std::vector<double> > &	xx);
+template void DeepPotModelDevi::compute_avg<double>(
+    std::vector<double>& avg, const std::vector<std::vector<double>>& xx);
 
-template
-void
-DeepPotModelDevi::
-compute_avg <float> (std::vector<float> &		avg, 
-       const std::vector<std::vector<float> > &	xx);
+template void DeepPotModelDevi::compute_avg<float>(
+    std::vector<float>& avg, const std::vector<std::vector<float>>& xx);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_std (
-    std::vector<VALUETYPE> &		std, 
-    const std::vector<VALUETYPE> &	avg, 
-    const std::vector<std::vector<VALUETYPE> >&xx,
-    const int & stride)
-{
-  assert (xx.size() == numb_models);
+void DeepPotModelDevi::compute_std(
+    std::vector<VALUETYPE>& std,
+    const std::vector<VALUETYPE>& avg,
+    const std::vector<std::vector<VALUETYPE>>& xx,
+    const int& stride) {
+  assert(xx.size() == numb_models);
   if (numb_models == 0) return;
 
   unsigned ndof = avg.size();
   unsigned nloc = ndof / stride;
-  assert (nloc * stride == ndof);
-  
+  assert(nloc * stride == ndof);
+
   std.resize(nloc);
-  fill (std.begin(), std.end(), VALUETYPE(0.));
-  
+  fill(std.begin(), std.end(), VALUETYPE(0.));
+
   for (unsigned ii = 0; ii < numb_models; ++ii) {
-    for (unsigned jj = 0 ; jj < nloc; ++jj){
-      const VALUETYPE * tmp_f = &(xx[ii][jj*stride]);
-      const VALUETYPE * tmp_avg = &(avg[jj*stride]);
-      for (unsigned dd = 0; dd < stride; ++dd){
-	VALUETYPE vdiff = tmp_f[dd] - tmp_avg[dd];
-	std[jj] += vdiff * vdiff;
+    for (unsigned jj = 0; jj < nloc; ++jj) {
+      const VALUETYPE* tmp_f = &(xx[ii][jj * stride]);
+      const VALUETYPE* tmp_avg = &(avg[jj * stride]);
+      for (unsigned dd = 0; dd < stride; ++dd) {
+        VALUETYPE vdiff = tmp_f[dd] - tmp_avg[dd];
+        std[jj] += vdiff * vdiff;
       }
     }
   }
 
-  for (unsigned jj = 0; jj < nloc; ++jj){
+  for (unsigned jj = 0; jj < nloc; ++jj) {
     std[jj] = sqrt(std[jj] / VALUETYPE(numb_models));
   }
 }
 
-template
-void
-DeepPotModelDevi::
-compute_std <double> (
-    std::vector<double> &		std, 
-    const std::vector<double> &	avg, 
-    const std::vector<std::vector<double> >&xx,
-    const int & stride);
-
-template
-void
-DeepPotModelDevi::
-compute_std <float> (
-    std::vector<float> &		std, 
-    const std::vector<float> &	avg, 
-    const std::vector<std::vector<float> >&xx,
-    const int & stride);
+template void DeepPotModelDevi::compute_std<double>(
+    std::vector<double>& std,
+    const std::vector<double>& avg,
+    const std::vector<std::vector<double>>& xx,
+    const int& stride);
+
+template void DeepPotModelDevi::compute_std<float>(
+    std::vector<float>& std,
+    const std::vector<float>& avg,
+    const std::vector<std::vector<float>>& xx,
+    const int& stride);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_std_e (std::vector<VALUETYPE> &		std, 
-	       const std::vector<VALUETYPE> &	avg, 
-	       const std::vector<std::vector<VALUETYPE> >&xx)  
-{
+void DeepPotModelDevi::compute_std_e(
+    std::vector<VALUETYPE>& std,
+    const std::vector<VALUETYPE>& avg,
+    const std::vector<std::vector<VALUETYPE>>& xx) {
   compute_std(std, avg, xx, 1);
 }
 
-template
-void
-DeepPotModelDevi::
-compute_std_e <double> (
-    std::vector<double> &		std, 
-    const std::vector<double> &	avg, 
-    const std::vector<std::vector<double> >&xx);
-
-template
-void
-DeepPotModelDevi::
-compute_std_e <float> (
-    std::vector<float> &		std, 
-    const std::vector<float> &	avg, 
-    const std::vector<std::vector<float> >&xx);
+template void DeepPotModelDevi::compute_std_e<double>(
+    std::vector<double>& std,
+    const std::vector<double>& avg,
+    const std::vector<std::vector<double>>& xx);
+
+template void DeepPotModelDevi::compute_std_e<float>(
+    std::vector<float>& std,
+    const std::vector<float>& avg,
+    const std::vector<std::vector<float>>& xx);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_std_f (std::vector<VALUETYPE> &		std, 
-	       const std::vector<VALUETYPE> &	avg, 
-	       const std::vector<std::vector<VALUETYPE> >&xx)  
-{
+void DeepPotModelDevi::compute_std_f(
+    std::vector<VALUETYPE>& std,
+    const std::vector<VALUETYPE>& avg,
+    const std::vector<std::vector<VALUETYPE>>& xx) {
   compute_std(std, avg, xx, 3);
 }
 
-template
-void
-DeepPotModelDevi::
-compute_std_f <double> (
-    std::vector<double> &		std, 
-    const std::vector<double> &	avg, 
-    const std::vector<std::vector<double> >&xx);
-
-template
-void
-DeepPotModelDevi::
-compute_std_f <float> (
-    std::vector<float> &		std, 
-    const std::vector<float> &	avg, 
-    const std::vector<std::vector<float> >&xx);
+template void DeepPotModelDevi::compute_std_f<double>(
+    std::vector<double>& std,
+    const std::vector<double>& avg,
+    const std::vector<std::vector<double>>& xx);
+
+template void DeepPotModelDevi::compute_std_f<float>(
+    std::vector<float>& std,
+    const std::vector<float>& avg,
+    const std::vector<std::vector<float>>& xx);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_relative_std (
-    std::vector<VALUETYPE> &std,
-    const std::vector<VALUETYPE> &avg,
-    const VALUETYPE eps, 
-    const int & stride)
-{
+void DeepPotModelDevi::compute_relative_std(std::vector<VALUETYPE>& std,
+                                            const std::vector<VALUETYPE>& avg,
+                                            const VALUETYPE eps,
+                                            const int& stride) {
   unsigned ndof = avg.size();
   unsigned nloc = std.size();
-  assert (nloc * stride == ndof);
+  assert(nloc * stride == ndof);
 
-  for (unsigned ii = 0; ii < nloc; ++ii){
-    const VALUETYPE * tmp_avg = &(avg[ii*stride]);
+  for (unsigned ii = 0; ii < nloc; ++ii) {
+    const VALUETYPE* tmp_avg = &(avg[ii * stride]);
     VALUETYPE f_norm = 0.0;
-    for (unsigned dd = 0; dd < stride; ++dd){
+    for (unsigned dd = 0; dd < stride; ++dd) {
       f_norm += tmp_avg[dd] * tmp_avg[dd];
     }
     f_norm = sqrt(f_norm);
@@ -1342,46 +1247,27 @@ compute_relative_std (
   }
 }
 
-template
-void
-DeepPotModelDevi::
-compute_relative_std <double> (
-    std::vector<double> &std,
-    const std::vector<double> &avg,
-    const double eps, 
-    const int & stride);
-
-template
-void
-DeepPotModelDevi::
-compute_relative_std <float> (
-    std::vector<float> &std,
-    const std::vector<float> &avg,
-    const float eps, 
-    const int & stride);
+template void DeepPotModelDevi::compute_relative_std<double>(
+    std::vector<double>& std,
+    const std::vector<double>& avg,
+    const double eps,
+    const int& stride);
+
+template void DeepPotModelDevi::compute_relative_std<float>(
+    std::vector<float>& std,
+    const std::vector<float>& avg,
+    const float eps,
+    const int& stride);
 
 template <typename VALUETYPE>
-void
-DeepPotModelDevi::
-compute_relative_std_f (std::vector<VALUETYPE> &std,
-			const std::vector<VALUETYPE> &avg,
-			const VALUETYPE eps)
-{
+void DeepPotModelDevi::compute_relative_std_f(std::vector<VALUETYPE>& std,
+                                              const std::vector<VALUETYPE>& avg,
+                                              const VALUETYPE eps) {
   compute_relative_std(std, avg, eps, 3);
 }
 
-template
-void
-DeepPotModelDevi::
-compute_relative_std_f <double> (
-    std::vector<double> &std,
-    const std::vector<double> &avg,
-    const double eps);
-
-template
-void
-DeepPotModelDevi::
-compute_relative_std_f <float> (
-    std::vector<float> &std,
-    const std::vector<float> &avg,
-    const float eps);
+template void DeepPotModelDevi::compute_relative_std_f<double>(
+    std::vector<double>& std, const std::vector<double>& avg, const double eps);
+
+template void DeepPotModelDevi::compute_relative_std_f<float>(
+    std::vector<float>& std, const std::vector<float>& avg, const float eps);
diff --git a/source/api_cc/src/DeepTensor.cc b/source/api_cc/src/DeepTensor.cc
index cb21a53752..89fceb1562 100644
--- a/source/api_cc/src/DeepTensor.cc
+++ b/source/api_cc/src/DeepTensor.cc
@@ -3,36 +3,25 @@
 using namespace deepmd;
 using namespace tensorflow;
 
-DeepTensor::
-DeepTensor()
-    : inited (false),
-      graph_def(new GraphDef())
-{
-}
+DeepTensor::DeepTensor() : inited(false), graph_def(new GraphDef()) {}
 
-DeepTensor::
-DeepTensor(const std::string & model, 
-	   const int & gpu_rank, 
-	   const std::string &name_scope_)
-    : inited (false), name_scope(name_scope_),
-      graph_def(new GraphDef())
-{
-  init(model, gpu_rank, name_scope_);  
+DeepTensor::DeepTensor(const std::string &model,
+                       const int &gpu_rank,
+                       const std::string &name_scope_)
+    : inited(false), name_scope(name_scope_), graph_def(new GraphDef()) {
+  init(model, gpu_rank, name_scope_);
 }
 
-DeepTensor::~DeepTensor() {
-  delete graph_def;
-}
+DeepTensor::~DeepTensor() { delete graph_def; }
 
-void
-DeepTensor::
-init (const std::string & model, 
-      const int & gpu_rank, 
-      const std::string &name_scope_)
-{
-  if (inited){
-    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do nothing at the second call of initializer" << std::endl;
-    return ;
+void DeepTensor::init(const std::string &model,
+                      const int &gpu_rank,
+                      const std::string &name_scope_) {
+  if (inited) {
+    std::cerr << "WARNING: deepmd-kit should not be initialized twice, do "
+                 "nothing at the second call of initializer"
+              << std::endl;
+    return;
   }
   name_scope = name_scope_;
   SessionOptions options;
@@ -40,9 +29,9 @@ init (const std::string & model,
   options.config.set_inter_op_parallelism_threads(num_inter_nthreads);
   options.config.set_intra_op_parallelism_threads(num_intra_nthreads);
   deepmd::load_op_library();
-  deepmd::check_status (NewSession(options, &session));
-  deepmd::check_status (ReadBinaryProto(Env::Default(), model, graph_def));
-  deepmd::check_status (session->Create(*graph_def));  
+  deepmd::check_status(NewSession(options, &session));
+  deepmd::check_status(ReadBinaryProto(Env::Default(), model, graph_def));
+  deepmd::check_status(session->Create(*graph_def));
   dtype = session_get_dtype(session, "descrpt_attr/rcut");
   if (dtype == tensorflow::DT_DOUBLE) {
     rcut = get_scalar<double>("descrpt_attr/rcut");
@@ -55,48 +44,37 @@ init (const std::string & model,
   get_vector<int>(sel_type, "model_attr/sel_type");
   model_type = get_scalar<STRINGTYPE>("model_attr/model_type");
   model_version = get_scalar<STRINGTYPE>("model_attr/model_version");
-  if(! model_compatable(model_version)){
-    throw deepmd::deepmd_exception(
-	"incompatable model: version " + model_version 
-	+ " in graph, but version " + global_model_version 
-	+ " supported ");
+  if (!model_compatable(model_version)) {
+    throw deepmd::deepmd_exception("incompatable model: version " +
+                                   model_version + " in graph, but version " +
+                                   global_model_version + " supported ");
   }
   inited = true;
 }
 
-void 
-DeepTensor::
-print_summary(const std::string &pre) const
-{
+void DeepTensor::print_summary(const std::string &pre) const {
   deepmd::print_summary(pre);
 }
 
-template<class VT>
-VT
-DeepTensor::
-get_scalar (const std::string & name) const
-{
+template <class VT>
+VT DeepTensor::get_scalar(const std::string &name) const {
   return session_get_scalar<VT>(session, name, name_scope);
 }
 
-template<class VT>
-void
-DeepTensor::
-get_vector (std::vector<VT> & vec, const std::string & name) const
-{
+template <class VT>
+void DeepTensor::get_vector(std::vector<VT> &vec,
+                            const std::string &name) const {
   session_get_vector<VT>(vec, session, name, name_scope);
 }
 
 template <typename MODELTYPE, typename VALUETYPE>
-void 
-DeepTensor::
-run_model (std::vector<VALUETYPE> &	d_tensor_,
-		  Session *			session, 
-		  const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-		  const AtomMap &atommap, 
-		  const std::vector<int> &	sel_fwd,
-		  const int			nghost)
-{
+void DeepTensor::run_model(
+    std::vector<VALUETYPE> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost) {
   unsigned nloc = atommap.get_type().size();
   unsigned nall = nloc + nghost;
   if (nloc == 0) {
@@ -106,20 +84,19 @@ run_model (std::vector<VALUETYPE> &	d_tensor_,
   }
 
   std::vector<Tensor> output_tensors;
-  deepmd::check_status (session->Run(input_tensors, 
-			    {name_prefix(name_scope) + "o_" + model_type},
-			    {}, 
-			    &output_tensors));
-  
+  deepmd::check_status(
+      session->Run(input_tensors, {name_prefix(name_scope) + "o_" + model_type},
+                   {}, &output_tensors));
+
   Tensor output_t = output_tensors[0];
   // Yixiao: newer model may output rank 2 tensor [nframes x (natoms x noutdim)]
   // assert (output_t.dims() == 1), "dim of output tensor should be 1";
-  auto ot = output_t.flat<MODELTYPE> ();
+  auto ot = output_t.flat<MODELTYPE>();
   // this is an Eigen Tensor
   int o_size = ot.size();
 
-  std::vector<VALUETYPE> d_tensor (o_size);
-  for (unsigned ii = 0; ii < o_size; ++ii){
+  std::vector<VALUETYPE> d_tensor(o_size);
+  for (unsigned ii = 0; ii < o_size; ++ii) {
     d_tensor[ii] = ot(ii);
   }
   // now we map the type-sorted sel-atom tensor back to original order
@@ -133,57 +110,48 @@ run_model (std::vector<VALUETYPE> &	d_tensor_,
   select_map<VALUETYPE>(d_tensor_, d_tensor, sel_srt, odim);
 }
 
-template
-void
-DeepTensor::
-run_model<double, double> (std::vector<double> &	d_tensor_,
-		  Session *			session, 
-		  const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-		  const AtomMap &atommap, 
-		  const std::vector<int> &	sel_fwd,
-		  const int			nghost);
-template
-void
-DeepTensor::
-run_model<float, double> (std::vector<double> &	d_tensor_,
-		  Session *			session, 
-		  const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-		  const AtomMap &atommap, 
-		  const std::vector<int> &	sel_fwd,
-		  const int			nghost);
-template
-void
-DeepTensor::
-run_model<double, float> (std::vector<float> &	d_tensor_,
-		  Session *			session, 
-		  const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-		  const AtomMap &atommap, 
-		  const std::vector<int> &	sel_fwd,
-		  const int			nghost);
-template
-void
-DeepTensor::
-run_model<float, float> (std::vector<float> &	d_tensor_,
-		  Session *			session, 
-		  const std::vector<std::pair<std::string, Tensor>> & input_tensors,
-		  const AtomMap &atommap, 
-		  const std::vector<int> &	sel_fwd,
-		  const int			nghost);
+template void DeepTensor::run_model<double, double>(
+    std::vector<double> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensor::run_model<float, double>(
+    std::vector<double> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensor::run_model<double, float>(
+    std::vector<float> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensor::run_model<float, float>(
+    std::vector<float> &d_tensor_,
+    Session *session,
+    const std::vector<std::pair<std::string, Tensor>> &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
 
 template <typename MODELTYPE, typename VALUETYPE>
-void
-DeepTensor::
-run_model (std::vector<VALUETYPE> &		dglobal_tensor_,
-		  std::vector<VALUETYPE> &	dforce_,
-		  std::vector<VALUETYPE> &	dvirial_,
-		  std::vector<VALUETYPE> &	datom_tensor_,
-		  std::vector<VALUETYPE> &	datom_virial_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost)
-{
+void DeepTensor::run_model(
+    std::vector<VALUETYPE> &dglobal_tensor_,
+    std::vector<VALUETYPE> &dforce_,
+    std::vector<VALUETYPE> &dvirial_,
+    std::vector<VALUETYPE> &datom_tensor_,
+    std::vector<VALUETYPE> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost) {
   unsigned nloc = atommap.get_type().size();
   unsigned nall = nloc + nghost;
   unsigned nsel = nloc - std::count(sel_fwd.begin(), sel_fwd.end(), -1);
@@ -196,14 +164,14 @@ run_model (std::vector<VALUETYPE> &		dglobal_tensor_,
   }
 
   std::vector<Tensor> output_tensors;
-  deepmd::check_status (session->Run(input_tensors, 
-			    {name_prefix(name_scope) + "o_global_" + model_type, 
-			     name_prefix(name_scope) + "o_force", 
-			     name_prefix(name_scope) + "o_virial", 
-			     name_prefix(name_scope) + "o_" + model_type,
-			     name_prefix(name_scope) + "o_atom_virial"},
-			    {}, 
-			    &output_tensors));
+  deepmd::check_status(
+      session->Run(input_tensors,
+                   {name_prefix(name_scope) + "o_global_" + model_type,
+                    name_prefix(name_scope) + "o_force",
+                    name_prefix(name_scope) + "o_virial",
+                    name_prefix(name_scope) + "o_" + model_type,
+                    name_prefix(name_scope) + "o_atom_virial"},
+                   {}, &output_tensors));
 
   Tensor output_gt = output_tensors[0];
   Tensor output_f = output_tensors[1];
@@ -211,54 +179,58 @@ run_model (std::vector<VALUETYPE> &		dglobal_tensor_,
   Tensor output_at = output_tensors[3];
   Tensor output_av = output_tensors[4];
   // this is the new model, output has to be rank 2 tensor
-  assert (output_gt.dims() == 2), "dim of output tensor should be 2";
-  assert (output_f.dims() == 2), "dim of output tensor should be 2";
-  assert (output_v.dims() == 2), "dim of output tensor should be 2";
-  assert (output_at.dims() == 2), "dim of output tensor should be 2";
-  assert (output_av.dims() == 2), "dim of output tensor should be 2";
+  assert(output_gt.dims() == 2), "dim of output tensor should be 2";
+  assert(output_f.dims() == 2), "dim of output tensor should be 2";
+  assert(output_v.dims() == 2), "dim of output tensor should be 2";
+  assert(output_at.dims() == 2), "dim of output tensor should be 2";
+  assert(output_av.dims() == 2), "dim of output tensor should be 2";
   // also check the tensor shapes
-  assert (output_gt.dim_size(0) == 1), "nframes should match";
-  assert (output_gt.dim_size(1) == odim), "dof of global tensor should be odim";  
-  assert (output_f.dim_size(0) == 1), "nframes should match";
-  assert (output_f.dim_size(1) == odim * nall * 3), "dof of force should be odim * nall * 3";
-  assert (output_v.dim_size(0) == 1), "nframes should match";
-  assert (output_v.dim_size(1) == odim * 9), "dof of virial should be odim * 9";
-  assert (output_at.dim_size(0) == 1), "nframes should match";
-  assert (output_at.dim_size(1) == nsel * odim), "dof of atomic tensor should be nsel * odim";  
-  assert (output_av.dim_size(0) == 1), "nframes should match";
-  assert (output_av.dim_size(1) == odim * nall * 9), "dof of atomic virial should be odim * nall * 9";  
-
-  auto ogt = output_gt.flat <ENERGYTYPE> ();
-  auto of = output_f.flat <MODELTYPE> ();
-  auto ov = output_v.flat <MODELTYPE> ();
-  auto oat = output_at.flat<MODELTYPE> ();
-  auto oav = output_av.flat<MODELTYPE> ();
+  assert(output_gt.dim_size(0) == 1), "nframes should match";
+  assert(output_gt.dim_size(1) == odim), "dof of global tensor should be odim";
+  assert(output_f.dim_size(0) == 1), "nframes should match";
+  assert(output_f.dim_size(1) == odim * nall * 3),
+      "dof of force should be odim * nall * 3";
+  assert(output_v.dim_size(0) == 1), "nframes should match";
+  assert(output_v.dim_size(1) == odim * 9), "dof of virial should be odim * 9";
+  assert(output_at.dim_size(0) == 1), "nframes should match";
+  assert(output_at.dim_size(1) == nsel * odim),
+      "dof of atomic tensor should be nsel * odim";
+  assert(output_av.dim_size(0) == 1), "nframes should match";
+  assert(output_av.dim_size(1) == odim * nall * 9),
+      "dof of atomic virial should be odim * nall * 9";
+
+  auto ogt = output_gt.flat<ENERGYTYPE>();
+  auto of = output_f.flat<MODELTYPE>();
+  auto ov = output_v.flat<MODELTYPE>();
+  auto oat = output_at.flat<MODELTYPE>();
+  auto oav = output_av.flat<MODELTYPE>();
 
   // global tensor
   dglobal_tensor_.resize(odim);
-  for (unsigned ii = 0; ii < odim; ++ii){
+  for (unsigned ii = 0; ii < odim; ++ii) {
     dglobal_tensor_[ii] = ogt(ii);
   }
 
   // component-wise force
-  std::vector<VALUETYPE> dforce (3 * nall * odim);
-  for (unsigned ii = 0; ii < odim * nall * 3; ++ii){
+  std::vector<VALUETYPE> dforce(3 * nall * odim);
+  for (unsigned ii = 0; ii < odim * nall * 3; ++ii) {
     dforce[ii] = of(ii);
   }
   dforce_ = dforce;
-  for (unsigned dd = 0; dd < odim; ++dd){
-    atommap.backward<VALUETYPE> (dforce_.begin() + (dd * nall * 3), dforce.begin() + (dd * nall * 3), 3);
+  for (unsigned dd = 0; dd < odim; ++dd) {
+    atommap.backward<VALUETYPE>(dforce_.begin() + (dd * nall * 3),
+                                dforce.begin() + (dd * nall * 3), 3);
   }
 
   // component-wise virial
   dvirial_.resize(odim * 9);
-  for (unsigned ii = 0; ii < odim * 9; ++ii){
+  for (unsigned ii = 0; ii < odim * 9; ++ii) {
     dvirial_[ii] = ov(ii);
   }
-  
+
   // atomic tensor
-  std::vector<VALUETYPE> datom_tensor (nsel * odim);
-  for (unsigned ii = 0; ii < nsel * odim; ++ii){
+  std::vector<VALUETYPE> datom_tensor(nsel * odim);
+  for (unsigned ii = 0; ii < nsel * odim; ++ii) {
     datom_tensor[ii] = oat(ii);
   }
   std::vector<int> sel_srt = sel_fwd;
@@ -268,79 +240,73 @@ run_model (std::vector<VALUETYPE> &		dglobal_tensor_,
   select_map<VALUETYPE>(datom_tensor_, datom_tensor, sel_srt, odim);
 
   // component-wise atomic virial
-  std::vector<VALUETYPE> datom_virial (9 * nall * odim);
-  for (unsigned ii = 0; ii < odim * nall * 9; ++ii){
+  std::vector<VALUETYPE> datom_virial(9 * nall * odim);
+  for (unsigned ii = 0; ii < odim * nall * 9; ++ii) {
     datom_virial[ii] = oav(ii);
   }
   datom_virial_ = datom_virial;
-  for (unsigned dd = 0; dd < odim; ++dd){
-    atommap.backward<VALUETYPE> (datom_virial_.begin() + (dd * nall * 9), datom_virial.begin() + (dd * nall * 9), 9);
+  for (unsigned dd = 0; dd < odim; ++dd) {
+    atommap.backward<VALUETYPE>(datom_virial_.begin() + (dd * nall * 9),
+                                datom_virial.begin() + (dd * nall * 9), 9);
   }
 }
 
-template
-void
-DeepTensor::
-run_model<double, double> (std::vector<double> &		dglobal_tensor_,
-		  std::vector<double> &	dforce_,
-		  std::vector<double> &	dvirial_,
-		  std::vector<double> &	datom_tensor_,
-		  std::vector<double> &	datom_virial_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost);
-template
-void
-DeepTensor::
-run_model<float, double> (std::vector<double> &		dglobal_tensor_,
-		  std::vector<double> &	dforce_,
-		  std::vector<double> &	dvirial_,
-		  std::vector<double> &	datom_tensor_,
-		  std::vector<double> &	datom_virial_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost);
-
-template
-void
-DeepTensor::
-run_model<double, float> (std::vector<float> &		dglobal_tensor_,
-		  std::vector<float> &	dforce_,
-		  std::vector<float> &	dvirial_,
-		  std::vector<float> &	datom_tensor_,
-		  std::vector<float> &	datom_virial_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost);
-
-template
-void
-DeepTensor::
-run_model<float, float> (std::vector<float> &		dglobal_tensor_,
-		  std::vector<float> &	dforce_,
-		  std::vector<float> &	dvirial_,
-		  std::vector<float> &	datom_tensor_,
-		  std::vector<float> &	datom_virial_,
-		  tensorflow::Session *			session, 
-		  const std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		  const AtomMap &		atommap, 
-		  const std::vector<int> &		sel_fwd,
-		  const int				nghost);
+template void DeepTensor::run_model<double, double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+template void DeepTensor::run_model<float, double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+
+template void DeepTensor::run_model<double, float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
+
+template void DeepTensor::run_model<float, float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    tensorflow::Session *session,
+    const std::vector<std::pair<std::string, tensorflow::Tensor>>
+        &input_tensors,
+    const AtomMap &atommap,
+    const std::vector<int> &sel_fwd,
+    const int nghost);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute (std::vector<VALUETYPE> &	dtensor_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox)
-{
+void DeepTensor::compute(std::vector<VALUETYPE> &dtensor_,
+                         const std::vector<VALUETYPE> &dcoord_,
+                         const std::vector<int> &datype_,
+                         const std::vector<VALUETYPE> &dbox) {
   std::vector<VALUETYPE> dcoord;
   std::vector<int> datype, fwd_map, bkw_map;
   int nghost_real;
@@ -355,36 +321,28 @@ compute (std::vector<VALUETYPE> &	dtensor_,
   compute_inner(dtensor_, dcoord, datype, dbox);
 }
 
-template
-void
-DeepTensor::
-compute<double> (std::vector<double> &	dtensor_,
-	 const std::vector<double> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<double> &	dbox);
-
-template
-void
-DeepTensor::
-compute<float> (std::vector<float> &	dtensor_,
-	 const std::vector<float> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<float> &	dbox);
+template void DeepTensor::compute<double>(std::vector<double> &dtensor_,
+                                          const std::vector<double> &dcoord_,
+                                          const std::vector<int> &datype_,
+                                          const std::vector<double> &dbox);
+
+template void DeepTensor::compute<float>(std::vector<float> &dtensor_,
+                                         const std::vector<float> &dcoord_,
+                                         const std::vector<int> &datype_,
+                                         const std::vector<float> &dbox);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute (std::vector<VALUETYPE> &	dtensor_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox, 
-	 const int			nghost,
-	 const InputNlist &	lmp_list)
-{
+void DeepTensor::compute(std::vector<VALUETYPE> &dtensor_,
+                         const std::vector<VALUETYPE> &dcoord_,
+                         const std::vector<int> &datype_,
+                         const std::vector<VALUETYPE> &dbox,
+                         const int nghost,
+                         const InputNlist &lmp_list) {
   std::vector<VALUETYPE> dcoord;
   std::vector<int> datype, fwd_map, bkw_map;
   int nghost_real;
-  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost,
+                    ntypes);
   // resize to nall_real
   dcoord.resize(bkw_map.size() * 3);
   datype.resize(bkw_map.size());
@@ -394,118 +352,93 @@ compute (std::vector<VALUETYPE> &	dtensor_,
   // internal nlist
   NeighborListData nlist_data;
   nlist_data.copy_from_nlist(lmp_list);
-  nlist_data.shuffle_exclude_empty(fwd_map);  
+  nlist_data.shuffle_exclude_empty(fwd_map);
   InputNlist nlist;
   nlist_data.make_inlist(nlist);
   compute_inner(dtensor_, dcoord, datype, dbox, nghost_real, nlist);
 }
 
-template
-void
-DeepTensor::
-compute <double> (std::vector<double> &	dtensor_,
-	 const std::vector<double> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<double> &	dbox, 
-	 const int			nghost,
-	 const InputNlist &	lmp_list);
-
-template
-void
-DeepTensor::
-compute <float> (std::vector<float> &	dtensor_,
-	 const std::vector<float> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<float> &	dbox, 
-	 const int			nghost,
-	 const InputNlist &	lmp_list);
+template void DeepTensor::compute<double>(std::vector<double> &dtensor_,
+                                          const std::vector<double> &dcoord_,
+                                          const std::vector<int> &datype_,
+                                          const std::vector<double> &dbox,
+                                          const int nghost,
+                                          const InputNlist &lmp_list);
+
+template void DeepTensor::compute<float>(std::vector<float> &dtensor_,
+                                         const std::vector<float> &dcoord_,
+                                         const std::vector<int> &datype_,
+                                         const std::vector<float> &dbox,
+                                         const int nghost,
+                                         const InputNlist &lmp_list);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute (std::vector<VALUETYPE> &	dglobal_tensor_,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox)
-{
+void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                         std::vector<VALUETYPE> &dforce_,
+                         std::vector<VALUETYPE> &dvirial_,
+                         const std::vector<VALUETYPE> &dcoord_,
+                         const std::vector<int> &datype_,
+                         const std::vector<VALUETYPE> &dbox) {
   std::vector<VALUETYPE> tmp_at_, tmp_av_;
-  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_, datype_, dbox);
+  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
+          datype_, dbox);
 }
 
-template
-void
-DeepTensor::
-compute <double> (std::vector<double> &	dglobal_tensor_,
-	 std::vector<double> &	dforce_,
-	 std::vector<double> &	dvirial_,
-	 const std::vector<double> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<double> &	dbox);
-
-template
-void
-DeepTensor::
-compute <float> (std::vector<float> &	dglobal_tensor_,
-	 std::vector<float> &	dforce_,
-	 std::vector<float> &	dvirial_,
-	 const std::vector<float> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<float> &	dbox);
+template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
+                                          std::vector<double> &dforce_,
+                                          std::vector<double> &dvirial_,
+                                          const std::vector<double> &dcoord_,
+                                          const std::vector<int> &datype_,
+                                          const std::vector<double> &dbox);
+
+template void DeepTensor::compute<float>(std::vector<float> &dglobal_tensor_,
+                                         std::vector<float> &dforce_,
+                                         std::vector<float> &dvirial_,
+                                         const std::vector<float> &dcoord_,
+                                         const std::vector<int> &datype_,
+                                         const std::vector<float> &dbox);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute (std::vector<VALUETYPE> &	dglobal_tensor_,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox, 
-	 const int			nghost,
-	 const InputNlist &	lmp_list)
-{
+void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                         std::vector<VALUETYPE> &dforce_,
+                         std::vector<VALUETYPE> &dvirial_,
+                         const std::vector<VALUETYPE> &dcoord_,
+                         const std::vector<int> &datype_,
+                         const std::vector<VALUETYPE> &dbox,
+                         const int nghost,
+                         const InputNlist &lmp_list) {
   std::vector<VALUETYPE> tmp_at_, tmp_av_;
-  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_, datype_, dbox, nghost, lmp_list);
+  compute(dglobal_tensor_, dforce_, dvirial_, tmp_at_, tmp_av_, dcoord_,
+          datype_, dbox, nghost, lmp_list);
 }
 
-template
-void
-DeepTensor::
-compute <double> (std::vector<double> &	dglobal_tensor_,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial_,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox, 
-   const int			nghost,
-   const InputNlist &	lmp_list);
-
-template
-void
-DeepTensor::
-compute <float> (std::vector<float> &	dglobal_tensor_,
-   std::vector<float> &	dforce_,
-   std::vector<float> &	dvirial_,
-   const std::vector<float> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<float> &	dbox, 
-   const int			nghost,
-   const InputNlist &	lmp_list);
+template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
+                                          std::vector<double> &dforce_,
+                                          std::vector<double> &dvirial_,
+                                          const std::vector<double> &dcoord_,
+                                          const std::vector<int> &datype_,
+                                          const std::vector<double> &dbox,
+                                          const int nghost,
+                                          const InputNlist &lmp_list);
+
+template void DeepTensor::compute<float>(std::vector<float> &dglobal_tensor_,
+                                         std::vector<float> &dforce_,
+                                         std::vector<float> &dvirial_,
+                                         const std::vector<float> &dcoord_,
+                                         const std::vector<int> &datype_,
+                                         const std::vector<float> &dbox,
+                                         const int nghost,
+                                         const InputNlist &lmp_list);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute (std::vector<VALUETYPE> &	dglobal_tensor_,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial_,
-	 std::vector<VALUETYPE> &	datom_tensor_,
-	 std::vector<VALUETYPE> &	datom_virial_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox)
-{
+void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                         std::vector<VALUETYPE> &dforce_,
+                         std::vector<VALUETYPE> &dvirial_,
+                         std::vector<VALUETYPE> &datom_tensor_,
+                         std::vector<VALUETYPE> &datom_virial_,
+                         const std::vector<VALUETYPE> &dcoord_,
+                         const std::vector<int> &datype_,
+                         const std::vector<VALUETYPE> &dbox) {
   std::vector<VALUETYPE> dcoord, dforce, datom_virial;
   std::vector<int> datype, fwd_map, bkw_map;
   int nghost_real;
@@ -517,60 +450,56 @@ compute (std::vector<VALUETYPE> &	dglobal_tensor_,
   // fwd map
   select_map<VALUETYPE>(dcoord, dcoord_, fwd_map, 3);
   select_map<int>(datype, datype_, fwd_map, 1);
-  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial, dcoord, datype, dbox);
+  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial,
+                dcoord, datype, dbox);
   // bkw map
   dforce_.resize(odim * fwd_map.size() * 3);
-  for(int kk = 0; kk < odim; ++kk){
-    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3, dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3,
+                          dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
   }
   datom_virial_.resize(odim * fwd_map.size() * 9);
-  for(int kk = 0; kk < odim; ++kk){
-    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9, datom_virial.begin() + kk * bkw_map.size() * 9, bkw_map, 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9,
+                          datom_virial.begin() + kk * bkw_map.size() * 9,
+                          bkw_map, 9);
   }
 }
 
-template
-void
-DeepTensor::
-compute <double> (std::vector<double> &	dglobal_tensor_,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial_,
-   std::vector<double> &	datom_tensor_,
-   std::vector<double> &	datom_virial_,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox);
-
-template
-void
-DeepTensor::
-compute <float> (std::vector<float> &	dglobal_tensor_,
-   std::vector<float> &	dforce_,
-   std::vector<float> &	dvirial_,
-   std::vector<float> &	datom_tensor_,
-   std::vector<float> &	datom_virial_,
-   const std::vector<float> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<float> &	dbox);
+template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
+                                          std::vector<double> &dforce_,
+                                          std::vector<double> &dvirial_,
+                                          std::vector<double> &datom_tensor_,
+                                          std::vector<double> &datom_virial_,
+                                          const std::vector<double> &dcoord_,
+                                          const std::vector<int> &datype_,
+                                          const std::vector<double> &dbox);
+
+template void DeepTensor::compute<float>(std::vector<float> &dglobal_tensor_,
+                                         std::vector<float> &dforce_,
+                                         std::vector<float> &dvirial_,
+                                         std::vector<float> &datom_tensor_,
+                                         std::vector<float> &datom_virial_,
+                                         const std::vector<float> &dcoord_,
+                                         const std::vector<int> &datype_,
+                                         const std::vector<float> &dbox);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute (std::vector<VALUETYPE> &	dglobal_tensor_,
-	 std::vector<VALUETYPE> &	dforce_,
-	 std::vector<VALUETYPE> &	dvirial_,
-	 std::vector<VALUETYPE> &	datom_tensor_,
-	 std::vector<VALUETYPE> &	datom_virial_,
-	 const std::vector<VALUETYPE> &	dcoord_,
-	 const std::vector<int> &	datype_,
-	 const std::vector<VALUETYPE> &	dbox, 
-	 const int			nghost,
-	 const InputNlist &	lmp_list)
-{
+void DeepTensor::compute(std::vector<VALUETYPE> &dglobal_tensor_,
+                         std::vector<VALUETYPE> &dforce_,
+                         std::vector<VALUETYPE> &dvirial_,
+                         std::vector<VALUETYPE> &datom_tensor_,
+                         std::vector<VALUETYPE> &datom_virial_,
+                         const std::vector<VALUETYPE> &dcoord_,
+                         const std::vector<int> &datype_,
+                         const std::vector<VALUETYPE> &dbox,
+                         const int nghost,
+                         const InputNlist &lmp_list) {
   std::vector<VALUETYPE> dcoord, dforce, datom_virial;
   std::vector<int> datype, fwd_map, bkw_map;
   int nghost_real;
-  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, ntypes);
+  select_real_atoms(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost,
+                    ntypes);
   // resize to nall_real
   dcoord.resize(bkw_map.size() * 3);
   datype.resize(bkw_map.size());
@@ -580,63 +509,57 @@ compute (std::vector<VALUETYPE> &	dglobal_tensor_,
   // internal nlist
   NeighborListData nlist_data;
   nlist_data.copy_from_nlist(lmp_list);
-  nlist_data.shuffle_exclude_empty(fwd_map);  
+  nlist_data.shuffle_exclude_empty(fwd_map);
   InputNlist nlist;
   nlist_data.make_inlist(nlist);
-  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial, dcoord, datype, dbox, nghost_real, nlist);
+  compute_inner(dglobal_tensor_, dforce, dvirial_, datom_tensor_, datom_virial,
+                dcoord, datype, dbox, nghost_real, nlist);
   // bkw map
   dforce_.resize(odim * fwd_map.size() * 3);
-  for(int kk = 0; kk < odim; ++kk){
-    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3, dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(dforce_.begin() + kk * fwd_map.size() * 3,
+                          dforce.begin() + kk * bkw_map.size() * 3, bkw_map, 3);
   }
   datom_virial_.resize(odim * fwd_map.size() * 9);
-  for(int kk = 0; kk < odim; ++kk){
-    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9, datom_virial.begin() + kk * bkw_map.size() * 9, bkw_map, 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    select_map<VALUETYPE>(datom_virial_.begin() + kk * fwd_map.size() * 9,
+                          datom_virial.begin() + kk * bkw_map.size() * 9,
+                          bkw_map, 9);
   }
 }
 
-template
-void
-DeepTensor::
-compute <double> (std::vector<double> &	dglobal_tensor_,
-   std::vector<double> &	dforce_,
-   std::vector<double> &	dvirial_,
-   std::vector<double> &	datom_tensor_,
-   std::vector<double> &	datom_virial_,
-   const std::vector<double> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<double> &	dbox, 
-   const int			nghost,
-   const InputNlist &	lmp_list);
-
-template
-void
-DeepTensor::
-compute <float> (std::vector<float> &	dglobal_tensor_,
-   std::vector<float> &	dforce_,
-   std::vector<float> &	dvirial_,
-   std::vector<float> &	datom_tensor_,
-   std::vector<float> &	datom_virial_,
-   const std::vector<float> &	dcoord_,
-   const std::vector<int> &	datype_,
-   const std::vector<float> &	dbox, 
-   const int			nghost,
-   const InputNlist &	lmp_list);
-
+template void DeepTensor::compute<double>(std::vector<double> &dglobal_tensor_,
+                                          std::vector<double> &dforce_,
+                                          std::vector<double> &dvirial_,
+                                          std::vector<double> &datom_tensor_,
+                                          std::vector<double> &datom_virial_,
+                                          const std::vector<double> &dcoord_,
+                                          const std::vector<int> &datype_,
+                                          const std::vector<double> &dbox,
+                                          const int nghost,
+                                          const InputNlist &lmp_list);
+
+template void DeepTensor::compute<float>(std::vector<float> &dglobal_tensor_,
+                                         std::vector<float> &dforce_,
+                                         std::vector<float> &dvirial_,
+                                         std::vector<float> &datom_tensor_,
+                                         std::vector<float> &datom_virial_,
+                                         const std::vector<float> &dcoord_,
+                                         const std::vector<int> &datype_,
+                                         const std::vector<float> &dbox,
+                                         const int nghost,
+                                         const InputNlist &lmp_list);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute_inner (std::vector<VALUETYPE> &		dtensor_,
-	       const std::vector<VALUETYPE> &	dcoord_,
-	       const std::vector<int> &		datype_,
-	       const std::vector<VALUETYPE> &	dbox)
-{
+void DeepTensor::compute_inner(std::vector<VALUETYPE> &dtensor_,
+                               const std::vector<VALUETYPE> &dcoord_,
+                               const std::vector<int> &datype_,
+                               const std::vector<VALUETYPE> &dbox) {
   int nall = dcoord_.size() / 3;
   int nloc = nall;
-  AtomMap atommap (datype_.begin(), datype_.begin() + nloc);
-  assert (nloc == atommap.get_type().size());
-  
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+
   std::vector<int> sel_fwd, sel_bkw;
   int nghost_sel;
   // this gives the raw selection map, will pass to run model
@@ -645,51 +568,51 @@ compute_inner (std::vector<VALUETYPE> &		dtensor_,
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors <double> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, name_scope);
-    assert (ret == nloc);
-    run_model<double> (dtensor_, session, input_tensors, atommap, sel_fwd);
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<double>(dtensor_, session, input_tensors, atommap, sel_fwd);
   } else {
-    int ret = session_input_tensors <float> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, name_scope);
-    assert (ret == nloc);
-    run_model<float> (dtensor_, session, input_tensors, atommap, sel_fwd);
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<float>(dtensor_, session, input_tensors, atommap, sel_fwd);
   }
 }
 
-template
-void
-DeepTensor::
-compute_inner <double> (std::vector<double> &		dtensor_,
-      const std::vector<double> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<double> &	dbox);
-
-template
-void
-DeepTensor::
-compute_inner <float> (std::vector<float> &		dtensor_,
-      const std::vector<float> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<float> &	dbox);
+template void DeepTensor::compute_inner<double>(
+    std::vector<double> &dtensor_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox);
+
+template void DeepTensor::compute_inner<float>(
+    std::vector<float> &dtensor_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute_inner (std::vector<VALUETYPE> &		dtensor_,
-	       const std::vector<VALUETYPE> &	dcoord_,
-	       const std::vector<int> &		datype_,
-	       const std::vector<VALUETYPE> &	dbox, 
-	       const int			nghost,
-	       const InputNlist &	nlist_)
-{
+void DeepTensor::compute_inner(std::vector<VALUETYPE> &dtensor_,
+                               const std::vector<VALUETYPE> &dcoord_,
+                               const std::vector<int> &datype_,
+                               const std::vector<VALUETYPE> &dbox,
+                               const int nghost,
+                               const InputNlist &nlist_) {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
-  AtomMap atommap (datype_.begin(), datype_.begin() + nloc);
-  assert (nloc == atommap.get_type().size());
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
 
   std::vector<int> sel_fwd, sel_bkw;
   int nghost_sel;
   // this gives the raw selection map, will pass to run model
-  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost, sel_type);
+  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost,
+                 sel_type);
   sel_fwd.resize(nloc);
 
   NeighborListData nlist_data;
@@ -701,53 +624,54 @@ compute_inner (std::vector<VALUETYPE> &		dtensor_,
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors <double> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0, name_scope);
-    assert (nloc == ret);
-    run_model<double> (dtensor_, session, input_tensors, atommap, sel_fwd, nghost);
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<double>(dtensor_, session, input_tensors, atommap, sel_fwd,
+                      nghost);
   } else {
-    int ret = session_input_tensors <float> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0, name_scope);
-    assert (nloc == ret);
-    run_model<float> (dtensor_, session, input_tensors, atommap, sel_fwd, nghost);
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<float>(dtensor_, session, input_tensors, atommap, sel_fwd,
+                     nghost);
   }
 }
 
-template
-void
-DeepTensor::
-compute_inner <double> (std::vector<double> &		dtensor_,
-      const std::vector<double> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<double> &	dbox, 
-      const int			nghost,
-      const InputNlist &	nlist_);
-
-template
-void
-DeepTensor::
-compute_inner <float> (std::vector<float> &		dtensor_,
-      const std::vector<float> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<float> &	dbox, 
-      const int			nghost,
-      const InputNlist &	nlist_);
+template void DeepTensor::compute_inner<double>(
+    std::vector<double> &dtensor_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
+
+template void DeepTensor::compute_inner<float>(
+    std::vector<float> &dtensor_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute_inner (std::vector<VALUETYPE> &		dglobal_tensor_,
-	       std::vector<VALUETYPE> &	dforce_,
-	       std::vector<VALUETYPE> &	dvirial_,
-	       std::vector<VALUETYPE> &	datom_tensor_,
-	       std::vector<VALUETYPE> &	datom_virial_,
-	       const std::vector<VALUETYPE> &	dcoord_,
-	       const std::vector<int> &		datype_,
-	       const std::vector<VALUETYPE> &	dbox)
-{
+void DeepTensor::compute_inner(std::vector<VALUETYPE> &dglobal_tensor_,
+                               std::vector<VALUETYPE> &dforce_,
+                               std::vector<VALUETYPE> &dvirial_,
+                               std::vector<VALUETYPE> &datom_tensor_,
+                               std::vector<VALUETYPE> &datom_virial_,
+                               const std::vector<VALUETYPE> &dcoord_,
+                               const std::vector<int> &datype_,
+                               const std::vector<VALUETYPE> &dbox) {
   int nall = dcoord_.size() / 3;
   int nloc = nall;
-  AtomMap atommap (datype_.begin(), datype_.begin() + nloc);
-  assert (nloc == atommap.get_type().size());
-  
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
+
   std::vector<int> sel_fwd, sel_bkw;
   int nghost_sel;
   // this gives the raw selection map, will pass to run model
@@ -756,63 +680,65 @@ compute_inner (std::vector<VALUETYPE> &		dglobal_tensor_,
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors <double> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, name_scope);
-    assert (ret == nloc);
-    run_model<double> (dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_, session, input_tensors, atommap, sel_fwd);
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<double>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                      datom_virial_, session, input_tensors, atommap, sel_fwd);
   } else {
-    int ret = session_input_tensors <float> (input_tensors, dcoord_, ntypes, datype_, dbox, cell_size, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, name_scope);
-    assert (ret == nloc);
-    run_model<float> (dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_, session, input_tensors, atommap, sel_fwd);
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, cell_size,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap,
+        name_scope);
+    assert(ret == nloc);
+    run_model<float>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                     datom_virial_, session, input_tensors, atommap, sel_fwd);
   }
 }
 
-template
-void
-DeepTensor::
-compute_inner <double> (std::vector<double> &		dglobal_tensor_,
-      std::vector<double> &	dforce_,
-      std::vector<double> &	dvirial_,
-      std::vector<double> &	datom_tensor_,
-      std::vector<double> &	datom_virial_,
-      const std::vector<double> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<double> &	dbox);
-
-template
-void
-DeepTensor::
-compute_inner <float> (std::vector<float> &		dglobal_tensor_,
-      std::vector<float> &	dforce_,
-      std::vector<float> &	dvirial_,
-      std::vector<float> &	datom_tensor_,
-      std::vector<float> &	datom_virial_,
-      const std::vector<float> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<float> &	dbox);
+template void DeepTensor::compute_inner<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox);
+
+template void DeepTensor::compute_inner<float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox);
 
 template <typename VALUETYPE>
-void
-DeepTensor::
-compute_inner (std::vector<VALUETYPE> &		dglobal_tensor_,
-	       std::vector<VALUETYPE> &	dforce_,
-	       std::vector<VALUETYPE> &	dvirial_,
-	       std::vector<VALUETYPE> &	datom_tensor_,
-	       std::vector<VALUETYPE> &	datom_virial_,
-	       const std::vector<VALUETYPE> &	dcoord_,
-	       const std::vector<int> &		datype_,
-	       const std::vector<VALUETYPE> &	dbox, 
-	       const int			nghost,
-	       const InputNlist &	nlist_)
-{
+void DeepTensor::compute_inner(std::vector<VALUETYPE> &dglobal_tensor_,
+                               std::vector<VALUETYPE> &dforce_,
+                               std::vector<VALUETYPE> &dvirial_,
+                               std::vector<VALUETYPE> &datom_tensor_,
+                               std::vector<VALUETYPE> &datom_virial_,
+                               const std::vector<VALUETYPE> &dcoord_,
+                               const std::vector<int> &datype_,
+                               const std::vector<VALUETYPE> &dbox,
+                               const int nghost,
+                               const InputNlist &nlist_) {
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
-  AtomMap atommap (datype_.begin(), datype_.begin() + nloc);
-  assert (nloc == atommap.get_type().size());
+  AtomMap atommap(datype_.begin(), datype_.begin() + nloc);
+  assert(nloc == atommap.get_type().size());
 
   std::vector<int> sel_fwd, sel_bkw;
   int nghost_sel;
   // this gives the raw selection map, will pass to run model
-  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost, sel_type);
+  select_by_type(sel_fwd, sel_bkw, nghost_sel, dcoord_, datype_, nghost,
+                 sel_type);
   sel_fwd.resize(nloc);
 
   NeighborListData nlist_data;
@@ -824,40 +750,46 @@ compute_inner (std::vector<VALUETYPE> &		dglobal_tensor_,
   std::vector<std::pair<std::string, Tensor>> input_tensors;
 
   if (dtype == tensorflow::DT_DOUBLE) {
-    int ret = session_input_tensors <double> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0, name_scope);
-    assert (nloc == ret);
-    run_model<double> (dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_, session, input_tensors, atommap, sel_fwd, nghost);
+    int ret = session_input_tensors<double>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<double>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                      datom_virial_, session, input_tensors, atommap, sel_fwd,
+                      nghost);
   } else {
-    int ret = session_input_tensors <float> (input_tensors, dcoord_, ntypes, datype_, dbox, nlist, std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0, name_scope);
-    assert (nloc == ret);
-    run_model<float> (dglobal_tensor_, dforce_, dvirial_, datom_tensor_, datom_virial_, session, input_tensors, atommap, sel_fwd, nghost);
+    int ret = session_input_tensors<float>(
+        input_tensors, dcoord_, ntypes, datype_, dbox, nlist,
+        std::vector<VALUETYPE>(), std::vector<VALUETYPE>(), atommap, nghost, 0,
+        name_scope);
+    assert(nloc == ret);
+    run_model<float>(dglobal_tensor_, dforce_, dvirial_, datom_tensor_,
+                     datom_virial_, session, input_tensors, atommap, sel_fwd,
+                     nghost);
   }
 }
 
-template
-void
-DeepTensor::
-compute_inner <double> (std::vector<double> &		dglobal_tensor_,
-      std::vector<double> &	dforce_,
-      std::vector<double> &	dvirial_,
-      std::vector<double> &	datom_tensor_,
-      std::vector<double> &	datom_virial_,
-      const std::vector<double> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<double> &	dbox, 
-      const int			nghost,
-      const InputNlist &	nlist_);
-
-template
-void
-DeepTensor::
-compute_inner <float> (std::vector<float> &		dglobal_tensor_,
-      std::vector<float> &	dforce_,
-      std::vector<float> &	dvirial_,
-      std::vector<float> &	datom_tensor_,
-      std::vector<float> &	datom_virial_,
-      const std::vector<float> &	dcoord_,
-      const std::vector<int> &		datype_,
-      const std::vector<float> &	dbox, 
-      const int			nghost,
-      const InputNlist &	nlist_);
+template void DeepTensor::compute_inner<double>(
+    std::vector<double> &dglobal_tensor_,
+    std::vector<double> &dforce_,
+    std::vector<double> &dvirial_,
+    std::vector<double> &datom_tensor_,
+    std::vector<double> &datom_virial_,
+    const std::vector<double> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<double> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
+
+template void DeepTensor::compute_inner<float>(
+    std::vector<float> &dglobal_tensor_,
+    std::vector<float> &dforce_,
+    std::vector<float> &dvirial_,
+    std::vector<float> &datom_tensor_,
+    std::vector<float> &datom_virial_,
+    const std::vector<float> &dcoord_,
+    const std::vector<int> &datype_,
+    const std::vector<float> &dbox,
+    const int nghost,
+    const InputNlist &nlist_);
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index 597600a1cb..50ee12bc7d 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -1,7 +1,9 @@
 #include "common.h"
+
+#include <fcntl.h>
+
 #include "AtomMap.h"
 #include "device.h"
-#include <fcntl.h>
 #if defined(_WIN32)
 #if defined(_WIN32_WINNT)
 #undef _WIN32_WINNT
@@ -10,22 +12,20 @@
 // target Windows version is windows 7 and later
 #define _WIN32_WINNT _WIN32_WINNT_WIN7
 #define PSAPI_VERSION 2
-#include <windows.h>
 #include <io.h>
+#include <windows.h>
 #define O_RDONLY _O_RDONLY
 #else
 // not windows
 #include <dlfcn.h>
 #endif
-#include "google/protobuf/text_format.h"
 #include "google/protobuf/io/zero_copy_stream_impl.h"
+#include "google/protobuf/text_format.h"
 
 using namespace tensorflow;
 
-static std::vector<std::string>
-split(const std::string &input_, 
-      const std::string &delimiter)
-{
+static std::vector<std::string> split(const std::string& input_,
+                                      const std::string& delimiter) {
   std::string input = input_;
   size_t pos = 0;
   std::vector<std::string> res;
@@ -37,201 +37,170 @@ split(const std::string &input_,
   return res;
 }
 
-bool
-deepmd::
-model_compatable(
-    std::string & model_version)
-{
+bool deepmd::model_compatable(std::string& model_version) {
   std::vector<std::string> words_mv = split(model_version, ".");
   std::vector<std::string> words_gmv = split(global_model_version, ".");
-  if(words_mv.size() != 2){
-    throw deepmd::deepmd_exception("invalid graph model version string " + model_version);
+  if (words_mv.size() != 2) {
+    throw deepmd::deepmd_exception("invalid graph model version string " +
+                                   model_version);
   }
-  if(words_gmv.size() != 2){
-    throw deepmd::deepmd_exception("invalid supported model version string " + global_model_version);
+  if (words_gmv.size() != 2) {
+    throw deepmd::deepmd_exception("invalid supported model version string " +
+                                   global_model_version);
   }
   int model_version_major = atoi(words_mv[0].c_str());
   int model_version_minor = atoi(words_mv[1].c_str());
   int MODEL_VERSION_MAJOR = atoi(words_gmv[0].c_str());
   int MODEL_VERSION_MINOR = atoi(words_gmv[1].c_str());
-  if(model_version_major != MODEL_VERSION_MAJOR ||
-     model_version_minor >  MODEL_VERSION_MINOR){
+  if (model_version_major != MODEL_VERSION_MAJOR ||
+      model_version_minor > MODEL_VERSION_MINOR) {
     return false;
-  }
-  else{
+  } else {
     return true;
   }
 }
 
 template <typename VALUETYPE>
-void 
-deepmd::
-select_by_type(std::vector<int> & fwd_map,
-	       std::vector<int> & bkw_map,
-	       int & nghost_real, 
-	       const std::vector<VALUETYPE> & dcoord_, 
-	       const std::vector<int> & datype_,
-	       const int & nghost,
-	       const std::vector<int> & sel_type_)
-{
-  std::vector<int> sel_type (sel_type_);
-  sort(sel_type.begin(), sel_type.end());  
+void deepmd::select_by_type(std::vector<int>& fwd_map,
+                            std::vector<int>& bkw_map,
+                            int& nghost_real,
+                            const std::vector<VALUETYPE>& dcoord_,
+                            const std::vector<int>& datype_,
+                            const int& nghost,
+                            const std::vector<int>& sel_type_) {
+  std::vector<int> sel_type(sel_type_);
+  sort(sel_type.begin(), sel_type.end());
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
   int nloc_real = 0;
   nghost_real = 0;
   fwd_map.resize(nall);
   bkw_map.clear();
-  bkw_map.reserve(nall);  
+  bkw_map.reserve(nall);
   int cc = 0;
-  for (int ii = 0; ii < nall; ++ii){
+  for (int ii = 0; ii < nall; ++ii) {
     // exclude virtual sites
     // select the type with id < ntypes
-    if (binary_search(sel_type.begin(), sel_type.end(), datype_[ii])){
+    if (binary_search(sel_type.begin(), sel_type.end(), datype_[ii])) {
       bkw_map.push_back(ii);
       if (ii < nloc) {
-	nloc_real += 1;
-      }
-      else{
-	nghost_real += 1;
+        nloc_real += 1;
+      } else {
+        nghost_real += 1;
       }
       fwd_map[ii] = cc;
-      cc ++;
-    }
-    else{
+      cc++;
+    } else {
       fwd_map[ii] = -1;
     }
-  }  
-  assert((nloc_real+nghost_real) == bkw_map.size());  
-}	       
-
-template
-void 
-deepmd::
-select_by_type <double>(std::vector<int> & fwd_map,
-	       std::vector<int> & bkw_map,
-	       int & nghost_real, 
-	       const std::vector<double> & dcoord_, 
-	       const std::vector<int> & datype_,
-	       const int & nghost,
-	       const std::vector<int> & sel_type_);
-
-template
-void 
-deepmd::
-select_by_type <float>(std::vector<int> & fwd_map,
-	       std::vector<int> & bkw_map,
-	       int & nghost_real, 
-	       const std::vector<float> & dcoord_, 
-	       const std::vector<int> & datype_,
-	       const int & nghost,
-	       const std::vector<int> & sel_type_);
+  }
+  assert((nloc_real + nghost_real) == bkw_map.size());
+}
+
+template void deepmd::select_by_type<double>(std::vector<int>& fwd_map,
+                                             std::vector<int>& bkw_map,
+                                             int& nghost_real,
+                                             const std::vector<double>& dcoord_,
+                                             const std::vector<int>& datype_,
+                                             const int& nghost,
+                                             const std::vector<int>& sel_type_);
+
+template void deepmd::select_by_type<float>(std::vector<int>& fwd_map,
+                                            std::vector<int>& bkw_map,
+                                            int& nghost_real,
+                                            const std::vector<float>& dcoord_,
+                                            const std::vector<int>& datype_,
+                                            const int& nghost,
+                                            const std::vector<int>& sel_type_);
 
 template <typename VALUETYPE>
-void
-deepmd::
-select_real_atoms(std::vector<int> & fwd_map,
-		  std::vector<int> & bkw_map,
-		  int & nghost_real,
-		  const std::vector<VALUETYPE> & dcoord_, 
-		  const std::vector<int> & datype_,
-		  const int & nghost,
-		  const int & ntypes)
-{
-  std::vector<int > sel_type;
-  for (int ii = 0; ii < ntypes; ++ii){
+void deepmd::select_real_atoms(std::vector<int>& fwd_map,
+                               std::vector<int>& bkw_map,
+                               int& nghost_real,
+                               const std::vector<VALUETYPE>& dcoord_,
+                               const std::vector<int>& datype_,
+                               const int& nghost,
+                               const int& ntypes) {
+  std::vector<int> sel_type;
+  for (int ii = 0; ii < ntypes; ++ii) {
     sel_type.push_back(ii);
   }
-  deepmd::select_by_type(fwd_map, bkw_map, nghost_real, dcoord_, datype_, nghost, sel_type);
+  deepmd::select_by_type(fwd_map, bkw_map, nghost_real, dcoord_, datype_,
+                         nghost, sel_type);
 }
 
-template
-void
-deepmd::
-select_real_atoms <double> (std::vector<int> & fwd_map,
-		  std::vector<int> & bkw_map,
-		  int & nghost_real,
-		  const std::vector<double> & dcoord_, 
-		  const std::vector<int> & datype_,
-		  const int & nghost,
-		  const int & ntypes);
-
-template
-void
-deepmd::
-select_real_atoms <float> (std::vector<int> & fwd_map,
-		  std::vector<int> & bkw_map,
-		  int & nghost_real,
-		  const std::vector<float> & dcoord_, 
-		  const std::vector<int> & datype_,
-		  const int & nghost,
-		  const int & ntypes);
-
-void
-deepmd::NeighborListData::
-copy_from_nlist(const InputNlist & inlist)
-{
+template void deepmd::select_real_atoms<double>(
+    std::vector<int>& fwd_map,
+    std::vector<int>& bkw_map,
+    int& nghost_real,
+    const std::vector<double>& dcoord_,
+    const std::vector<int>& datype_,
+    const int& nghost,
+    const int& ntypes);
+
+template void deepmd::select_real_atoms<float>(
+    std::vector<int>& fwd_map,
+    std::vector<int>& bkw_map,
+    int& nghost_real,
+    const std::vector<float>& dcoord_,
+    const std::vector<int>& datype_,
+    const int& nghost,
+    const int& ntypes);
+
+void deepmd::NeighborListData::copy_from_nlist(const InputNlist& inlist) {
   int inum = inlist.inum;
   ilist.resize(inum);
   jlist.resize(inum);
-  memcpy(&ilist[0], inlist.ilist, inum*sizeof(int));
-  for(int ii = 0; ii < inum; ++ii){
+  memcpy(&ilist[0], inlist.ilist, inum * sizeof(int));
+  for (int ii = 0; ii < inum; ++ii) {
     int jnum = inlist.numneigh[ii];
     jlist[ii].resize(jnum);
-    memcpy(&jlist[ii][0], inlist.firstneigh[ii], jnum*sizeof(int));
+    memcpy(&jlist[ii][0], inlist.firstneigh[ii], jnum * sizeof(int));
   }
 }
 
-void
-deepmd::NeighborListData::
-shuffle(const AtomMap & map)
-{
-  const std::vector<int> & fwd_map = map.get_fwd_map();
+void deepmd::NeighborListData::shuffle(const AtomMap& map) {
+  const std::vector<int>& fwd_map = map.get_fwd_map();
   shuffle(fwd_map);
 }
 
-void
-deepmd::NeighborListData::
-shuffle(const std::vector<int> & fwd_map)
-{
+void deepmd::NeighborListData::shuffle(const std::vector<int>& fwd_map) {
   int nloc = fwd_map.size();
-  for(unsigned ii = 0; ii < ilist.size(); ++ii){
-    if(ilist[ii] < nloc){
+  for (unsigned ii = 0; ii < ilist.size(); ++ii) {
+    if (ilist[ii] < nloc) {
       ilist[ii] = fwd_map[ilist[ii]];
     }
   }
-  for(unsigned ii = 0; ii < jlist.size(); ++ii){
-    for(unsigned jj = 0; jj < jlist[ii].size(); ++jj){
-      if(jlist[ii][jj] < nloc){
-	jlist[ii][jj] = fwd_map[jlist[ii][jj]];
+  for (unsigned ii = 0; ii < jlist.size(); ++ii) {
+    for (unsigned jj = 0; jj < jlist[ii].size(); ++jj) {
+      if (jlist[ii][jj] < nloc) {
+        jlist[ii][jj] = fwd_map[jlist[ii][jj]];
       }
     }
   }
 }
 
-void
-deepmd::NeighborListData::
-shuffle_exclude_empty (const std::vector<int> & fwd_map)
-{
+void deepmd::NeighborListData::shuffle_exclude_empty(
+    const std::vector<int>& fwd_map) {
   shuffle(fwd_map);
-  std::vector<int > new_ilist;
-  std::vector<std::vector<int> > new_jlist;
+  std::vector<int> new_ilist;
+  std::vector<std::vector<int>> new_jlist;
   new_ilist.reserve(ilist.size());
   new_jlist.reserve(jlist.size());
-  for(int ii = 0; ii < ilist.size(); ++ii){
-    if(ilist[ii] >= 0){
+  for (int ii = 0; ii < ilist.size(); ++ii) {
+    if (ilist[ii] >= 0) {
       new_ilist.push_back(ilist[ii]);
     }
   }
   int new_inum = new_ilist.size();
-  for(int ii = 0; ii < jlist.size(); ++ii){
-    if(ilist[ii] >= 0){
+  for (int ii = 0; ii < jlist.size(); ++ii) {
+    if (ilist[ii] >= 0) {
       std::vector<int> tmp_jlist;
       tmp_jlist.reserve(jlist[ii].size());
-      for(int jj = 0; jj < jlist[ii].size(); ++jj){
-	if(jlist[ii][jj] >= 0){
-	  tmp_jlist.push_back(jlist[ii][jj]);
-	}
+      for (int jj = 0; jj < jlist[ii].size(); ++jj) {
+        if (jlist[ii][jj] >= 0) {
+          tmp_jlist.push_back(jlist[ii][jj]);
+        }
       }
       new_jlist.push_back(tmp_jlist);
     }
@@ -240,14 +209,11 @@ shuffle_exclude_empty (const std::vector<int> & fwd_map)
   jlist = new_jlist;
 }
 
-void 
-deepmd::NeighborListData::
-make_inlist(InputNlist & inlist)
-{
+void deepmd::NeighborListData::make_inlist(InputNlist& inlist) {
   int nloc = ilist.size();
   numneigh.resize(nloc);
   firstneigh.resize(nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     numneigh[ii] = jlist[ii].size();
     firstneigh[ii] = &jlist[ii][0];
   }
@@ -257,61 +223,49 @@ make_inlist(InputNlist & inlist)
   inlist.firstneigh = &firstneigh[0];
 }
 
-void
-deepmd::
-check_status(const tensorflow::Status& status) {
+void deepmd::check_status(const tensorflow::Status& status) {
   if (!status.ok()) {
     std::cout << status.ToString() << std::endl;
     throw deepmd::tf_exception(status.ToString());
   }
 }
 
-void
-throw_env_not_set_warning(std::string env_name)
-{
-  std::cerr << "DeePMD-kit WARNING: Environmental variable " << env_name << " is not set. "
-    << "Tune " << env_name << " for the best performance."
-    << std::endl;
+void throw_env_not_set_warning(std::string env_name) {
+  std::cerr << "DeePMD-kit WARNING: Environmental variable " << env_name
+            << " is not set. "
+            << "Tune " << env_name << " for the best performance." << std::endl;
 }
 
-void
-deepmd::
-get_env_nthreads(int & num_intra_nthreads,
-		 int & num_inter_nthreads)
-{
+void deepmd::get_env_nthreads(int& num_intra_nthreads,
+                              int& num_inter_nthreads) {
   num_intra_nthreads = 0;
   num_inter_nthreads = 0;
-  const char* env_intra_nthreads = std::getenv("TF_INTRA_OP_PARALLELISM_THREADS");
-  const char* env_inter_nthreads = std::getenv("TF_INTER_OP_PARALLELISM_THREADS");
+  const char* env_intra_nthreads =
+      std::getenv("TF_INTRA_OP_PARALLELISM_THREADS");
+  const char* env_inter_nthreads =
+      std::getenv("TF_INTER_OP_PARALLELISM_THREADS");
   const char* env_omp_nthreads = std::getenv("OMP_NUM_THREADS");
-  if (env_intra_nthreads && 
-      std::string(env_intra_nthreads) != std::string("") && 
-      atoi(env_intra_nthreads) >= 0
-      ) {
+  if (env_intra_nthreads &&
+      std::string(env_intra_nthreads) != std::string("") &&
+      atoi(env_intra_nthreads) >= 0) {
     num_intra_nthreads = atoi(env_intra_nthreads);
   } else {
     throw_env_not_set_warning("TF_INTRA_OP_PARALLELISM_THREADS");
   }
-  if (env_inter_nthreads && 
+  if (env_inter_nthreads &&
       std::string(env_inter_nthreads) != std::string("") &&
-      atoi(env_inter_nthreads) >= 0
-      ) {
+      atoi(env_inter_nthreads) >= 0) {
     num_inter_nthreads = atoi(env_inter_nthreads);
   } else {
     throw_env_not_set_warning("TF_INTER_OP_PARALLELISM_THREADS");
   }
-  if (!(env_omp_nthreads && 
-      std::string(env_omp_nthreads) != std::string("") &&
-      atoi(env_omp_nthreads) >= 0
-      )) {
+  if (!(env_omp_nthreads && std::string(env_omp_nthreads) != std::string("") &&
+        atoi(env_omp_nthreads) >= 0)) {
     throw_env_not_set_warning("OMP_NUM_THREADS");
   }
 }
 
-void
-deepmd::
-load_op_library()
-{
+void deepmd::load_op_library() {
   tensorflow::Env* env = tensorflow::Env::Default();
 #if defined(_WIN32)
   std::string dso_path = "deepmd_op.dll";
@@ -321,777 +275,657 @@ load_op_library()
   void* dso_handle = dlopen(dso_path.c_str(), RTLD_NOW | RTLD_LOCAL);
 #endif
   if (!dso_handle) {
-    throw deepmd::deepmd_exception(dso_path + " is not found! You can add the library directory to LD_LIBRARY_PATH");
+    throw deepmd::deepmd_exception(
+        dso_path +
+        " is not found! You can add the library directory to LD_LIBRARY_PATH");
   }
 }
 
-std::string
-deepmd::
-name_prefix(const std::string & scope)
-{
+std::string deepmd::name_prefix(const std::string& scope) {
   std::string prefix = "";
-  if (scope != ""){
+  if (scope != "") {
     prefix = scope + "/";
   }
   return prefix;
 }
 
 template <typename MODELTYPE, typename VALUETYPE>
-int
-deepmd::
-session_input_tensors (
-    std::vector<std::pair<std::string, Tensor>> & input_tensors,
-    const std::vector<VALUETYPE> &	dcoord_,
-    const int &					ntypes,
-    const std::vector<int> &			datype_,
-    const std::vector<VALUETYPE> &	dbox, 
-    const double &			cell_size,
-    const std::vector<VALUETYPE> &	fparam_,
-    const std::vector<VALUETYPE> &	aparam_,
-    const deepmd::AtomMap&	atommap,
-    const std::string				scope)
-{
+int deepmd::session_input_tensors(
+    std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    const double& cell_size,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope) {
   bool b_pbc = (dbox.size() == 9);
 
   int nframes = 1;
   int nall = dcoord_.size() / 3;
   int nloc = nall;
-  assert (nall == datype_.size());
-
-  std::vector<int > datype = atommap.get_type();
-  std::vector<int > type_count (ntypes, 0);
-  for (unsigned ii = 0; ii < datype.size(); ++ii){
-    type_count[datype[ii]] ++;
-  }
-  datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
-
-  TensorShape coord_shape ;
-  coord_shape.AddDim (nframes);
-  coord_shape.AddDim (nall * 3);
-  TensorShape type_shape ;
-  type_shape.AddDim (nframes);
-  type_shape.AddDim (nall);
-  TensorShape box_shape ;
-  box_shape.AddDim (nframes);
-  box_shape.AddDim (9);
-  TensorShape mesh_shape ;
-  if (b_pbc){
+  assert(nall == datype_.size());
+
+  std::vector<int> datype = atommap.get_type();
+  std::vector<int> type_count(ntypes, 0);
+  for (unsigned ii = 0; ii < datype.size(); ++ii) {
+    type_count[datype[ii]]++;
+  }
+  datype.insert(datype.end(), datype_.begin() + nloc, datype_.end());
+
+  TensorShape coord_shape;
+  coord_shape.AddDim(nframes);
+  coord_shape.AddDim(nall * 3);
+  TensorShape type_shape;
+  type_shape.AddDim(nframes);
+  type_shape.AddDim(nall);
+  TensorShape box_shape;
+  box_shape.AddDim(nframes);
+  box_shape.AddDim(9);
+  TensorShape mesh_shape;
+  if (b_pbc) {
     mesh_shape.AddDim(6);
-  }
-  else {
+  } else {
     mesh_shape.AddDim(0);
   }
-  TensorShape natoms_shape ;
-  natoms_shape.AddDim (2 + ntypes);
-  TensorShape fparam_shape ;
-  fparam_shape.AddDim (nframes);
-  fparam_shape.AddDim (fparam_.size());
-  TensorShape aparam_shape ;
-  aparam_shape.AddDim (nframes);
-  aparam_shape.AddDim (aparam_.size());
-  
+  TensorShape natoms_shape;
+  natoms_shape.AddDim(2 + ntypes);
+  TensorShape fparam_shape;
+  fparam_shape.AddDim(nframes);
+  fparam_shape.AddDim(fparam_.size());
+  TensorShape aparam_shape;
+  aparam_shape.AddDim(nframes);
+  aparam_shape.AddDim(aparam_.size());
+
   tensorflow::DataType model_type;
-  if(std::is_same<MODELTYPE, double>::value){
+  if (std::is_same<MODELTYPE, double>::value) {
     model_type = tensorflow::DT_DOUBLE;
-  }
-  else if(std::is_same<MODELTYPE, float>::value){
+  } else if (std::is_same<MODELTYPE, float>::value) {
     model_type = tensorflow::DT_FLOAT;
-  }
-  else{
+  } else {
     throw deepmd::deepmd_exception("unsupported data type");
   }
-  Tensor coord_tensor	(model_type, coord_shape);
-  Tensor box_tensor	(model_type, box_shape);
-  Tensor fparam_tensor  (model_type, fparam_shape);
-  Tensor aparam_tensor  (model_type, aparam_shape);
-
-  Tensor type_tensor	(DT_INT32, type_shape);
-  Tensor mesh_tensor	(DT_INT32, mesh_shape);
-  Tensor natoms_tensor	(DT_INT32, natoms_shape);
-
-  auto coord = coord_tensor.matrix<MODELTYPE> ();
-  auto type = type_tensor.matrix<int> ();
-  auto box = box_tensor.matrix<MODELTYPE> ();
-  auto mesh = mesh_tensor.flat<int> ();
-  auto natoms = natoms_tensor.flat<int> ();  
-  auto fparam = fparam_tensor.matrix<MODELTYPE> ();
-  auto aparam = aparam_tensor.matrix<MODELTYPE> ();
-
-  std::vector<VALUETYPE> dcoord (dcoord_);
-  atommap.forward<VALUETYPE> (dcoord.begin(), dcoord_.begin(), 3);
-  
-  for (int ii = 0; ii < nframes; ++ii){
-    for (int jj = 0; jj < nall * 3; ++jj){
+  Tensor coord_tensor(model_type, coord_shape);
+  Tensor box_tensor(model_type, box_shape);
+  Tensor fparam_tensor(model_type, fparam_shape);
+  Tensor aparam_tensor(model_type, aparam_shape);
+
+  Tensor type_tensor(DT_INT32, type_shape);
+  Tensor mesh_tensor(DT_INT32, mesh_shape);
+  Tensor natoms_tensor(DT_INT32, natoms_shape);
+
+  auto coord = coord_tensor.matrix<MODELTYPE>();
+  auto type = type_tensor.matrix<int>();
+  auto box = box_tensor.matrix<MODELTYPE>();
+  auto mesh = mesh_tensor.flat<int>();
+  auto natoms = natoms_tensor.flat<int>();
+  auto fparam = fparam_tensor.matrix<MODELTYPE>();
+  auto aparam = aparam_tensor.matrix<MODELTYPE>();
+
+  std::vector<VALUETYPE> dcoord(dcoord_);
+  atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3);
+
+  for (int ii = 0; ii < nframes; ++ii) {
+    for (int jj = 0; jj < nall * 3; ++jj) {
       coord(ii, jj) = dcoord[jj];
     }
-    if(b_pbc){
-      for (int jj = 0; jj < 9; ++jj){
-	box(ii, jj) = dbox[jj];
+    if (b_pbc) {
+      for (int jj = 0; jj < 9; ++jj) {
+        box(ii, jj) = dbox[jj];
       }
-    }
-    else{
-      for (int jj = 0; jj < 9; ++jj){
-	box(ii, jj) = 0.;
+    } else {
+      for (int jj = 0; jj < 9; ++jj) {
+        box(ii, jj) = 0.;
       }
     }
-    for (int jj = 0; jj < nall; ++jj){
+    for (int jj = 0; jj < nall; ++jj) {
       type(ii, jj) = datype[jj];
     }
-    for (int jj = 0; jj < fparam_.size(); ++jj){
+    for (int jj = 0; jj < fparam_.size(); ++jj) {
       fparam(ii, jj) = fparam_[jj];
     }
-    for (int jj = 0; jj < aparam_.size(); ++jj){
+    for (int jj = 0; jj < aparam_.size(); ++jj) {
       aparam(ii, jj) = aparam_[jj];
     }
   }
-  if (b_pbc){
-    mesh (1-1) = 0;
-    mesh (2-1) = 0;
-    mesh (3-1) = 0;
-    mesh (4-1) = 0;
-    mesh (5-1) = 0;
-    mesh (6-1) = 0;
+  if (b_pbc) {
+    mesh(1 - 1) = 0;
+    mesh(2 - 1) = 0;
+    mesh(3 - 1) = 0;
+    mesh(4 - 1) = 0;
+    mesh(5 - 1) = 0;
+    mesh(6 - 1) = 0;
   }
-  natoms (0) = nloc;
-  natoms (1) = nall;
-  for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
+  natoms(0) = nloc;
+  natoms(1) = nall;
+  for (int ii = 0; ii < ntypes; ++ii) natoms(ii + 2) = type_count[ii];
 
   std::string prefix = "";
-  if (scope != ""){
+  if (scope != "") {
     prefix = scope + "/";
   }
   input_tensors = {
-    {prefix+"t_coord",	coord_tensor}, 
-    {prefix+"t_type",	type_tensor},
-    {prefix+"t_box",	box_tensor},
-    {prefix+"t_mesh",	mesh_tensor},
-    {prefix+"t_natoms",	natoms_tensor},
-  };  
+      {prefix + "t_coord", coord_tensor},   {prefix + "t_type", type_tensor},
+      {prefix + "t_box", box_tensor},       {prefix + "t_mesh", mesh_tensor},
+      {prefix + "t_natoms", natoms_tensor},
+  };
   if (fparam_.size() > 0) {
-    input_tensors.push_back({prefix+"t_fparam", fparam_tensor});
+    input_tensors.push_back({prefix + "t_fparam", fparam_tensor});
   }
   if (aparam_.size() > 0) {
-    input_tensors.push_back({prefix+"t_aparam", aparam_tensor});
+    input_tensors.push_back({prefix + "t_aparam", aparam_tensor});
   }
   return nloc;
 }
 
 template <typename MODELTYPE, typename VALUETYPE>
-int
-deepmd::
-session_input_tensors (
-    std::vector<std::pair<std::string, Tensor>> & input_tensors,
-    const std::vector<VALUETYPE> &	dcoord_,
-    const int &					ntypes,
-    const std::vector<int> &			datype_,
-    const std::vector<VALUETYPE> &	dbox,		    
-    InputNlist &				dlist, 
-    const std::vector<VALUETYPE> &	fparam_,
-    const std::vector<VALUETYPE> &	aparam_,
-    const deepmd::AtomMap&	atommap,
-    const int					nghost,
-    const int					ago,
-    const std::string				scope)
-{
-  assert (dbox.size() == 9);
+int deepmd::session_input_tensors(
+    std::vector<std::pair<std::string, Tensor>>& input_tensors,
+    const std::vector<VALUETYPE>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<VALUETYPE>& dbox,
+    InputNlist& dlist,
+    const std::vector<VALUETYPE>& fparam_,
+    const std::vector<VALUETYPE>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope) {
+  assert(dbox.size() == 9);
 
   int nframes = 1;
   int nall = dcoord_.size() / 3;
   int nloc = nall - nghost;
-  assert (nall == datype_.size());  
+  assert(nall == datype_.size());
+
+  std::vector<int> datype = atommap.get_type();
+  std::vector<int> type_count(ntypes, 0);
+  for (unsigned ii = 0; ii < datype.size(); ++ii) {
+    type_count[datype[ii]]++;
+  }
+  datype.insert(datype.end(), datype_.begin() + nloc, datype_.end());
+
+  TensorShape coord_shape;
+  coord_shape.AddDim(nframes);
+  coord_shape.AddDim(nall * 3);
+  TensorShape type_shape;
+  type_shape.AddDim(nframes);
+  type_shape.AddDim(nall);
+  TensorShape box_shape;
+  box_shape.AddDim(nframes);
+  box_shape.AddDim(9);
+  TensorShape mesh_shape;
+  mesh_shape.AddDim(16);
+  TensorShape natoms_shape;
+  natoms_shape.AddDim(2 + ntypes);
+  TensorShape fparam_shape;
+  fparam_shape.AddDim(nframes);
+  fparam_shape.AddDim(fparam_.size());
+  TensorShape aparam_shape;
+  aparam_shape.AddDim(nframes);
+  aparam_shape.AddDim(aparam_.size());
 
-  std::vector<int > datype = atommap.get_type();
-  std::vector<int > type_count (ntypes, 0);
-  for (unsigned ii = 0; ii < datype.size(); ++ii){
-    type_count[datype[ii]] ++;
-  }
-  datype.insert (datype.end(), datype_.begin() + nloc, datype_.end());
-
-  TensorShape coord_shape ;
-  coord_shape.AddDim (nframes);
-  coord_shape.AddDim (nall * 3);
-  TensorShape type_shape ;
-  type_shape.AddDim (nframes);
-  type_shape.AddDim (nall);
-  TensorShape box_shape ;
-  box_shape.AddDim (nframes);
-  box_shape.AddDim (9);
-  TensorShape mesh_shape ;
-  mesh_shape.AddDim (16);
-  TensorShape natoms_shape ;
-  natoms_shape.AddDim (2 + ntypes);
-  TensorShape fparam_shape ;
-  fparam_shape.AddDim (nframes);
-  fparam_shape.AddDim (fparam_.size());
-  TensorShape aparam_shape ;
-  aparam_shape.AddDim (nframes);
-  aparam_shape.AddDim (aparam_.size());
-  
   tensorflow::DataType model_type;
-  if(std::is_same<MODELTYPE, double>::value){
+  if (std::is_same<MODELTYPE, double>::value) {
     model_type = tensorflow::DT_DOUBLE;
-  }
-  else if(std::is_same<MODELTYPE, float>::value){
+  } else if (std::is_same<MODELTYPE, float>::value) {
     model_type = tensorflow::DT_FLOAT;
-  }
-  else{
+  } else {
     throw deepmd::deepmd_exception("unsupported data type");
   }
-  Tensor coord_tensor	(model_type, coord_shape);
-  Tensor box_tensor	(model_type, box_shape);
-  Tensor fparam_tensor  (model_type, fparam_shape);
-  Tensor aparam_tensor  (model_type, aparam_shape);
-
-  Tensor type_tensor	(DT_INT32, type_shape);
-  Tensor mesh_tensor	(DT_INT32, mesh_shape);
-  Tensor natoms_tensor	(DT_INT32, natoms_shape);
-
-  auto coord = coord_tensor.matrix<MODELTYPE> ();
-  auto type = type_tensor.matrix<int> ();
-  auto box = box_tensor.matrix<MODELTYPE> ();
-  auto mesh = mesh_tensor.flat<int> ();
-  auto natoms = natoms_tensor.flat<int> ();
-  auto fparam = fparam_tensor.matrix<MODELTYPE> ();
-  auto aparam = aparam_tensor.matrix<MODELTYPE> ();
-
-  std::vector<VALUETYPE> dcoord (dcoord_);
-  atommap.forward<VALUETYPE> (dcoord.begin(), dcoord_.begin(), 3);
-  
-  for (int ii = 0; ii < nframes; ++ii){
-    for (int jj = 0; jj < nall * 3; ++jj){
+  Tensor coord_tensor(model_type, coord_shape);
+  Tensor box_tensor(model_type, box_shape);
+  Tensor fparam_tensor(model_type, fparam_shape);
+  Tensor aparam_tensor(model_type, aparam_shape);
+
+  Tensor type_tensor(DT_INT32, type_shape);
+  Tensor mesh_tensor(DT_INT32, mesh_shape);
+  Tensor natoms_tensor(DT_INT32, natoms_shape);
+
+  auto coord = coord_tensor.matrix<MODELTYPE>();
+  auto type = type_tensor.matrix<int>();
+  auto box = box_tensor.matrix<MODELTYPE>();
+  auto mesh = mesh_tensor.flat<int>();
+  auto natoms = natoms_tensor.flat<int>();
+  auto fparam = fparam_tensor.matrix<MODELTYPE>();
+  auto aparam = aparam_tensor.matrix<MODELTYPE>();
+
+  std::vector<VALUETYPE> dcoord(dcoord_);
+  atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3);
+
+  for (int ii = 0; ii < nframes; ++ii) {
+    for (int jj = 0; jj < nall * 3; ++jj) {
       coord(ii, jj) = dcoord[jj];
     }
-    for (int jj = 0; jj < 9; ++jj){
+    for (int jj = 0; jj < 9; ++jj) {
       box(ii, jj) = dbox[jj];
     }
-    for (int jj = 0; jj < nall; ++jj){
+    for (int jj = 0; jj < nall; ++jj) {
       type(ii, jj) = datype[jj];
     }
-    for (int jj = 0; jj < fparam_.size(); ++jj){
+    for (int jj = 0; jj < fparam_.size(); ++jj) {
       fparam(ii, jj) = fparam_[jj];
     }
-    for (int jj = 0; jj < aparam_.size(); ++jj){
+    for (int jj = 0; jj < aparam_.size(); ++jj) {
       aparam(ii, jj) = aparam_[jj];
     }
   }
-  
+
   for (int ii = 0; ii < 16; ++ii) mesh(ii) = 0;
-  
-  const int stride = sizeof(int *) / sizeof(int);
-  assert (stride * sizeof(int) == sizeof(int *));
-  assert (stride <= 4);
-  mesh (0) = ago;
-  mesh (1) = dlist.inum;
-  mesh (2) = 0;
-  mesh (3) = 0;
-  memcpy (&mesh(4),  &(dlist.ilist), sizeof(int *));
-  memcpy (&mesh(8),  &(dlist.numneigh), sizeof(int *));
-  memcpy (&mesh(12), &(dlist.firstneigh), sizeof(int **));
-
-  natoms (0) = nloc;
-  natoms (1) = nall;
-  for (int ii = 0; ii < ntypes; ++ii) natoms(ii+2) = type_count[ii];
+
+  const int stride = sizeof(int*) / sizeof(int);
+  assert(stride * sizeof(int) == sizeof(int*));
+  assert(stride <= 4);
+  mesh(0) = ago;
+  mesh(1) = dlist.inum;
+  mesh(2) = 0;
+  mesh(3) = 0;
+  memcpy(&mesh(4), &(dlist.ilist), sizeof(int*));
+  memcpy(&mesh(8), &(dlist.numneigh), sizeof(int*));
+  memcpy(&mesh(12), &(dlist.firstneigh), sizeof(int**));
+
+  natoms(0) = nloc;
+  natoms(1) = nall;
+  for (int ii = 0; ii < ntypes; ++ii) natoms(ii + 2) = type_count[ii];
 
   std::string prefix = "";
-  if (scope != ""){
+  if (scope != "") {
     prefix = scope + "/";
   }
   input_tensors = {
-    {prefix+"t_coord",	coord_tensor}, 
-    {prefix+"t_type",	type_tensor},
-    {prefix+"t_box",	box_tensor},
-    {prefix+"t_mesh",	mesh_tensor},
-    {prefix+"t_natoms",natoms_tensor},
-  };  
+      {prefix + "t_coord", coord_tensor},   {prefix + "t_type", type_tensor},
+      {prefix + "t_box", box_tensor},       {prefix + "t_mesh", mesh_tensor},
+      {prefix + "t_natoms", natoms_tensor},
+  };
   if (fparam_.size() > 0) {
-    input_tensors.push_back({prefix+"t_fparam", fparam_tensor});
+    input_tensors.push_back({prefix + "t_fparam", fparam_tensor});
   }
   if (aparam_.size() > 0) {
-    input_tensors.push_back({prefix+"t_aparam", aparam_tensor});
+    input_tensors.push_back({prefix + "t_aparam", aparam_tensor});
   }
   return nloc;
 }
 
-template<typename VT>
-VT
-deepmd::
-session_get_scalar(Session* session, const std::string name_, const std::string scope) 
-{
+template <typename VT>
+VT deepmd::session_get_scalar(Session* session,
+                              const std::string name_,
+                              const std::string scope) {
   std::string name = name_;
   if (scope != "") {
     name = scope + "/" + name;
   }
   std::vector<Tensor> output_tensors;
-  deepmd::check_status (session->Run(std::vector<std::pair<std::string, Tensor>> ({}), 
-			    {name.c_str()}, 
-			    {}, 
-			    &output_tensors));
+  deepmd::check_status(
+      session->Run(std::vector<std::pair<std::string, Tensor>>({}),
+                   {name.c_str()}, {}, &output_tensors));
   Tensor output_rc = output_tensors[0];
-  auto orc = output_rc.flat <VT> ();
+  auto orc = output_rc.flat<VT>();
   return orc(0);
 }
 
-template<typename VT>
-void
-deepmd::
-session_get_vector(std::vector<VT> & o_vec, Session* session, const std::string name_, const std::string scope) 
-{
+template <typename VT>
+void deepmd::session_get_vector(std::vector<VT>& o_vec,
+                                Session* session,
+                                const std::string name_,
+                                const std::string scope) {
   std::string name = name_;
   if (scope != "") {
     name = scope + "/" + name;
   }
   std::vector<Tensor> output_tensors;
-  deepmd::check_status (session->Run(std::vector<std::pair<std::string, Tensor>> ({}), 
-			    {name.c_str()}, 
-			    {}, 
-			    &output_tensors));
+  deepmd::check_status(
+      session->Run(std::vector<std::pair<std::string, Tensor>>({}),
+                   {name.c_str()}, {}, &output_tensors));
   Tensor output_rc = output_tensors[0];
   assert(1 == output_rc.shape().dims());
   int dof = output_rc.shape().dim_size(0);
   o_vec.resize(dof);
-  auto orc = output_rc.flat <VT> ();
-  for (int ii = 0; ii < dof; ++ii){
+  auto orc = output_rc.flat<VT>();
+  for (int ii = 0; ii < dof; ++ii) {
     o_vec[ii] = orc(ii);
-  }  
+  }
 }
 
-
-int
-deepmd::
-session_get_dtype(tensorflow::Session* session, const std::string name_, const std::string scope) 
-{
+int deepmd::session_get_dtype(tensorflow::Session* session,
+                              const std::string name_,
+                              const std::string scope) {
   std::string name = name_;
   if (scope != "") {
     name = scope + "/" + name;
   }
   std::vector<Tensor> output_tensors;
-  deepmd::check_status (session->Run(std::vector<std::pair<std::string, Tensor>> ({}), 
-			    {name.c_str()}, 
-			    {}, 
-			    &output_tensors));
+  deepmd::check_status(
+      session->Run(std::vector<std::pair<std::string, Tensor>>({}),
+                   {name.c_str()}, {}, &output_tensors));
   Tensor output_rc = output_tensors[0];
   // cast enum to int
   return (int)output_rc.dtype();
 }
 
-
-template<typename VT>
-void 
-deepmd::
-select_map(std::vector<VT> & out,
-	   const std::vector<VT > & in,
-	   const std::vector<int > & idx_map, 
-	   const int & stride)
-{
+template <typename VT>
+void deepmd::select_map(std::vector<VT>& out,
+                        const std::vector<VT>& in,
+                        const std::vector<int>& idx_map,
+                        const int& stride) {
 #ifdef DEBUG
-  assert(in.size() / stride * stride == in.size()), "in size should be multiples of stride"
+  assert(in.size() / stride * stride == in.size()),
+      "in size should be multiples of stride"
 #endif
-  for (int ii = 0; ii < in.size() / stride; ++ii){
+      for (int ii = 0; ii < in.size() / stride; ++ii) {
 #ifdef DEBUG
     assert(ii < idx_map.size()), "idx goes over the idx map size";
     assert(idx_map[ii] < out.size()), "mappped idx goes over the out size";
 #endif
     if (idx_map[ii] >= 0) {
       int to_ii = idx_map[ii];
-      for (int dd = 0; dd < stride; ++dd){
-	out[to_ii * stride + dd] = in[ii * stride + dd];
+      for (int dd = 0; dd < stride; ++dd) {
+        out[to_ii * stride + dd] = in[ii * stride + dd];
       }
     }
   }
 }
 
-template<typename VT>
-void 
-deepmd::
-select_map(typename std::vector<VT >::iterator out,
-	   const typename std::vector<VT >::const_iterator in, 
-	   const std::vector<int > & idx_map, 
-	   const int & stride)
-{
-  for (int ii = 0; ii < idx_map.size(); ++ii){
+template <typename VT>
+void deepmd::select_map(typename std::vector<VT>::iterator out,
+                        const typename std::vector<VT>::const_iterator in,
+                        const std::vector<int>& idx_map,
+                        const int& stride) {
+  for (int ii = 0; ii < idx_map.size(); ++ii) {
     if (idx_map[ii] >= 0) {
       int to_ii = idx_map[ii];
-      for (int dd = 0; dd < stride; ++dd){
-	*(out + to_ii * stride + dd) = *(in + ii * stride + dd);
+      for (int dd = 0; dd < stride; ++dd) {
+        *(out + to_ii * stride + dd) = *(in + ii * stride + dd);
       }
     }
   }
 }
 
 // sel_map(_,_,fwd_map,_) == sel_map_inv(_,_,bkw_map,_)
-template<typename VT>
-void 
-deepmd::
-select_map_inv(std::vector<VT> & out,
-	   const std::vector<VT > & in,
-	   const std::vector<int > & idx_map, 
-	   const int & stride)
-{
+template <typename VT>
+void deepmd::select_map_inv(std::vector<VT>& out,
+                            const std::vector<VT>& in,
+                            const std::vector<int>& idx_map,
+                            const int& stride) {
 #ifdef DEBUG
-  assert(in.size() / stride * stride == in.size()), "in size should be multiples of stride"
+  assert(in.size() / stride * stride == in.size()),
+      "in size should be multiples of stride"
 #endif
-  for (int ii = 0; ii < out.size() / stride; ++ii){
+      for (int ii = 0; ii < out.size() / stride; ++ii) {
 #ifdef DEBUG
     assert(ii < idx_map.size()), "idx goes over the idx map size";
     assert(idx_map[ii] < in.size()), "from idx goes over the in size";
 #endif
     if (idx_map[ii] >= 0) {
       int from_ii = idx_map[ii];
-      for (int dd = 0; dd < stride; ++dd){
-	out[ii * stride + dd] = in[from_ii * stride + dd];
+      for (int dd = 0; dd < stride; ++dd) {
+        out[ii * stride + dd] = in[from_ii * stride + dd];
       }
     }
   }
 }
 
-template<typename VT>
-void 
-deepmd::
-select_map_inv(typename std::vector<VT >::iterator out,
-	   const typename std::vector<VT >::const_iterator in, 
-	   const std::vector<int > & idx_map, 
-	   const int & stride)
-{
-  for (int ii = 0; ii < idx_map.size(); ++ii){
+template <typename VT>
+void deepmd::select_map_inv(typename std::vector<VT>::iterator out,
+                            const typename std::vector<VT>::const_iterator in,
+                            const std::vector<int>& idx_map,
+                            const int& stride) {
+  for (int ii = 0; ii < idx_map.size(); ++ii) {
     if (idx_map[ii] >= 0) {
       int from_ii = idx_map[ii];
-      for (int dd = 0; dd < stride; ++dd){
-	*(out + ii * stride + dd) = *(in + from_ii * stride + dd);
+      for (int dd = 0; dd < stride; ++dd) {
+        *(out + ii * stride + dd) = *(in + from_ii * stride + dd);
       }
     }
   }
 }
 
-
-template
-int
-deepmd::
-session_get_scalar<int>(Session*, const std::string, const std::string);
-
-template
-void
-deepmd::
-session_get_vector<int>(std::vector<int> &, Session*, const std::string, const std::string);
-
-template
-void 
-deepmd::
-select_map<int>(
-    std::vector<int> & out,
-    const std::vector<int > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map<int>(
-    typename std::vector<int >::iterator out,
-    const typename std::vector<int >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<int>(
-    std::vector<int> & out,
-    const std::vector<int > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<int>(
-    typename std::vector<int >::iterator out,
-    const typename std::vector<int >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-
-template
-float
-deepmd::
-session_get_scalar<float>(Session*, const std::string, const std::string);
-
-template
-void
-deepmd::
-session_get_vector<float>(std::vector<float> &, Session*, const std::string, const std::string);
-
-template
-void 
-deepmd::
-select_map<float>(
-    std::vector<float> & out,
-    const std::vector<float > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map<float>(
-    typename std::vector<float >::iterator out,
-    const typename std::vector<float >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<float>(
-    std::vector<float> & out,
-    const std::vector<float > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<float>(
-    typename std::vector<float >::iterator out,
-    const typename std::vector<float >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-
-template
-double
-deepmd::
-session_get_scalar<double>(Session*, const std::string, const std::string);
-
-template
-void
-deepmd::
-session_get_vector<double>(std::vector<double> &, Session*, const std::string, const std::string);
-
-template
-void 
-deepmd::
-select_map<double>(
-    std::vector<double> & out,
-    const std::vector<double > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map<double >(
-    typename std::vector<double >::iterator out,
-    const typename std::vector<double >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<double>(
-    std::vector<double> & out,
-    const std::vector<double > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<double >(
-    typename std::vector<double >::iterator out,
-    const typename std::vector<double >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-
-template
-deepmd::STRINGTYPE
-deepmd::
-session_get_scalar<deepmd::STRINGTYPE>(Session*, const std::string, const std::string);
-
-template
-void
-deepmd::
-session_get_vector<deepmd::STRINGTYPE>(std::vector<deepmd::STRINGTYPE> &, Session*, const std::string, const std::string);
-
-template
-void 
-deepmd::
-select_map<deepmd::STRINGTYPE>(
-    std::vector<deepmd::STRINGTYPE> & out,
-    const std::vector<deepmd::STRINGTYPE > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map<deepmd::STRINGTYPE >(
-    typename std::vector<deepmd::STRINGTYPE >::iterator out,
-    const typename std::vector<deepmd::STRINGTYPE >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<deepmd::STRINGTYPE>(
-    std::vector<deepmd::STRINGTYPE> & out,
-    const std::vector<deepmd::STRINGTYPE > & in,
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-template
-void 
-deepmd::
-select_map_inv<deepmd::STRINGTYPE >(
-    typename std::vector<deepmd::STRINGTYPE >::iterator out,
-    const typename std::vector<deepmd::STRINGTYPE >::const_iterator in, 
-    const std::vector<int > & idx_map, 
-    const int & stride);
-
-
-void
-deepmd::
-read_file_to_string(std::string model, std::string & file_content)
-{
-  deepmd::check_status(tensorflow::ReadFileToString(tensorflow::Env::Default(), model, &file_content));
+template int deepmd::session_get_scalar<int>(Session*,
+                                             const std::string,
+                                             const std::string);
+
+template void deepmd::session_get_vector<int>(std::vector<int>&,
+                                              Session*,
+                                              const std::string,
+                                              const std::string);
+
+template void deepmd::select_map<int>(std::vector<int>& out,
+                                      const std::vector<int>& in,
+                                      const std::vector<int>& idx_map,
+                                      const int& stride);
+
+template void deepmd::select_map<int>(
+    typename std::vector<int>::iterator out,
+    const typename std::vector<int>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template void deepmd::select_map_inv<int>(std::vector<int>& out,
+                                          const std::vector<int>& in,
+                                          const std::vector<int>& idx_map,
+                                          const int& stride);
+
+template void deepmd::select_map_inv<int>(
+    typename std::vector<int>::iterator out,
+    const typename std::vector<int>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template float deepmd::session_get_scalar<float>(Session*,
+                                                 const std::string,
+                                                 const std::string);
+
+template void deepmd::session_get_vector<float>(std::vector<float>&,
+                                                Session*,
+                                                const std::string,
+                                                const std::string);
+
+template void deepmd::select_map<float>(std::vector<float>& out,
+                                        const std::vector<float>& in,
+                                        const std::vector<int>& idx_map,
+                                        const int& stride);
+
+template void deepmd::select_map<float>(
+    typename std::vector<float>::iterator out,
+    const typename std::vector<float>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template void deepmd::select_map_inv<float>(std::vector<float>& out,
+                                            const std::vector<float>& in,
+                                            const std::vector<int>& idx_map,
+                                            const int& stride);
+
+template void deepmd::select_map_inv<float>(
+    typename std::vector<float>::iterator out,
+    const typename std::vector<float>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template double deepmd::session_get_scalar<double>(Session*,
+                                                   const std::string,
+                                                   const std::string);
+
+template void deepmd::session_get_vector<double>(std::vector<double>&,
+                                                 Session*,
+                                                 const std::string,
+                                                 const std::string);
+
+template void deepmd::select_map<double>(std::vector<double>& out,
+                                         const std::vector<double>& in,
+                                         const std::vector<int>& idx_map,
+                                         const int& stride);
+
+template void deepmd::select_map<double>(
+    typename std::vector<double>::iterator out,
+    const typename std::vector<double>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template void deepmd::select_map_inv<double>(std::vector<double>& out,
+                                             const std::vector<double>& in,
+                                             const std::vector<int>& idx_map,
+                                             const int& stride);
+
+template void deepmd::select_map_inv<double>(
+    typename std::vector<double>::iterator out,
+    const typename std::vector<double>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template deepmd::STRINGTYPE deepmd::session_get_scalar<deepmd::STRINGTYPE>(
+    Session*, const std::string, const std::string);
+
+template void deepmd::session_get_vector<deepmd::STRINGTYPE>(
+    std::vector<deepmd::STRINGTYPE>&,
+    Session*,
+    const std::string,
+    const std::string);
+
+template void deepmd::select_map<deepmd::STRINGTYPE>(
+    std::vector<deepmd::STRINGTYPE>& out,
+    const std::vector<deepmd::STRINGTYPE>& in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template void deepmd::select_map<deepmd::STRINGTYPE>(
+    typename std::vector<deepmd::STRINGTYPE>::iterator out,
+    const typename std::vector<deepmd::STRINGTYPE>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template void deepmd::select_map_inv<deepmd::STRINGTYPE>(
+    std::vector<deepmd::STRINGTYPE>& out,
+    const std::vector<deepmd::STRINGTYPE>& in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+template void deepmd::select_map_inv<deepmd::STRINGTYPE>(
+    typename std::vector<deepmd::STRINGTYPE>::iterator out,
+    const typename std::vector<deepmd::STRINGTYPE>::const_iterator in,
+    const std::vector<int>& idx_map,
+    const int& stride);
+
+void deepmd::read_file_to_string(std::string model, std::string& file_content) {
+  deepmd::check_status(tensorflow::ReadFileToString(tensorflow::Env::Default(),
+                                                    model, &file_content));
 }
 
-
-void
-deepmd::
-convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb)
-{
-    int fd = open(fn_pb_txt.c_str(), O_RDONLY);
-    tensorflow::protobuf::io::ZeroCopyInputStream* input = new tensorflow::protobuf::io::FileInputStream(fd);
-    tensorflow::GraphDef graph_def;
-    tensorflow::protobuf::TextFormat::Parse(input, &graph_def);
-    delete input;
-    std::fstream output(fn_pb, std::ios::out | std::ios::trunc | std::ios::binary);
-    graph_def.SerializeToOstream(&output);
+void deepmd::convert_pbtxt_to_pb(std::string fn_pb_txt, std::string fn_pb) {
+  int fd = open(fn_pb_txt.c_str(), O_RDONLY);
+  tensorflow::protobuf::io::ZeroCopyInputStream* input =
+      new tensorflow::protobuf::io::FileInputStream(fd);
+  tensorflow::GraphDef graph_def;
+  tensorflow::protobuf::TextFormat::Parse(input, &graph_def);
+  delete input;
+  std::fstream output(fn_pb,
+                      std::ios::out | std::ios::trunc | std::ios::binary);
+  graph_def.SerializeToOstream(&output);
 }
 
-template
-int
-deepmd::
-session_input_tensors<double, double> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<double> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<double> &	dbox, 
-		       const double &		cell_size,
-		       const std::vector<double> &	fparam_,
-		       const std::vector<double> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const std::string		scope);
-template
-int
-deepmd::
-session_input_tensors<float, double> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<double> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<double> &	dbox, 
-		       const double &		cell_size,
-		       const std::vector<double> &	fparam_,
-		       const std::vector<double> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const std::string		scope);
-
-template
-int
-deepmd::
-session_input_tensors<double, float> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<float> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<float> &	dbox, 
-		       const double &		cell_size,
-		       const std::vector<float> &	fparam_,
-		       const std::vector<float> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const std::string		scope);
-template
-int
-deepmd::
-session_input_tensors<float, float> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<float> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<float> &	dbox, 
-		       const double &		cell_size,
-		       const std::vector<float> &	fparam_,
-		       const std::vector<float> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const std::string		scope);
-
-template
-int
-deepmd::
-session_input_tensors<double, double> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<double> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<double> &	dbox,		    
-		       InputNlist &		dlist, 
-		       const std::vector<double> &	fparam_,
-		       const std::vector<double> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const int			nghost,
-		       const int			ago,
-		       const std::string		scope);
-template
-int
-deepmd::
-session_input_tensors<float, double> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<double> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<double> &	dbox,		    
-		       InputNlist &		dlist, 
-		       const std::vector<double> &	fparam_,
-		       const std::vector<double> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const int			nghost,
-		       const int			ago,
-		       const std::string		scope);
-
-template
-int
-deepmd::
-session_input_tensors<double, float> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<float> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<float> &	dbox,		    
-		       InputNlist &		dlist, 
-		       const std::vector<float> &	fparam_,
-		       const std::vector<float> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const int			nghost,
-		       const int			ago,
-		       const std::string		scope);
-template
-int
-deepmd::
-session_input_tensors<float, float> (std::vector<std::pair<std::string, tensorflow::Tensor>> & input_tensors,
-		       const std::vector<float> &	dcoord_,
-		       const int &			ntypes,
-		       const std::vector<int> &		datype_,
-		       const std::vector<float> &	dbox,		    
-		       InputNlist &		dlist, 
-		       const std::vector<float> &	fparam_,
-		       const std::vector<float> &	aparam_,
-		       const deepmd::AtomMap&atommap,
-		       const int			nghost,
-		       const int			ago,
-		       const std::string		scope);
-
-void
-deepmd::
-print_summary(const std::string &pre)
-{
+template int deepmd::session_input_tensors<double, double>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<double>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const double& cell_size,
+    const std::vector<double>& fparam_,
+    const std::vector<double>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope);
+template int deepmd::session_input_tensors<float, double>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<double>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    const double& cell_size,
+    const std::vector<double>& fparam_,
+    const std::vector<double>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope);
+
+template int deepmd::session_input_tensors<double, float>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<float>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const double& cell_size,
+    const std::vector<float>& fparam_,
+    const std::vector<float>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope);
+template int deepmd::session_input_tensors<float, float>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<float>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    const double& cell_size,
+    const std::vector<float>& fparam_,
+    const std::vector<float>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const std::string scope);
+
+template int deepmd::session_input_tensors<double, double>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<double>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    InputNlist& dlist,
+    const std::vector<double>& fparam_,
+    const std::vector<double>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope);
+template int deepmd::session_input_tensors<float, double>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<double>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<double>& dbox,
+    InputNlist& dlist,
+    const std::vector<double>& fparam_,
+    const std::vector<double>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope);
+
+template int deepmd::session_input_tensors<double, float>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<float>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    InputNlist& dlist,
+    const std::vector<float>& fparam_,
+    const std::vector<float>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope);
+template int deepmd::session_input_tensors<float, float>(
+    std::vector<std::pair<std::string, tensorflow::Tensor>>& input_tensors,
+    const std::vector<float>& dcoord_,
+    const int& ntypes,
+    const std::vector<int>& datype_,
+    const std::vector<float>& dbox,
+    InputNlist& dlist,
+    const std::vector<float>& fparam_,
+    const std::vector<float>& aparam_,
+    const deepmd::AtomMap& atommap,
+    const int nghost,
+    const int ago,
+    const std::string scope);
+
+void deepmd::print_summary(const std::string& pre) {
   int num_intra_nthreads, num_inter_nthreads;
   deepmd::get_env_nthreads(num_intra_nthreads, num_inter_nthreads);
   std::cout << pre << "installed to:       " + global_install_prefix << "\n";
@@ -1101,14 +935,21 @@ print_summary(const std::string &pre)
   std::cout << pre << "source commit at:   " + global_git_date << "\n";
   std::cout << pre << "surpport model ver.:" + global_model_version << "\n";
 #if defined(GOOGLE_CUDA)
-  std::cout << pre << "build variant:      cuda" << "\n";
+  std::cout << pre << "build variant:      cuda"
+            << "\n";
 #elif defined(TENSORFLOW_USE_ROCM)
-  std::cout << pre << "build variant:      rocm" << "\n";
+  std::cout << pre << "build variant:      rocm"
+            << "\n";
 #else
-  std::cout << pre << "build variant:      cpu" << "\n";
+  std::cout << pre << "build variant:      cpu"
+            << "\n";
 #endif
   std::cout << pre << "build with tf inc:  " + global_tf_include_dir << "\n";
   std::cout << pre << "build with tf lib:  " + global_tf_lib << "\n";
-  std::cout << pre << "set tf intra_op_parallelism_threads: " <<  num_intra_nthreads << "\n";
-  std::cout << pre << "set tf inter_op_parallelism_threads: " <<  num_inter_nthreads << std::endl;
+  std::cout << pre
+            << "set tf intra_op_parallelism_threads: " << num_intra_nthreads
+            << "\n";
+  std::cout << pre
+            << "set tf inter_op_parallelism_threads: " << num_inter_nthreads
+            << std::endl;
 }
diff --git a/source/api_cc/tests/CMakeLists.txt b/source/api_cc/tests/CMakeLists.txt
index e8e8e585a2..ff6e432abd 100644
--- a/source/api_cc/tests/CMakeLists.txt
+++ b/source/api_cc/tests/CMakeLists.txt
@@ -3,17 +3,11 @@ project(deepmd_api_test)
 
 file(GLOB TEST_SRC test_*.cc)
 
-add_executable( runUnitTests_cc ${TEST_SRC} )
-target_link_libraries(runUnitTests_cc GTest::gtest_main ${LIB_DEEPMD_CC} rt coverage_config)
+add_executable(runUnitTests_cc ${TEST_SRC})
+target_link_libraries(runUnitTests_cc GTest::gtest_main ${LIB_DEEPMD_CC} rt
+                      coverage_config)
 target_precompile_headers(runUnitTests_cc PRIVATE test_utils.h)
-add_test( runUnitTest_cc runUnitTests_cc )
-set_target_properties(
-  runUnitTests_cc
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN/../lib"
-)
-target_compile_definitions(
-  runUnitTests_cc
-  PUBLIC ${prec_def}
-)
+add_test(runUnitTest_cc runUnitTests_cc)
+set_target_properties(runUnitTests_cc PROPERTIES INSTALL_RPATH "$ORIGIN/../lib")
+target_compile_definitions(runUnitTests_cc PUBLIC ${prec_def})
 install(TARGETS runUnitTests_cc DESTINATION bin/)
diff --git a/source/api_cc/tests/test_deepdipole.cc b/source/api_cc/tests/test_deepdipole.cc
index f08825f21b..c788fa2c99 100644
--- a/source/api_cc/tests/test_deepdipole.cc
+++ b/source/api_cc/tests/test_deepdipole.cc
@@ -1,63 +1,53 @@
+#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <cmath>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "DeepTensor.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>  
-
 template <class VALUETYPE>
-class TestInferDeepDipole : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepDipole : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_d = {
-    -9.274180565967479195e-01,2.698028341272042496e+00,2.521268387140979117e-01,2.927260638453461628e+00,-8.571926301526779923e-01,1.667785136187720063e+00
-  };
+      -9.274180565967479195e-01, 2.698028341272042496e+00,
+      2.521268387140979117e-01,  2.927260638453461628e+00,
+      -8.571926301526779923e-01, 1.667785136187720063e+00};
   int natoms = 6;
 
   deepmd::DeepTensor dp;
 
   void SetUp() override {
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole.pbtxt", "deepdipole.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole.pbtxt",
+                                "deepdipole.pb");
 
     dp.init("deepdipole.pb");
   };
 
-  void TearDown() override {
-    remove( "deepdipole.pb" ) ;
-  };
+  void TearDown() override { remove("deepdipole.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepDipole, ValueTypes);
 
-TYPED_TEST(TestInferDeepDipole, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepDipole, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::DeepTensor& dp = this->dp;
   EXPECT_EQ(dp.cutoff(), 4.);
   EXPECT_EQ(dp.numb_types(), 2);
   EXPECT_EQ(dp.output_dim(), 3);
@@ -69,71 +59,163 @@ TYPED_TEST(TestInferDeepDipole, cpu_build_nlist)
   dp.compute(value, coord, atype, box);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepDipole, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepDipole, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> value;
-  dp.compute(value, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(value, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepDipoleNew : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepDipoleNew : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_t = {
-    -1.128427726201255282e-01, 2.654103846999197880e-01, 2.625816377288122533e-02, 3.027556488877700680e-01, -7.475444785689989990e-02, 1.526291164572509684e-01
-  };
+      -1.128427726201255282e-01, 2.654103846999197880e-01,
+      2.625816377288122533e-02,  3.027556488877700680e-01,
+      -7.475444785689989990e-02, 1.526291164572509684e-01};
   std::vector<VALUETYPE> expected_f = {
-    8.424897862241968738e-02, -3.823566783202275721e-02, 3.570797165027734810e-01, 6.102563129736437997e-02, -1.351209759852018133e-01, -2.438224487466488510e-01, -1.403204771681088869e-01, 1.719596545791735875e-01, -1.136584427103610045e-01, 2.761686212947551955e-02, -7.247860200915196005e-02, 6.208831127377397591e-02, -2.605870723577520809e-01, -4.504074577536486268e-02, 7.340240097998475266e-02, 2.280160774766013809e-01, 1.189163370225677641e-01, -1.350895372995223886e-01, -4.294311497114180337e-02, 1.524802094783661577e-01, 1.070451777645946290e-01, -1.259336332521076574e-01, -2.087610788959351760e-01, 9.447141346538817652e-02, 1.668125597515543457e-01, 5.487037060760904805e-02, -2.014994036104674757e-01, -7.411985441205551361e-02, 3.614456658821710300e-01, 2.901174891391154476e-01, -4.871926969937838414e-02, -1.252747945819455699e-01, -2.555459318266457558e-01, 1.249033125831290059e-01, -2.347603724902655176e-01, -3.458874493198500766e-02, 3.563990394229877290e-01, 1.052342031228763047e-01, 1.907268232932498031e-01, -2.432737821373903708e-01, 1.016781829972335099e-01, -7.707616437996064884e-02, -1.139199805053340564e-01, -2.068592154909300040e-01, -1.156337826476897951e-01, 6.583817133933017596e-02, 2.902207490750204344e-01, 9.945482314729316153e-02, 7.986986504051810098e-02, -2.549975565538568079e-01, 1.275343199697696051e-01, -1.449133131601115787e-01, -3.527636315034351350e-02, -2.250060193826620980e-01
-  };
+      8.424897862241968738e-02,  -3.823566783202275721e-02,
+      3.570797165027734810e-01,  6.102563129736437997e-02,
+      -1.351209759852018133e-01, -2.438224487466488510e-01,
+      -1.403204771681088869e-01, 1.719596545791735875e-01,
+      -1.136584427103610045e-01, 2.761686212947551955e-02,
+      -7.247860200915196005e-02, 6.208831127377397591e-02,
+      -2.605870723577520809e-01, -4.504074577536486268e-02,
+      7.340240097998475266e-02,  2.280160774766013809e-01,
+      1.189163370225677641e-01,  -1.350895372995223886e-01,
+      -4.294311497114180337e-02, 1.524802094783661577e-01,
+      1.070451777645946290e-01,  -1.259336332521076574e-01,
+      -2.087610788959351760e-01, 9.447141346538817652e-02,
+      1.668125597515543457e-01,  5.487037060760904805e-02,
+      -2.014994036104674757e-01, -7.411985441205551361e-02,
+      3.614456658821710300e-01,  2.901174891391154476e-01,
+      -4.871926969937838414e-02, -1.252747945819455699e-01,
+      -2.555459318266457558e-01, 1.249033125831290059e-01,
+      -2.347603724902655176e-01, -3.458874493198500766e-02,
+      3.563990394229877290e-01,  1.052342031228763047e-01,
+      1.907268232932498031e-01,  -2.432737821373903708e-01,
+      1.016781829972335099e-01,  -7.707616437996064884e-02,
+      -1.139199805053340564e-01, -2.068592154909300040e-01,
+      -1.156337826476897951e-01, 6.583817133933017596e-02,
+      2.902207490750204344e-01,  9.945482314729316153e-02,
+      7.986986504051810098e-02,  -2.549975565538568079e-01,
+      1.275343199697696051e-01,  -1.449133131601115787e-01,
+      -3.527636315034351350e-02, -2.250060193826620980e-01};
   std::vector<VALUETYPE> expected_v = {
-    3.479789535931299138e-02, 4.337414719007849292e-03, -3.647371468256610082e-03, 8.053492919528318708e-03, 1.003834811499279773e-03, -8.441338187607602033e-04, -6.695998268698949256e-03, -8.346286793845711892e-04, 7.018468440279366279e-04, -4.515896716004976635e-02, 1.891794570218296306e-02, 3.417435352652402336e-02, 9.998952222904963771e-02, -4.188750255541257711e-02, -7.566774655171297492e-02, 1.804286120725206444e-01, -7.558495911146115298e-02, -1.365405712981232755e-01, -1.002593446510361419e-01, -1.117945222697993429e-01, 7.449172735713084637e-02, 7.770237313970995707e-02, 1.313723119887387492e-01, -8.655414676270002661e-02, -4.973937467461287537e-02, -8.663006083493235421e-02, 5.703914957966123994e-02, -3.382231967662072125e-02, -4.215813217482468345e-03, 3.545115660155720612e-03, -8.247565860499378454e-03, -1.028025206407854253e-03, 8.644757417520612143e-04, 6.761330949063471332e-03, 8.427721296283078580e-04, -7.086947453692606178e-04, -1.622698090933780493e-02, 1.305372051650728060e-01, -2.082599910094798112e-01, -7.109985131471197733e-03, 2.202585658101286273e-02, -3.554509763049529952e-02, 1.436400379134906459e-02, -3.554915857551419617e-02, 5.763638171798115412e-02, 2.074946305037073946e-01, 5.016353704485233822e-02, -5.700401936915034523e-02, 1.082138666905367308e-01, 2.616159414496492877e-02, -2.972908425564194101e-02, -1.229314789425654392e-01, -2.971969820589494271e-02, 3.377238432488059716e-02, 7.622024445219390681e-03, 9.500540384976005961e-04, -7.989090778275298932e-04, -2.952148931042387209e-02, -3.679732378636401541e-03, 3.094320409307891630e-03, -9.534268115386618486e-04, -1.188407357158671420e-04, 9.993425503379762414e-05, 9.319088860655992679e-02, -3.903942630815338682e-02, -7.052283462118023871e-02, 1.544831983829924038e-01, -6.471593445773991815e-02, -1.169062041817236081e-01, -6.990884596438741438e-02, 2.928613817427033750e-02, 5.290399154061733306e-02, 7.491400658274136037e-02, 1.273824184577304897e-01, -8.391492311946648075e-02, 3.543872837542783732e-02, 4.324623973455964804e-02, -2.873418641045778418e-02, -8.444981234074398768e-02, -1.531171183141288306e-01, 1.007308415346981068e-01, -6.396885751015785743e-03, -7.973455327045167592e-04, 6.704951070469818575e-04, 2.915483242551994078e-02, 3.634030104030812076e-03, -3.055888951116827318e-03, 6.608747470375698129e-04, 8.237532257692081912e-05, -6.927015762150179410e-05, -6.099175331115514430e-03, 2.402310352789886402e-02, -3.861491558256636286e-02, -2.583867422346154685e-02, 6.050621302336450097e-02, -9.822840263095998503e-02, -3.827994718203701213e-02, 1.252239810257823327e-01, -2.018867305507059950e-01, 1.136620144506474833e-01, 2.747872876828840599e-02, -3.122582814578225147e-02, -2.136319389661417989e-01, -5.164728194785846160e-02, 5.869009312256637939e-02, -3.147575788810638014e-02, -7.609523885036708832e-03, 8.647186232996251914e-03, -5.990706138603461330e-03, -7.467169124604876177e-04, 6.279210400235934152e-04, -9.287887182821588476e-04, -1.157696985960763821e-04, 9.735179200124630735e-05, -2.966271471326579340e-02, -3.697335544996301071e-03, 3.109123071928715683e-03, 1.800225987816693740e-01, -7.541487246259104271e-02, -1.362333179969384966e-01, -7.524185541795300192e-02, 3.152023672914239238e-02, 5.693978247845072477e-02, 5.703636164117102669e-02, -2.389361095778780308e-02, -4.316265205277792366e-02, -4.915584336537091176e-02, -8.674240294138457763e-02, 5.709724154860432860e-02, -8.679070528401405804e-02, -1.572017650485294793e-01, 1.034201569997979520e-01, -3.557746655862283752e-02, -8.626268394893003844e-02, 5.645546718878535764e-02, 6.848075985139651621e-03, 8.535845420570665554e-04, -7.177870012752625602e-04, 8.266638576582277997e-04, 1.030402542123569647e-04, -8.664748649675494882e-05, 2.991751925173294011e-02, 3.729095884068693231e-03, -3.135830629785046203e-03, 1.523793442834292522e-02, -3.873020552543556677e-02, 6.275576045602117292e-02, -3.842536616563556329e-02, 1.249268983543572881e-01, -2.014296501045876875e-01, 1.288704808602599873e-02, -6.326999354443738066e-02, 1.014064886873057153e-01, -1.318711149757016143e-01, -3.188092889522457091e-02, 3.622832829002789468e-02, -3.210149046681261276e-02, -7.760799893075580151e-03, 8.819090787585878374e-03, -2.047554776382226327e-01, -4.950132426418570042e-02, 5.625150484566552450e-02
-  };
+      3.479789535931299138e-02,  4.337414719007849292e-03,
+      -3.647371468256610082e-03, 8.053492919528318708e-03,
+      1.003834811499279773e-03,  -8.441338187607602033e-04,
+      -6.695998268698949256e-03, -8.346286793845711892e-04,
+      7.018468440279366279e-04,  -4.515896716004976635e-02,
+      1.891794570218296306e-02,  3.417435352652402336e-02,
+      9.998952222904963771e-02,  -4.188750255541257711e-02,
+      -7.566774655171297492e-02, 1.804286120725206444e-01,
+      -7.558495911146115298e-02, -1.365405712981232755e-01,
+      -1.002593446510361419e-01, -1.117945222697993429e-01,
+      7.449172735713084637e-02,  7.770237313970995707e-02,
+      1.313723119887387492e-01,  -8.655414676270002661e-02,
+      -4.973937467461287537e-02, -8.663006083493235421e-02,
+      5.703914957966123994e-02,  -3.382231967662072125e-02,
+      -4.215813217482468345e-03, 3.545115660155720612e-03,
+      -8.247565860499378454e-03, -1.028025206407854253e-03,
+      8.644757417520612143e-04,  6.761330949063471332e-03,
+      8.427721296283078580e-04,  -7.086947453692606178e-04,
+      -1.622698090933780493e-02, 1.305372051650728060e-01,
+      -2.082599910094798112e-01, -7.109985131471197733e-03,
+      2.202585658101286273e-02,  -3.554509763049529952e-02,
+      1.436400379134906459e-02,  -3.554915857551419617e-02,
+      5.763638171798115412e-02,  2.074946305037073946e-01,
+      5.016353704485233822e-02,  -5.700401936915034523e-02,
+      1.082138666905367308e-01,  2.616159414496492877e-02,
+      -2.972908425564194101e-02, -1.229314789425654392e-01,
+      -2.971969820589494271e-02, 3.377238432488059716e-02,
+      7.622024445219390681e-03,  9.500540384976005961e-04,
+      -7.989090778275298932e-04, -2.952148931042387209e-02,
+      -3.679732378636401541e-03, 3.094320409307891630e-03,
+      -9.534268115386618486e-04, -1.188407357158671420e-04,
+      9.993425503379762414e-05,  9.319088860655992679e-02,
+      -3.903942630815338682e-02, -7.052283462118023871e-02,
+      1.544831983829924038e-01,  -6.471593445773991815e-02,
+      -1.169062041817236081e-01, -6.990884596438741438e-02,
+      2.928613817427033750e-02,  5.290399154061733306e-02,
+      7.491400658274136037e-02,  1.273824184577304897e-01,
+      -8.391492311946648075e-02, 3.543872837542783732e-02,
+      4.324623973455964804e-02,  -2.873418641045778418e-02,
+      -8.444981234074398768e-02, -1.531171183141288306e-01,
+      1.007308415346981068e-01,  -6.396885751015785743e-03,
+      -7.973455327045167592e-04, 6.704951070469818575e-04,
+      2.915483242551994078e-02,  3.634030104030812076e-03,
+      -3.055888951116827318e-03, 6.608747470375698129e-04,
+      8.237532257692081912e-05,  -6.927015762150179410e-05,
+      -6.099175331115514430e-03, 2.402310352789886402e-02,
+      -3.861491558256636286e-02, -2.583867422346154685e-02,
+      6.050621302336450097e-02,  -9.822840263095998503e-02,
+      -3.827994718203701213e-02, 1.252239810257823327e-01,
+      -2.018867305507059950e-01, 1.136620144506474833e-01,
+      2.747872876828840599e-02,  -3.122582814578225147e-02,
+      -2.136319389661417989e-01, -5.164728194785846160e-02,
+      5.869009312256637939e-02,  -3.147575788810638014e-02,
+      -7.609523885036708832e-03, 8.647186232996251914e-03,
+      -5.990706138603461330e-03, -7.467169124604876177e-04,
+      6.279210400235934152e-04,  -9.287887182821588476e-04,
+      -1.157696985960763821e-04, 9.735179200124630735e-05,
+      -2.966271471326579340e-02, -3.697335544996301071e-03,
+      3.109123071928715683e-03,  1.800225987816693740e-01,
+      -7.541487246259104271e-02, -1.362333179969384966e-01,
+      -7.524185541795300192e-02, 3.152023672914239238e-02,
+      5.693978247845072477e-02,  5.703636164117102669e-02,
+      -2.389361095778780308e-02, -4.316265205277792366e-02,
+      -4.915584336537091176e-02, -8.674240294138457763e-02,
+      5.709724154860432860e-02,  -8.679070528401405804e-02,
+      -1.572017650485294793e-01, 1.034201569997979520e-01,
+      -3.557746655862283752e-02, -8.626268394893003844e-02,
+      5.645546718878535764e-02,  6.848075985139651621e-03,
+      8.535845420570665554e-04,  -7.177870012752625602e-04,
+      8.266638576582277997e-04,  1.030402542123569647e-04,
+      -8.664748649675494882e-05, 2.991751925173294011e-02,
+      3.729095884068693231e-03,  -3.135830629785046203e-03,
+      1.523793442834292522e-02,  -3.873020552543556677e-02,
+      6.275576045602117292e-02,  -3.842536616563556329e-02,
+      1.249268983543572881e-01,  -2.014296501045876875e-01,
+      1.288704808602599873e-02,  -6.326999354443738066e-02,
+      1.014064886873057153e-01,  -1.318711149757016143e-01,
+      -3.188092889522457091e-02, 3.622832829002789468e-02,
+      -3.210149046681261276e-02, -7.760799893075580151e-03,
+      8.819090787585878374e-03,  -2.047554776382226327e-01,
+      -4.950132426418570042e-02, 5.625150484566552450e-02};
   std::vector<VALUETYPE> expected_gt;
   std::vector<VALUETYPE> expected_gv;
   int natoms = 6;
@@ -143,49 +225,47 @@ class TestInferDeepDipoleNew : public ::testing::Test
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deepdipole_new.pbtxt";
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole_new.pbtxt", "deepdipole_new.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole_new.pbtxt",
+                                "deepdipole_new.pb");
     dp.init("deepdipole_new.pb");
-    odim = dp.output_dim ();
+    odim = dp.output_dim();
 
     expected_gt.resize(odim);
-    for(int ii = 0; ii < nsel; ++ii){
-      for(int dd = 0; dd < odim; ++dd){
-	      expected_gt[dd] += expected_t[ii*odim+dd];
+    for (int ii = 0; ii < nsel; ++ii) {
+      for (int dd = 0; dd < odim; ++dd) {
+        expected_gt[dd] += expected_t[ii * odim + dd];
       }
     }
 
     expected_gv.resize(odim * 9);
-    for (int kk = 0; kk < odim; ++kk){
-      for(int ii = 0; ii < natoms; ++ii){
-        for(int dd = 0; dd < 9; ++dd){
-          expected_gv[kk*9 + dd] += expected_v[kk*natoms*9 + ii*9 + dd];
+    for (int kk = 0; kk < odim; ++kk) {
+      for (int ii = 0; ii < natoms; ++ii) {
+        for (int dd = 0; dd < 9; ++dd) {
+          expected_gv[kk * 9 + dd] += expected_v[kk * natoms * 9 + ii * 9 + dd];
         }
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deepdipole_new.pb" ) ;
-  };
+  void TearDown() override { remove("deepdipole_new.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepDipoleNew, ValueTypes);
 
-TYPED_TEST(TestInferDeepDipoleNew, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleNew, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_t = this -> expected_t;
-  std::vector<VALUETYPE>& expected_f = this -> expected_f;
-  std::vector<VALUETYPE>& expected_v = this -> expected_v;
-  std::vector<VALUETYPE>& expected_gt = this -> expected_gt;
-  std::vector<VALUETYPE>& expected_gv = this -> expected_gv;
-  int& natoms = this -> natoms;
-  int& nsel = this -> nsel;
-  int& odim = this -> odim;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_t = this->expected_t;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  std::vector<VALUETYPE>& expected_gt = this->expected_gt;
+  std::vector<VALUETYPE>& expected_gv = this->expected_gv;
+  int& natoms = this->natoms;
+  int& nsel = this->nsel;
+  int& odim = this->odim;
+  deepmd::DeepTensor& dp = this->dp;
   EXPECT_EQ(dp.cutoff(), 4.);
   EXPECT_EQ(dp.numb_types(), 2);
   EXPECT_EQ(dp.output_dim(), 3);
@@ -197,191 +277,185 @@ TYPED_TEST(TestInferDeepDipoleNew, cpu_build_nlist)
 
   dp.compute(at, coord, atype, box);
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, at, av, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   EXPECT_EQ(av.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(av[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepDipoleNew, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleNew, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_t = this -> expected_t;
-  std::vector<VALUETYPE>& expected_f = this -> expected_f;
-  std::vector<VALUETYPE>& expected_v = this -> expected_v;
-  std::vector<VALUETYPE>& expected_gt = this -> expected_gt;
-  std::vector<VALUETYPE>& expected_gv = this -> expected_gv;
-  int& natoms = this -> natoms;
-  int& nsel = this -> nsel;
-  int& odim = this -> odim;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_t = this->expected_t;
+  std::vector<VALUETYPE>& expected_f = this->expected_f;
+  std::vector<VALUETYPE>& expected_v = this->expected_v;
+  std::vector<VALUETYPE>& expected_gt = this->expected_gt;
+  std::vector<VALUETYPE>& expected_gv = this->expected_gv;
+  int& natoms = this->natoms;
+  int& nsel = this->nsel;
+  int& odim = this->odim;
+  deepmd::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> gt, ff, vv, at, av;
 
-  dp.compute(at, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(at, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
-
-  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  std::vector<VALUETYPE> rff (odim * nloc * 3);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  std::vector<VALUETYPE> rff(odim * nloc * 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
+  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist);
 
-  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall-nloc, inlist);
-  
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   // atom tensor
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   // atom virial
-  std::vector<VALUETYPE> rav (odim * nloc * 9);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9, av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
+  std::vector<VALUETYPE> rav(odim * nloc * 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9,
+                          av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
   }
   EXPECT_EQ(rav.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(rav[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepDipoleFake : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepDipoleFake : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_d = {
-    -3.186217894664857830e-01, 1.082220317383403296e+00, 5.646623185237639730e-02, 7.426508038929955369e-01, -3.115996324658170114e-01, -5.619108089573777720e-01, -4.181578166874897473e-01, -7.579762930974662805e-01, 4.980618433125854616e-01, 1.059635561913792712e+00, -2.641989315855929332e-01, 5.307984468104405273e-01, -1.484512535335152095e-01, 4.978588497891502374e-01, -8.022467807199461509e-01, -9.165936539882671985e-01, -2.238112120606238209e-01, 2.553133145814526217e-01
-  };
+      -3.186217894664857830e-01, 1.082220317383403296e+00,
+      5.646623185237639730e-02,  7.426508038929955369e-01,
+      -3.115996324658170114e-01, -5.619108089573777720e-01,
+      -4.181578166874897473e-01, -7.579762930974662805e-01,
+      4.980618433125854616e-01,  1.059635561913792712e+00,
+      -2.641989315855929332e-01, 5.307984468104405273e-01,
+      -1.484512535335152095e-01, 4.978588497891502374e-01,
+      -8.022467807199461509e-01, -9.165936539882671985e-01,
+      -2.238112120606238209e-01, 2.553133145814526217e-01};
   int natoms = 6;
 
   deepmd::DeepTensor dp;
 
   void SetUp() override {
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole_fake.pbtxt", "deepdipole_fake.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deepdipole_fake.pbtxt",
+                                "deepdipole_fake.pb");
 
     dp.init("deepdipole_fake.pb");
   };
 
-  void TearDown() override {
-    remove( "deepdipole_fake.pb" ) ;
-  };
+  void TearDown() override { remove("deepdipole_fake.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepDipoleFake, ValueTypes);
 
-TYPED_TEST(TestInferDeepDipoleFake, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleFake, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::DeepTensor& dp = this->dp;
   EXPECT_EQ(dp.cutoff(), 2.);
   EXPECT_EQ(dp.numb_types(), 2);
   EXPECT_EQ(dp.output_dim(), 3);
@@ -394,40 +468,37 @@ TYPED_TEST(TestInferDeepDipoleFake, cpu_build_nlist)
   dp.compute(value, coord, atype, box);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepDipoleFake, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepDipoleFake, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> value;
-  dp.compute(value, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(value, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
-
diff --git a/source/api_cc/tests/test_deeppolar.cc b/source/api_cc/tests/test_deeppolar.cc
index 8f11ecfd81..f179893212 100644
--- a/source/api_cc/tests/test_deeppolar.cc
+++ b/source/api_cc/tests/test_deeppolar.cc
@@ -1,66 +1,62 @@
+#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <cmath>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "DeepTensor.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>  
-
 template <class VALUETYPE>
-class TestInferDeepPolar : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPolar : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_d = {
-    1.061407927405987051e-01,-3.569013342133873778e-01,-2.862108976089940138e-02,-3.569013342133875444e-01,1.304367268874677244e+00,1.037647501453442256e-01,-2.862108976089940138e-02,1.037647501453441284e-01,8.100521520762453409e-03,1.236797829492216616e+00,-3.717307430531632262e-01,7.371515676976750919e-01,-3.717307430531630041e-01,1.127222682121889058e-01,-2.239181552775717510e-01,7.371515676976746478e-01,-2.239181552775717787e-01,4.448255365635306879e-01
-  };
+      1.061407927405987051e-01,  -3.569013342133873778e-01,
+      -2.862108976089940138e-02, -3.569013342133875444e-01,
+      1.304367268874677244e+00,  1.037647501453442256e-01,
+      -2.862108976089940138e-02, 1.037647501453441284e-01,
+      8.100521520762453409e-03,  1.236797829492216616e+00,
+      -3.717307430531632262e-01, 7.371515676976750919e-01,
+      -3.717307430531630041e-01, 1.127222682121889058e-01,
+      -2.239181552775717510e-01, 7.371515676976746478e-01,
+      -2.239181552775717787e-01, 4.448255365635306879e-01};
   int natoms;
 
   deepmd::DeepTensor dp;
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppolar.pbtxt";
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppolar.pbtxt", "deeppolar.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppolar.pbtxt",
+                                "deeppolar.pb");
 
     dp.init("deeppolar.pb");
 
     natoms = expected_d.size();
   };
 
-  void TearDown() override {
-    remove( "deeppolar.pb" ) ;
-  };
+  void TearDown() override { remove("deeppolar.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPolar, ValueTypes);
 
-TYPED_TEST(TestInferDeepPolar, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPolar, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::DeepTensor& dp = this->dp;
 
   EXPECT_EQ(dp.cutoff(), 6.);
   EXPECT_EQ(dp.numb_types(), 2);
@@ -73,71 +69,385 @@ TYPED_TEST(TestInferDeepPolar, cpu_build_nlist)
   dp.compute(value, coord, atype, box);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPolar, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPolar, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<int>& atype = this -> atype;
-  std::vector<VALUETYPE>& box = this -> box;
-  std::vector<VALUETYPE>& expected_d = this -> expected_d;
-  int& natoms = this -> natoms;
-  deepmd::DeepTensor& dp = this -> dp;
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<int>& atype = this->atype;
+  std::vector<VALUETYPE>& box = this->box;
+  std::vector<VALUETYPE>& expected_d = this->expected_d;
+  int& natoms = this->natoms;
+  deepmd::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> value;
-  dp.compute(value, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(value, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(value.size(), expected_d.size());
-  for(int ii = 0; ii < expected_d.size(); ++ii){
+  for (int ii = 0; ii < expected_d.size(); ++ii) {
     EXPECT_LT(fabs(value[ii] - expected_d[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepPolarNew : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPolarNew : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_t = {
-    1.936327241487292961e+00, 5.198696351735779264e-02, 3.888336625074450149e-03, 5.198696351735781346e-02, 1.764967784387830196e+00, -1.354658545697527347e-02, 3.888336625074451016e-03, -1.354658545697527000e-02, 1.939288409902199639e+00, 1.786740420980893029e+00, 4.868765294055640847e-02, -9.812132615180739481e-02, 4.868765294055640847e-02, 1.925999147066305373e+00, 2.895028407651457567e-02, -9.812132615180743644e-02, 2.895028407651457220e-02, 1.883109989034779996e+00
-  };
+      1.936327241487292961e+00, 5.198696351735779264e-02,
+      3.888336625074450149e-03, 5.198696351735781346e-02,
+      1.764967784387830196e+00, -1.354658545697527347e-02,
+      3.888336625074451016e-03, -1.354658545697527000e-02,
+      1.939288409902199639e+00, 1.786740420980893029e+00,
+      4.868765294055640847e-02, -9.812132615180739481e-02,
+      4.868765294055640847e-02, 1.925999147066305373e+00,
+      2.895028407651457567e-02, -9.812132615180743644e-02,
+      2.895028407651457220e-02, 1.883109989034779996e+00};
   std::vector<VALUETYPE> expected_f = {
-    5.305178446980116092e-02, -1.127314829623577049e-02, 1.136493514861047216e-01, 5.598130220328862322e-05, -4.352126938892845326e-02, -7.700608888887500170e-02, -1.050015668789053697e-01, 5.882396336737016895e-02, -3.723875897544067642e-02, -7.850322286760008650e-02, 7.279117637753844405e-02, -6.178451060078461732e-02, 3.404361490778949895e-01, 5.447934529195214842e-02, -8.698375128815737101e-02, -2.100391251033939810e-01, -1.313000673516965255e-01, 1.493637582671529240e-01, -9.589318874236771317e-02, 6.285887854370801608e-02, -1.824395427630142175e-01, -3.264267092869802683e-02, 3.637498661083633789e-02, 1.524859582123189172e-01, 1.442484990808054202e-01, -8.957992476622803069e-02, 3.076469140583825215e-02, 4.909822745881124717e-02, -2.559151672032903835e-01, -1.522830913546814324e-01, -2.885480042033320910e-02, 7.730841025065784966e-02, 1.553301391955271560e-01, -3.595606644821771475e-02, 1.689528165643162105e-01, -3.858154695988691516e-03, 5.018843026262573281e-02, 1.756005154318779349e-02, 3.489323893614350303e-02, -4.020411124876955428e-02, 2.218648284685413238e-02, -8.086177159691650476e-03, -2.222392408702593067e-02, -3.825892777133557687e-02, -1.689393838770965675e-02, -5.465804822761769627e-02, -1.398775875506316768e-01, -1.165702490994514756e-01, 5.449067849718619572e-02, 1.588580450812354106e-01, -8.209560373418453572e-02, 1.240697480360127003e-02, -2.046806414931008622e-02, 1.887527294448937965e-01, -9.589318874236771317e-02, 6.285887854370801608e-02, -1.824395427630142175e-01, -3.264267092869802683e-02, 3.637498661083633789e-02, 1.524859582123189172e-01, 1.442484990808054202e-01, -8.957992476622803069e-02, 3.076469140583825215e-02, 4.909822745881124717e-02, -2.559151672032903835e-01, -1.522830913546814324e-01, -2.885480042033320910e-02, 7.730841025065784966e-02, 1.553301391955271560e-01, -3.595606644821771475e-02, 1.689528165643162105e-01, -3.858154695988691516e-03, 4.038746042068122599e-02, -2.549213597407858356e-01, -1.131801705114504619e-01, 1.489732376295762606e-01, 2.734584831542113958e-01, -1.125511889088352951e-01, -1.908551011160136424e-01, -2.400995606986339528e-02, 2.255650484976146619e-01, -2.185213968874370055e-02, 1.475333123369945709e-01, 9.584417756169674729e-02, -1.576380405016522893e-02, -5.153693137796186430e-02, -8.489897831367294867e-02, 3.911034680466508873e-02, -9.052354830259493057e-02, -1.077888832535272776e-02, -1.970229486427777510e-01, -6.538978166042377915e-02, -1.570533119125729904e-01, 1.417940206277617798e-01, -4.684714285705613573e-02, 6.070882964241105378e-02, 5.715183445260185735e-02, 1.138024049318459713e-01, 9.374622673558237473e-02, 3.096927839536914306e-02, -9.232883741117139942e-02, -6.499836527010099951e-02, 2.839980861544661936e-02, 8.097497759757724123e-03, 1.006700103228213017e-01, -6.129199344840163821e-02, 8.266585923704758421e-02, -3.307338951814068478e-02, 5.018843026262574669e-02, 1.756005154318778308e-02, 3.489323893614350997e-02, -4.020411124876957509e-02, 2.218648284685414279e-02, -8.086177159691652211e-03, -2.222392408702593067e-02, -3.825892777133557687e-02, -1.689393838770965675e-02, -5.465804822761770321e-02, -1.398775875506316491e-01, -1.165702490994514756e-01, 5.449067849718619572e-02, 1.588580450812354106e-01, -8.209560373418453572e-02, 1.240697480360125615e-02, -2.046806414931009316e-02, 1.887527294448937965e-01, -1.970229486427777510e-01, -6.538978166042375140e-02, -1.570533119125729626e-01, 1.417940206277618076e-01, -4.684714285705613573e-02, 6.070882964241105378e-02, 5.715183445260184347e-02, 1.138024049318459713e-01, 9.374622673558236086e-02, 3.096927839536912919e-02, -9.232883741117139942e-02, -6.499836527010102727e-02, 2.839980861544661589e-02, 8.097497759757731062e-03, 1.006700103228213017e-01, -6.129199344840162433e-02, 8.266585923704758421e-02, -3.307338951814066397e-02, -3.078161564779093723e-02, -8.748776750553553111e-03, -2.162930108693108394e-02, 2.135313622214399243e-02, -8.845621737097757523e-03, 9.365293934359546560e-03, 8.562579091543631032e-03, 1.772751551871581607e-02, 1.573655414890783033e-02, -3.649820158632081230e-02, -1.904914900326310223e-01, -1.076542087674599024e-01, -5.186655049718805199e-02, 1.686765146765009937e-01, -6.620206332305828001e-02, 8.923065241761217459e-02, 2.168185832506550753e-02, 1.703837250941818704e-01
-  };
+      5.305178446980116092e-02,  -1.127314829623577049e-02,
+      1.136493514861047216e-01,  5.598130220328862322e-05,
+      -4.352126938892845326e-02, -7.700608888887500170e-02,
+      -1.050015668789053697e-01, 5.882396336737016895e-02,
+      -3.723875897544067642e-02, -7.850322286760008650e-02,
+      7.279117637753844405e-02,  -6.178451060078461732e-02,
+      3.404361490778949895e-01,  5.447934529195214842e-02,
+      -8.698375128815737101e-02, -2.100391251033939810e-01,
+      -1.313000673516965255e-01, 1.493637582671529240e-01,
+      -9.589318874236771317e-02, 6.285887854370801608e-02,
+      -1.824395427630142175e-01, -3.264267092869802683e-02,
+      3.637498661083633789e-02,  1.524859582123189172e-01,
+      1.442484990808054202e-01,  -8.957992476622803069e-02,
+      3.076469140583825215e-02,  4.909822745881124717e-02,
+      -2.559151672032903835e-01, -1.522830913546814324e-01,
+      -2.885480042033320910e-02, 7.730841025065784966e-02,
+      1.553301391955271560e-01,  -3.595606644821771475e-02,
+      1.689528165643162105e-01,  -3.858154695988691516e-03,
+      5.018843026262573281e-02,  1.756005154318779349e-02,
+      3.489323893614350303e-02,  -4.020411124876955428e-02,
+      2.218648284685413238e-02,  -8.086177159691650476e-03,
+      -2.222392408702593067e-02, -3.825892777133557687e-02,
+      -1.689393838770965675e-02, -5.465804822761769627e-02,
+      -1.398775875506316768e-01, -1.165702490994514756e-01,
+      5.449067849718619572e-02,  1.588580450812354106e-01,
+      -8.209560373418453572e-02, 1.240697480360127003e-02,
+      -2.046806414931008622e-02, 1.887527294448937965e-01,
+      -9.589318874236771317e-02, 6.285887854370801608e-02,
+      -1.824395427630142175e-01, -3.264267092869802683e-02,
+      3.637498661083633789e-02,  1.524859582123189172e-01,
+      1.442484990808054202e-01,  -8.957992476622803069e-02,
+      3.076469140583825215e-02,  4.909822745881124717e-02,
+      -2.559151672032903835e-01, -1.522830913546814324e-01,
+      -2.885480042033320910e-02, 7.730841025065784966e-02,
+      1.553301391955271560e-01,  -3.595606644821771475e-02,
+      1.689528165643162105e-01,  -3.858154695988691516e-03,
+      4.038746042068122599e-02,  -2.549213597407858356e-01,
+      -1.131801705114504619e-01, 1.489732376295762606e-01,
+      2.734584831542113958e-01,  -1.125511889088352951e-01,
+      -1.908551011160136424e-01, -2.400995606986339528e-02,
+      2.255650484976146619e-01,  -2.185213968874370055e-02,
+      1.475333123369945709e-01,  9.584417756169674729e-02,
+      -1.576380405016522893e-02, -5.153693137796186430e-02,
+      -8.489897831367294867e-02, 3.911034680466508873e-02,
+      -9.052354830259493057e-02, -1.077888832535272776e-02,
+      -1.970229486427777510e-01, -6.538978166042377915e-02,
+      -1.570533119125729904e-01, 1.417940206277617798e-01,
+      -4.684714285705613573e-02, 6.070882964241105378e-02,
+      5.715183445260185735e-02,  1.138024049318459713e-01,
+      9.374622673558237473e-02,  3.096927839536914306e-02,
+      -9.232883741117139942e-02, -6.499836527010099951e-02,
+      2.839980861544661936e-02,  8.097497759757724123e-03,
+      1.006700103228213017e-01,  -6.129199344840163821e-02,
+      8.266585923704758421e-02,  -3.307338951814068478e-02,
+      5.018843026262574669e-02,  1.756005154318778308e-02,
+      3.489323893614350997e-02,  -4.020411124876957509e-02,
+      2.218648284685414279e-02,  -8.086177159691652211e-03,
+      -2.222392408702593067e-02, -3.825892777133557687e-02,
+      -1.689393838770965675e-02, -5.465804822761770321e-02,
+      -1.398775875506316491e-01, -1.165702490994514756e-01,
+      5.449067849718619572e-02,  1.588580450812354106e-01,
+      -8.209560373418453572e-02, 1.240697480360125615e-02,
+      -2.046806414931009316e-02, 1.887527294448937965e-01,
+      -1.970229486427777510e-01, -6.538978166042375140e-02,
+      -1.570533119125729626e-01, 1.417940206277618076e-01,
+      -4.684714285705613573e-02, 6.070882964241105378e-02,
+      5.715183445260184347e-02,  1.138024049318459713e-01,
+      9.374622673558236086e-02,  3.096927839536912919e-02,
+      -9.232883741117139942e-02, -6.499836527010102727e-02,
+      2.839980861544661589e-02,  8.097497759757731062e-03,
+      1.006700103228213017e-01,  -6.129199344840162433e-02,
+      8.266585923704758421e-02,  -3.307338951814066397e-02,
+      -3.078161564779093723e-02, -8.748776750553553111e-03,
+      -2.162930108693108394e-02, 2.135313622214399243e-02,
+      -8.845621737097757523e-03, 9.365293934359546560e-03,
+      8.562579091543631032e-03,  1.772751551871581607e-02,
+      1.573655414890783033e-02,  -3.649820158632081230e-02,
+      -1.904914900326310223e-01, -1.076542087674599024e-01,
+      -5.186655049718805199e-02, 1.686765146765009937e-01,
+      -6.620206332305828001e-02, 8.923065241761217459e-02,
+      2.168185832506550753e-02,  1.703837250941818704e-01};
   std::vector<VALUETYPE> expected_v = {
-    -2.123013313652813774e-03, -2.646248889538913257e-04, 2.225254748021367093e-04, 9.843593195853941446e-04, 1.226963457840150472e-04, -1.031764725911038809e-04, -8.467513732241481721e-04, -1.055440805151912256e-04, 8.875297679686559459e-05, 1.829118379697145316e-02, 2.302438731350108913e-03, -1.890198823577125386e-03, 3.300229266409118040e-02, -1.339230641165423293e-02, -2.445540228188634868e-02, 5.127826101331301595e-02, -2.458314752619149279e-02, -4.252530480245884925e-02, 9.733043787604266084e-02, -6.217238566516904152e-02, 3.767656091618994812e-02, 6.674680725588777973e-03, 4.245867422406505304e-02, -2.752200660186601699e-02, -8.318636634138946995e-03, -2.738884420387305285e-02, 1.785195524121836741e-02, -3.151218435289559073e-03, -3.927864338604547816e-04, 3.302976830190196104e-04, 1.387198082848713948e-06, 1.729085429046553641e-07, -1.454003656243721975e-07, -4.056191292896940703e-05, -5.055875832506090064e-06, 4.251531950061960394e-06, 7.087482338961141604e-02, -1.643445525800983908e-01, 2.668682182870234509e-01, 7.752581706917366366e-03, -2.674714571946596939e-02, 4.308263417785011123e-02, -9.385640612496094423e-03, 4.307848167667025635e-02, -6.910099104451945806e-02, -1.822493611414978121e-01, -4.510097387143227610e-02, 5.157836206906134952e-02, -1.170389534066011428e-01, -2.858136680923874240e-02, 3.256883555835647648e-02, 1.336331160725280354e-01, 3.257484898923947853e-02, -3.710113093740719653e-02, 3.343993600586595179e-03, 4.168150663620683060e-04, -3.505035785317401481e-04, -4.312491363797464269e-03, -5.375343342977005178e-04, 4.520175083867039156e-04, -5.045304632809267465e-04, -6.288764981405317546e-05, 5.288279643454484632e-05, 2.176577726533836937e-02, -1.041710664445027849e-02, -1.802940684978692962e-02, -3.097121964369356495e-02, 1.077096511204005125e-02, 2.079488766754130843e-02, -1.120464690158002596e-01, 4.736950869652114399e-02, 8.530900293808066359e-02, 3.029112757823893692e-02, 1.058529311156591879e-01, -6.894903720238335088e-02, -5.089618157121258979e-02, -6.973511953466600410e-02, 4.618114280030299196e-02, 1.143309394598741001e-02, 2.319568285212985151e-02, -1.522637168466081138e-02, -1.535733649675188493e-03, -1.914228911776438445e-04, 1.609692493993826663e-04, -2.603290366421702733e-03, -3.244894507721100851e-04, 2.728661290583660171e-04, 6.938458118266074663e-04, 8.648503036932213837e-05, -7.272604826511198082e-05, -2.609239945314979423e-02, 1.142603664459106681e-02, -2.051406106454568487e-02, 5.779549344910496142e-03, -3.860615030463052100e-02, 6.168332781226748551e-02, 2.068839156841529789e-02, -7.643723474881176927e-02, 1.229844977392647865e-01, -3.554667688747349674e-02, -8.262665730398828859e-03, 9.285295046969522723e-03, 1.497274901467501862e-01, 3.666859638982037511e-02, -4.181688913175674732e-02, -3.257377626487627069e-03, -8.171909213273372040e-04, 9.379633299917983094e-04, 5.408910405506226968e-04, 6.741984641424190365e-05, -5.669396175743082354e-05, 4.696290607396237790e-04, 5.853733334998140626e-05, -4.922457577157541143e-05, -5.350269144276139158e-03, -6.668890718077903363e-04, 5.607930831110977251e-04, 3.013271000130106694e-02, -1.241570117891089425e-02, -2.255430712666738058e-02, -1.643158253499693577e-02, 6.876116339617440766e-03, 1.242585434168311936e-02, 2.120265775977717496e-03, -2.988284987993197143e-03, -4.123302560925387432e-03, 3.528008965720315360e-02, -1.132921329184741026e-02, 6.435692645130823564e-03, -2.115291124444698342e-02, -2.971050496327276927e-02, 1.966236467455729359e-02, -2.194244461519655187e-02, -1.469000955331024871e-02, 1.000316933044766328e-02, -2.208576023807404254e-03, -2.752899293131040766e-04, 2.314938041951108548e-04, -5.840262773118632192e-04, -7.279647649213021596e-05, 6.121521886838239123e-05, -1.263538670848133802e-03, -1.574949051482092536e-04, 1.324388975109944740e-04, 8.955566031735841259e-03, -2.660296383100100095e-02, 4.296567375352825652e-02, 2.380373596470350059e-02, -7.784355459714024927e-02, 1.255004729498893912e-01, -1.824501349606120690e-02, 3.948761180940744964e-02, -6.423389834199008663e-02, 1.038606825469970407e-02, 2.616819816765628484e-03, -3.006960935423359793e-03, -1.864007491704058883e-02, -4.504736174636920880e-03, 5.118497771104377897e-03, 1.680266347982039554e-01, 4.105963063126880086e-02, -4.679634408112137711e-02, 3.343993600586595179e-03, 4.168150663620683060e-04, -3.505035785317401481e-04, -4.312491363797464269e-03, -5.375343342977005178e-04, 4.520175083867039156e-04, -5.045304632809267465e-04, -6.288764981405317546e-05, 5.288279643454484632e-05, 2.176577726533836937e-02, -1.041710664445027849e-02, -1.802940684978692962e-02, -3.097121964369356495e-02, 1.077096511204005125e-02, 2.079488766754130843e-02, -1.120464690158002596e-01, 4.736950869652114399e-02, 8.530900293808066359e-02, 3.029112757823893692e-02, 1.058529311156591879e-01, -6.894903720238335088e-02, -5.089618157121258979e-02, -6.973511953466600410e-02, 4.618114280030299196e-02, 1.143309394598741001e-02, 2.319568285212985151e-02, -1.522637168466081138e-02, -1.535733649675188493e-03, -1.914228911776438445e-04, 1.609692493993826663e-04, -2.603290366421702733e-03, -3.244894507721100851e-04, 2.728661290583660171e-04, 6.938458118266074663e-04, 8.648503036932213837e-05, -7.272604826511198082e-05, -2.609239945314979423e-02, 1.142603664459106681e-02, -2.051406106454568487e-02, 5.779549344910496142e-03, -3.860615030463052100e-02, 6.168332781226748551e-02, 2.068839156841529789e-02, -7.643723474881176927e-02, 1.229844977392647865e-01, -3.554667688747349674e-02, -8.262665730398828859e-03, 9.285295046969522723e-03, 1.497274901467501862e-01, 3.666859638982037511e-02, -4.181688913175674732e-02, -3.257377626487627069e-03, -8.171909213273372040e-04, 9.379633299917983094e-04, 1.097257666720985849e-03, 1.367686610077148478e-04, -1.150100103928514269e-04, -3.252401295559594844e-03, -4.053984617694676175e-04, 3.409032519425078027e-04, -1.217154259382106555e-04, -1.517132787898375553e-05, 1.275770753460001047e-05, -1.104423096905816498e-01, 4.615651100464009809e-02, 8.344619780982527601e-02, -1.998235369855275168e-01, 8.508819942125579738e-02, 1.528709647298205909e-01, 8.333302476347614896e-02, -3.488524142655123617e-02, -6.303339769808283255e-02, -7.468341447282240975e-02, -1.443673498458480642e-01, 9.485360739696327426e-02, -2.685004652445167612e-04, -1.702408228533323561e-02, 1.097613894113106531e-02, 9.496752299747332482e-02, 1.714581306702349373e-01, -1.128066531362114239e-01, -2.109671824413435984e-03, -2.629619271223545066e-04, 2.211270750801623281e-04, 1.011694656468142307e-02, 1.261035832424879221e-03, -1.060416495448196581e-03, 2.326027531269699879e-04, 2.899297772687444119e-05, -2.438045854305356789e-05, -9.775618976121780001e-04, 7.897148922927013995e-03, -1.259878571596698138e-02, -5.534571406250721713e-03, 2.552681480358522451e-02, -4.094434810336724379e-02, -1.258721457759937913e-02, 4.161890111720080443e-02, -6.708566706120022705e-02, 3.521744971093632853e-02, 8.557787631933998912e-03, -9.738493960065902622e-03, -8.446926488038911107e-02, -2.017604402799078392e-02, 2.285024948138817888e-02, -9.755577915095828626e-03, -2.364722966186930900e-03, 2.689144780896026744e-03, 8.392348196279006065e-05, 1.046071729847805219e-05, -8.796512273720217211e-06, -2.967282659264359589e-03, -3.698595949224694123e-04, 3.110182957302592738e-04, -1.688223115474902841e-03, -2.104300767164184042e-04, 1.769525645115341121e-04, -1.040849854787611189e-01, 4.406117175034113265e-02, 7.931633477513304331e-02, 3.539829580561168476e-02, -1.443144702217136026e-02, -2.631106338063535569e-02, -4.383990895980735547e-02, 1.895493123709470276e-02, 3.388325869579450478e-02, 1.809448338386955915e-02, 4.269882582195522885e-02, -2.795653019460052346e-02, 4.363124777259473619e-02, 8.597058258914810902e-02, -5.646456449126337207e-02, 4.431189331687027805e-02, 7.186269332716928304e-02, -4.739074421553418626e-02, 7.807665162715203382e-05, 9.731933913865978996e-06, -8.183671700296416994e-06, 2.525821455836478949e-03, 3.148332692827336839e-04, -2.647461582604813284e-04, 5.088778918832323993e-03, 6.342953893162101269e-04, -5.333847591977234877e-04, 1.765533347871811772e-03, -1.422682766506909793e-02, 2.269730547460076936e-02, 2.888222424864686153e-04, -4.083171371247279469e-03, 6.494062010930001794e-03, 1.594130471018519873e-02, -4.922350239779287734e-02, 7.944117864515577720e-02, -5.516443865142822006e-02, -1.340804559261108905e-02, 1.525892700429632917e-02, 7.450140187529649682e-02, 1.809617933997387934e-02, -2.059052256811338619e-02, -3.118940445306414219e-02, -7.412336287839308216e-03, 8.382871287998559101e-03, 5.408910405506207452e-04, 6.741984641424155129e-05, -5.669396175743063380e-05, 4.696290607396231285e-04, 5.853733334998132494e-05, -4.922457577157534367e-05, -5.350269144276134821e-03, -6.668890718077897942e-04, 5.607930831110975083e-04, 3.013271000130106694e-02, -1.241570117891090119e-02, -2.255430712666738752e-02, -1.643158253499694271e-02, 6.876116339617444236e-03, 1.242585434168312457e-02, 2.120265775977718363e-03, -2.988284987993198010e-03, -4.123302560925387432e-03, 3.528008965720314666e-02, -1.132921329184741026e-02, 6.435692645130823564e-03, -2.115291124444698342e-02, -2.971050496327276927e-02, 1.966236467455729012e-02, -2.194244461519655881e-02, -1.469000955331024871e-02, 1.000316933044766501e-02, -2.208576023807403820e-03, -2.752899293131040766e-04, 2.314938041951108548e-04, -5.840262773118632192e-04, -7.279647649213021596e-05, 6.121521886838239123e-05, -1.263538670848133802e-03, -1.574949051482092536e-04, 1.324388975109944740e-04, 8.955566031735841259e-03, -2.660296383100100095e-02, 4.296567375352825652e-02, 2.380373596470350059e-02, -7.784355459714024927e-02, 1.255004729498893912e-01, -1.824501349606121037e-02, 3.948761180940744964e-02, -6.423389834199008663e-02, 1.038606825469969019e-02, 2.616819816765625015e-03, -3.006960935423356324e-03, -1.864007491704059577e-02, -4.504736174636922615e-03, 5.118497771104379632e-03, 1.680266347982039554e-01, 4.105963063126880086e-02, -4.679634408112137711e-02, 8.392348196278930170e-05, 1.046071729847797087e-05, -8.796512273720142672e-06, -2.967282659264356987e-03, -3.698595949224691413e-04, 3.110182957302590027e-04, -1.688223115474903708e-03, -2.104300767164184855e-04, 1.769525645115341934e-04, -1.040849854787611189e-01, 4.406117175034113265e-02, 7.931633477513304331e-02, 3.539829580561167782e-02, -1.443144702217136026e-02, -2.631106338063535569e-02, -4.383990895980735547e-02, 1.895493123709470276e-02, 3.388325869579450478e-02, 1.809448338386955221e-02, 4.269882582195521498e-02, -2.795653019460051653e-02, 4.363124777259472925e-02, 8.597058258914809514e-02, -5.646456449126335819e-02, 4.431189331687027111e-02, 7.186269332716926916e-02, -4.739074421553417932e-02, 7.807665162715246750e-05, 9.731933913866019654e-06, -8.183671700296457651e-06, 2.525821455836478515e-03, 3.148332692827336297e-04, -2.647461582604812742e-04, 5.088778918832324860e-03, 6.342953893162102353e-04, -5.333847591977235961e-04, 1.765533347871809603e-03, -1.422682766506909793e-02, 2.269730547460076589e-02, 2.888222424864694826e-04, -4.083171371247282938e-03, 6.494062010930008733e-03, 1.594130471018519873e-02, -4.922350239779287040e-02, 7.944117864515577720e-02, -5.516443865142821312e-02, -1.340804559261108558e-02, 1.525892700429632570e-02, 7.450140187529649682e-02, 1.809617933997387934e-02, -2.059052256811338966e-02, -3.118940445306412831e-02, -7.412336287839304746e-03, 8.382871287998553897e-03, -9.575909105642434974e-04, -1.193597735547498307e-04, 1.003707186710399045e-04, -9.520061199010912585e-05, -1.186636523389461756e-05, 9.978534401229592523e-06, -5.876800709203859434e-03, -7.325190685693192200e-04, 6.159819440242017292e-04, -1.659431774532551043e-02, 6.520628417529478540e-03, 1.204087494393247214e-02, 6.518824051016284399e-03, -2.745500204548994606e-03, -4.950724849051978994e-03, -5.340810191179472081e-03, 3.101366677982481286e-03, 5.077959020099345744e-03, 7.727976016970144156e-03, 7.022558645366243878e-03, -4.714356496325102820e-03, 7.018017321145150929e-03, 1.341962078953426278e-02, -8.818944869050635710e-03, -2.755773236988961865e-03, 1.079245666846929096e-02, -6.886663303228377636e-03, 9.801230913130992879e-04, 1.221683173308112048e-04, -1.027324486645460452e-04, 1.233918620327190629e-04, 1.538028875195364422e-05, -1.293342463232469071e-05, 4.892751025155074075e-03, 6.098613175830685205e-04, -5.128379261493998297e-04, -7.792305682365031905e-03, 2.541307371885552502e-02, -4.097328323558844382e-02, 2.530143617608526449e-02, -8.265149730513186854e-02, 1.332544508945474881e-01, -1.184335640259520997e-02, 3.220055758982264676e-02, -5.209911236104310117e-02, 8.090761694886683397e-02, 1.959431243541279177e-02, -2.227702786419644143e-02, 1.968691296265078980e-02, 4.764576998712748319e-03, -5.415896903683155988e-03, 1.534638141861073557e-01, 3.728680895816388619e-02, -4.242975875503233324e-02
-  };
+      -2.123013313652813774e-03, -2.646248889538913257e-04,
+      2.225254748021367093e-04,  9.843593195853941446e-04,
+      1.226963457840150472e-04,  -1.031764725911038809e-04,
+      -8.467513732241481721e-04, -1.055440805151912256e-04,
+      8.875297679686559459e-05,  1.829118379697145316e-02,
+      2.302438731350108913e-03,  -1.890198823577125386e-03,
+      3.300229266409118040e-02,  -1.339230641165423293e-02,
+      -2.445540228188634868e-02, 5.127826101331301595e-02,
+      -2.458314752619149279e-02, -4.252530480245884925e-02,
+      9.733043787604266084e-02,  -6.217238566516904152e-02,
+      3.767656091618994812e-02,  6.674680725588777973e-03,
+      4.245867422406505304e-02,  -2.752200660186601699e-02,
+      -8.318636634138946995e-03, -2.738884420387305285e-02,
+      1.785195524121836741e-02,  -3.151218435289559073e-03,
+      -3.927864338604547816e-04, 3.302976830190196104e-04,
+      1.387198082848713948e-06,  1.729085429046553641e-07,
+      -1.454003656243721975e-07, -4.056191292896940703e-05,
+      -5.055875832506090064e-06, 4.251531950061960394e-06,
+      7.087482338961141604e-02,  -1.643445525800983908e-01,
+      2.668682182870234509e-01,  7.752581706917366366e-03,
+      -2.674714571946596939e-02, 4.308263417785011123e-02,
+      -9.385640612496094423e-03, 4.307848167667025635e-02,
+      -6.910099104451945806e-02, -1.822493611414978121e-01,
+      -4.510097387143227610e-02, 5.157836206906134952e-02,
+      -1.170389534066011428e-01, -2.858136680923874240e-02,
+      3.256883555835647648e-02,  1.336331160725280354e-01,
+      3.257484898923947853e-02,  -3.710113093740719653e-02,
+      3.343993600586595179e-03,  4.168150663620683060e-04,
+      -3.505035785317401481e-04, -4.312491363797464269e-03,
+      -5.375343342977005178e-04, 4.520175083867039156e-04,
+      -5.045304632809267465e-04, -6.288764981405317546e-05,
+      5.288279643454484632e-05,  2.176577726533836937e-02,
+      -1.041710664445027849e-02, -1.802940684978692962e-02,
+      -3.097121964369356495e-02, 1.077096511204005125e-02,
+      2.079488766754130843e-02,  -1.120464690158002596e-01,
+      4.736950869652114399e-02,  8.530900293808066359e-02,
+      3.029112757823893692e-02,  1.058529311156591879e-01,
+      -6.894903720238335088e-02, -5.089618157121258979e-02,
+      -6.973511953466600410e-02, 4.618114280030299196e-02,
+      1.143309394598741001e-02,  2.319568285212985151e-02,
+      -1.522637168466081138e-02, -1.535733649675188493e-03,
+      -1.914228911776438445e-04, 1.609692493993826663e-04,
+      -2.603290366421702733e-03, -3.244894507721100851e-04,
+      2.728661290583660171e-04,  6.938458118266074663e-04,
+      8.648503036932213837e-05,  -7.272604826511198082e-05,
+      -2.609239945314979423e-02, 1.142603664459106681e-02,
+      -2.051406106454568487e-02, 5.779549344910496142e-03,
+      -3.860615030463052100e-02, 6.168332781226748551e-02,
+      2.068839156841529789e-02,  -7.643723474881176927e-02,
+      1.229844977392647865e-01,  -3.554667688747349674e-02,
+      -8.262665730398828859e-03, 9.285295046969522723e-03,
+      1.497274901467501862e-01,  3.666859638982037511e-02,
+      -4.181688913175674732e-02, -3.257377626487627069e-03,
+      -8.171909213273372040e-04, 9.379633299917983094e-04,
+      5.408910405506226968e-04,  6.741984641424190365e-05,
+      -5.669396175743082354e-05, 4.696290607396237790e-04,
+      5.853733334998140626e-05,  -4.922457577157541143e-05,
+      -5.350269144276139158e-03, -6.668890718077903363e-04,
+      5.607930831110977251e-04,  3.013271000130106694e-02,
+      -1.241570117891089425e-02, -2.255430712666738058e-02,
+      -1.643158253499693577e-02, 6.876116339617440766e-03,
+      1.242585434168311936e-02,  2.120265775977717496e-03,
+      -2.988284987993197143e-03, -4.123302560925387432e-03,
+      3.528008965720315360e-02,  -1.132921329184741026e-02,
+      6.435692645130823564e-03,  -2.115291124444698342e-02,
+      -2.971050496327276927e-02, 1.966236467455729359e-02,
+      -2.194244461519655187e-02, -1.469000955331024871e-02,
+      1.000316933044766328e-02,  -2.208576023807404254e-03,
+      -2.752899293131040766e-04, 2.314938041951108548e-04,
+      -5.840262773118632192e-04, -7.279647649213021596e-05,
+      6.121521886838239123e-05,  -1.263538670848133802e-03,
+      -1.574949051482092536e-04, 1.324388975109944740e-04,
+      8.955566031735841259e-03,  -2.660296383100100095e-02,
+      4.296567375352825652e-02,  2.380373596470350059e-02,
+      -7.784355459714024927e-02, 1.255004729498893912e-01,
+      -1.824501349606120690e-02, 3.948761180940744964e-02,
+      -6.423389834199008663e-02, 1.038606825469970407e-02,
+      2.616819816765628484e-03,  -3.006960935423359793e-03,
+      -1.864007491704058883e-02, -4.504736174636920880e-03,
+      5.118497771104377897e-03,  1.680266347982039554e-01,
+      4.105963063126880086e-02,  -4.679634408112137711e-02,
+      3.343993600586595179e-03,  4.168150663620683060e-04,
+      -3.505035785317401481e-04, -4.312491363797464269e-03,
+      -5.375343342977005178e-04, 4.520175083867039156e-04,
+      -5.045304632809267465e-04, -6.288764981405317546e-05,
+      5.288279643454484632e-05,  2.176577726533836937e-02,
+      -1.041710664445027849e-02, -1.802940684978692962e-02,
+      -3.097121964369356495e-02, 1.077096511204005125e-02,
+      2.079488766754130843e-02,  -1.120464690158002596e-01,
+      4.736950869652114399e-02,  8.530900293808066359e-02,
+      3.029112757823893692e-02,  1.058529311156591879e-01,
+      -6.894903720238335088e-02, -5.089618157121258979e-02,
+      -6.973511953466600410e-02, 4.618114280030299196e-02,
+      1.143309394598741001e-02,  2.319568285212985151e-02,
+      -1.522637168466081138e-02, -1.535733649675188493e-03,
+      -1.914228911776438445e-04, 1.609692493993826663e-04,
+      -2.603290366421702733e-03, -3.244894507721100851e-04,
+      2.728661290583660171e-04,  6.938458118266074663e-04,
+      8.648503036932213837e-05,  -7.272604826511198082e-05,
+      -2.609239945314979423e-02, 1.142603664459106681e-02,
+      -2.051406106454568487e-02, 5.779549344910496142e-03,
+      -3.860615030463052100e-02, 6.168332781226748551e-02,
+      2.068839156841529789e-02,  -7.643723474881176927e-02,
+      1.229844977392647865e-01,  -3.554667688747349674e-02,
+      -8.262665730398828859e-03, 9.285295046969522723e-03,
+      1.497274901467501862e-01,  3.666859638982037511e-02,
+      -4.181688913175674732e-02, -3.257377626487627069e-03,
+      -8.171909213273372040e-04, 9.379633299917983094e-04,
+      1.097257666720985849e-03,  1.367686610077148478e-04,
+      -1.150100103928514269e-04, -3.252401295559594844e-03,
+      -4.053984617694676175e-04, 3.409032519425078027e-04,
+      -1.217154259382106555e-04, -1.517132787898375553e-05,
+      1.275770753460001047e-05,  -1.104423096905816498e-01,
+      4.615651100464009809e-02,  8.344619780982527601e-02,
+      -1.998235369855275168e-01, 8.508819942125579738e-02,
+      1.528709647298205909e-01,  8.333302476347614896e-02,
+      -3.488524142655123617e-02, -6.303339769808283255e-02,
+      -7.468341447282240975e-02, -1.443673498458480642e-01,
+      9.485360739696327426e-02,  -2.685004652445167612e-04,
+      -1.702408228533323561e-02, 1.097613894113106531e-02,
+      9.496752299747332482e-02,  1.714581306702349373e-01,
+      -1.128066531362114239e-01, -2.109671824413435984e-03,
+      -2.629619271223545066e-04, 2.211270750801623281e-04,
+      1.011694656468142307e-02,  1.261035832424879221e-03,
+      -1.060416495448196581e-03, 2.326027531269699879e-04,
+      2.899297772687444119e-05,  -2.438045854305356789e-05,
+      -9.775618976121780001e-04, 7.897148922927013995e-03,
+      -1.259878571596698138e-02, -5.534571406250721713e-03,
+      2.552681480358522451e-02,  -4.094434810336724379e-02,
+      -1.258721457759937913e-02, 4.161890111720080443e-02,
+      -6.708566706120022705e-02, 3.521744971093632853e-02,
+      8.557787631933998912e-03,  -9.738493960065902622e-03,
+      -8.446926488038911107e-02, -2.017604402799078392e-02,
+      2.285024948138817888e-02,  -9.755577915095828626e-03,
+      -2.364722966186930900e-03, 2.689144780896026744e-03,
+      8.392348196279006065e-05,  1.046071729847805219e-05,
+      -8.796512273720217211e-06, -2.967282659264359589e-03,
+      -3.698595949224694123e-04, 3.110182957302592738e-04,
+      -1.688223115474902841e-03, -2.104300767164184042e-04,
+      1.769525645115341121e-04,  -1.040849854787611189e-01,
+      4.406117175034113265e-02,  7.931633477513304331e-02,
+      3.539829580561168476e-02,  -1.443144702217136026e-02,
+      -2.631106338063535569e-02, -4.383990895980735547e-02,
+      1.895493123709470276e-02,  3.388325869579450478e-02,
+      1.809448338386955915e-02,  4.269882582195522885e-02,
+      -2.795653019460052346e-02, 4.363124777259473619e-02,
+      8.597058258914810902e-02,  -5.646456449126337207e-02,
+      4.431189331687027805e-02,  7.186269332716928304e-02,
+      -4.739074421553418626e-02, 7.807665162715203382e-05,
+      9.731933913865978996e-06,  -8.183671700296416994e-06,
+      2.525821455836478949e-03,  3.148332692827336839e-04,
+      -2.647461582604813284e-04, 5.088778918832323993e-03,
+      6.342953893162101269e-04,  -5.333847591977234877e-04,
+      1.765533347871811772e-03,  -1.422682766506909793e-02,
+      2.269730547460076936e-02,  2.888222424864686153e-04,
+      -4.083171371247279469e-03, 6.494062010930001794e-03,
+      1.594130471018519873e-02,  -4.922350239779287734e-02,
+      7.944117864515577720e-02,  -5.516443865142822006e-02,
+      -1.340804559261108905e-02, 1.525892700429632917e-02,
+      7.450140187529649682e-02,  1.809617933997387934e-02,
+      -2.059052256811338619e-02, -3.118940445306414219e-02,
+      -7.412336287839308216e-03, 8.382871287998559101e-03,
+      5.408910405506207452e-04,  6.741984641424155129e-05,
+      -5.669396175743063380e-05, 4.696290607396231285e-04,
+      5.853733334998132494e-05,  -4.922457577157534367e-05,
+      -5.350269144276134821e-03, -6.668890718077897942e-04,
+      5.607930831110975083e-04,  3.013271000130106694e-02,
+      -1.241570117891090119e-02, -2.255430712666738752e-02,
+      -1.643158253499694271e-02, 6.876116339617444236e-03,
+      1.242585434168312457e-02,  2.120265775977718363e-03,
+      -2.988284987993198010e-03, -4.123302560925387432e-03,
+      3.528008965720314666e-02,  -1.132921329184741026e-02,
+      6.435692645130823564e-03,  -2.115291124444698342e-02,
+      -2.971050496327276927e-02, 1.966236467455729012e-02,
+      -2.194244461519655881e-02, -1.469000955331024871e-02,
+      1.000316933044766501e-02,  -2.208576023807403820e-03,
+      -2.752899293131040766e-04, 2.314938041951108548e-04,
+      -5.840262773118632192e-04, -7.279647649213021596e-05,
+      6.121521886838239123e-05,  -1.263538670848133802e-03,
+      -1.574949051482092536e-04, 1.324388975109944740e-04,
+      8.955566031735841259e-03,  -2.660296383100100095e-02,
+      4.296567375352825652e-02,  2.380373596470350059e-02,
+      -7.784355459714024927e-02, 1.255004729498893912e-01,
+      -1.824501349606121037e-02, 3.948761180940744964e-02,
+      -6.423389834199008663e-02, 1.038606825469969019e-02,
+      2.616819816765625015e-03,  -3.006960935423356324e-03,
+      -1.864007491704059577e-02, -4.504736174636922615e-03,
+      5.118497771104379632e-03,  1.680266347982039554e-01,
+      4.105963063126880086e-02,  -4.679634408112137711e-02,
+      8.392348196278930170e-05,  1.046071729847797087e-05,
+      -8.796512273720142672e-06, -2.967282659264356987e-03,
+      -3.698595949224691413e-04, 3.110182957302590027e-04,
+      -1.688223115474903708e-03, -2.104300767164184855e-04,
+      1.769525645115341934e-04,  -1.040849854787611189e-01,
+      4.406117175034113265e-02,  7.931633477513304331e-02,
+      3.539829580561167782e-02,  -1.443144702217136026e-02,
+      -2.631106338063535569e-02, -4.383990895980735547e-02,
+      1.895493123709470276e-02,  3.388325869579450478e-02,
+      1.809448338386955221e-02,  4.269882582195521498e-02,
+      -2.795653019460051653e-02, 4.363124777259472925e-02,
+      8.597058258914809514e-02,  -5.646456449126335819e-02,
+      4.431189331687027111e-02,  7.186269332716926916e-02,
+      -4.739074421553417932e-02, 7.807665162715246750e-05,
+      9.731933913866019654e-06,  -8.183671700296457651e-06,
+      2.525821455836478515e-03,  3.148332692827336297e-04,
+      -2.647461582604812742e-04, 5.088778918832324860e-03,
+      6.342953893162102353e-04,  -5.333847591977235961e-04,
+      1.765533347871809603e-03,  -1.422682766506909793e-02,
+      2.269730547460076589e-02,  2.888222424864694826e-04,
+      -4.083171371247282938e-03, 6.494062010930008733e-03,
+      1.594130471018519873e-02,  -4.922350239779287040e-02,
+      7.944117864515577720e-02,  -5.516443865142821312e-02,
+      -1.340804559261108558e-02, 1.525892700429632570e-02,
+      7.450140187529649682e-02,  1.809617933997387934e-02,
+      -2.059052256811338966e-02, -3.118940445306412831e-02,
+      -7.412336287839304746e-03, 8.382871287998553897e-03,
+      -9.575909105642434974e-04, -1.193597735547498307e-04,
+      1.003707186710399045e-04,  -9.520061199010912585e-05,
+      -1.186636523389461756e-05, 9.978534401229592523e-06,
+      -5.876800709203859434e-03, -7.325190685693192200e-04,
+      6.159819440242017292e-04,  -1.659431774532551043e-02,
+      6.520628417529478540e-03,  1.204087494393247214e-02,
+      6.518824051016284399e-03,  -2.745500204548994606e-03,
+      -4.950724849051978994e-03, -5.340810191179472081e-03,
+      3.101366677982481286e-03,  5.077959020099345744e-03,
+      7.727976016970144156e-03,  7.022558645366243878e-03,
+      -4.714356496325102820e-03, 7.018017321145150929e-03,
+      1.341962078953426278e-02,  -8.818944869050635710e-03,
+      -2.755773236988961865e-03, 1.079245666846929096e-02,
+      -6.886663303228377636e-03, 9.801230913130992879e-04,
+      1.221683173308112048e-04,  -1.027324486645460452e-04,
+      1.233918620327190629e-04,  1.538028875195364422e-05,
+      -1.293342463232469071e-05, 4.892751025155074075e-03,
+      6.098613175830685205e-04,  -5.128379261493998297e-04,
+      -7.792305682365031905e-03, 2.541307371885552502e-02,
+      -4.097328323558844382e-02, 2.530143617608526449e-02,
+      -8.265149730513186854e-02, 1.332544508945474881e-01,
+      -1.184335640259520997e-02, 3.220055758982264676e-02,
+      -5.209911236104310117e-02, 8.090761694886683397e-02,
+      1.959431243541279177e-02,  -2.227702786419644143e-02,
+      1.968691296265078980e-02,  4.764576998712748319e-03,
+      -5.415896903683155988e-03, 1.534638141861073557e-01,
+      3.728680895816388619e-02,  -4.242975875503233324e-02};
   std::vector<VALUETYPE> expected_gt;
   std::vector<VALUETYPE> expected_gv;
   int natoms = 6;
@@ -147,36 +457,34 @@ class TestInferDeepPolarNew : public ::testing::Test
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppolar_new.pbtxt";
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppolar_new.pbtxt", "deeppolar_new.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppolar_new.pbtxt",
+                                "deeppolar_new.pb");
     dp.init("deeppolar_new.pb");
-    odim = dp.output_dim ();
+    odim = dp.output_dim();
 
     expected_gt.resize(odim);
-    for(int ii = 0; ii < nsel; ++ii){
-      for(int dd = 0; dd < odim; ++dd){
-	      expected_gt[dd] += expected_t[ii*odim+dd];
+    for (int ii = 0; ii < nsel; ++ii) {
+      for (int dd = 0; dd < odim; ++dd) {
+        expected_gt[dd] += expected_t[ii * odim + dd];
       }
     }
 
     expected_gv.resize(odim * 9);
-    for (int kk = 0; kk < odim; ++kk){
-      for(int ii = 0; ii < natoms; ++ii){
-        for(int dd = 0; dd < 9; ++dd){
-          expected_gv[kk*9 + dd] += expected_v[kk*natoms*9 + ii*9 + dd];
+    for (int kk = 0; kk < odim; ++kk) {
+      for (int ii = 0; ii < natoms; ++ii) {
+        for (int dd = 0; dd < 9; ++dd) {
+          expected_gv[kk * 9 + dd] += expected_v[kk * natoms * 9 + ii * 9 + dd];
         }
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppolar_new.pb" ) ;
-  };
+  void TearDown() override { remove("deeppolar_new.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPolarNew, ValueTypes);
 
-TYPED_TEST(TestInferDeepPolarNew, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPolarNew, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -201,50 +509,48 @@ TYPED_TEST(TestInferDeepPolarNew, cpu_build_nlist)
 
   dp.compute(at, coord, atype, box);
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
   dp.compute(gt, ff, vv, at, av, coord, atype, box);
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   EXPECT_EQ(ff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(ff[ii] - expected_f[ii]), EPSILON);
   }
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   EXPECT_EQ(av.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(av[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPolarNew, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPolarNew, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -259,87 +565,88 @@ TYPED_TEST(TestInferDeepPolarNew, cpu_lmp_nlist)
   int& odim = this->odim;
   deepmd::DeepTensor& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
+  std::vector<int> atype_cpy, mapping;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  std::vector<std::vector<int > > nlist_data;
+  std::vector<std::vector<int> > nlist_data;
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   std::vector<VALUETYPE> gt, ff, vv, at, av;
 
-  dp.compute(at, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(at, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
 
-
-  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  dp.compute(gt, ff, vv, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  std::vector<VALUETYPE> rff (odim * nloc * 3);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  std::vector<VALUETYPE> rff(odim * nloc * 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
 
+  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist);
 
-  dp.compute(gt, ff, vv, at, av, coord_cpy, atype_cpy, box, nall-nloc, inlist);
-  
   EXPECT_EQ(gt.size(), expected_gt.size());
-  for(int ii = 0; ii < expected_gt.size(); ++ii){
+  for (int ii = 0; ii < expected_gt.size(); ++ii) {
     EXPECT_LT(fabs(gt[ii] - expected_gt[ii]), EPSILON);
   }
   // remove ghost atoms
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3, ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rff.begin() + kk * nloc * 3,
+                          ff.begin() + kk * nall * 3, mapping, nloc, nall, 3);
   }
   EXPECT_EQ(rff.size(), expected_f.size());
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(rff[ii] - expected_f[ii]), EPSILON);
   }
   // virial
   EXPECT_EQ(vv.size(), expected_gv.size());
-  for(int ii = 0; ii < expected_gv.size(); ++ii){
+  for (int ii = 0; ii < expected_gv.size(); ++ii) {
     EXPECT_LT(fabs(vv[ii] - expected_gv[ii]), EPSILON);
   }
   // atom tensor
   EXPECT_EQ(at.size(), expected_t.size());
-  for(int ii = 0; ii < expected_t.size(); ++ii){
+  for (int ii = 0; ii < expected_t.size(); ++ii) {
     EXPECT_LT(fabs(at[ii] - expected_t[ii]), EPSILON);
   }
   // atom virial
-  std::vector<VALUETYPE> rav (odim * nloc * 9);
-  for(int kk = 0; kk < odim; ++kk){
-    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9, av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
+  std::vector<VALUETYPE> rav(odim * nloc * 9);
+  for (int kk = 0; kk < odim; ++kk) {
+    _fold_back<VALUETYPE>(rav.begin() + kk * nloc * 9,
+                          av.begin() + kk * nall * 9, mapping, nloc, nall, 9);
   }
   EXPECT_EQ(rav.size(), expected_v.size());
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(rav[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPolarNew, print_summary)
-{
+TYPED_TEST(TestInferDeepPolarNew, print_summary) {
   deepmd::DeepTensor& dp = this->dp;
   dp.print_summary("");
 }
diff --git a/source/api_cc/tests/test_deeppot_a.cc b/source/api_cc/tests/test_deeppot_a.cc
index 54428e2bbc..df66ea4eaa 100644
--- a/source/api_cc/tests/test_deeppot_a.cc
+++ b/source/api_cc/tests/test_deeppot_a.cc
@@ -1,52 +1,77 @@
+#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <cmath>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "DeepPot.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>  
-
 template <class VALUETYPE>
-class TestInferDeepPotA : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPotA : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_e = {
-    -9.275780747115504710e+01,-1.863501786584258468e+02,-1.863392472863538103e+02,-9.279281325486221021e+01,-1.863671545232153903e+02,-1.863619822847602165e+02
-  };
+      -9.275780747115504710e+01, -1.863501786584258468e+02,
+      -1.863392472863538103e+02, -9.279281325486221021e+01,
+      -1.863671545232153903e+02, -1.863619822847602165e+02};
   std::vector<VALUETYPE> expected_f = {
-    -3.034045420701179663e-01,8.405844663871177014e-01,7.696947487118485642e-02,7.662001266663505117e-01,-1.880601391333554251e-01,-6.183333871091722944e-01,-5.036172391059643427e-01,-6.529525836149027151e-01,5.432962643022043459e-01,6.382357912332115024e-01,-1.748518296794561167e-01,3.457363524891907125e-01,1.286482986991941552e-03,3.757251165286925043e-01,-5.972588700887541124e-01,-5.987006197104716154e-01,-2.004450304880958100e-01,2.495901655353461868e-01
-  };
+      -3.034045420701179663e-01, 8.405844663871177014e-01,
+      7.696947487118485642e-02,  7.662001266663505117e-01,
+      -1.880601391333554251e-01, -6.183333871091722944e-01,
+      -5.036172391059643427e-01, -6.529525836149027151e-01,
+      5.432962643022043459e-01,  6.382357912332115024e-01,
+      -1.748518296794561167e-01, 3.457363524891907125e-01,
+      1.286482986991941552e-03,  3.757251165286925043e-01,
+      -5.972588700887541124e-01, -5.987006197104716154e-01,
+      -2.004450304880958100e-01, 2.495901655353461868e-01};
   std::vector<VALUETYPE> expected_v = {
-    -2.912234126853306959e-01,-3.800610846612756388e-02,2.776624987489437202e-01,-5.053761003913598976e-02,-3.152373041953385746e-01,1.060894290092162379e-01,2.826389131596073745e-01,1.039129970665329250e-01,-2.584378792325942586e-01,-3.121722367954994914e-01,8.483275876786681990e-02,2.524662342344257682e-01,4.142176771106586414e-02,-3.820285230785245428e-02,-2.727311173065460545e-02,2.668859789777112135e-01,-6.448243569420382404e-02,-2.121731470426218846e-01,-8.624335220278558922e-02,-1.809695356746038597e-01,1.529875294531883312e-01,-1.283658185172031341e-01,-1.992682279795223999e-01,1.409924999632362341e-01,1.398322735274434292e-01,1.804318474574856390e-01,-1.470309318999652726e-01,-2.593983661598450730e-01,-4.236536279233147489e-02,3.386387920184946720e-02,-4.174017537818433543e-02,-1.003500282164128260e-01,1.525690815194478966e-01,3.398976109910181037e-02,1.522253908435125536e-01,-2.349125581341701963e-01,9.515545977581392825e-04,-1.643218849228543846e-02,1.993234765412972564e-02,6.027265332209678569e-04,-9.563256398907417355e-02,1.510815124001868293e-01,-7.738094816888557714e-03,1.502832772532304295e-01,-2.380965783745832010e-01,-2.309456719810296654e-01,-6.666961081213038098e-02,7.955566551234216632e-02,-8.099093777937517447e-02,-3.386641099800401927e-02,4.447884755740908608e-02,1.008593228579038742e-01,4.556718179228393811e-02,-6.078081273849572641e-02
-  };
+      -2.912234126853306959e-01, -3.800610846612756388e-02,
+      2.776624987489437202e-01,  -5.053761003913598976e-02,
+      -3.152373041953385746e-01, 1.060894290092162379e-01,
+      2.826389131596073745e-01,  1.039129970665329250e-01,
+      -2.584378792325942586e-01, -3.121722367954994914e-01,
+      8.483275876786681990e-02,  2.524662342344257682e-01,
+      4.142176771106586414e-02,  -3.820285230785245428e-02,
+      -2.727311173065460545e-02, 2.668859789777112135e-01,
+      -6.448243569420382404e-02, -2.121731470426218846e-01,
+      -8.624335220278558922e-02, -1.809695356746038597e-01,
+      1.529875294531883312e-01,  -1.283658185172031341e-01,
+      -1.992682279795223999e-01, 1.409924999632362341e-01,
+      1.398322735274434292e-01,  1.804318474574856390e-01,
+      -1.470309318999652726e-01, -2.593983661598450730e-01,
+      -4.236536279233147489e-02, 3.386387920184946720e-02,
+      -4.174017537818433543e-02, -1.003500282164128260e-01,
+      1.525690815194478966e-01,  3.398976109910181037e-02,
+      1.522253908435125536e-01,  -2.349125581341701963e-01,
+      9.515545977581392825e-04,  -1.643218849228543846e-02,
+      1.993234765412972564e-02,  6.027265332209678569e-04,
+      -9.563256398907417355e-02, 1.510815124001868293e-01,
+      -7.738094816888557714e-03, 1.502832772532304295e-01,
+      -2.380965783745832010e-01, -2.309456719810296654e-01,
+      -6.666961081213038098e-02, 7.955566551234216632e-02,
+      -8.099093777937517447e-02, -3.386641099800401927e-02,
+      4.447884755740908608e-02,  1.008593228579038742e-01,
+      4.556718179228393811e-02,  -6.078081273849572641e-02};
   int natoms;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::DeepPot dp;
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppot.pbtxt";
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt", "deeppot.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
+                                "deeppot.pb");
 
     dp.init("deeppot.pb");
 
@@ -56,25 +81,22 @@ class TestInferDeepPotA : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(int ii = 0; ii < natoms; ++ii){
+    for (int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotA, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotA, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -84,26 +106,25 @@ TYPED_TEST(TestInferDeepPotA, cpu_build_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial;
   dp.compute(ener, force, virial, coord, atype, box);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_numfv)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_numfv) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -113,23 +134,20 @@ TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_numfv)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
-  class MyModel : public EnergyModelTest<VALUETYPE>
-  {
-    deepmd::DeepPot & mydp;
-    const std::vector<int > atype;
-public:
-    MyModel(
-	deepmd::DeepPot & dp_,
-	const std::vector<int> & atype_
-	) : mydp(dp_), atype(atype_) {};
-    virtual void compute (
-	double & ener,
-	std::vector<VALUETYPE> &	force,
-	std::vector<VALUETYPE> &	virial,
-	const std::vector<VALUETYPE> & coord,
-	const std::vector<VALUETYPE> & box) {
+  class MyModel : public EnergyModelTest<VALUETYPE> {
+    deepmd::DeepPot& mydp;
+    const std::vector<int> atype;
+
+   public:
+    MyModel(deepmd::DeepPot& dp_, const std::vector<int>& atype_)
+        : mydp(dp_), atype(atype_){};
+    virtual void compute(double& ener,
+                         std::vector<VALUETYPE>& force,
+                         std::vector<VALUETYPE>& virial,
+                         const std::vector<VALUETYPE>& coord,
+                         const std::vector<VALUETYPE>& box) {
       mydp.compute(ener, force, virial, coord, atype, box);
     }
   };
@@ -157,9 +175,7 @@ TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_numfv)
   model.test_v(coord, box_);
 }
 
-
-TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_atomic)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -169,35 +185,33 @@ TYPED_TEST(TestInferDeepPotA, cpu_build_nlist_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial, atom_ener, atom_vir;
   dp.compute(ener, force, virial, atom_ener, atom_vir, coord, atype, box);
-  
-  EXPECT_EQ(force.size(), natoms*3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -207,59 +221,59 @@ TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
-  
+  convert_nlist(inlist, nlist_data);
+
   double ener;
   std::vector<VALUETYPE> force_, virial;
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 
   ener = 0.;
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_atomic)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -269,45 +283,46 @@ TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
-  
+  convert_nlist(inlist, nlist_data);
+
   double ener;
   std::vector<VALUETYPE> force_, atom_ener_, atom_vir_, virial;
   std::vector<VALUETYPE> force, atom_ener, atom_vir;
-  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 0);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
   _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
   _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 
@@ -315,35 +330,34 @@ TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_atomic)
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
   std::fill(atom_ener_.begin(), atom_ener_.end(), 0.0);
-  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);  
-  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
   _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
   _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_2rc)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_2rc) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -353,59 +367,59 @@ TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_2rc)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc*2);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc * 2);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
-  
+  convert_nlist(inlist, nlist_data);
+
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 
   ener = 0.;
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_type_sel)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_type_sel) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -415,59 +429,59 @@ TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_type_sel)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
 
   // add vir atoms
   int nvir = 2;
-  std::vector<VALUETYPE> coord_vir(nvir*3);
+  std::vector<VALUETYPE> coord_vir(nvir * 3);
   std::vector<int> atype_vir(nvir, 2);
-  for(int ii = 0; ii < nvir; ++ii){
+  for (int ii = 0; ii < nvir; ++ii) {
     coord_vir[ii] = coord[ii];
-  }  
+  }
   coord.insert(coord.begin(), coord_vir.begin(), coord_vir.end());
   atype.insert(atype.begin(), atype_vir.begin(), atype_vir.end());
   natoms += nvir;
-  std::vector<VALUETYPE> expected_f_vir(nvir*3, 0.0);
-  expected_f.insert(expected_f.begin(), expected_f_vir.begin(), expected_f_vir.end());
+  std::vector<VALUETYPE> expected_f_vir(nvir * 3, 0.0);
+  expected_f.insert(expected_f.begin(), expected_f_vir.begin(),
+                    expected_f_vir.end());
 
   // build nlist
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   // dp compute
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   // fold back
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_type_sel_atomic)
-{
+TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_type_sel_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -477,91 +491,117 @@ TYPED_TEST(TestInferDeepPotA, cpu_lmp_nlist_type_sel_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
 
   // add vir atoms
   int nvir = 2;
-  std::vector<VALUETYPE> coord_vir(nvir*3);
+  std::vector<VALUETYPE> coord_vir(nvir * 3);
   std::vector<int> atype_vir(nvir, 2);
-  for(int ii = 0; ii < nvir; ++ii){
+  for (int ii = 0; ii < nvir; ++ii) {
     coord_vir[ii] = coord[ii];
-  }  
+  }
   coord.insert(coord.begin(), coord_vir.begin(), coord_vir.end());
   atype.insert(atype.begin(), atype_vir.begin(), atype_vir.end());
   natoms += nvir;
-  std::vector<VALUETYPE> expected_f_vir(nvir*3, 0.0);
-  expected_f.insert(expected_f.begin(), expected_f_vir.begin(), expected_f_vir.end());
+  std::vector<VALUETYPE> expected_f_vir(nvir * 3, 0.0);
+  expected_f.insert(expected_f.begin(), expected_f_vir.begin(),
+                    expected_f_vir.end());
 
   // build nlist
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   // dp compute
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0), atomic_energy, atomic_virial;
-  dp.compute(ener, force_, virial, atomic_energy, atomic_virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0), atomic_energy,
+      atomic_virial;
+  dp.compute(ener, force_, virial, atomic_energy, atomic_virial, coord_cpy,
+             atype_cpy, box, nall - nloc, inlist, 0);
   // fold back
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPotA, print_summary)
-{
+TYPED_TEST(TestInferDeepPotA, print_summary) {
   deepmd::DeepPot& dp = this->dp;
   dp.print_summary("");
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepPotANoPbc : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
+class TestInferDeepPotANoPbc : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
   std::vector<VALUETYPE> box = {};
   std::vector<VALUETYPE> expected_e = {
-    -9.255934839310273787e+01,-1.863253376736990106e+02,-1.857237299341402945e+02,-9.279308539717486326e+01,-1.863708105823244239e+02,-1.863635196514972563e+02
-  };
+      -9.255934839310273787e+01, -1.863253376736990106e+02,
+      -1.857237299341402945e+02, -9.279308539717486326e+01,
+      -1.863708105823244239e+02, -1.863635196514972563e+02};
   std::vector<VALUETYPE> expected_f = {
-    -2.161037360255332107e+00,9.052994347015581589e-01,1.635379623977007979e+00,2.161037360255332107e+00,-9.052994347015581589e-01,-1.635379623977007979e+00,-1.167128117249453811e-02,1.371975700096064992e-03,-1.575265180249604477e-03,6.226508593971802341e-01,-1.816734122009256991e-01,3.561766019664774907e-01,-1.406075393906316626e-02,3.789140061530929526e-01,-6.018777878642909140e-01,-5.969188242856223736e-01,-1.986125696522633155e-01,2.472764510780630642e-01    
-  };
+      -2.161037360255332107e+00, 9.052994347015581589e-01,
+      1.635379623977007979e+00,  2.161037360255332107e+00,
+      -9.052994347015581589e-01, -1.635379623977007979e+00,
+      -1.167128117249453811e-02, 1.371975700096064992e-03,
+      -1.575265180249604477e-03, 6.226508593971802341e-01,
+      -1.816734122009256991e-01, 3.561766019664774907e-01,
+      -1.406075393906316626e-02, 3.789140061530929526e-01,
+      -6.018777878642909140e-01, -5.969188242856223736e-01,
+      -1.986125696522633155e-01, 2.472764510780630642e-01};
   std::vector<VALUETYPE> expected_v = {
-    -7.042445481792056761e-01,2.950213647777754078e-01,5.329418202437231633e-01,2.950213647777752968e-01,-1.235900311906896754e-01,-2.232594111831812944e-01,5.329418202437232743e-01,-2.232594111831813499e-01,-4.033073234276823849e-01,-8.949230984097404917e-01,3.749002169013777030e-01,6.772391014992630298e-01,3.749002169013777586e-01,-1.570527935667933583e-01,-2.837082722496912512e-01,6.772391014992631408e-01,-2.837082722496912512e-01,-5.125052659994422388e-01,4.858210330291591605e-02,-6.902596153269104431e-03,6.682612642430500391e-03,-5.612247004554610057e-03,9.767795567660207592e-04,-9.773758942738038254e-04,5.638322117219018645e-03,-9.483806049779926932e-04,8.493873281881353637e-04,-2.941738570564985666e-01,-4.482529909499673171e-02,4.091569840186781021e-02,-4.509020615859140463e-02,-1.013919988807244071e-01,1.551440772665269030e-01,4.181857726606644232e-02,1.547200233064863484e-01,-2.398213304685777592e-01,-3.218625798524068354e-02,-1.012438450438508421e-02,1.271639330380921855e-02,3.072814938490859779e-03,-9.556241797915024372e-02,1.512251983492413077e-01,-8.277872384009607454e-03,1.505412040827929787e-01,-2.386150620881526407e-01,-2.312295470054945568e-01,-6.631490213524345034e-02,7.932427266386249398e-02,-8.053754366323923053e-02,-3.294595881137418747e-02,4.342495071150231922e-02,1.004599500126941436e-01,4.450400364869536163e-02,-5.951077548033092968e-02
-  };
+      -7.042445481792056761e-01, 2.950213647777754078e-01,
+      5.329418202437231633e-01,  2.950213647777752968e-01,
+      -1.235900311906896754e-01, -2.232594111831812944e-01,
+      5.329418202437232743e-01,  -2.232594111831813499e-01,
+      -4.033073234276823849e-01, -8.949230984097404917e-01,
+      3.749002169013777030e-01,  6.772391014992630298e-01,
+      3.749002169013777586e-01,  -1.570527935667933583e-01,
+      -2.837082722496912512e-01, 6.772391014992631408e-01,
+      -2.837082722496912512e-01, -5.125052659994422388e-01,
+      4.858210330291591605e-02,  -6.902596153269104431e-03,
+      6.682612642430500391e-03,  -5.612247004554610057e-03,
+      9.767795567660207592e-04,  -9.773758942738038254e-04,
+      5.638322117219018645e-03,  -9.483806049779926932e-04,
+      8.493873281881353637e-04,  -2.941738570564985666e-01,
+      -4.482529909499673171e-02, 4.091569840186781021e-02,
+      -4.509020615859140463e-02, -1.013919988807244071e-01,
+      1.551440772665269030e-01,  4.181857726606644232e-02,
+      1.547200233064863484e-01,  -2.398213304685777592e-01,
+      -3.218625798524068354e-02, -1.012438450438508421e-02,
+      1.271639330380921855e-02,  3.072814938490859779e-03,
+      -9.556241797915024372e-02, 1.512251983492413077e-01,
+      -8.277872384009607454e-03, 1.505412040827929787e-01,
+      -2.386150620881526407e-01, -2.312295470054945568e-01,
+      -6.631490213524345034e-02, 7.932427266386249398e-02,
+      -8.053754366323923053e-02, -3.294595881137418747e-02,
+      4.342495071150231922e-02,  1.004599500126941436e-01,
+      4.450400364869536163e-02,  -5.951077548033092968e-02};
   int natoms;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::DeepPot dp;
 
@@ -577,25 +617,22 @@ class TestInferDeepPotANoPbc : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(int ii = 0; ii < natoms; ++ii){
+    for (int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotANoPbc, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotANoPbc, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPotANoPbc, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -605,20 +642,20 @@ TYPED_TEST(TestInferDeepPotANoPbc, cpu_build_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial;
   dp.compute(ener, force, virial, coord, atype, box);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
diff --git a/source/api_cc/tests/test_deeppot_model_devi.cc b/source/api_cc/tests/test_deeppot_model_devi.cc
index ed33f8ae05..847545b808 100644
--- a/source/api_cc/tests/test_deeppot_model_devi.cc
+++ b/source/api_cc/tests/test_deeppot_model_devi.cc
@@ -1,34 +1,25 @@
+#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <cmath>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "DeepPot.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>  
-
 template <class VALUETYPE>
-class TestInferDeepPotModeDevi : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPotModeDevi : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   int natoms;
 
   deepmd::DeepPot dp0;
@@ -38,50 +29,48 @@ class TestInferDeepPotModeDevi : public ::testing::Test
   void SetUp() override {
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
-      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt", "deeppot.pb");
+      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
+                                  "deeppot.pb");
       dp0.init("deeppot.pb");
     }
     {
       std::string file_name = "../../tests/infer/deeppot-1.pbtxt";
-      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt", "deeppot-1.pb");
+      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt",
+                                  "deeppot-1.pb");
       dp1.init("deeppot-1.pb");
     }
     dp_md.init(std::vector<std::string>({"deeppot.pb", "deeppot-1.pb"}));
   };
 
   void TearDown() override {
-    remove( "deeppot.pb" ) ;
-    remove( "deeppot-1.pb" ) ;
+    remove("deeppot.pb");
+    remove("deeppot-1.pb");
   };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotModeDevi, ValueTypes);
 
 template <class VALUETYPE>
-class TestInferDeepPotModeDeviPython : public ::testing::Test
-{  
-protected:  
+class TestInferDeepPotModeDeviPython : public ::testing::Test {
+ protected:
   std::vector<VALUETYPE> coord = {
-    4.170220047025740423e-02,7.203244934421580703e-02,1.000114374817344942e-01,
-    4.053881673400336005e+00,4.191945144032948461e-02,6.852195003967595510e-02,
-    1.130233257263184132e+00,1.467558908171130543e-02,1.092338594768797883e-01,
-    1.862602113776709242e-02,1.134556072704304919e+00,1.396767474230670159e-01,
-    5.120445224973151355e+00,8.781174363909455272e-02,2.738759319792616331e-03,
-    4.067046751017840300e+00,1.141730480236712753e+00,5.586898284457517128e-02,
-  };
-  std::vector<int> atype = {
-    0, 0, 1, 1, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    20., 0., 0., 0., 20., 0., 0., 0., 20.
+      4.170220047025740423e-02, 7.203244934421580703e-02,
+      1.000114374817344942e-01, 4.053881673400336005e+00,
+      4.191945144032948461e-02, 6.852195003967595510e-02,
+      1.130233257263184132e+00, 1.467558908171130543e-02,
+      1.092338594768797883e-01, 1.862602113776709242e-02,
+      1.134556072704304919e+00, 1.396767474230670159e-01,
+      5.120445224973151355e+00, 8.781174363909455272e-02,
+      2.738759319792616331e-03, 4.067046751017840300e+00,
+      1.141730480236712753e+00, 5.586898284457517128e-02,
   };
+  std::vector<int> atype = {0, 0, 1, 1, 1, 1};
+  std::vector<VALUETYPE> box = {20., 0., 0., 0., 20., 0., 0., 0., 20.};
   int natoms;
-  std::vector<VALUETYPE> expected_md_f = {
-    0.509504727653, 0.458424067748, 0.481978258466
-  }; // max min avg
-  std::vector<VALUETYPE> expected_md_v = {
-    0.167004837423,0.00041822790564,0.0804864867641
-  }; // max min avg
+  std::vector<VALUETYPE> expected_md_f = {0.509504727653, 0.458424067748,
+                                          0.481978258466};  // max min avg
+  std::vector<VALUETYPE> expected_md_v = {0.167004837423, 0.00041822790564,
+                                          0.0804864867641};  // max min avg
 
   deepmd::DeepPot dp0;
   deepmd::DeepPot dp1;
@@ -90,28 +79,28 @@ class TestInferDeepPotModeDeviPython : public ::testing::Test
   void SetUp() override {
     {
       std::string file_name = "../../tests/infer/deeppot.pbtxt";
-      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt", "deeppot.pb");
+      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot.pbtxt",
+                                  "deeppot.pb");
       dp0.init("deeppot.pb");
     }
     {
       std::string file_name = "../../tests/infer/deeppot-1.pbtxt";
-      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt", "deeppot-1.pb");
+      deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-1.pbtxt",
+                                  "deeppot-1.pb");
       dp1.init("deeppot-1.pb");
     }
     dp_md.init(std::vector<std::string>({"deeppot.pb", "deeppot-1.pb"}));
   };
 
   void TearDown() override {
-    remove( "deeppot.pb" ) ;
-    remove( "deeppot-1.pb" ) ;
+    remove("deeppot.pb");
+    remove("deeppot-1.pb");
   };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotModeDeviPython, ValueTypes);
 
-
-TYPED_TEST(TestInferDeepPotModeDevi, attrs)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, attrs) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -130,8 +119,7 @@ TYPED_TEST(TestInferDeepPotModeDevi, attrs)
   EXPECT_EQ(dp1.dim_aparam(), dp_md.dim_aparam());
 }
 
-TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -141,50 +129,52 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list)
   deepmd::DeepPot& dp1 = this->dp1;
   deepmd::DeepPotModelDevi& dp_md = this->dp_md;
   float rc = dp_md.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   int nmodel = 2;
-  std::vector<double > edir(nmodel), emd;
-  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel), vdir(nmodel), fmd_, fmd(nmodel), vmd;
-  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp_md.compute(emd, fmd_, vmd, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  for(int kk = 0; kk < nmodel; ++kk){
+  std::vector<double> edir(nmodel), emd;
+  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel),
+      vdir(nmodel), fmd_, fmd(nmodel), vmd;
+  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp_md.compute(emd, fmd_, vmd, coord_cpy, atype_cpy, box, nall - nloc, inlist,
+                0);
+  for (int kk = 0; kk < nmodel; ++kk) {
     _fold_back<VALUETYPE>(fdir[kk], fdir_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(fmd[kk], fmd_[kk], mapping, nloc, nall, 3);
-  }  
+  }
 
   EXPECT_EQ(edir.size(), emd.size());
   EXPECT_EQ(fdir.size(), fmd.size());
   EXPECT_EQ(vdir.size(), vmd.size());
-  for(int kk = 0; kk < nmodel; ++kk){
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_EQ(fdir[kk].size(), fmd[kk].size());
     EXPECT_EQ(vdir[kk].size(), vmd[kk].size());
-  }  
-  for(int kk = 0; kk < nmodel; ++kk){
+  }
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_LT(fabs(edir[kk] - emd[kk]), EPSILON);
-    for(int ii = 0; ii < fdir[0].size(); ++ii){
+    for (int ii = 0; ii < fdir[0].size(); ++ii) {
       EXPECT_LT(fabs(fdir[kk][ii] - fmd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < vdir[0].size(); ++ii){
+    for (int ii = 0; ii < vdir[0].size(); ++ii) {
       EXPECT_LT(fabs(vdir[kk][ii] - vmd[kk][ii]), EPSILON);
     }
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_atomic)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -194,62 +184,65 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_atomic)
   deepmd::DeepPot& dp1 = this->dp1;
   deepmd::DeepPotModelDevi& dp_md = this->dp_md;
   float rc = dp_md.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   int nmodel = 2;
-  std::vector<double > edir(nmodel), emd;
-  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel), vdir(nmodel), fmd_, fmd(nmodel), vmd, aedir(nmodel), aemd, avdir(nmodel), avdir_(nmodel), avmd(nmodel), avmd_;
-  dp0.compute(edir[0], fdir_[0], vdir[0], aedir[0], avdir_[0], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp1.compute(edir[1], fdir_[1], vdir[1], aedir[1], avdir_[1], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp_md.compute(emd, fmd_, vmd, aemd, avmd_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  for(int kk = 0; kk < nmodel; ++kk){
+  std::vector<double> edir(nmodel), emd;
+  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel),
+      vdir(nmodel), fmd_, fmd(nmodel), vmd, aedir(nmodel), aemd, avdir(nmodel),
+      avdir_(nmodel), avmd(nmodel), avmd_;
+  dp0.compute(edir[0], fdir_[0], vdir[0], aedir[0], avdir_[0], coord_cpy,
+              atype_cpy, box, nall - nloc, inlist, 0);
+  dp1.compute(edir[1], fdir_[1], vdir[1], aedir[1], avdir_[1], coord_cpy,
+              atype_cpy, box, nall - nloc, inlist, 0);
+  dp_md.compute(emd, fmd_, vmd, aemd, avmd_, coord_cpy, atype_cpy, box,
+                nall - nloc, inlist, 0);
+  for (int kk = 0; kk < nmodel; ++kk) {
     _fold_back<VALUETYPE>(fdir[kk], fdir_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(fmd[kk], fmd_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(avdir[kk], avdir_[kk], mapping, nloc, nall, 9);
     _fold_back<VALUETYPE>(avmd[kk], avmd_[kk], mapping, nloc, nall, 9);
-  }  
+  }
 
   EXPECT_EQ(edir.size(), emd.size());
   EXPECT_EQ(fdir.size(), fmd.size());
   EXPECT_EQ(vdir.size(), vmd.size());
   EXPECT_EQ(aedir.size(), aemd.size());
   EXPECT_EQ(avdir.size(), avmd.size());
-  for(int kk = 0; kk < nmodel; ++kk){
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_EQ(fdir[kk].size(), fmd[kk].size());
     EXPECT_EQ(vdir[kk].size(), vmd[kk].size());
     EXPECT_EQ(aedir[kk].size(), aemd[kk].size());
     EXPECT_EQ(avdir[kk].size(), avmd[kk].size());
-  }  
-  for(int kk = 0; kk < nmodel; ++kk){
+  }
+  for (int kk = 0; kk < nmodel; ++kk) {
     EXPECT_LT(fabs(edir[kk] - emd[kk]), EPSILON);
-    for(int ii = 0; ii < fdir[0].size(); ++ii){
+    for (int ii = 0; ii < fdir[0].size(); ++ii) {
       EXPECT_LT(fabs(fdir[kk][ii] - fmd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < vdir[0].size(); ++ii){
+    for (int ii = 0; ii < vdir[0].size(); ++ii) {
       EXPECT_LT(fabs(vdir[kk][ii] - vmd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < aedir[0].size(); ++ii){
+    for (int ii = 0; ii < aedir[0].size(); ++ii) {
       EXPECT_LT(fabs(aedir[kk][ii] - aemd[kk][ii]), EPSILON);
     }
-    for(int ii = 0; ii < avdir[0].size(); ++ii){
+    for (int ii = 0; ii < avdir[0].size(); ++ii) {
       EXPECT_LT(fabs(avdir[kk][ii] - avmd[kk][ii]), EPSILON);
     }
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_std)
-{
+TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_std) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -259,90 +252,94 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_std)
   deepmd::DeepPot& dp1 = this->dp1;
   deepmd::DeepPotModelDevi& dp_md = this->dp_md;
   float rc = dp_md.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   int nmodel = 2;
-  std::vector<double > edir(nmodel), emd;
-  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel), vdir(nmodel), fmd_, fmd(nmodel), vmd;
+  std::vector<double> edir(nmodel), emd;
+  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel),
+      vdir(nmodel), fmd_, fmd(nmodel), vmd;
   std::vector<std::vector<VALUETYPE> > aemd(nmodel), aemd_, avmd(nmodel), avmd_;
-  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp_md.compute(emd, fmd_, vmd, aemd_, avmd_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  for(int kk = 0; kk < nmodel; ++kk){
+  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp_md.compute(emd, fmd_, vmd, aemd_, avmd_, coord_cpy, atype_cpy, box,
+                nall - nloc, inlist, 0);
+  for (int kk = 0; kk < nmodel; ++kk) {
     _fold_back<VALUETYPE>(fdir[kk], fdir_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(fmd[kk], fmd_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(avmd[kk], avmd_[kk], mapping, nloc, nall, 9);
     aemd[kk].resize(nloc);
-    for(int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       aemd[kk][ii] = aemd_[kk][ii];
     }
-  }  
+  }
 
   // dp compute std e
-  std::vector<VALUETYPE > avg_e, std_e;
+  std::vector<VALUETYPE> avg_e, std_e;
   dp_md.compute_avg(avg_e, aemd);
   dp_md.compute_std_e(std_e, avg_e, aemd);
 
   // manual compute std e
-  std::vector<double > manual_avg_e(nloc);
-  std::vector<double > manual_std_e(nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  std::vector<double> manual_avg_e(nloc);
+  std::vector<double> manual_std_e(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     double avg_e(0.0);
-    for(int kk = 0; kk < nmodel; ++kk){
+    for (int kk = 0; kk < nmodel; ++kk) {
       avg_e += aemd[kk][ii];
     }
     avg_e /= nmodel;
     manual_avg_e[ii] = avg_e;
     double std = 0;
-    for (int kk = 0; kk < nmodel; ++kk){
+    for (int kk = 0; kk < nmodel; ++kk) {
       std += (aemd[kk][ii] - avg_e) * (aemd[kk][ii] - avg_e);
     }
     std = sqrt(std / nmodel);
     manual_std_e[ii] = std;
   }
   EXPECT_EQ(manual_std_e.size(), std_e.size());
-  for(int ii = 0; ii < std_e.size(); ++ii){
+  for (int ii = 0; ii < std_e.size(); ++ii) {
     EXPECT_LT(fabs(manual_avg_e[ii] - avg_e[ii]), EPSILON);
     EXPECT_LT(fabs(manual_std_e[ii] - std_e[ii]), EPSILON);
   }
-  
+
   // dp compute std f
-  std::vector<VALUETYPE > avg_f, std_f;
+  std::vector<VALUETYPE> avg_f, std_f;
   dp_md.compute_avg(avg_f, fmd);
   dp_md.compute_std_f(std_f, avg_f, fmd);
 
   // manual compute std f
-  std::vector<VALUETYPE > manual_std_f(nloc);
-  std::vector<VALUETYPE > manual_rel_std_f(nloc);
+  std::vector<VALUETYPE> manual_std_f(nloc);
+  std::vector<VALUETYPE> manual_rel_std_f(nloc);
   VALUETYPE eps = 0.2;
   EXPECT_EQ(fmd[0].size(), nloc * 3);
-  for(int ii = 0; ii < nloc; ++ii){
-    std::vector<VALUETYPE > avg_f(3, 0.0);
-    for(int dd = 0; dd < 3; ++dd){
-      for(int kk = 0; kk < nmodel; ++kk){
-	avg_f[dd] += fmd[kk][ii*3+dd];
+  for (int ii = 0; ii < nloc; ++ii) {
+    std::vector<VALUETYPE> avg_f(3, 0.0);
+    for (int dd = 0; dd < 3; ++dd) {
+      for (int kk = 0; kk < nmodel; ++kk) {
+        avg_f[dd] += fmd[kk][ii * 3 + dd];
       }
-      avg_f[dd] /= (nmodel) * 1.0;
+      avg_f[dd] /= (nmodel)*1.0;
     }
     VALUETYPE std = 0.;
-    for(int kk = 0; kk < nmodel; ++kk){
-      for(int dd = 0; dd < 3; ++dd){
-	VALUETYPE tmp = fmd[kk][ii*3+dd] - avg_f[dd];
-	std += tmp * tmp;
+    for (int kk = 0; kk < nmodel; ++kk) {
+      for (int dd = 0; dd < 3; ++dd) {
+        VALUETYPE tmp = fmd[kk][ii * 3 + dd] - avg_f[dd];
+        std += tmp * tmp;
       }
     }
     VALUETYPE f_norm = 0;
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       f_norm += avg_f[dd] * avg_f[dd];
     }
     f_norm = sqrt(f_norm);
@@ -352,32 +349,30 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_std)
   }
 
   EXPECT_EQ(manual_std_f.size(), std_f.size());
-  for(int ii = 0; ii < std_f.size(); ++ii){
+  for (int ii = 0; ii < std_f.size(); ++ii) {
     EXPECT_LT(fabs(manual_std_f[ii] - std_f[ii]), EPSILON);
   }
   dp_md.compute_relative_std_f(std_f, avg_f, eps);
   EXPECT_EQ(manual_std_f.size(), std_f.size());
-  for(int ii = 0; ii < std_f.size(); ++ii){  
+  for (int ii = 0; ii < std_f.size(); ++ii) {
     EXPECT_LT(fabs(manual_rel_std_f[ii] - std_f[ii]), EPSILON);
   }
 }
 
 template <class VALUETYPE>
-inline VALUETYPE mymax(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE mymax(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 0;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     if (xx[ii] > ret) {
       ret = xx[ii];
     }
   }
   return ret;
-};  
+};
 template <class VALUETYPE>
-inline VALUETYPE mymin(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE mymin(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 1e10;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     if (xx[ii] < ret) {
       ret = xx[ii];
     }
@@ -385,26 +380,23 @@ inline VALUETYPE mymin(const std::vector<VALUETYPE > & xx)
   return ret;
 };
 template <class VALUETYPE>
-inline VALUETYPE myavg(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE myavg(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 0;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     ret += xx[ii];
   }
   return (ret / xx.size());
 };
 template <class VALUETYPE>
-inline VALUETYPE mystd(const std::vector<VALUETYPE > & xx)
-{
+inline VALUETYPE mystd(const std::vector<VALUETYPE>& xx) {
   VALUETYPE ret = 0;
-  for (int ii = 0; ii < xx.size(); ++ii){
+  for (int ii = 0; ii < xx.size(); ++ii) {
     ret += xx[ii] * xx[ii];
   }
   return sqrt(ret / xx.size());
 };
 
-TYPED_TEST(TestInferDeepPotModeDeviPython, cpu_lmp_list_std)
-{
+TYPED_TEST(TestInferDeepPotModeDeviPython, cpu_lmp_list_std) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -416,42 +408,46 @@ TYPED_TEST(TestInferDeepPotModeDeviPython, cpu_lmp_list_std)
   std::vector<VALUETYPE>& expected_md_f = this->expected_md_f;
   std::vector<VALUETYPE>& expected_md_v = this->expected_md_v;
   float rc = dp_md.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   int nmodel = 2;
-  std::vector<double > edir(nmodel), emd;
-  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel), vdir(nmodel), fmd_, fmd(nmodel), vmd;
+  std::vector<double> edir(nmodel), emd;
+  std::vector<std::vector<VALUETYPE> > fdir_(nmodel), fdir(nmodel),
+      vdir(nmodel), fmd_, fmd(nmodel), vmd;
   std::vector<std::vector<VALUETYPE> > aemd(nmodel), aemd_, avmd(nmodel), avmd_;
-  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  dp_md.compute(emd, fmd_, vmd, aemd_, avmd_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
-  for(int kk = 0; kk < nmodel; ++kk){
+  dp0.compute(edir[0], fdir_[0], vdir[0], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp1.compute(edir[1], fdir_[1], vdir[1], coord_cpy, atype_cpy, box,
+              nall - nloc, inlist, 0);
+  dp_md.compute(emd, fmd_, vmd, aemd_, avmd_, coord_cpy, atype_cpy, box,
+                nall - nloc, inlist, 0);
+  for (int kk = 0; kk < nmodel; ++kk) {
     _fold_back<VALUETYPE>(fdir[kk], fdir_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(fmd[kk], fmd_[kk], mapping, nloc, nall, 3);
     _fold_back<VALUETYPE>(avmd[kk], avmd_[kk], mapping, nloc, nall, 9);
     aemd[kk].resize(nloc);
-    for(int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       aemd[kk][ii] = aemd_[kk][ii];
     }
-  }  
+  }
 
   // dp compute std e
-  std::vector<VALUETYPE > avg_e, std_e;
+  std::vector<VALUETYPE> avg_e, std_e;
   dp_md.compute_avg(avg_e, aemd);
-  dp_md.compute_std_e(std_e, avg_e, aemd);  
-  
+  dp_md.compute_std_e(std_e, avg_e, aemd);
+
   // dp compute std f
-  std::vector<VALUETYPE > avg_f, std_f;
+  std::vector<VALUETYPE> avg_f, std_f;
   dp_md.compute_avg(avg_f, fmd);
   dp_md.compute_std_f(std_f, avg_f, fmd);
   EXPECT_LT(fabs(mymax(std_f) - expected_md_f[0]), EPSILON);
@@ -460,12 +456,12 @@ TYPED_TEST(TestInferDeepPotModeDeviPython, cpu_lmp_list_std)
 
   // dp compute std v
   // we normalize v by number of atoms
-  for (int ii = 0; ii < vmd.size(); ++ii){
-    for(int jj = 0; jj < vmd[ii].size(); ++jj){
+  for (int ii = 0; ii < vmd.size(); ++ii) {
+    for (int jj = 0; jj < vmd[ii].size(); ++jj) {
       vmd[ii][jj] /= VALUETYPE(atype.size());
     }
   }
-  std::vector<VALUETYPE > avg_v, std_v;  
+  std::vector<VALUETYPE> avg_v, std_v;
   dp_md.compute_avg(avg_v, vmd);
   dp_md.compute_std(std_v, avg_v, vmd, 1);
   EXPECT_LT(fabs(mymax(std_v) - expected_md_v[0]), EPSILON);
diff --git a/source/api_cc/tests/test_deeppot_r.cc b/source/api_cc/tests/test_deeppot_r.cc
index 55189f4701..b90aa82831 100644
--- a/source/api_cc/tests/test_deeppot_r.cc
+++ b/source/api_cc/tests/test_deeppot_r.cc
@@ -1,52 +1,77 @@
+#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <cmath>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
 #include "DeepPot.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>  
-
 template <class VALUETYPE>
-class TestInferDeepPotR : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
+class TestInferDeepPotR : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<VALUETYPE> expected_e = {
-    -9.320909762801588272e+01,-1.868020345400987878e+02,-1.868011172371355997e+02,-9.320868430396934912e+01,-1.868010398844378415e+02,-1.868016706555875999e+02
-  };
+      -9.320909762801588272e+01, -1.868020345400987878e+02,
+      -1.868011172371355997e+02, -9.320868430396934912e+01,
+      -1.868010398844378415e+02, -1.868016706555875999e+02};
   std::vector<VALUETYPE> expected_f = {
-    6.385312846474267391e-04,-6.460452911141417731e-03,-5.652405655332678417e-04,-7.516468794343579736e-03,1.128804614240160216e-03,5.531937784564192051e-03,1.914138124904981664e-03,5.601819906021693503e-03,-5.131359585752605541e-03,-4.847104424804288617e-03,1.992071550328819614e-03,-4.028159855157302516e-03,1.236340684486603517e-03,-5.373955841338794344e-03,8.312829460571366513e-03,8.574563125108854156e-03,3.111712681889538742e-03,-4.120007238692381148e-03
-  };
+      6.385312846474267391e-04,  -6.460452911141417731e-03,
+      -5.652405655332678417e-04, -7.516468794343579736e-03,
+      1.128804614240160216e-03,  5.531937784564192051e-03,
+      1.914138124904981664e-03,  5.601819906021693503e-03,
+      -5.131359585752605541e-03, -4.847104424804288617e-03,
+      1.992071550328819614e-03,  -4.028159855157302516e-03,
+      1.236340684486603517e-03,  -5.373955841338794344e-03,
+      8.312829460571366513e-03,  8.574563125108854156e-03,
+      3.111712681889538742e-03,  -4.120007238692381148e-03};
   std::vector<VALUETYPE> expected_v = {
-    5.844056241889131371e-03,4.663973497239899614e-04,-2.268382127762904633e-03,4.663973497239897988e-04,2.349338784202595950e-03,-6.908546513234039253e-04,-2.268382127762904633e-03,-6.908546513234039253e-04,2.040499248150800561e-03,4.238130266437327605e-03,-1.539867187443782223e-04,-2.393101333240631613e-03,-1.539867187443782223e-04,4.410341945447907377e-04,9.544239698119633068e-06,-2.393101333240631613e-03,9.544239698119578858e-06,1.877785959095269654e-03,5.798992562057291543e-03,6.943392552230453693e-04,-1.180376879311998773e-03,6.943392552230453693e-04,1.686725132156275536e-03,-1.461632060145726542e-03,-1.180376879311998556e-03,-1.461632060145726325e-03,1.749543733794208444e-03,7.173915604192910439e-03,3.903218041111061569e-04,-5.747400467123527524e-04,3.903218041111061569e-04,1.208289706621179949e-03,-1.826828914132010932e-03,-5.747400467123527524e-04,-1.826828914132011148e-03,2.856960586657185906e-03,4.067553030177322240e-03,-3.267469855253819430e-05,-6.980667859103454904e-05,-3.267469855253830272e-05,1.387653029234650918e-03,-2.096820720698671855e-03,-6.980667859103444062e-05,-2.096820720698671855e-03,3.218305506720191278e-03,4.753992590355240674e-03,1.224911338353675992e-03,-1.683421934571502484e-03,1.224911338353676209e-03,7.332113564901583539e-04,-1.025577052190138451e-03,-1.683421934571502484e-03,-1.025577052190138234e-03,1.456681925652047018e-03
-  };
+      5.844056241889131371e-03,  4.663973497239899614e-04,
+      -2.268382127762904633e-03, 4.663973497239897988e-04,
+      2.349338784202595950e-03,  -6.908546513234039253e-04,
+      -2.268382127762904633e-03, -6.908546513234039253e-04,
+      2.040499248150800561e-03,  4.238130266437327605e-03,
+      -1.539867187443782223e-04, -2.393101333240631613e-03,
+      -1.539867187443782223e-04, 4.410341945447907377e-04,
+      9.544239698119633068e-06,  -2.393101333240631613e-03,
+      9.544239698119578858e-06,  1.877785959095269654e-03,
+      5.798992562057291543e-03,  6.943392552230453693e-04,
+      -1.180376879311998773e-03, 6.943392552230453693e-04,
+      1.686725132156275536e-03,  -1.461632060145726542e-03,
+      -1.180376879311998556e-03, -1.461632060145726325e-03,
+      1.749543733794208444e-03,  7.173915604192910439e-03,
+      3.903218041111061569e-04,  -5.747400467123527524e-04,
+      3.903218041111061569e-04,  1.208289706621179949e-03,
+      -1.826828914132010932e-03, -5.747400467123527524e-04,
+      -1.826828914132011148e-03, 2.856960586657185906e-03,
+      4.067553030177322240e-03,  -3.267469855253819430e-05,
+      -6.980667859103454904e-05, -3.267469855253830272e-05,
+      1.387653029234650918e-03,  -2.096820720698671855e-03,
+      -6.980667859103444062e-05, -2.096820720698671855e-03,
+      3.218305506720191278e-03,  4.753992590355240674e-03,
+      1.224911338353675992e-03,  -1.683421934571502484e-03,
+      1.224911338353676209e-03,  7.332113564901583539e-04,
+      -1.025577052190138451e-03, -1.683421934571502484e-03,
+      -1.025577052190138234e-03, 1.456681925652047018e-03};
   int natoms;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::DeepPot dp;
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppot-r.pbtxt";
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-r.pbtxt", "deeppot.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-r.pbtxt",
+                                "deeppot.pb");
 
     dp.init("deeppot.pb");
 
@@ -56,25 +81,22 @@ class TestInferDeepPotR : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(int ii = 0; ii < natoms; ++ii){
+    for (int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotR, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotR, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -84,26 +106,25 @@ TYPED_TEST(TestInferDeepPotR, cpu_build_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial;
   dp.compute(ener, force, virial, coord, atype, box);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_numfv)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_numfv) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -113,23 +134,20 @@ TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_numfv)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
-  class MyModel : public EnergyModelTest<VALUETYPE>
-  {
-    deepmd::DeepPot & mydp;
-    const std::vector<int > & atype;
-public:
-    MyModel(
-	deepmd::DeepPot & dp_,
-	const std::vector<int> & atype_
-	) : mydp(dp_), atype(atype_) {};
-    virtual void compute (
-	double & ener,
-	std::vector<VALUETYPE> &	force,
-	std::vector<VALUETYPE> &	virial,
-	const std::vector<VALUETYPE> & coord,
-	const std::vector<VALUETYPE> & box) {
+  class MyModel : public EnergyModelTest<VALUETYPE> {
+    deepmd::DeepPot& mydp;
+    const std::vector<int>& atype;
+
+   public:
+    MyModel(deepmd::DeepPot& dp_, const std::vector<int>& atype_)
+        : mydp(dp_), atype(atype_){};
+    virtual void compute(double& ener,
+                         std::vector<VALUETYPE>& force,
+                         std::vector<VALUETYPE>& virial,
+                         const std::vector<VALUETYPE>& coord,
+                         const std::vector<VALUETYPE>& box) {
       mydp.compute(ener, force, virial, coord, atype, box);
     }
   };
@@ -157,8 +175,7 @@ TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_numfv)
   model.test_v(coord, box_);
 }
 
-TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_atomic)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -168,35 +185,33 @@ TYPED_TEST(TestInferDeepPotR, cpu_build_nlist_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial, atom_ener, atom_vir;
   dp.compute(ener, force, virial, atom_ener, atom_vir, coord, atype, box);
-  
-  EXPECT_EQ(force.size(), natoms*3);
+
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -206,59 +221,59 @@ TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
-  
+  convert_nlist(inlist, nlist_data);
+
   double ener;
   std::vector<VALUETYPE> force_, virial;
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 
   ener = 0.;
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_atomic)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_atomic) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -268,45 +283,46 @@ TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_atomic)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
-  
+  convert_nlist(inlist, nlist_data);
+
   double ener;
   std::vector<VALUETYPE> force_, atom_ener_, atom_vir_, virial;
   std::vector<VALUETYPE> force, atom_ener, atom_vir;
-  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 0);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
   _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
   _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 
@@ -314,35 +330,34 @@ TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_atomic)
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
   std::fill(atom_ener_.begin(), atom_ener_.end(), 0.0);
-  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);  
-  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  std::fill(atom_vir_.begin(), atom_vir_.end(), 0.0);
+  dp.compute(ener, force_, virial, atom_ener_, atom_vir_, coord_cpy, atype_cpy,
+             box, nall - nloc, inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
   _fold_back<VALUETYPE>(atom_ener, atom_ener_, mapping, nloc, nall, 1);
   _fold_back<VALUETYPE>(atom_vir, atom_vir_, mapping, nloc, nall, 9);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(atom_ener.size(), natoms);
-  EXPECT_EQ(atom_vir.size(), natoms*9);
+  EXPECT_EQ(atom_vir.size(), natoms * 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     EXPECT_LT(fabs(atom_ener[ii] - expected_e[ii]), EPSILON);
   }
-  for(int ii = 0; ii < natoms*9; ++ii){
+  for (int ii = 0; ii < natoms * 9; ++ii) {
     EXPECT_LT(fabs(atom_vir[ii] - expected_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_2rc)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_2rc) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -352,59 +367,59 @@ TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_2rc)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc*2);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc * 2);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
-  
+  convert_nlist(inlist, nlist_data);
+
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 
   ener = 0.;
   std::fill(force_.begin(), force_.end(), 0.0);
   std::fill(virial.begin(), virial.end(), 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 1);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 1);
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
-TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_type_sel)
-{
+TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_type_sel) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -414,91 +429,118 @@ TYPED_TEST(TestInferDeepPotR, cpu_lmp_nlist_type_sel)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   float rc = dp.cutoff();
 
   // add vir atoms
   int nvir = 2;
-  std::vector<VALUETYPE> coord_vir(nvir*3);
+  std::vector<VALUETYPE> coord_vir(nvir * 3);
   std::vector<int> atype_vir(nvir, 2);
-  for(int ii = 0; ii < nvir; ++ii){
+  for (int ii = 0; ii < nvir; ++ii) {
     coord_vir[ii] = coord[ii];
-  }  
+  }
   coord.insert(coord.begin(), coord_vir.begin(), coord_vir.end());
   atype.insert(atype.begin(), atype_vir.begin(), atype_vir.end());
   natoms += nvir;
-  std::vector<VALUETYPE> expected_f_vir(nvir*3, 0.0);
-  expected_f.insert(expected_f.begin(), expected_f_vir.begin(), expected_f_vir.end());
+  std::vector<VALUETYPE> expected_f_vir(nvir * 3, 0.0);
+  expected_f.insert(expected_f.begin(), expected_f_vir.begin(),
+                    expected_f_vir.end());
 
   // build nlist
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int> > nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   // dp compute
   double ener;
-  std::vector<VALUETYPE> force_(nall*3, 0.0), virial(9, 0.0);
-  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall-nloc, inlist, 0);
+  std::vector<VALUETYPE> force_(nall * 3, 0.0), virial(9, 0.0);
+  dp.compute(ener, force_, virial, coord_cpy, atype_cpy, box, nall - nloc,
+             inlist, 0);
   // fold back
   std::vector<VALUETYPE> force;
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
 
-
 template <class VALUETYPE>
-class TestInferDeepPotRNoPbc : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<int> atype = {
-    0, 1, 1, 0, 1, 1
-  };
+class TestInferDeepPotRNoPbc : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
   std::vector<VALUETYPE> box = {};
   std::vector<VALUETYPE> expected_e = {
-    -9.321213823508108476e+01,-1.868044102481340758e+02,-1.868067983858651075e+02,-9.320899631301440991e+01,-1.868014559732615112e+02,-1.868017660713088617e+02
-  };
+      -9.321213823508108476e+01, -1.868044102481340758e+02,
+      -1.868067983858651075e+02, -9.320899631301440991e+01,
+      -1.868014559732615112e+02, -1.868017660713088617e+02};
   std::vector<VALUETYPE> expected_f = {
-    4.578151103701261042e-03,-1.917874111009987628e-03,-3.464546781179331930e-03,-4.578151103701261042e-03,1.917874111009987628e-03,3.464546781179331930e-03,-2.624402581721222913e-03,3.566275128489623933e-04,-2.859315986763691776e-04,-5.767787273464367384e-03,1.907053583551196647e-03,-3.889064429673861831e-03,1.786820066350549132e-04,-5.327197473636275694e-03,8.236236182834734409e-03,8.213507848550535492e-03,3.063516377236116545e-03,-4.061240154484504865e-03
-  };
+      4.578151103701261042e-03,  -1.917874111009987628e-03,
+      -3.464546781179331930e-03, -4.578151103701261042e-03,
+      1.917874111009987628e-03,  3.464546781179331930e-03,
+      -2.624402581721222913e-03, 3.566275128489623933e-04,
+      -2.859315986763691776e-04, -5.767787273464367384e-03,
+      1.907053583551196647e-03,  -3.889064429673861831e-03,
+      1.786820066350549132e-04,  -5.327197473636275694e-03,
+      8.236236182834734409e-03,  8.213507848550535492e-03,
+      3.063516377236116545e-03,  -4.061240154484504865e-03};
   std::vector<VALUETYPE> expected_v = {
-            1.984979026299632174e-03,-8.315452677741701822e-04,-1.502146290172694243e-03,-8.315452677741700738e-04,3.483500446080982317e-04,6.292774999372096039e-04,-1.502146290172694243e-03,6.292774999372097123e-04,1.136759354725281907e-03,1.402852790439301908e-03,-5.876815743732210226e-04,-1.061618327900012114e-03,-5.876815743732211311e-04,2.461909298049979960e-04,4.447320022283834766e-04,-1.061618327900012331e-03,4.447320022283834766e-04,8.033868427351443728e-04,4.143606961846296385e-03,-5.511382161123719835e-04,4.465413399437045397e-04,-5.511382161123719835e-04,1.082271054025323839e-04,-1.097918001262628728e-04,4.465413399437046481e-04,-1.097918001262628728e-04,1.220966982358671871e-04,5.263952004497593831e-03,2.395243710938091842e-04,-2.830378939414603329e-04,2.395243710938094010e-04,1.189969706598244898e-03,-1.805627331015851201e-03,-2.830378939414602245e-04,-1.805627331015851635e-03,2.801996513751836820e-03,2.208413501170402270e-03,5.331756287635716889e-05,-1.664423506603235218e-04,5.331756287635695205e-05,1.379626072862918072e-03,-2.094132943741625064e-03,-1.664423506603234133e-04,-2.094132943741625064e-03,3.199787996743366607e-03,4.047014004814953811e-03,1.137904999421357000e-03,-1.568106936614101698e-03,1.137904999421357217e-03,7.205982843216952307e-04,-1.011174600268313238e-03,-1.568106936614101698e-03,-1.011174600268313238e-03,1.435226522157425754e-03
-  };
+      1.984979026299632174e-03,  -8.315452677741701822e-04,
+      -1.502146290172694243e-03, -8.315452677741700738e-04,
+      3.483500446080982317e-04,  6.292774999372096039e-04,
+      -1.502146290172694243e-03, 6.292774999372097123e-04,
+      1.136759354725281907e-03,  1.402852790439301908e-03,
+      -5.876815743732210226e-04, -1.061618327900012114e-03,
+      -5.876815743732211311e-04, 2.461909298049979960e-04,
+      4.447320022283834766e-04,  -1.061618327900012331e-03,
+      4.447320022283834766e-04,  8.033868427351443728e-04,
+      4.143606961846296385e-03,  -5.511382161123719835e-04,
+      4.465413399437045397e-04,  -5.511382161123719835e-04,
+      1.082271054025323839e-04,  -1.097918001262628728e-04,
+      4.465413399437046481e-04,  -1.097918001262628728e-04,
+      1.220966982358671871e-04,  5.263952004497593831e-03,
+      2.395243710938091842e-04,  -2.830378939414603329e-04,
+      2.395243710938094010e-04,  1.189969706598244898e-03,
+      -1.805627331015851201e-03, -2.830378939414602245e-04,
+      -1.805627331015851635e-03, 2.801996513751836820e-03,
+      2.208413501170402270e-03,  5.331756287635716889e-05,
+      -1.664423506603235218e-04, 5.331756287635695205e-05,
+      1.379626072862918072e-03,  -2.094132943741625064e-03,
+      -1.664423506603234133e-04, -2.094132943741625064e-03,
+      3.199787996743366607e-03,  4.047014004814953811e-03,
+      1.137904999421357000e-03,  -1.568106936614101698e-03,
+      1.137904999421357217e-03,  7.205982843216952307e-04,
+      -1.011174600268313238e-03, -1.568106936614101698e-03,
+      -1.011174600268313238e-03, 1.435226522157425754e-03};
   int natoms;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::DeepPot dp;
 
   void SetUp() override {
     std::string file_name = "../../tests/infer/deeppot-r.pbtxt";
-    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-r.pbtxt", "deeppot.pb");
+    deepmd::convert_pbtxt_to_pb("../../tests/infer/deeppot-r.pbtxt",
+                                "deeppot.pb");
 
     dp.init("deeppot.pb");
 
@@ -508,25 +550,22 @@ class TestInferDeepPotRNoPbc : public ::testing::Test
     expected_tot_e = 0.;
     expected_tot_v.resize(9);
     std::fill(expected_tot_v.begin(), expected_tot_v.end(), 0.);
-    for(int ii = 0; ii < natoms; ++ii){
+    for (int ii = 0; ii < natoms; ++ii) {
       expected_tot_e += expected_e[ii];
     }
-    for(int ii = 0; ii < natoms; ++ii){
-      for(int dd = 0; dd < 9; ++dd){
-	expected_tot_v[dd] += expected_v[ii*9+dd];
+    for (int ii = 0; ii < natoms; ++ii) {
+      for (int dd = 0; dd < 9; ++dd) {
+        expected_tot_v[dd] += expected_v[ii * 9 + dd];
       }
     }
   };
 
-  void TearDown() override {
-    remove( "deeppot.pb" ) ;
-  };
+  void TearDown() override { remove("deeppot.pb"); };
 };
 
 TYPED_TEST_SUITE(TestInferDeepPotRNoPbc, ValueTypes);
 
-TYPED_TEST(TestInferDeepPotRNoPbc, cpu_build_nlist)
-{
+TYPED_TEST(TestInferDeepPotRNoPbc, cpu_build_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -536,20 +575,20 @@ TYPED_TEST(TestInferDeepPotRNoPbc, cpu_build_nlist)
   std::vector<VALUETYPE>& expected_v = this->expected_v;
   int& natoms = this->natoms;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepPot& dp = this->dp;
   double ener;
   std::vector<VALUETYPE> force, virial;
   dp.compute(ener, force, virial, coord, atype, box);
 
-  EXPECT_EQ(force.size(), natoms*3);
+  EXPECT_EQ(force.size(), natoms * 3);
   EXPECT_EQ(virial.size(), 9);
 
   EXPECT_LT(fabs(ener - expected_tot_e), EPSILON);
-  for(int ii = 0; ii < natoms*3; ++ii){
-    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);    
+  for (int ii = 0; ii < natoms * 3; ++ii) {
+    EXPECT_LT(fabs(force[ii] - expected_f[ii]), EPSILON);
   }
-  for(int ii = 0; ii < 3*3; ++ii){
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_tot_v[ii]), EPSILON);
   }
 }
diff --git a/source/api_cc/tests/test_dipolecharge.cc b/source/api_cc/tests/test_dipolecharge.cc
index b5703bb4fe..2d568a0aa7 100644
--- a/source/api_cc/tests/test_dipolecharge.cc
+++ b/source/api_cc/tests/test_dipolecharge.cc
@@ -1,56 +1,58 @@
+#include <fcntl.h>
 #include <gtest/gtest.h>
-#include <cmath>
+#include <sys/stat.h>
+#include <sys/types.h>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
-#include "DeepTensor.h"
+
 #include "DataModifier.h"
+#include "DeepTensor.h"
 #include "SimulationRegion.h"
 #include "ewald.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>  
-
 template <class VALUETYPE>
-class TestDipoleCharge : public ::testing::Test
-{  
-protected:  
+class TestDipoleCharge : public ::testing::Test {
+ protected:
   std::vector<VALUETYPE> coord = {
-    4.6067455554,    8.8719311819,    6.3886531197,
-    4.0044515745,    4.2449530507,    7.7902855220,
-    2.6453069446,    0.8772647726,    1.2804446790,
-    1.1445332290,    0.0067366438,    1.8606485070,
-    7.1002867706,    5.0325506787,    3.1805888348,
-    4.5352891138,    7.7389683929,    9.4260970128,
-    2.1833238914,    9.0916071034,    7.2299906064,
-    4.1040157820,    1.0496745045,    5.4748315591,
-  };
-  std::vector<int> atype = {
-    0,3,2,1,3,4,1,4
-  };
-  std::vector<VALUETYPE> box = {
-    10., 0., 0., 0., 10., 0., 0., 0., 10.
-  };
-  std::vector<double> expected_e = {
-    3.671081837126222158e+00
+      4.6067455554, 8.8719311819, 6.3886531197, 4.0044515745, 4.2449530507,
+      7.7902855220, 2.6453069446, 0.8772647726, 1.2804446790, 1.1445332290,
+      0.0067366438, 1.8606485070, 7.1002867706, 5.0325506787, 3.1805888348,
+      4.5352891138, 7.7389683929, 9.4260970128, 2.1833238914, 9.0916071034,
+      7.2299906064, 4.1040157820, 1.0496745045, 5.4748315591,
   };
+  std::vector<int> atype = {0, 3, 2, 1, 3, 4, 1, 4};
+  std::vector<VALUETYPE> box = {10., 0., 0., 0., 10., 0., 0., 0., 10.};
+  std::vector<double> expected_e = {3.671081837126222158e+00};
   std::vector<VALUETYPE> expected_f = {
-    8.786854427753210128e-01,-1.590752486903602159e-01,-2.709225006303785932e-01,-4.449513960033193438e-01,-1.564291540964127813e-01,2.139031741772115178e-02,1.219699614140521193e+00,-5.580358618499958734e-02,-3.878662478349682585e-01,-1.286685244990778854e+00,1.886475802950296488e-01,3.904450515493615437e-01,1.605017382138404849e-02,2.138016869742287995e-01,-2.617514921203008965e-02,2.877081057057793712e-01,-3.846449683844421763e-01,3.048855616906603894e-02,-9.075632811311897807e-01,-6.509653472431625731e-03,2.302010972126376787e-01,2.370565856822822726e-01,3.600133435593881881e-01,1.243887532859055609e-02
-  };
+      8.786854427753210128e-01,  -1.590752486903602159e-01,
+      -2.709225006303785932e-01, -4.449513960033193438e-01,
+      -1.564291540964127813e-01, 2.139031741772115178e-02,
+      1.219699614140521193e+00,  -5.580358618499958734e-02,
+      -3.878662478349682585e-01, -1.286685244990778854e+00,
+      1.886475802950296488e-01,  3.904450515493615437e-01,
+      1.605017382138404849e-02,  2.138016869742287995e-01,
+      -2.617514921203008965e-02, 2.877081057057793712e-01,
+      -3.846449683844421763e-01, 3.048855616906603894e-02,
+      -9.075632811311897807e-01, -6.509653472431625731e-03,
+      2.302010972126376787e-01,  2.370565856822822726e-01,
+      3.600133435593881881e-01,  1.243887532859055609e-02};
   std::vector<VALUETYPE> expected_v = {
-    3.714071471995848417e-01,6.957130186032146613e-01,-1.158289779017217302e+00,6.957130186032139951e-01,-1.400130091653774933e+01,-3.631620234653316626e-01,-1.158289779017217302e+00,-3.631620234653316626e-01,3.805077486043773050e+00
-  };
-  std::vector<VALUETYPE> charge_map = {
-    1., 1., 1., 1., 1., -1., -3.
-  };
+      3.714071471995848417e-01,  6.957130186032146613e-01,
+      -1.158289779017217302e+00, 6.957130186032139951e-01,
+      -1.400130091653774933e+01, -3.631620234653316626e-01,
+      -1.158289779017217302e+00, -3.631620234653316626e-01,
+      3.805077486043773050e+00};
+  std::vector<VALUETYPE> charge_map = {1., 1., 1., 1., 1., -1., -3.};
   int natoms;
   int ntypes;
   std::vector<int> type_asso;
   double expected_tot_e;
-  std::vector<VALUETYPE>expected_tot_v;
+  std::vector<VALUETYPE> expected_tot_v;
 
   deepmd::DeepTensor dp;
   deepmd::DipoleChargeModifier dm;
@@ -72,26 +74,20 @@ class TestDipoleCharge : public ::testing::Test
     EXPECT_EQ(9, expected_v.size());
   };
 
-  void TearDown() override {
-    remove( "dipolecharge_e.pb" ) ;
-  };
+  void TearDown() override { remove("dipolecharge_e.pb"); };
 };
 
-static bool
-_in_vec(const int & value,
-	const std::vector<int> & vec)
-{
+static bool _in_vec(const int& value, const std::vector<int>& vec) {
   // naive impl.
-  for(int ii = 0; ii < vec.size(); ++ii){
-    if(value == vec[ii]) return true;
+  for (int ii = 0; ii < vec.size(); ++ii) {
+    if (value == vec[ii]) return true;
   }
   return false;
 }
 
 TYPED_TEST_SUITE(TestDipoleCharge, ValueTypes);
 
-TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
-{
+TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist) {
   using VALUETYPE = TypeParam;
   std::vector<VALUETYPE>& coord = this->coord;
   std::vector<int>& atype = this->atype;
@@ -104,80 +100,83 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   int& ntypes = this->ntypes;
   std::vector<int>& type_asso = this->type_asso;
   double& expected_tot_e = this->expected_tot_e;
-  std::vector<VALUETYPE>&expected_tot_v = this->expected_tot_v;
+  std::vector<VALUETYPE>& expected_tot_v = this->expected_tot_v;
   deepmd::DeepTensor& dp = this->dp;
   deepmd::DipoleChargeModifier& dm = this->dm;
   // build nlist
   // float rc = dp.cutoff();
   float rc = 4.0;
-  int nloc = coord.size() / 3;  
+  int nloc = coord.size() / 3;
   std::vector<VALUETYPE> coord_cpy;
-  std::vector<int> atype_cpy, mapping;  
-  std::vector<std::vector<int > > nlist_data;
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-  	       coord, atype, box, rc);
+  std::vector<int> atype_cpy, mapping;
+  std::vector<std::vector<int>> nlist_data;
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   int nall = coord_cpy.size() / 3;
   int nghost = nall - nloc;
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(inlist, nlist_data);  
+  convert_nlist(inlist, nlist_data);
 
   // evaluate dipole
-  std::vector<VALUETYPE> dipole, dipole_recd(nloc*3, 0.0);
-  dp.compute(dipole, coord_cpy, atype_cpy, box, nall-nloc, inlist);
+  std::vector<VALUETYPE> dipole, dipole_recd(nloc * 3, 0.0);
+  dp.compute(dipole, coord_cpy, atype_cpy, box, nall - nloc, inlist);
 
   // add virtual atoms to the system
   // // a lot of mappings
   std::vector<int> sel_types = dp.sel_types();
   std::vector<int> sel_fwd, sel_bwd;
   int sel_nghost;
-  deepmd::select_by_type(sel_fwd, sel_bwd, sel_nghost, coord_cpy, atype_cpy, nghost, sel_types);
+  deepmd::select_by_type(sel_fwd, sel_bwd, sel_nghost, coord_cpy, atype_cpy,
+                         nghost, sel_types);
   int sel_nall = sel_bwd.size();
   int sel_nloc = sel_nall - sel_nghost;
   std::vector<int> sel_atype(sel_bwd.size());
   deepmd::select_map<int>(sel_atype, atype, sel_fwd, 1);
-  // Yixiao: because the deeptensor already return the correct order, the following map is no longer needed
-  // deepmd::AtomMap<double> nnp_map(sel_atype.begin(), sel_atype.begin() + sel_nloc);
-  // const std::vector<int> & sort_fwd_map(nnp_map.get_fwd_map());
+  // Yixiao: because the deeptensor already return the correct order, the
+  // following map is no longer needed deepmd::AtomMap<double>
+  // nnp_map(sel_atype.begin(), sel_atype.begin() + sel_nloc); const
+  // std::vector<int> & sort_fwd_map(nnp_map.get_fwd_map());
 
   // // add coords
-  std::vector<VALUETYPE > add_coord;
-  std::vector<int > add_atype;
-  std::vector<std::pair<int,int>> pairs;
-  for(int ii = 0; ii < nloc; ++ii){
-    if(_in_vec(atype[ii], sel_types)){
+  std::vector<VALUETYPE> add_coord;
+  std::vector<int> add_atype;
+  std::vector<std::pair<int, int>> pairs;
+  for (int ii = 0; ii < nloc; ++ii) {
+    if (_in_vec(atype[ii], sel_types)) {
       // Yixiao: the sort map is no longer needed
       // int res_idx = sort_fwd_map[sel_fwd[ii]];
       int res_idx = sel_fwd[ii];
-      std::vector<VALUETYPE > tmp_coord(3);
-      for(int dd = 0; dd < 3; ++dd){
-	tmp_coord[dd] = coord[ii*3+dd] + dipole[res_idx*3+dd];
-	dipole_recd[ii*3+dd] = dipole[res_idx*3+dd];
+      std::vector<VALUETYPE> tmp_coord(3);
+      for (int dd = 0; dd < 3; ++dd) {
+        tmp_coord[dd] = coord[ii * 3 + dd] + dipole[res_idx * 3 + dd];
+        dipole_recd[ii * 3 + dd] = dipole[res_idx * 3 + dd];
       }
-      pairs.push_back(std::pair<int,int>(ii, add_atype.size()+atype.size()));
-      // std::cout << ii <<  " " 
-      // 		<< atype[ii] << " " 
-      // 		<< res_idx << " " 
-      // 		<< type_asso[atype[ii]] << " " 
-      // 		<< " pair "  
-      // 		<< pairs.back().first << " " << pairs.back().second << " "
+      pairs.push_back(std::pair<int, int>(ii, add_atype.size() + atype.size()));
+      // std::cout << ii <<  " "
+      // 		<< atype[ii] << " "
+      // 		<< res_idx << " "
+      // 		<< type_asso[atype[ii]] << " "
+      // 		<< " pair "
+      // 		<< pairs.back().first << " " << pairs.back().second << "
+      // "
       // 		<< std::endl;
       add_coord.insert(add_coord.end(), tmp_coord.begin(), tmp_coord.end());
-      add_atype.push_back(type_asso[atype[ii]]);      
+      add_atype.push_back(type_asso[atype[ii]]);
     }
   }
   coord.insert(coord.end(), add_coord.begin(), add_coord.end());
   atype.insert(atype.end(), add_atype.begin(), add_atype.end());
   nloc = atype.size();
-  EXPECT_EQ(atype.size()*3, coord.size());
+  EXPECT_EQ(atype.size() * 3, coord.size());
 
   // get charge value
   std::vector<VALUETYPE> charge(nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     charge[ii] = charge_map[atype[ii]];
   }
-  
+
   // compute the recp part of the ele interaction
   VALUETYPE eener;
   std::vector<VALUETYPE> eforce, evirial;
@@ -187,14 +186,14 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   eparam.beta = 0.2;
   eparam.spacing = 4;
   ewald_recp(eener, eforce, evirial, coord, charge, region, eparam);
-  
+
   EXPECT_LT(fabs(eener - expected_e[0]), 1e-6);
   EXPECT_EQ(eforce.size(), coord.size());
-  EXPECT_EQ(evirial.size(), 9);  
+  EXPECT_EQ(evirial.size(), 9);
 
   // extend the system with virtual atoms, and build nlist
-  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping,
-  	       coord, atype, box, rc);
+  _build_nlist<VALUETYPE>(nlist_data, coord_cpy, atype_cpy, mapping, coord,
+                          atype, box, rc);
   nall = coord_cpy.size() / 3;
   nghost = nall - nloc;
   ilist.resize(nloc);
@@ -207,8 +206,9 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   convert_nlist(inlist, nlist_data);
 
   // compute force and virial
-  std::vector<VALUETYPE > force_, force, virial;
-  dm.compute(force_, virial, coord_cpy, atype_cpy, box, pairs, eforce, nghost, inlist);
+  std::vector<VALUETYPE> force_, force, virial;
+  dm.compute(force_, virial, coord_cpy, atype_cpy, box, pairs, eforce, nghost,
+             inlist);
   // for(int ii = 0; ii < force_.size(); ++ii){
   //   std::cout << force_[ii] << " " ;
   // }
@@ -216,37 +216,37 @@ TYPED_TEST(TestDipoleCharge, cpu_lmp_nlist)
   _fold_back<VALUETYPE>(force, force_, mapping, nloc, nall, 3);
 
   // compare force
-  EXPECT_EQ(force.size(), nloc*3);
+  EXPECT_EQ(force.size(), nloc * 3);
   // note nloc > expected_f.size(), because nloc contains virtual atoms.
-  for(int ii = 0; ii < expected_f.size(); ++ii){
+  for (int ii = 0; ii < expected_f.size(); ++ii) {
     EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-6);
   }
 
   // add recp virial and viral corr to virial
   // virial = virial_recp + virial_dipolecharge + virial_corr
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
-      virial[dd0*3+dd1] += evirial[dd0*3+dd1];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      virial[dd0 * 3 + dd1] += evirial[dd0 * 3 + dd1];
     }
-  }    
-  for(int ii = 0; ii < pairs.size(); ++ii){
+  }
+  for (int ii = 0; ii < pairs.size(); ++ii) {
     int idx0 = pairs[ii].first;
     int idx1 = pairs[ii].second;
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-      for (int dd1 = 0; dd1 < 3; ++dd1){
-	virial[dd0*3+dd1] -= eforce[idx1*3+dd0] * dipole_recd[idx0*3+dd1];
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        virial[dd0 * 3 + dd1] -=
+            eforce[idx1 * 3 + dd0] * dipole_recd[idx0 * 3 + dd1];
       }
-    }    
+    }
   }
   // compare virial
-  EXPECT_EQ(virial.size(), 3*3);
-  for(int ii = 0; ii < expected_v.size(); ++ii){
+  EXPECT_EQ(virial.size(), 3 * 3);
+  for (int ii = 0; ii < expected_v.size(); ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_v[ii]), 1e-5);
   }
 }
 
-TYPED_TEST(TestDipoleCharge, print_summary)
-{
+TYPED_TEST(TestDipoleCharge, print_summary) {
   deepmd::DipoleChargeModifier& dm = this->dm;
   dm.print_summary("");
 }
diff --git a/source/api_cc/tests/test_ewald.cc b/source/api_cc/tests/test_ewald.cc
index 83b190586a..be9a6d9eaf 100644
--- a/source/api_cc/tests/test_ewald.cc
+++ b/source/api_cc/tests/test_ewald.cc
@@ -1,60 +1,46 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
 #include <fstream>
 #include <vector>
+
+#include "ewald.h"
 #include "neighbor_list.h"
 #include "test_utils.h"
-#include "ewald.h"
 
 template <class VALUETYPE>
-class TestInferEwald : public ::testing::Test
-{  
-protected:  
-  std::vector<VALUETYPE> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<VALUETYPE> charge = {
-    -2, 1, 1, -2, 1, 1
-  };
-  std::vector<VALUETYPE> box = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
-  };
-  void SetUp() override {
-  };
-  void TearDown() override {
-  };
+class TestInferEwald : public ::testing::Test {
+ protected:
+  std::vector<VALUETYPE> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                                  00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                                  3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<VALUETYPE> charge = {-2, 1, 1, -2, 1, 1};
+  std::vector<VALUETYPE> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
+  void SetUp() override{};
+  void TearDown() override{};
 };
 
 TYPED_TEST_SUITE(TestInferEwald, ValueTypes);
 
-TYPED_TEST(TestInferEwald, cpu_numfv)
-{
+TYPED_TEST(TestInferEwald, cpu_numfv) {
   using VALUETYPE = TypeParam;
-  std::vector<VALUETYPE>& coord = this -> coord;
-  std::vector<VALUETYPE>& charge = this -> charge;
-  std::vector<VALUETYPE>& box = this -> box;
-  class MyModel : public EnergyModelTest<VALUETYPE>
-  {
-    const std::vector<VALUETYPE > & charge;
-    deepmd::EwaldParameters<VALUETYPE> eparam;    
-public:
-    MyModel(
-	const std::vector<VALUETYPE> & charge_
-	) : charge(charge_) {
+  std::vector<VALUETYPE>& coord = this->coord;
+  std::vector<VALUETYPE>& charge = this->charge;
+  std::vector<VALUETYPE>& box = this->box;
+  class MyModel : public EnergyModelTest<VALUETYPE> {
+    const std::vector<VALUETYPE>& charge;
+    deepmd::EwaldParameters<VALUETYPE> eparam;
+
+   public:
+    MyModel(const std::vector<VALUETYPE>& charge_) : charge(charge_) {
       eparam.beta = 0.4;
     };
-    virtual void compute (
-	double & ener,
-	std::vector<VALUETYPE> &	force,
-	std::vector<VALUETYPE> &	virial,
-	const std::vector<VALUETYPE> & coord,
-	const std::vector<VALUETYPE> & box) {
+    virtual void compute(double& ener,
+                         std::vector<VALUETYPE>& force,
+                         std::vector<VALUETYPE>& virial,
+                         const std::vector<VALUETYPE>& coord,
+                         const std::vector<VALUETYPE>& box) {
       deepmd::Region<VALUETYPE> region;
       init_region_cpu(region, &box[0]);
       VALUETYPE ener_;
diff --git a/source/api_cc/tests/test_utils.h b/source/api_cc/tests/test_utils.h
index b6759514e5..e1feacd228 100644
--- a/source/api_cc/tests/test_utils.h
+++ b/source/api_cc/tests/test_utils.h
@@ -1,108 +1,99 @@
 #pragma once
 #include <cmath>
-#include "gtest/gtest.h"
+
 #include "SimulationRegion.h"
-#include "region.h"
+#include "gtest/gtest.h"
 #include "neighbor_list.h"
+#include "region.h"
 
 #define EPSILON (std::is_same<VALUETYPE, double>::value ? 1e-10 : 1e-4)
 
 typedef testing::Types<double, float> ValueTypes;
 
-template<typename VALUETYPE>
-inline void 
-_fold_back(
-    typename std::vector<VALUETYPE >::iterator out,
-    const typename std::vector<VALUETYPE >::const_iterator in, 
-    const std::vector<int> &mapping,
-    const int nloc,
-    const int nall,
-    const int ndim)
-{
+template <typename VALUETYPE>
+inline void _fold_back(typename std::vector<VALUETYPE>::iterator out,
+                       const typename std::vector<VALUETYPE>::const_iterator in,
+                       const std::vector<int> &mapping,
+                       const int nloc,
+                       const int nall,
+                       const int ndim) {
   // out.resize(nloc*ndim);
-  std::copy(in, in + nloc*ndim, out);
-  for(int ii = nloc; ii < nall; ++ii){
+  std::copy(in, in + nloc * ndim, out);
+  for (int ii = nloc; ii < nall; ++ii) {
     int in_idx = ii;
     int out_idx = mapping[in_idx];
-    for(int dd = 0; dd < ndim; ++dd){
+    for (int dd = 0; dd < ndim; ++dd) {
       *(out + out_idx * ndim + dd) += *(in + in_idx * ndim + dd);
     }
   }
 }
 
-template<typename VALUETYPE>
-inline void 
-_fold_back(
-    std::vector<VALUETYPE > &out,
-    const std::vector<VALUETYPE > &in,
-    const std::vector<int> &mapping,
-    const int nloc,
-    const int nall,
-    const int ndim)
-{
-  out.resize(nloc*ndim);
+template <typename VALUETYPE>
+inline void _fold_back(std::vector<VALUETYPE> &out,
+                       const std::vector<VALUETYPE> &in,
+                       const std::vector<int> &mapping,
+                       const int nloc,
+                       const int nall,
+                       const int ndim) {
+  out.resize(nloc * ndim);
   _fold_back<VALUETYPE>(out.begin(), in.begin(), mapping, nloc, nall, ndim);
 }
 
-template<typename VALUETYPE>
-inline void
-_build_nlist(
-    std::vector<std::vector<int>> &nlist_data,
-    std::vector<VALUETYPE > & coord_cpy,
-    std::vector<int > & atype_cpy,
-    std::vector<int > & mapping,
-    const std::vector<VALUETYPE > & coord,
-    const std::vector<int > & atype,
-    const std::vector<VALUETYPE > & box,
-    const float & rc)
-{
+template <typename VALUETYPE>
+inline void _build_nlist(std::vector<std::vector<int>> &nlist_data,
+                         std::vector<VALUETYPE> &coord_cpy,
+                         std::vector<int> &atype_cpy,
+                         std::vector<int> &mapping,
+                         const std::vector<VALUETYPE> &coord,
+                         const std::vector<int> &atype,
+                         const std::vector<VALUETYPE> &box,
+                         const float &rc) {
   // convert VALUETYPE to double, it looks like copy_coord only accepts double
   std::vector<double> coord_cpy_;
   std::vector<double> coord_(coord.begin(), coord.end());
   std::vector<double> box_(box.begin(), box.end());
 
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   region.reinitBox(&box_[0]);
   std::vector<int> ncell, ngcell;
-  copy_coord(coord_cpy_, atype_cpy, mapping, ncell, ngcell, coord_, atype, rc, region);
+  copy_coord(coord_cpy_, atype_cpy, mapping, ncell, ngcell, coord_, atype, rc,
+             region);
   std::vector<int> nat_stt, ext_stt, ext_end;
   nat_stt.resize(3);
   ext_stt.resize(3);
   ext_end.resize(3);
-  for (int dd = 0; dd < 3; ++dd){
+  for (int dd = 0; dd < 3; ++dd) {
     ext_stt[dd] = -ngcell[dd];
     ext_end[dd] = ncell[dd] + ngcell[dd];
   }
   int nloc = coord_.size() / 3;
   int nall = coord_cpy_.size() / 3;
   std::vector<std::vector<int>> nlist_r_cpy;
-  build_nlist(nlist_data, nlist_r_cpy, coord_cpy_, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_data, nlist_r_cpy, coord_cpy_, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
 
   // convert double to VALUETYPE
   coord_cpy.assign(coord_cpy_.begin(), coord_cpy_.end());
 }
 
-template<typename VALUETYPE>
-class EnergyModelTest
-{
+template <typename VALUETYPE>
+class EnergyModelTest {
   double hh = std::is_same<VALUETYPE, double>::value ? 1e-5 : 1e-2;
-  double level = std::is_same<VALUETYPE, double>::value ? 1e-6 : 1e-2; // expected?
-public:
-  virtual void compute (
-      double & ener,
-      std::vector<VALUETYPE> &	force,
-      std::vector<VALUETYPE> &	virial,
-      const std::vector<VALUETYPE> & coord,
-      const std::vector<VALUETYPE> & box
-      ) = 0;
-  void test_f (
-      const std::vector<VALUETYPE> & coord,
-      const std::vector<VALUETYPE> & box) {
+  double level =
+      std::is_same<VALUETYPE, double>::value ? 1e-6 : 1e-2;  // expected?
+ public:
+  virtual void compute(double &ener,
+                       std::vector<VALUETYPE> &force,
+                       std::vector<VALUETYPE> &virial,
+                       const std::vector<VALUETYPE> &coord,
+                       const std::vector<VALUETYPE> &box) = 0;
+  void test_f(const std::vector<VALUETYPE> &coord,
+              const std::vector<VALUETYPE> &box) {
     int ndof = coord.size();
     double ener;
     std::vector<VALUETYPE> force, virial;
     compute(ener, force, virial, coord, box);
-    for(int ii = 0; ii < ndof; ++ii){
+    for (int ii = 0; ii < ndof; ++ii) {
       std::vector<VALUETYPE> coord0(coord), coord1(coord);
       double ener0, ener1;
       std::vector<VALUETYPE> forcet, virialt;
@@ -110,21 +101,20 @@ class EnergyModelTest
       coord1[ii] -= hh;
       compute(ener0, forcet, virialt, coord0, box);
       compute(ener1, forcet, virialt, coord1, box);
-      VALUETYPE num = - (ener0 - ener1) / (2.*hh);
+      VALUETYPE num = -(ener0 - ener1) / (2. * hh);
       VALUETYPE ana = force[ii];
       EXPECT_LT(fabs(num - ana), level);
     }
-  }  
-  void test_v(
-      const std::vector<VALUETYPE> & coord,
-      const std::vector<VALUETYPE> & box) {
+  }
+  void test_v(const std::vector<VALUETYPE> &coord,
+              const std::vector<VALUETYPE> &box) {
     std::vector<VALUETYPE> num_diff(9);
     double ener;
     std::vector<VALUETYPE> force, virial;
     compute(ener, force, virial, coord, box);
     deepmd::Region<VALUETYPE> region;
     init_region_cpu(region, &box[0]);
-    for(int ii = 0; ii < 9; ++ii){
+    for (int ii = 0; ii < 9; ++ii) {
       std::vector<VALUETYPE> box0(box), box1(box);
       box0[ii] += hh;
       box1[ii] -= hh;
@@ -133,35 +123,34 @@ class EnergyModelTest
       init_region_cpu(region1, &box1[0]);
       std::vector<VALUETYPE> coord0(coord), coord1(coord);
       int natoms = coord.size() / 3;
-      for(int ii = 0; ii < natoms; ++ii){
-	VALUETYPE pi[3];
-	convert_to_inter_cpu(pi, region, &coord[ii*3]);
-	convert_to_phys_cpu(&coord0[ii*3], region0, pi);
+      for (int ii = 0; ii < natoms; ++ii) {
+        VALUETYPE pi[3];
+        convert_to_inter_cpu(pi, region, &coord[ii * 3]);
+        convert_to_phys_cpu(&coord0[ii * 3], region0, pi);
       }
-      for(int ii = 0; ii < natoms; ++ii){
-	VALUETYPE pi[3];
-	convert_to_inter_cpu(pi, region, &coord[ii*3]);
-	convert_to_phys_cpu(&coord1[ii*3], region1, pi);
+      for (int ii = 0; ii < natoms; ++ii) {
+        VALUETYPE pi[3];
+        convert_to_inter_cpu(pi, region, &coord[ii * 3]);
+        convert_to_phys_cpu(&coord1[ii * 3], region1, pi);
       }
       double ener0, ener1;
       std::vector<VALUETYPE> forcet, virialt;
       compute(ener0, forcet, virialt, coord0, box0);
       compute(ener1, forcet, virialt, coord1, box1);
-      num_diff[ii] = - (ener0 - ener1) / (2.*hh);
+      num_diff[ii] = -(ener0 - ener1) / (2. * hh);
     }
     std::vector<VALUETYPE> num_virial(9, 0);
-    for(int dd0 = 0; dd0 < 3; ++dd0){
-      for(int dd1 = 0; dd1 < 3; ++dd1){
-	for(int dd = 0; dd < 3; ++dd){
-	  num_virial[dd0*3+dd1] += num_diff[dd*3+dd0] * box[dd*3+dd1];
-	  // num_virial[dd0*3+dd1] += num_diff[dd0*3+dd] * box[dd1*3+dd];
-	}
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        for (int dd = 0; dd < 3; ++dd) {
+          num_virial[dd0 * 3 + dd1] +=
+              num_diff[dd * 3 + dd0] * box[dd * 3 + dd1];
+          // num_virial[dd0*3+dd1] += num_diff[dd0*3+dd] * box[dd1*3+dd];
+        }
       }
     }
-    for(int ii = 0; ii < 9; ++ii){
+    for (int ii = 0; ii < 9; ++ii) {
       EXPECT_LT(fabs(num_virial[ii] - virial[ii]), level);
     }
   }
 };
-
-
diff --git a/source/cmake/FindROCM.cmake b/source/cmake/FindROCM.cmake
index fa8ee2c20f..6ef575ae39 100644
--- a/source/cmake/FindROCM.cmake
+++ b/source/cmake/FindROCM.cmake
@@ -1,10 +1,6 @@
-# Input:
-# ROCM_ROOT 
+# Input: ROCM_ROOT
 #
-# Output:
-# ROCM_FOUND        
-# ROCM_INCLUDE_DIRS 
-# ROCM_LIBRARIES
+# Output: ROCM_FOUND ROCM_INCLUDE_DIRS ROCM_LIBRARIES
 
 # define the search path
 cmake_minimum_required(VERSION 3.21)
@@ -13,74 +9,76 @@ set(ROCM_PATH ${CMAKE_HIP_COMPILER_ROCM_ROOT})
 set(ROCM_search_PATHS ${CMAKE_HIP_COMPILER_ROCM_ROOT})
 
 # includes
-find_path (ROCM_INCLUDE_DIRS
-  NAMES 
-  hip/hip_runtime.h
-  rocprim/rocprim.hpp
-  hipcub/hipcub.hpp
-  PATHS ${ROCM_search_PATHS} 
+find_path(
+  ROCM_INCLUDE_DIRS
+  NAMES hip/hip_runtime.h rocprim/rocprim.hpp hipcub/hipcub.hpp
+  PATHS ${ROCM_search_PATHS}
   PATH_SUFFIXES "include"
-  NO_DEFAULT_PATH
-  )
-if (NOT ROCM_INCLUDE_DIRS AND ROCM_FIND_REQUIRED)
-  message(FATAL_ERROR 
-    "Not found 'hip' or 'rocprim' or 'hipcub' directory in path '${ROCM_search_PATHS}' "
-    "You can manually set the ROCM install path by -DROCM_ROOT ")
-endif ()
+  NO_DEFAULT_PATH)
+if(NOT ROCM_INCLUDE_DIRS AND ROCM_FIND_REQUIRED)
+  message(
+    FATAL_ERROR
+      "Not found 'hip' or 'rocprim' or 'hipcub' directory in path '${ROCM_search_PATHS}' "
+      "You can manually set the ROCM install path by -DROCM_ROOT ")
+endif()
 
 # FindHIP.cmake
-find_path (HIP_CMAKE
-  NAMES 
-  FindHIP.cmake
-  PATHS ${ROCM_search_PATHS} 
+find_path(
+  HIP_CMAKE
+  NAMES FindHIP.cmake
+  PATHS ${ROCM_search_PATHS}
   PATH_SUFFIXES "hip/cmake"
-  NO_DEFAULT_PATH
-  )
+  NO_DEFAULT_PATH)
 
-if (NOT HIP_CMAKE AND ROCM_FIND_REQUIRED)
-  message(FATAL_ERROR 
-    "Not found 'FindHIP.cmake' file in path '${ROCM_search_PATHS}' "
-    "You can manually set the ROCM install path by -DROCM_ROOT ")
-endif ()
+if(NOT HIP_CMAKE AND ROCM_FIND_REQUIRED)
+  message(
+    FATAL_ERROR "Not found 'FindHIP.cmake' file in path '${ROCM_search_PATHS}' "
+                "You can manually set the ROCM install path by -DROCM_ROOT ")
+endif()
 
-list (APPEND CMAKE_MODULE_PATH ${HIP_CMAKE})
-find_package(HIP) 
+list(APPEND CMAKE_MODULE_PATH ${HIP_CMAKE})
+find_package(HIP)
 
 # define the libs to find
-if (NOT ROCM_FIND_COMPONENTS)
-  if (HIP_VERSION VERSION_GREATER_EQUAL 3.5.1)
+if(NOT ROCM_FIND_COMPONENTS)
+  if(HIP_VERSION VERSION_GREATER_EQUAL 3.5.1)
     set(ROCM_FIND_COMPONENTS amd_comgr amdhip64)
   else()
     set(ROCM_FIND_COMPONENTS hip-hcc hiprtc)
   endif()
-endif ()
+endif()
 
 # libs
-foreach (module ${ROCM_FIND_COMPONENTS})
-  find_library(ROCM_LIBRARIES_${module}
+foreach(module ${ROCM_FIND_COMPONENTS})
+  find_library(
+    ROCM_LIBRARIES_${module}
     NAMES ${module}
-    PATHS ${ROCM_search_PATHS} PATH_SUFFIXES "lib" NO_DEFAULT_PATH
-    )
-  if (ROCM_LIBRARIES_${module})
+    PATHS ${ROCM_search_PATHS}
+    PATH_SUFFIXES "lib"
+    NO_DEFAULT_PATH)
+  if(ROCM_LIBRARIES_${module})
     list(APPEND ROCM_LIBRARIES ${ROCM_LIBRARIES_${module}})
-  elseif (ROCM_FIND_REQUIRED)
-    message(FATAL_ERROR 
-      "Not found lib/'${module}' in '${ROCM_search_PATHS}' "
-      "You can manually set the ROCM install path by -DROCM_ROOT ")
-  endif ()
-endforeach ()
+  elseif(ROCM_FIND_REQUIRED)
+    message(
+      FATAL_ERROR "Not found lib/'${module}' in '${ROCM_search_PATHS}' "
+                  "You can manually set the ROCM install path by -DROCM_ROOT ")
+  endif()
+endforeach()
 
 # define the output variable
-if (ROCM_INCLUDE_DIRS AND ROCM_LIBRARIES AND HIP_CMAKE)
+if(ROCM_INCLUDE_DIRS
+   AND ROCM_LIBRARIES
+   AND HIP_CMAKE)
   set(ROCM_FOUND TRUE)
-else ()
+else()
   set(ROCM_FOUND FALSE)
-endif ()
+endif()
 
 # print message
-if (NOT ROCM_FIND_QUIETLY)
-  message(STATUS "Found ROCM: ${ROCM_INCLUDE_DIRS}, ${ROCM_LIBRARIES}, ${HIP_CMAKE}"
-    " in ${ROCM_search_PATHS}, build AMD GPU support")
-endif ()
+if(NOT ROCM_FIND_QUIETLY)
+  message(
+    STATUS "Found ROCM: ${ROCM_INCLUDE_DIRS}, ${ROCM_LIBRARIES}, ${HIP_CMAKE}"
+           " in ${ROCM_search_PATHS}, build AMD GPU support")
+endif()
 
 unset(ROCM_search_PATHS)
diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake
index 35137db664..01092ca79a 100644
--- a/source/cmake/Findtensorflow.cmake
+++ b/source/cmake/Findtensorflow.cmake
@@ -1,51 +1,49 @@
-# Input:
-# TENSORFLOW_ROOT 
-# BUILD_CPP_IF
+# Input: TENSORFLOW_ROOT BUILD_CPP_IF
 #
-# Output:
-# TensorFlow_FOUND        
-# TensorFlow_INCLUDE_DIRS 
-# TensorFlow_LIBRARY    
-# TensorFlow_LIBRARY_PATH
-# TensorFlowFramework_LIBRARY    
+# Output: TensorFlow_FOUND TensorFlow_INCLUDE_DIRS TensorFlow_LIBRARY
+# TensorFlow_LIBRARY_PATH TensorFlowFramework_LIBRARY
 # TensorFlowFramework_LIBRARY_PATH
 #
-# Target:
-# TensorFlow::tensorflow_framework
-# TensorFlow::tensorflow_cc
+# Target: TensorFlow::tensorflow_framework TensorFlow::tensorflow_cc
 
-if (BUILD_CPP_IF AND INSTALL_TENSORFLOW)
+if(BUILD_CPP_IF AND INSTALL_TENSORFLOW)
   # Here we try to install libtensorflow_cc using conda install.
 
-  if (USE_CUDA_TOOLKIT)
-    set (VARIANT cuda)
-  else ()
-    set (VARIANT cpu)
-  endif ()
+  if(USE_CUDA_TOOLKIT)
+    set(VARIANT cuda)
+  else()
+    set(VARIANT cpu)
+  endif()
 
-  if (NOT DEFINED TENSORFLOW_ROOT)
-    set (TENSORFLOW_ROOT ${CMAKE_INSTALL_PREFIX})
-  endif ()
+  if(NOT DEFINED TENSORFLOW_ROOT)
+    set(TENSORFLOW_ROOT ${CMAKE_INSTALL_PREFIX})
+  endif()
   # execute conda install
-  execute_process(
-	  COMMAND conda create libtensorflow_cc=*=${VARIANT}* -c deepmodeling -y -p ${TENSORFLOW_ROOT}
-	  )
-endif ()
+  execute_process(COMMAND conda create libtensorflow_cc=*=${VARIANT}* -c
+                          deepmodeling -y -p ${TENSORFLOW_ROOT})
+endif()
 
-if (BUILD_CPP_IF AND USE_TF_PYTHON_LIBS AND NOT SKBUILD)
-  # Here we try to install libtensorflow_cc.so as well as libtensorflow_framework.so using libs within the python site-package tensorflow folder.
+if(BUILD_CPP_IF
+   AND USE_TF_PYTHON_LIBS
+   AND NOT SKBUILD)
+  # Here we try to install libtensorflow_cc.so as well as
+  # libtensorflow_framework.so using libs within the python site-package
+  # tensorflow folder.
   execute_process(
-    COMMAND ${Python_EXECUTABLE} -c "import tensorflow; print(tensorflow.sysconfig.get_lib())"
+    COMMAND ${Python_EXECUTABLE} -c
+            "import tensorflow; print(tensorflow.sysconfig.get_lib())"
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
     OUTPUT_VARIABLE TENSORFLOW_ROOT
     RESULT_VARIABLE TENSORFLOW_ROOT_RESULT_VAR
     ERROR_VARIABLE TENSORFLOW_ROOT_ERROR_VAR
-    OUTPUT_STRIP_TRAILING_WHITESPACE
-  )
-  if (NOT ${TENSORFLOW_ROOT_RESULT_VAR} EQUAL 0)
-    message(FATAL_ERROR "Cannot determine tensorflow root, error code: ${TENSORFLOW_ROOT_RESULT_VAR}, error message: ${TENSORFLOW_ROOT_ERROR_VAR}")
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+  if(NOT ${TENSORFLOW_ROOT_RESULT_VAR} EQUAL 0)
+    message(
+      FATAL_ERROR
+        "Cannot determine tensorflow root, error code: ${TENSORFLOW_ROOT_RESULT_VAR}, error message: ${TENSORFLOW_ROOT_ERROR_VAR}"
+    )
   endif()
-endif ()
+endif()
 
 if(DEFINED TENSORFLOW_ROOT)
   string(REPLACE "lib64" "lib" TENSORFLOW_ROOT_NO64 ${TENSORFLOW_ROOT})
@@ -54,160 +52,184 @@ endif(DEFINED TENSORFLOW_ROOT)
 # define the search path
 list(APPEND TensorFlow_search_PATHS ${TENSORFLOW_ROOT})
 if(BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
-list(APPEND TensorFlow_search_PATHS ${TENSORFLOW_ROOT_NO64})
-list(APPEND TensorFlow_search_PATHS "/usr/")
-list(APPEND TensorFlow_search_PATHS "/usr/local/")
+  list(APPEND TensorFlow_search_PATHS ${TENSORFLOW_ROOT_NO64})
+  list(APPEND TensorFlow_search_PATHS "/usr/")
+  list(APPEND TensorFlow_search_PATHS "/usr/local/")
 endif()
 if(BUILD_PY_IF OR USE_TF_PYTHON_LIBS)
-	# here TENSORFLOW_ROOT is path to site-packages/tensorflow
-	# for conda libraries, append extra paths
-	list(APPEND TensorFlow_search_PATHS "${TENSORFLOW_ROOT}/../tensorflow_core")
+  # here TENSORFLOW_ROOT is path to site-packages/tensorflow for conda
+  # libraries, append extra paths
+  list(APPEND TensorFlow_search_PATHS "${TENSORFLOW_ROOT}/../tensorflow_core")
 endif()
 
 # includes
-find_path(TensorFlow_INCLUDE_DIRS
-  NAMES 
-  tensorflow/core/public/session.h
-  tensorflow/core/platform/env.h
-  tensorflow/core/framework/op.h
-  tensorflow/core/framework/op_kernel.h
-  tensorflow/core/framework/shape_inference.h
-  PATHS ${TensorFlow_search_PATHS} 
+find_path(
+  TensorFlow_INCLUDE_DIRS
+  NAMES tensorflow/core/public/session.h tensorflow/core/platform/env.h
+        tensorflow/core/framework/op.h tensorflow/core/framework/op_kernel.h
+        tensorflow/core/framework/shape_inference.h
+  PATHS ${TensorFlow_search_PATHS}
   PATH_SUFFIXES "/include"
-  NO_DEFAULT_PATH
-  )
-  
-if (NOT TensorFlow_INCLUDE_DIRS AND tensorflow_FIND_REQUIRED)
-  message(FATAL_ERROR 
-    "Not found 'include/tensorflow/core/public/session.h' directory or other header files in path '${TensorFlow_search_PATHS}' "
-    "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
-endif ()
+  NO_DEFAULT_PATH)
+
+if(NOT TensorFlow_INCLUDE_DIRS AND tensorflow_FIND_REQUIRED)
+  message(
+    FATAL_ERROR
+      "Not found 'include/tensorflow/core/public/session.h' directory or other header files in path '${TensorFlow_search_PATHS}' "
+      "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
+endif()
 
-if (BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
-  message (STATUS "Enabled cpp interface build, looking for tensorflow_cc and tensorflow_framework")
+if(BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
+  message(
+    STATUS
+      "Enabled cpp interface build, looking for tensorflow_cc and tensorflow_framework"
+  )
   # tensorflow_cc and tensorflow_framework
-  if (NOT TensorFlow_FIND_COMPONENTS)
+  if(NOT TensorFlow_FIND_COMPONENTS)
     set(TensorFlow_FIND_COMPONENTS tensorflow_cc tensorflow_framework)
-  endif ()
+  endif()
   # the lib
   list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1)
   list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2)
-  set (TensorFlow_LIBRARY_PATH "")
-  foreach (module ${TensorFlow_FIND_COMPONENTS})
-    find_library(TensorFlow_LIBRARY_${module}
+  set(TensorFlow_LIBRARY_PATH "")
+  foreach(module ${TensorFlow_FIND_COMPONENTS})
+    find_library(
+      TensorFlow_LIBRARY_${module}
       NAMES ${module}
-      PATHS ${TensorFlow_search_PATHS} PATH_SUFFIXES lib NO_DEFAULT_PATH
-      )
-    if (TensorFlow_LIBRARY_${module})
+      PATHS ${TensorFlow_search_PATHS}
+      PATH_SUFFIXES lib
+      NO_DEFAULT_PATH)
+    if(TensorFlow_LIBRARY_${module})
       list(APPEND TensorFlow_LIBRARY ${TensorFlow_LIBRARY_${module}})
-      get_filename_component(TensorFlow_LIBRARY_PATH_${module} ${TensorFlow_LIBRARY_${module}} PATH)
+      get_filename_component(TensorFlow_LIBRARY_PATH_${module}
+                             ${TensorFlow_LIBRARY_${module}} PATH)
       list(APPEND TensorFlow_LIBRARY_PATH ${TensorFlow_LIBRARY_PATH_${module}})
-    elseif (tensorflow_FIND_REQUIRED)
-      message(FATAL_ERROR 
-	"Not found lib/'${module}' in '${TensorFlow_search_PATHS}' "
-	"You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
-    endif ()
-  endforeach ()
-else (BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
-  message (STATUS "Disabled cpp interface build, looking for tensorflow_framework")
-endif (BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
-
+    elseif(tensorflow_FIND_REQUIRED)
+      message(
+        FATAL_ERROR
+          "Not found lib/'${module}' in '${TensorFlow_search_PATHS}' "
+          "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT "
+      )
+    endif()
+  endforeach()
+else(BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
+  message(
+    STATUS "Disabled cpp interface build, looking for tensorflow_framework")
+endif(BUILD_CPP_IF AND NOT USE_TF_PYTHON_LIBS)
 
 # tensorflow_framework
-if (NOT TensorFlowFramework_FIND_COMPONENTS)
-  if (WIN32)
+if(NOT TensorFlowFramework_FIND_COMPONENTS)
+  if(WIN32)
     set(TensorFlowFramework_FIND_COMPONENTS _pywrap_tensorflow_internal)
     set(TF_SUFFIX "")
-  else ()
-  set(TensorFlowFramework_FIND_COMPONENTS tensorflow_framework)
+  else()
+    set(TensorFlowFramework_FIND_COMPONENTS tensorflow_framework)
     set(TF_SUFFIX lib)
-  endif ()
-endif ()
+  endif()
+endif()
 # the lib
-if (WIN32)
+if(WIN32)
   list(APPEND TensorFlow_search_PATHS ${TENSORFLOW_ROOT}/python)
-else ()
-list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1)
-list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2)
+else()
+  list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.1)
+  list(APPEND CMAKE_FIND_LIBRARY_SUFFIXES .so.2)
 endif()
-set (TensorFlowFramework_LIBRARY_PATH "")
-foreach (module ${TensorFlowFramework_FIND_COMPONENTS})
-  find_library(TensorFlowFramework_LIBRARY_${module}
+set(TensorFlowFramework_LIBRARY_PATH "")
+foreach(module ${TensorFlowFramework_FIND_COMPONENTS})
+  find_library(
+    TensorFlowFramework_LIBRARY_${module}
     NAMES ${module}
-    PATHS ${TensorFlow_search_PATHS} PATH_SUFFIXES ${TF_SUFFIX} NO_DEFAULT_PATH
+    PATHS ${TensorFlow_search_PATHS}
+    PATH_SUFFIXES ${TF_SUFFIX}
+    NO_DEFAULT_PATH)
+  if(TensorFlowFramework_LIBRARY_${module})
+    list(APPEND TensorFlowFramework_LIBRARY
+         ${TensorFlowFramework_LIBRARY_${module}})
+    get_filename_component(TensorFlowFramework_LIBRARY_PATH_${module}
+                           ${TensorFlowFramework_LIBRARY_${module}} PATH)
+    list(APPEND TensorFlowFramework_LIBRARY_PATH
+         ${TensorFlowFramework_LIBRARY_PATH_${module}})
+  elseif(tensorflow_FIND_REQUIRED)
+    message(
+      FATAL_ERROR
+        "Not found ${TF_SUFFIX}/${module} in '${TensorFlow_search_PATHS}' "
+        "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT "
     )
-  if (TensorFlowFramework_LIBRARY_${module})
-    list(APPEND TensorFlowFramework_LIBRARY ${TensorFlowFramework_LIBRARY_${module}})
-    get_filename_component(TensorFlowFramework_LIBRARY_PATH_${module} ${TensorFlowFramework_LIBRARY_${module}} PATH)
-    list(APPEND TensorFlowFramework_LIBRARY_PATH ${TensorFlowFramework_LIBRARY_PATH_${module}})
-  elseif (tensorflow_FIND_REQUIRED)
-    message(FATAL_ERROR 
-      "Not found ${TF_SUFFIX}/${module} in '${TensorFlow_search_PATHS}' "
-      "You can manually set the tensorflow install path by -DTENSORFLOW_ROOT ")
-  endif ()
-endforeach ()
+  endif()
+endforeach()
 
 # find _pywrap_tensorflow_internal and set it as tensorflow_cc
-if (BUILD_CPP_IF AND USE_TF_PYTHON_LIBS)
+if(BUILD_CPP_IF AND USE_TF_PYTHON_LIBS)
   set(TF_SUFFIX python)
   if(WIN32)
     set(TensorFlow_FIND_COMPONENTS _pywrap_tensorflow_internal.lib)
-  else ()
-    set(TensorFlow_FIND_COMPONENTS _pywrap_tensorflow_internal${CMAKE_SHARED_MODULE_SUFFIX})
+  else()
+    set(TensorFlow_FIND_COMPONENTS
+        _pywrap_tensorflow_internal${CMAKE_SHARED_MODULE_SUFFIX})
   endif()
-  foreach (module ${TensorFlow_FIND_COMPONENTS})
-    find_library(TensorFlow_LIBRARY_${module}
+  foreach(module ${TensorFlow_FIND_COMPONENTS})
+    find_library(
+      TensorFlow_LIBRARY_${module}
       NAMES ${module}
-      PATHS ${TensorFlow_search_PATHS} PATH_SUFFIXES ${TF_SUFFIX} NO_DEFAULT_PATH
-      )
-    if (TensorFlow_LIBRARY_${module})
+      PATHS ${TensorFlow_search_PATHS}
+      PATH_SUFFIXES ${TF_SUFFIX}
+      NO_DEFAULT_PATH)
+    if(TensorFlow_LIBRARY_${module})
       list(APPEND TensorFlow_LIBRARY ${TensorFlow_LIBRARY_${module}})
-      get_filename_component(TensorFlow_LIBRARY_PATH_${module} ${TensorFlow_LIBRARY_${module}} PATH)
+      get_filename_component(TensorFlow_LIBRARY_PATH_${module}
+                             ${TensorFlow_LIBRARY_${module}} PATH)
       list(APPEND TensorFlow_LIBRARY_PATH ${TensorFlow_LIBRARY_PATH_${module}})
-      set (TensorFlow_LIBRARY_tensorflow_cc ${TensorFlow_LIBRARY_${module}})
-    elseif (tensorflow_FIND_REQUIRED)
-      message(FATAL_ERROR 
-        "Not found ${TF_SUFFIX}/${module} in '${TensorFlow_search_PATHS}' ")
-    endif ()
-  endforeach ()
+      set(TensorFlow_LIBRARY_tensorflow_cc ${TensorFlow_LIBRARY_${module}})
+    elseif(tensorflow_FIND_REQUIRED)
+      message(
+        FATAL_ERROR
+          "Not found ${TF_SUFFIX}/${module} in '${TensorFlow_search_PATHS}' ")
+    endif()
+  endforeach()
 endif()
 
-
 # find protobuf header
-find_path(TensorFlow_INCLUDE_DIRS_GOOGLE
-  NAMES 
-  google/protobuf/type.pb.h
-  PATHS ${TensorFlow_search_PATHS} 
+find_path(
+  TensorFlow_INCLUDE_DIRS_GOOGLE
+  NAMES google/protobuf/type.pb.h
+  PATHS ${TensorFlow_search_PATHS}
   PATH_SUFFIXES "/include"
-  NO_DEFAULT_PATH
-  )
-  # try to find from ldd list of TF library
-  # a warning is threw here, just ignore it
-  # https://stackoverflow.com/a/49738486/9567349
-  if ($ENV{LD_LIBRARY_PATH})
-    string(REPLACE ":" ";" _LD_LIBRARY_DIRS $ENV{LD_LIBRARY_PATH})
+  NO_DEFAULT_PATH)
+# try to find from ldd list of TF library a warning is threw here, just ignore
+# it https://stackoverflow.com/a/49738486/9567349
+if($ENV{LD_LIBRARY_PATH})
+  string(REPLACE ":" ";" _LD_LIBRARY_DIRS $ENV{LD_LIBRARY_PATH})
+endif()
+file(
+  GET_RUNTIME_DEPENDENCIES
+  RESOLVED_DEPENDENCIES_VAR
+  TensorFlow_LINKED_LIBRARIES
+  UNRESOLVED_DEPENDENCIES_VAR
+  TensorFlow_LINKED_LIBRARIES_UNRESOLVED
+  LIBRARIES
+  ${TensorFlowFramework_LIBRARY}
+  POST_INCLUDE_REGEXES
+  "^.+protobuf\..+$"
+  DIRECTORIES
+  "${_LD_LIBRARY_DIRS}")
+# search protobuf from linked libraries
+foreach(_lib ${TensorFlow_LINKED_LIBRARIES})
+  string(REGEX MATCH "^.+protobuf\..+$" _protobuf_lib ${_lib})
+  if(_protobuf_lib)
+    set(Protobuf_LIBRARY ${_protobuf_lib})
+    break()
   endif()
-  file(GET_RUNTIME_DEPENDENCIES
-    RESOLVED_DEPENDENCIES_VAR TensorFlow_LINKED_LIBRARIES
-    UNRESOLVED_DEPENDENCIES_VAR TensorFlow_LINKED_LIBRARIES_UNRESOLVED
-    LIBRARIES ${TensorFlowFramework_LIBRARY}
-    POST_INCLUDE_REGEXES "^.+protobuf\..+$"
-    DIRECTORIES "${_LD_LIBRARY_DIRS}"
+endforeach()
+if(NOT TensorFlow_INCLUDE_DIRS_GOOGLE)
+  message(
+    STATUS
+      "Protobuf headers are not found in the directory of TensorFlow, assuming external protobuf was used to build TensorFlow"
   )
-  # search protobuf from linked libraries
-  foreach(_lib ${TensorFlow_LINKED_LIBRARIES})
-      string(REGEX MATCH "^.+protobuf\..+$" _protobuf_lib ${_lib})
-      if (_protobuf_lib)
-        set(Protobuf_LIBRARY ${_protobuf_lib})
-        break()
-      endif()
-  endforeach()
-if (NOT TensorFlow_INCLUDE_DIRS_GOOGLE)
-  message(STATUS "Protobuf headers are not found in the directory of TensorFlow, assuming external protobuf was used to build TensorFlow")
-  if (NOT Protobuf_LIBRARY)
+  if(NOT Protobuf_LIBRARY)
     message(FATAL_ERROR "TensorFlow is not linked to protobuf")
   endif()
-  get_filename_component(Protobuf_LIBRARY_DIRECTORY ${Protobuf_LIBRARY} DIRECTORY)
+  get_filename_component(Protobuf_LIBRARY_DIRECTORY ${Protobuf_LIBRARY}
+                         DIRECTORY)
   # assume the include directory is ../include
   set(Protobuf_INCLUDE_DIR ${Protobuf_LIBRARY_DIRECTORY}/../include)
   find_package(Protobuf REQUIRED)
@@ -215,20 +237,22 @@ if (NOT TensorFlow_INCLUDE_DIRS_GOOGLE)
 endif()
 list(APPEND TensorFlow_INCLUDE_DIRS ${TensorFlow_INCLUDE_DIRS_GOOGLE})
 
-if (BUILD_CPP_IF)
+if(BUILD_CPP_IF)
   # define the output variable
-  if (TensorFlow_INCLUDE_DIRS AND TensorFlow_LIBRARY AND TensorFlowFramework_LIBRARY)
+  if(TensorFlow_INCLUDE_DIRS
+     AND TensorFlow_LIBRARY
+     AND TensorFlowFramework_LIBRARY)
     set(TensorFlow_FOUND TRUE)
-  else ()
+  else()
     set(TensorFlow_FOUND FALSE)
-  endif ()
-else (BUILD_CPP_IF)
-  if (TensorFlow_INCLUDE_DIRS AND TensorFlowFramework_LIBRARY)
+  endif()
+else(BUILD_CPP_IF)
+  if(TensorFlow_INCLUDE_DIRS AND TensorFlowFramework_LIBRARY)
     set(TensorFlow_FOUND TRUE)
-  else ()
+  else()
     set(TensorFlow_FOUND FALSE)
-  endif ()
-endif (BUILD_CPP_IF)
+  endif()
+endif(BUILD_CPP_IF)
 
 # detect TensorFlow version
 try_run(
@@ -237,37 +261,42 @@ try_run(
   "${CMAKE_CURRENT_LIST_DIR}/tf_version.cpp"
   CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
   RUN_OUTPUT_VARIABLE TENSORFLOW_VERSION
-  COMPILE_OUTPUT_VARIABLE TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR
-)
-if (NOT ${TENSORFLOW_VERSION_COMPILE_RESULT_VAR})
-  message(FATAL_ERROR "Failed to compile: \n ${TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR}" )
+  COMPILE_OUTPUT_VARIABLE TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR)
+if(NOT ${TENSORFLOW_VERSION_COMPILE_RESULT_VAR})
+  message(
+    FATAL_ERROR "Failed to compile: \n ${TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR}"
+  )
 endif()
-if (NOT ${TENSORFLOW_VERSION_RUN_RESULT_VAR} EQUAL "0")
-  message(FATAL_ERROR "Failed to run, return code: ${TENSORFLOW_VERSION}" )
+if(NOT ${TENSORFLOW_VERSION_RUN_RESULT_VAR} EQUAL "0")
+  message(FATAL_ERROR "Failed to run, return code: ${TENSORFLOW_VERSION}")
 endif()
 
 # print message
-if (NOT TensorFlow_FIND_QUIETLY)
-  message(STATUS "Found TensorFlow: ${TensorFlow_INCLUDE_DIRS}, ${TensorFlow_LIBRARY}, ${TensorFlowFramework_LIBRARY} "
-    " in ${TensorFlow_search_PATHS} (found version \"${TENSORFLOW_VERSION}\")")
-endif ()
+if(NOT TensorFlow_FIND_QUIETLY)
+  message(
+    STATUS
+      "Found TensorFlow: ${TensorFlow_INCLUDE_DIRS}, ${TensorFlow_LIBRARY}, ${TensorFlowFramework_LIBRARY} "
+      " in ${TensorFlow_search_PATHS} (found version \"${TENSORFLOW_VERSION}\")"
+  )
+endif()
 
 unset(TensorFlow_search_PATHS)
 
-if (TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.10)
-  set (CMAKE_CXX_STANDARD 17)
-elseif (TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.7)
-  set (CMAKE_CXX_STANDARD 14)
+if(TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.10)
+  set(CMAKE_CXX_STANDARD 17)
+elseif(TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.7)
+  set(CMAKE_CXX_STANDARD 14)
 else()
-  set (CMAKE_CXX_STANDARD 11)
+  set(CMAKE_CXX_STANDARD 11)
 endif()
 
-if (MSVC)
+if(MSVC)
   # see TF .bazelrc
-  add_compile_options(/W0 /Zc:__cplusplus /D_USE_MATH_DEFINES /d2ReducedOptimizeHugeFunctions)
+  add_compile_options(/W0 /Zc:__cplusplus /D_USE_MATH_DEFINES
+                      /d2ReducedOptimizeHugeFunctions)
   set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
 endif()
-if (TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.4 AND MSVC)
+if(TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.4 AND MSVC)
   # see TF 2.4 release notes
   add_compile_options(/Zc:preprocessor)
 endif()
@@ -276,62 +305,62 @@ endif()
 if(MSVC OR APPLE)
   # skip on windows or osx
   set(OP_CXX_ABI 0)
-elseif (NOT DEFINED OP_CXX_ABI)
-  if (TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.9)
-    # TF 2.9 removes the tf_cxx11_abi_flag function, which is really bad...
-    # try compiling with both 0 and 1, and see which one works
+elseif(NOT DEFINED OP_CXX_ABI)
+  if(TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.9)
+    # TF 2.9 removes the tf_cxx11_abi_flag function, which is really bad... try
+    # compiling with both 0 and 1, and see which one works
     try_compile(
-      CPP_CXX_ABI_COMPILE_RESULT_VAR0
-      ${CMAKE_CURRENT_BINARY_DIR}/tf_cxx_abi0
+      CPP_CXX_ABI_COMPILE_RESULT_VAR0 ${CMAKE_CURRENT_BINARY_DIR}/tf_cxx_abi0
       "${CMAKE_CURRENT_LIST_DIR}/test_cxx_abi.cpp"
       LINK_LIBRARIES ${TensorFlowFramework_LIBRARY}
       CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
-      COMPILE_DEFINITIONS -D_GLIBCXX_USE_CXX11_ABI=0
-      )
+      COMPILE_DEFINITIONS -D_GLIBCXX_USE_CXX11_ABI=0)
     try_compile(
-      CPP_CXX_ABI_COMPILE_RESULT_VAR1
-      ${CMAKE_CURRENT_BINARY_DIR}/tf_cxx_abi1
+      CPP_CXX_ABI_COMPILE_RESULT_VAR1 ${CMAKE_CURRENT_BINARY_DIR}/tf_cxx_abi1
       "${CMAKE_CURRENT_LIST_DIR}/test_cxx_abi.cpp"
       LINK_LIBRARIES ${TensorFlowFramework_LIBRARY}
       CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
-      COMPILE_DEFINITIONS -D_GLIBCXX_USE_CXX11_ABI=1
-      )
-    if (NOT ${CPP_CXX_ABI_COMPILE_RESULT_VAR0} AND ${CPP_CXX_ABI_COMPILE_RESULT_VAR1})
+      COMPILE_DEFINITIONS -D_GLIBCXX_USE_CXX11_ABI=1)
+    if(NOT ${CPP_CXX_ABI_COMPILE_RESULT_VAR0}
+       AND ${CPP_CXX_ABI_COMPILE_RESULT_VAR1})
       set(OP_CXX_ABI 1)
-    elseif(${CPP_CXX_ABI_COMPILE_RESULT_VAR0} AND NOT ${CPP_CXX_ABI_COMPILE_RESULT_VAR1})
+    elseif(${CPP_CXX_ABI_COMPILE_RESULT_VAR0}
+           AND NOT ${CPP_CXX_ABI_COMPILE_RESULT_VAR1})
       set(OP_CXX_ABI 0)
     else()
       message(FATAL_ERROR "Failed to detect OP_CXX_ABI, please set it manually")
     endif()
   else()
     try_run(
-      CPP_CXX_ABI_RUN_RESULT_VAR CPP_CXX_ABI_COMPILE_RESULT_VAR
+      CPP_CXX_ABI_RUN_RESULT_VAR
+      CPP_CXX_ABI_COMPILE_RESULT_VAR
       ${CMAKE_CURRENT_BINARY_DIR}/tf_cxx_abi
       "${CMAKE_CURRENT_LIST_DIR}/tf_cxx_abi.cpp"
-      LINK_LIBRARIES ${TensorFlowFramework_LIBRARY}
+      LINK_LIBRARIES
+      ${TensorFlowFramework_LIBRARY}
       CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}"
       RUN_OUTPUT_VARIABLE CPP_CXX_ABI
-      COMPILE_OUTPUT_VARIABLE CPP_CXX_ABI_COMPILE_OUTPUT_VAR
-      )
-    if (NOT ${CPP_CXX_ABI_COMPILE_RESULT_VAR})
-      message(FATAL_ERROR "Failed to compile: \n ${CPP_CXX_ABI_COMPILE_OUTPUT_VAR}" )
+      COMPILE_OUTPUT_VARIABLE CPP_CXX_ABI_COMPILE_OUTPUT_VAR)
+    if(NOT ${CPP_CXX_ABI_COMPILE_RESULT_VAR})
+      message(
+        FATAL_ERROR "Failed to compile: \n ${CPP_CXX_ABI_COMPILE_OUTPUT_VAR}")
     endif()
-    if (NOT ${CPP_CXX_ABI_RUN_RESULT_VAR} EQUAL "0")
-      message(FATAL_ERROR "Failed to run, return code: ${CPP_CXX_ABI}" )
+    if(NOT ${CPP_CXX_ABI_RUN_RESULT_VAR} EQUAL "0")
+      message(FATAL_ERROR "Failed to run, return code: ${CPP_CXX_ABI}")
     endif()
     set(OP_CXX_ABI ${CPP_CXX_ABI})
   endif()
-  message (STATUS "Automatically determined OP_CXX_ABI=${OP_CXX_ABI} ")
+  message(STATUS "Automatically determined OP_CXX_ABI=${OP_CXX_ABI} ")
 else()
-  message (STATUS "User set OP_CXX_ABI=${OP_CXX_ABI} ")  
-endif()    
+  message(STATUS "User set OP_CXX_ABI=${OP_CXX_ABI} ")
+endif()
 # message the cxx_abi used during compiling
-if (${OP_CXX_ABI} EQUAL 0) 
-  message (STATUS "Set GLIBCXX_USE_CXX_ABI=0")
-else ()
-  set (OP_CXX_ABI 1)
-  message (STATUS "Set GLIBCXX_USE_CXX_ABI=1")
-endif ()
+if(${OP_CXX_ABI} EQUAL 0)
+  message(STATUS "Set GLIBCXX_USE_CXX_ABI=0")
+else()
+  set(OP_CXX_ABI 1)
+  message(STATUS "Set GLIBCXX_USE_CXX_ABI=1")
+endif()
 
 # set _GLIBCXX_USE_CXX11_ABI flag globally
 add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
@@ -343,46 +372,55 @@ add_definitions(-D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
 add_library(TensorFlow::tensorflow_framework SHARED IMPORTED GLOBAL)
 if(WIN32)
   if(USE_TF_PYTHON_LIBS)
-    string(REGEX REPLACE "[.]lib" ".pyd" _DLL_FILE ${TensorFlowFramework_LIBRARY})
+    string(REGEX REPLACE "[.]lib" ".pyd" _DLL_FILE
+                         ${TensorFlowFramework_LIBRARY})
   else()
-    string(REGEX REPLACE "[.]lib" ".dll" _DLL_FILE ${TensorFlowFramework_LIBRARY})
+    string(REGEX REPLACE "[.]lib" ".dll" _DLL_FILE
+                         ${TensorFlowFramework_LIBRARY})
   endif()
-  set_target_properties(TensorFlow::tensorflow_framework PROPERTIES
-               IMPORTED_IMPLIB ${TensorFlowFramework_LIBRARY}
-               IMPORTED_LOCATION ${_DLL_FILE})
+  set_target_properties(
+    TensorFlow::tensorflow_framework
+    PROPERTIES IMPORTED_IMPLIB ${TensorFlowFramework_LIBRARY} IMPORTED_LOCATION
+                                                              ${_DLL_FILE})
 else()
-set_property(TARGET TensorFlow::tensorflow_framework PROPERTY
-             IMPORTED_LOCATION ${TensorFlowFramework_LIBRARY})
+  set_property(TARGET TensorFlow::tensorflow_framework
+               PROPERTY IMPORTED_LOCATION ${TensorFlowFramework_LIBRARY})
 endif()
-set_property(TARGET TensorFlow::tensorflow_framework PROPERTY
-             CXX_STANDARD ${CMAKE_CXX_STANDARD})
-target_include_directories(TensorFlow::tensorflow_framework INTERFACE ${TensorFlow_INCLUDE_DIRS})
-target_compile_definitions(TensorFlow::tensorflow_framework INTERFACE
-                           -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
+set_property(TARGET TensorFlow::tensorflow_framework
+             PROPERTY CXX_STANDARD ${CMAKE_CXX_STANDARD})
+target_include_directories(TensorFlow::tensorflow_framework
+                           INTERFACE ${TensorFlow_INCLUDE_DIRS})
+target_compile_definitions(TensorFlow::tensorflow_framework
+                           INTERFACE -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
 
 # TensorFlow::tensorflow_cc
 if(BUILD_CPP_IF)
   add_library(TensorFlow::tensorflow_cc SHARED IMPORTED GLOBAL)
   if(WIN32)
     if(USE_TF_PYTHON_LIBS)
-      string(REGEX REPLACE "[.]lib" ".pyd" _DLL_FILE ${TensorFlow_LIBRARY_tensorflow_cc})
+      string(REGEX REPLACE "[.]lib" ".pyd" _DLL_FILE
+                           ${TensorFlow_LIBRARY_tensorflow_cc})
     else()
-      string(REGEX REPLACE "[.]lib" ".dll" _DLL_FILE ${TensorFlow_LIBRARY_tensorflow_cc})
+      string(REGEX REPLACE "[.]lib" ".dll" _DLL_FILE
+                           ${TensorFlow_LIBRARY_tensorflow_cc})
     endif()
-    set_target_properties(TensorFlow::tensorflow_cc PROPERTIES
-                 IMPORTED_IMPLIB ${TensorFlow_LIBRARY_tensorflow_cc}
+    set_target_properties(
+      TensorFlow::tensorflow_cc
+      PROPERTIES IMPORTED_IMPLIB ${TensorFlow_LIBRARY_tensorflow_cc}
                  IMPORTED_LOCATION ${_DLL_FILE})
   else()
-    set_property(TARGET TensorFlow::tensorflow_cc PROPERTY
-                IMPORTED_LOCATION ${TensorFlow_LIBRARY_tensorflow_cc})
+    set_property(TARGET TensorFlow::tensorflow_cc
+                 PROPERTY IMPORTED_LOCATION ${TensorFlow_LIBRARY_tensorflow_cc})
   endif()
-  set_property(TARGET TensorFlow::tensorflow_cc PROPERTY
-              CXX_STANDARD ${CMAKE_CXX_STANDARD})
-  target_include_directories(TensorFlow::tensorflow_cc INTERFACE ${TensorFlow_INCLUDE_DIRS})
-  target_compile_definitions(TensorFlow::tensorflow_cc INTERFACE
-                            -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
-  if (USE_TF_PYTHON_LIBS)
+  set_property(TARGET TensorFlow::tensorflow_cc PROPERTY CXX_STANDARD
+                                                         ${CMAKE_CXX_STANDARD})
+  target_include_directories(TensorFlow::tensorflow_cc
+                             INTERFACE ${TensorFlow_INCLUDE_DIRS})
+  target_compile_definitions(TensorFlow::tensorflow_cc
+                             INTERFACE -D_GLIBCXX_USE_CXX11_ABI=${OP_CXX_ABI})
+  if(USE_TF_PYTHON_LIBS)
     # link: libpython3.x.so
-    target_link_libraries (TensorFlow::tensorflow_cc INTERFACE ${Python_LIBRARIES})
+    target_link_libraries(TensorFlow::tensorflow_cc
+                          INTERFACE ${Python_LIBRARIES})
   endif()
 endif()
diff --git a/source/cmake/Findxdrfile.cmake b/source/cmake/Findxdrfile.cmake
index 787b4164fa..edea06e61f 100644
--- a/source/cmake/Findxdrfile.cmake
+++ b/source/cmake/Findxdrfile.cmake
@@ -1,10 +1,6 @@
-# Input:
-# XDRFILE_ROOT 
+# Input: XDRFILE_ROOT
 #
-# Output:
-# XDRFILE_FOUND        
-# XDRFILE_INCLUDE_DIRS 
-# XDRFILE_LIBRARIES    
+# Output: XDRFILE_FOUND XDRFILE_INCLUDE_DIRS XDRFILE_LIBRARIES
 
 # define the search path
 list(APPEND XDRFILE_search_PATHS ${XDRFILE_ROOT})
@@ -12,52 +8,53 @@ list(APPEND XDRFILE_search_PATHS "/usr/")
 list(APPEND XDRFILE_search_PATHS "/usr/local/")
 
 # define the libs to find
-if (NOT XDRFILE_FIND_COMPONENTS)
+if(NOT XDRFILE_FIND_COMPONENTS)
   set(XDRFILE_FIND_COMPONENTS xdrfile)
-endif ()
+endif()
 
 # includes
-find_path (XDRFILE_INCLUDE_DIRS
-  NAMES 
-  xdrfile/xdrfile.h
-  xdrfile/xdrfile_xtc.h
-  xdrfile/xdrfile_trr.h
-  PATHS ${XDRFILE_search_PATHS} 
+find_path(
+  XDRFILE_INCLUDE_DIRS
+  NAMES xdrfile/xdrfile.h xdrfile/xdrfile_xtc.h xdrfile/xdrfile_trr.h
+  PATHS ${XDRFILE_search_PATHS}
   PATH_SUFFIXES "/include"
-  NO_DEFAULT_PATH
-  )
-if (NOT XDRFILE_INCLUDE_DIRS AND xdrfile_FIND_REQUIRED)
-  message(FATAL_ERROR 
-    "Not found 'include/xdrfile/xdrfile.h' directory in path '${XDRFILE_search_PATHS}' "
-    "You can manually set the xdrfile install path by -DXDRFILE_ROOT ")
-endif ()
+  NO_DEFAULT_PATH)
+if(NOT XDRFILE_INCLUDE_DIRS AND xdrfile_FIND_REQUIRED)
+  message(
+    FATAL_ERROR
+      "Not found 'include/xdrfile/xdrfile.h' directory in path '${XDRFILE_search_PATHS}' "
+      "You can manually set the xdrfile install path by -DXDRFILE_ROOT ")
+endif()
 
 # libs
-foreach (module ${XDRFILE_FIND_COMPONENTS})
-  find_library(XDRFILE_LIBRARIES_${module}
+foreach(module ${XDRFILE_FIND_COMPONENTS})
+  find_library(
+    XDRFILE_LIBRARIES_${module}
     NAMES ${module}
-    PATHS ${XDRFILE_search_PATHS} PATH_SUFFIXES lib NO_DEFAULT_PATH
-    )
-  if (XDRFILE_LIBRARIES_${module})
+    PATHS ${XDRFILE_search_PATHS}
+    PATH_SUFFIXES lib
+    NO_DEFAULT_PATH)
+  if(XDRFILE_LIBRARIES_${module})
     list(APPEND XDRFILE_LIBRARIES ${XDRFILE_LIBRARIES_${module}})
-  elseif (xdrfile_FIND_REQUIRED)
-    message(FATAL_ERROR 
-      "Not found lib/'${module}' in '${XDRFILE_search_PATHS}' "
-      "You can manually set the xdrfile install path by -DXDRFILE_ROOT ")
-  endif ()
-endforeach ()
+  elseif(xdrfile_FIND_REQUIRED)
+    message(
+      FATAL_ERROR
+        "Not found lib/'${module}' in '${XDRFILE_search_PATHS}' "
+        "You can manually set the xdrfile install path by -DXDRFILE_ROOT ")
+  endif()
+endforeach()
 
 # define the output variable
-if (XDRFILE_INCLUDE_DIRS AND XDRFILE_LIBRARIES)
+if(XDRFILE_INCLUDE_DIRS AND XDRFILE_LIBRARIES)
   set(XDRFILE_FOUND TRUE)
-else ()
+else()
   set(XDRFILE_FOUND FALSE)
-endif ()
+endif()
 
 # print message
-if (NOT XDRFILE_FIND_QUIETLY)
+if(NOT XDRFILE_FIND_QUIETLY)
   message(STATUS "Found XDRFILE: ${XDRFILE_INCLUDE_DIRS}, ${XDRFILE_LIBRARIES}"
-    " in ${XDRFILE_search_PATHS}")
-endif ()
+                 " in ${XDRFILE_search_PATHS}")
+endif()
 
 unset(XDRFILE_search_PATHS)
diff --git a/source/cmake/cmake_lammps.cmake.in b/source/cmake/cmake_lammps.cmake.in
index 44ad206d9c..7db7ec71c0 100644
--- a/source/cmake/cmake_lammps.cmake.in
+++ b/source/cmake/cmake_lammps.cmake.in
@@ -4,25 +4,25 @@ string(REGEX REPLACE "\n" "" files "${files}")
 
 foreach (cur_file ${files})
   file (
-    INSTALL DESTINATION "${LMP_INSTALL_PREFIX}" 
+    INSTALL DESTINATION "${LMP_INSTALL_PREFIX}"
     TYPE FILE
     FILES "${cur_file}"
     )
 endforeach ()
 
 file (
-  INSTALL DESTINATION "${LMP_INSTALL_PREFIX}" 
-  TYPE FILE 
+  INSTALL DESTINATION "${LMP_INSTALL_PREFIX}"
+  TYPE FILE
   FILES "@CMAKE_BINARY_DIR@/lmp/env@HIGH_PREC_VARIANT@.sh"
 )
 file (
-  INSTALL DESTINATION "${LMP_INSTALL_PREFIX}" 
-  TYPE FILE 
+  INSTALL DESTINATION "${LMP_INSTALL_PREFIX}"
+  TYPE FILE
   FILES "@CMAKE_BINARY_DIR@/lmp/env@LOW_PREC_VARIANT@.sh"
 )
 
 file (
-  INSTALL DESTINATION "${LMP_INSTALL_PREFIX}" 
-  TYPE FILE 
+  INSTALL DESTINATION "${LMP_INSTALL_PREFIX}"
+  TYPE FILE
   FILES "@CMAKE_BINARY_DIR@/lmp/pair_deepmd.h"
 )
diff --git a/source/cmake/coverage_config/CMakeLists.txt b/source/cmake/coverage_config/CMakeLists.txt
index 90fc13522d..2522bf9ff1 100644
--- a/source/cmake/coverage_config/CMakeLists.txt
+++ b/source/cmake/coverage_config/CMakeLists.txt
@@ -1,12 +1,13 @@
-# include this directory before other modules
-add_library(coverage_config INTERFACE)
-target_compile_options(coverage_config INTERFACE
-  -O0        # no optimization
-  -g         # generate debug info
-  --coverage # sets all required flags
-)
-if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.13)
-  target_link_options(coverage_config INTERFACE --coverage)
-else()
-  target_link_libraries(coverage_config INTERFACE --coverage)
-endif()
\ No newline at end of file
+# include this directory before other modules
+add_library(coverage_config INTERFACE)
+target_compile_options(
+  coverage_config
+  INTERFACE -O0 # no optimization
+            -g # generate debug info
+            --coverage # sets all required flags
+)
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.13)
+  target_link_options(coverage_config INTERFACE --coverage)
+else()
+  target_link_libraries(coverage_config INTERFACE --coverage)
+endif()
diff --git a/source/cmake/test_cxx_abi.cpp b/source/cmake/test_cxx_abi.cpp
index 0e70533aa0..69cf50c767 100644
--- a/source/cmake/test_cxx_abi.cpp
+++ b/source/cmake/test_cxx_abi.cpp
@@ -1,6 +1,7 @@
 #include <string>
+
 #include "tensorflow/core/framework/shape_inference.h"
 int main() {
-    auto ignore = tensorflow::strings::StrCat("a", "b");
-    return 0;
+  auto ignore = tensorflow::strings::StrCat("a", "b");
+  return 0;
 }
diff --git a/source/cmake/tf_cxx_abi.cpp b/source/cmake/tf_cxx_abi.cpp
index c25565c568..6a1426ae99 100644
--- a/source/cmake/tf_cxx_abi.cpp
+++ b/source/cmake/tf_cxx_abi.cpp
@@ -1,8 +1,8 @@
 #include <iostream>
+
 #include "tensorflow/core/public/version.h"
-int main(int argc, char * argv[])
-{
-#if (TF_MAJOR_VERSION == 2 && TF_MINOR_VERSION>=9) || TF_MAJOR_VERSION > 2 
+int main(int argc, char* argv[]) {
+#if (TF_MAJOR_VERSION == 2 && TF_MINOR_VERSION >= 9) || TF_MAJOR_VERSION > 2
 #error "TF>=2.9 should not execute this file..."
 #else
   std::cout << tf_cxx11_abi_flag();
diff --git a/source/cmake/tf_version.cpp b/source/cmake/tf_version.cpp
index 9d129aefb8..45b8002092 100644
--- a/source/cmake/tf_version.cpp
+++ b/source/cmake/tf_version.cpp
@@ -1,9 +1,10 @@
 #include <iostream>
+
 #include "tensorflow/core/public/version.h"
 
-int main(int argc, char * argv[])
-{
-  // See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h
+int main(int argc, char* argv[]) {
+  // See
+  // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h
   // TF_VERSION_STRING has been avaiable since TensorFlow v0.6
   std::cout << TF_VERSION_STRING;
   return 0;
diff --git a/source/config/CMakeLists.txt b/source/config/CMakeLists.txt
index 52834ebb1e..eb0bbc8bf4 100644
--- a/source/config/CMakeLists.txt
+++ b/source/config/CMakeLists.txt
@@ -1,8 +1,6 @@
 # config
 
-configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini" @ONLY)
+configure_file("run_config.ini" "${CMAKE_CURRENT_BINARY_DIR}/run_config.ini"
+               @ONLY)
 
-install(
-  FILES		${CMAKE_CURRENT_BINARY_DIR}/run_config.ini
-  DESTINATION	deepmd
-)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/run_config.ini DESTINATION deepmd)
diff --git a/source/gmx/.gitignore b/source/gmx/.gitignore
index ed2f43cf54..a160cc9947 100644
--- a/source/gmx/.gitignore
+++ b/source/gmx/.gitignore
@@ -1 +1 @@
-CMakeLists.txt.patch
\ No newline at end of file
+CMakeLists.txt.patch
diff --git a/source/gmx/CMakeLists.txt b/source/gmx/CMakeLists.txt
index e176b2b209..8ebd1450e2 100644
--- a/source/gmx/CMakeLists.txt
+++ b/source/gmx/CMakeLists.txt
@@ -1,13 +1,10 @@
 message(STATUS "Build GROMACS plugin")
 
 file(GLOB PATCH_VERSIONS patches/*)
-FOREACH(PATCH_VERSION ${PATCH_VERSIONS})
-  configure_file(
-    "${PATCH_VERSION}/CMakeLists.txt.patch.in"
-    "${PATCH_VERSION}/CMakeLists.txt.patch"
-    @ONLY
-  )
-ENDFOREACH(PATCH_VERSION)
+foreach(PATCH_VERSION ${PATCH_VERSIONS})
+  configure_file("${PATCH_VERSION}/CMakeLists.txt.patch.in"
+                 "${PATCH_VERSION}/CMakeLists.txt.patch" @ONLY)
+endforeach(PATCH_VERSION)
 
 set(libgmxname ${LIB_DEEPMD_GROMACS})
 file(GLOB LIB_SRC src/*.cpp)
@@ -15,33 +12,31 @@ file(GLOB INC_SRC include/*.h)
 
 add_library(${libgmxname} SHARED ${LIB_SRC})
 target_link_libraries(${libgmxname} PUBLIC ${LIB_DEEPMD_CC})
-target_include_directories(${libgmxname} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
-target_include_directories(${libgmxname} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
+target_include_directories(${libgmxname}
+                           PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_include_directories(${libgmxname}
+                           PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
 
 set_target_properties(
-  ${libgmxname} 
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
-)
+  ${libgmxname} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}")
 
-install (
+install(
   FILES dp_gmx_patch
   DESTINATION bin
-  PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
-)
+  PERMISSIONS
+    OWNER_READ
+    OWNER_WRITE
+    OWNER_EXECUTE
+    GROUP_READ
+    GROUP_EXECUTE
+    WORLD_READ
+    WORLD_EXECUTE)
 
-install (
+install(
   DIRECTORY patches/
   DESTINATION share/deepmd_gromacs_patches
-  PATTERN */CMakeLists.txt.patch.in EXCLUDE
-)
+  PATTERN */CMakeLists.txt.patch.in EXCLUDE)
 
-install (
-  FILES ${INC_SRC}
-  DESTINATION include/deepmd
-)
+install(FILES ${INC_SRC} DESTINATION include/deepmd)
 
-install(
-  TARGETS	${libgmxname}
-  DESTINATION	lib/
-)
\ No newline at end of file
+install(TARGETS ${libgmxname} DESTINATION lib/)
diff --git a/source/gmx/include/gmx_plugin.h b/source/gmx/include/gmx_plugin.h
index 78786fc37f..b360abf3b4 100644
--- a/source/gmx/include/gmx_plugin.h
+++ b/source/gmx/include/gmx_plugin.h
@@ -2,27 +2,25 @@
 #define _GMX_PLUGIN_H_
 #include "DeepPot.h"
 
-namespace deepmd
-{
+namespace deepmd {
 
-class DeepmdPlugin
-{
-    public:
-        DeepmdPlugin();
-        DeepmdPlugin(char*);
-        ~DeepmdPlugin();  
-        void              init_from_json(char*);
-        deepmd::DeepPot*  nnp;
-        std::vector<int > dtype;
-        std::vector<int > dindex;
-        bool              pbc;
-        float             lmd;
-        int               natom;
+class DeepmdPlugin {
+ public:
+  DeepmdPlugin();
+  DeepmdPlugin(char*);
+  ~DeepmdPlugin();
+  void init_from_json(char*);
+  deepmd::DeepPot* nnp;
+  std::vector<int> dtype;
+  std::vector<int> dindex;
+  bool pbc;
+  float lmd;
+  int natom;
 };
 
-}
+}  // namespace deepmd
 
 const float c_dp2gmx = 0.1;
 const float e_dp2gmx = 96.48533132;
 const float f_dp2gmx = 964.8533132;
-#endif
\ No newline at end of file
+#endif
diff --git a/source/gmx/patches/2020.2/CMakeLists.txt.patch.in b/source/gmx/patches/2020.2/CMakeLists.txt.patch.in
index 93f11bbffe..fb1115fd36 100644
--- a/source/gmx/patches/2020.2/CMakeLists.txt.patch.in
+++ b/source/gmx/patches/2020.2/CMakeLists.txt.patch.in
@@ -3,7 +3,7 @@
 @@ -134,6 +134,26 @@
  #    (i.e., something that is exposed in installed headers).
  set(GMX_PUBLIC_LIBRARIES "")
- 
+
 +# DeepMD
 +message(STATUS "Compling with DeepMD...")
 +add_definitions(-w) # close warning
diff --git a/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.cpp.patch b/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.cpp.patch
index cee4604797..b4133aa4c0 100644
--- a/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.cpp.patch
+++ b/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.cpp.patch
@@ -3,7 +3,7 @@
 @@ -98,6 +98,11 @@
  #include "gromacs/utility/smalloc.h"
  #include "gromacs/utility/strconvert.h"
- 
+
 +// Deepmd
 +// #include "deepmd/gmx_plugin.h"
 +deepmd::DeepmdPlugin* deepmdPlugin;
@@ -29,5 +29,5 @@
 +        useDeepmd = true;
 +    }
  }
- 
+
  t_forcerec::t_forcerec() = default;
diff --git a/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.h.patch b/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.h.patch
index 03661975af..7ef9ce18d7 100644
--- a/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.h.patch
+++ b/source/gmx/patches/2020.2/src/gromacs/mdlib/forcerec.h.patch
@@ -3,7 +3,7 @@
 @@ -44,6 +44,10 @@
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/utility/arrayref.h"
- 
+
 +#include "deepmd/gmx_plugin.h"
 +extern deepmd::DeepmdPlugin* deepmdPlugin;
 +extern bool                  useDeepmd;
diff --git a/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch b/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch
index bfbb82b1fb..bb8468b8de 100644
--- a/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch
+++ b/source/gmx/patches/2020.2/src/gromacs/mdlib/sim_util.cpp.patch
@@ -7,12 +7,12 @@
 +
 +#include <iostream>
  #include "gmxpre.h"
- 
+
  #include "config.h"
 @@ -114,6 +116,8 @@
  #include "gromacs/utility/strconvert.h"
  #include "gromacs/utility/sysinfo.h"
- 
+
 +#include "deepmd/gmx_plugin.h"
 +
  using gmx::AtomLocality;
@@ -21,7 +21,7 @@
 @@ -1838,6 +1842,64 @@
                                 simulationWork.useGpuPmePpCommunication, false, wcycle);
      }
- 
+
 +    /* DeepMD */
 +    double               dener;
 +    std::vector<double > dforce;
@@ -91,7 +91,7 @@
 +        {
 +            enerd->term[F_EPOT] += dener * e_dp2gmx * deepmdPlugin->lmd;
 +        }
- 
+
          if (!EI_TPI(inputrec->eI))
          {
              checkPotentialEnergyValidity(step, *enerd, *inputrec);
diff --git a/source/gmx/src/gmx_plugin.cpp b/source/gmx/src/gmx_plugin.cpp
index 65ddc14f02..af462f6f49 100644
--- a/source/gmx/src/gmx_plugin.cpp
+++ b/source/gmx/src/gmx_plugin.cpp
@@ -1,129 +1,108 @@
 #include "gmx_plugin.h"
-#include "json.hpp"
-#include <iostream>
+
 #include <fstream>
+#include <iostream>
 #include <sstream>
 
-using namespace deepmd;
+#include "json.hpp"
 
-DeepmdPlugin::DeepmdPlugin () 
-{
-    nnp = new deepmd::DeepPot;
-}
+using namespace deepmd;
 
-DeepmdPlugin::DeepmdPlugin (char* json_file)
-{
-    nnp = new deepmd::DeepPot;
-    DeepmdPlugin::init_from_json(json_file);
-}
+DeepmdPlugin::DeepmdPlugin() { nnp = new deepmd::DeepPot; }
 
-DeepmdPlugin::~DeepmdPlugin()
-{
-    delete nnp;
+DeepmdPlugin::DeepmdPlugin(char* json_file) {
+  nnp = new deepmd::DeepPot;
+  DeepmdPlugin::init_from_json(json_file);
 }
 
-void DeepmdPlugin::init_from_json(char* json_file)
-{
-    std::ifstream fp (json_file);
-    if (fp.is_open())
-    {
-        std::cout << "Init deepmd plugin from: " << json_file << std::endl;
-        nlohmann::json jdata;
-        fp >> jdata;
-        std::string graph_file = jdata["graph_file"];
-        std::string type_file  = jdata["type_file"];
-        std::string index_file = jdata["index_file"];
+DeepmdPlugin::~DeepmdPlugin() { delete nnp; }
 
-        /* lambda */
-        if (jdata.contains("lambda"))
-        {
-            DeepmdPlugin::lmd = jdata["lambda"];
-        }
-        else
-        {
-            DeepmdPlugin::lmd = 1.0;
-        }
-        std::cout << "Setting lambda: " << DeepmdPlugin::lmd << std::endl;
-        /* lambda */
+void DeepmdPlugin::init_from_json(char* json_file) {
+  std::ifstream fp(json_file);
+  if (fp.is_open()) {
+    std::cout << "Init deepmd plugin from: " << json_file << std::endl;
+    nlohmann::json jdata;
+    fp >> jdata;
+    std::string graph_file = jdata["graph_file"];
+    std::string type_file = jdata["type_file"];
+    std::string index_file = jdata["index_file"];
 
-        /* pbc */
-        if (jdata.contains("pbc"))
-        {
-            DeepmdPlugin::pbc = jdata["pbc"];
-        }
-        else
-        {
-            DeepmdPlugin::pbc = true;
-        }
-        std::cout << "Setting pbc: " << DeepmdPlugin::pbc << std::endl;
-        /* pbc */
-
-        std::string              line;
-        std::istringstream       iss;
-        int                      val;
-
-        /* read type file */
-        std::ifstream            ft(type_file);
-        if (ft.is_open())
-        {
-            getline(ft, line);
-            iss.clear();
-            iss.str(line);
-            while (iss >> val)
-            {
-                DeepmdPlugin::dtype.push_back(val);
-            }
-            DeepmdPlugin::natom = DeepmdPlugin::dtype.size();
-            std::cout << "Number of atoms: " << DeepmdPlugin::natom << std::endl;
-        }
-        else
-        {
-            std::cerr << "Not found type file: " << type_file << std::endl;
-            exit(1); 
-        }
-        /* read type file */
+    /* lambda */
+    if (jdata.contains("lambda")) {
+      DeepmdPlugin::lmd = jdata["lambda"];
+    } else {
+      DeepmdPlugin::lmd = 1.0;
+    }
+    std::cout << "Setting lambda: " << DeepmdPlugin::lmd << std::endl;
+    /* lambda */
 
-        /* read index file */
-        std::ifstream  fi(index_file);
-        if (fi.is_open())
-        {
-            getline(fi, line);
-            iss.clear();
-            iss.str(line);
-            while (iss >> val)
-            {
-                DeepmdPlugin::dindex.push_back(val);
-            }
-            if (DeepmdPlugin::dindex.size() != DeepmdPlugin::natom)
-            {
-                std::cerr << "Number of atoms in index file (" << DeepmdPlugin::dindex.size() << ") does not match type file (" << DeepmdPlugin::natom << ")!" << std::endl;
-                exit(1);
-            }
-        }
-        else
-        {
-            std::cerr << "Not found index file: " << index_file << std::endl;
-            exit(1);
-        }
-        /* read index file */
+    /* pbc */
+    if (jdata.contains("pbc")) {
+      DeepmdPlugin::pbc = jdata["pbc"];
+    } else {
+      DeepmdPlugin::pbc = true;
+    }
+    std::cout << "Setting pbc: " << DeepmdPlugin::pbc << std::endl;
+    /* pbc */
 
-        /* init model */
-        std::cout << "Begin Init Model: " << graph_file << std::endl;
-        DeepmdPlugin::nnp->init(graph_file);
-        std::cout << "Successfully load model!" << std::endl;
-        std::string summary;
-        DeepmdPlugin::nnp->print_summary(summary);
-        std::cout << "Summary: " << std::endl << summary << std::endl;
-        std::string map;
-        DeepmdPlugin::nnp->get_type_map(map);
-        std::cout << "Atom map: " << map << std::endl;
-        /* init model */
+    std::string line;
+    std::istringstream iss;
+    int val;
 
-        std::cout << "Successfully init plugin!" << std::endl;
+    /* read type file */
+    std::ifstream ft(type_file);
+    if (ft.is_open()) {
+      getline(ft, line);
+      iss.clear();
+      iss.str(line);
+      while (iss >> val) {
+        DeepmdPlugin::dtype.push_back(val);
+      }
+      DeepmdPlugin::natom = DeepmdPlugin::dtype.size();
+      std::cout << "Number of atoms: " << DeepmdPlugin::natom << std::endl;
+    } else {
+      std::cerr << "Not found type file: " << type_file << std::endl;
+      exit(1);
     }
-    else
-    {
-        std::cerr << "Invaild json file: " << json_file << std::endl;
+    /* read type file */
+
+    /* read index file */
+    std::ifstream fi(index_file);
+    if (fi.is_open()) {
+      getline(fi, line);
+      iss.clear();
+      iss.str(line);
+      while (iss >> val) {
+        DeepmdPlugin::dindex.push_back(val);
+      }
+      if (DeepmdPlugin::dindex.size() != DeepmdPlugin::natom) {
+        std::cerr << "Number of atoms in index file ("
+                  << DeepmdPlugin::dindex.size()
+                  << ") does not match type file (" << DeepmdPlugin::natom
+                  << ")!" << std::endl;
         exit(1);
+      }
+    } else {
+      std::cerr << "Not found index file: " << index_file << std::endl;
+      exit(1);
     }
-}
\ No newline at end of file
+    /* read index file */
+
+    /* init model */
+    std::cout << "Begin Init Model: " << graph_file << std::endl;
+    DeepmdPlugin::nnp->init(graph_file);
+    std::cout << "Successfully load model!" << std::endl;
+    std::string summary;
+    DeepmdPlugin::nnp->print_summary(summary);
+    std::cout << "Summary: " << std::endl << summary << std::endl;
+    std::string map;
+    DeepmdPlugin::nnp->get_type_map(map);
+    std::cout << "Atom map: " << map << std::endl;
+    /* init model */
+
+    std::cout << "Successfully init plugin!" << std::endl;
+  } else {
+    std::cerr << "Invaild json file: " << json_file << std::endl;
+    exit(1);
+  }
+}
diff --git a/source/install/build_cc.sh b/source/install/build_cc.sh
index 2804c33161..c1f29ed01d 100755
--- a/source/install/build_cc.sh
+++ b/source/install/build_cc.sh
@@ -1,22 +1,18 @@
 set -e
 
-if [ "$DP_VARIANT" = "cuda" ]
-then
-  CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
-elif [ "$DP_VARIANT" = "rocm" ]
-then
-  CUDA_ARGS="-DUSE_ROCM_TOOLKIT=TRUE"
+if [ "$DP_VARIANT" = "cuda" ]; then
+	CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
+elif [ "$DP_VARIANT" = "rocm" ]; then
+	CUDA_ARGS="-DUSE_ROCM_TOOLKIT=TRUE"
 fi
 #------------------
 
 SCRIPT_PATH=$(dirname $(realpath -s $0))
-if [ -z "$INSTALL_PREFIX" ]
-then
-  INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp)
+if [ -z "$INSTALL_PREFIX" ]; then
+	INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp)
 fi
-if [ -z "$DOWNLOAD_TENSORFLOW" ]
-then
-  DOWNLOAD_TENSORFLOW=TRUE
+if [ -z "$DOWNLOAD_TENSORFLOW" ]; then
+	DOWNLOAD_TENSORFLOW=TRUE
 fi
 mkdir -p ${INSTALL_PREFIX}
 echo "Installing DeePMD-kit to ${INSTALL_PREFIX}"
diff --git a/source/install/build_lammps.sh b/source/install/build_lammps.sh
index 96963bea08..22968bba12 100755
--- a/source/install/build_lammps.sh
+++ b/source/install/build_lammps.sh
@@ -1,9 +1,8 @@
 set -e
 
 SCRIPT_PATH=$(dirname $(realpath -s $0))
-if [ -z "$INSTALL_PREFIX" ]
-then
-  INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp)
+if [ -z "$INSTALL_PREFIX" ]; then
+	INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp)
 fi
 mkdir -p ${INSTALL_PREFIX}
 echo "Installing LAMMPS to ${INSTALL_PREFIX}"
@@ -16,8 +15,7 @@ mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 # download LAMMMPS
 LAMMPS_VERSION=stable_23Jun2022_update2
-if [ ! -d "lammps-${LAMMPS_VERSION}" ]
-then
+if [ ! -d "lammps-${LAMMPS_VERSION}" ]; then
 	curl -L -o lammps.tar.gz https://github.com/lammps/lammps/archive/refs/tags/${LAMMPS_VERSION}.tar.gz
 	tar vxzf lammps.tar.gz
 fi
@@ -33,4 +31,3 @@ make install-python
 
 #------------------
 echo "Congratulations! LAMMPS has been installed at ${INSTALL_PREFIX}"
-
diff --git a/source/install/build_tf.py b/source/install/build_tf.py
index 0ede567442..ff2ed23094 100755
--- a/source/install/build_tf.py
+++ b/source/install/build_tf.py
@@ -14,31 +14,51 @@
 # make sure Python 3 is used
 # https://stackoverflow.com/a/41901923/9567349
 import sys
+
 if sys.version_info[0] < 3:
     raise Exception("Python 3 or a more recent version is required.")
 
 # The script should only rely on the stardard Python libraries.
 
-from contextlib import contextmanager
 import argparse
+import hashlib
 import importlib.util
+import logging
 import os
 import re
+import shutil
 import stat
 import subprocess as sp
-import hashlib
-import logging
-import urllib.request
 import tarfile
-import shutil
 import tempfile
-from pathlib import Path
-from typing import List, Dict, Optional
-from abc import ABCMeta, abstractmethod, abstractproperty
-from functools import lru_cache
-from shutil import copytree, ignore_patterns, copy2
-from fnmatch import filter
-
+import urllib.request
+from abc import (
+    ABCMeta,
+    abstractmethod,
+    abstractproperty,
+)
+from contextlib import (
+    contextmanager,
+)
+from fnmatch import (
+    filter,
+)
+from functools import (
+    lru_cache,
+)
+from pathlib import (
+    Path,
+)
+from shutil import (
+    copy2,
+    copytree,
+    ignore_patterns,
+)
+from typing import (
+    Dict,
+    List,
+    Optional,
+)
 
 # default config
 FILE = Path(__file__).parent.absolute()
@@ -64,14 +84,14 @@
 dlog.setLevel(logging.INFO)
 handler = logging.StreamHandler(sys.stdout)
 handler.setLevel(logging.DEBUG)
-formatter = logging.Formatter(
-    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 handler.setFormatter(formatter)
 dlog.addHandler(handler)
 
 
 # Common utils
 
+
 def download_file(url: str, filename: str):
     """Download files from remote URL.
 
@@ -88,7 +108,7 @@ def download_file(url: str, filename: str):
         raises for HTTP error
     """
     dlog.info("Download %s from %s" % (filename, url))
-    with urllib.request.urlopen(url) as response, open(filename, 'wb') as out_file:
+    with urllib.request.urlopen(url) as response, open(filename, "wb") as out_file:
         shutil.copyfileobj(response, out_file)
 
 
@@ -109,13 +129,14 @@ class OnlineResource:
         if not None, decompress to a directory
     """
 
-    def __init__(self,
-                 filename: str,
-                 url: str,
-                 sha256: str = None,
-                 executable: bool = False,
-                 gzip: str = None,
-                 ) -> None:
+    def __init__(
+        self,
+        filename: str,
+        url: str,
+        sha256: str = None,
+        executable: bool = False,
+        gzip: str = None,
+    ) -> None:
         self.filename = filename
         self.url = url
         self.reference_sha256 = sha256
@@ -130,9 +151,8 @@ def __call__(self):
                 raise RuntimeError(
                     "Download {} from {} failed! "
                     "You can manually download it to {} and "
-                    "retry the script.".format(
-                        self.filename, self.url, str(self.path)
-                    ))
+                    "retry the script.".format(self.filename, self.url, str(self.path))
+                )
         self.post_process()
 
     def post_process(self):
@@ -140,25 +160,25 @@ def post_process(self):
             self.path.chmod(self.path.stat().st_mode | stat.S_IEXEC)
         if self.gzip is not None:
             with tarfile.open(self.path) as tar:
+
                 def is_within_directory(directory, target):
-                    
+
                     abs_directory = os.path.abspath(directory)
                     abs_target = os.path.abspath(target)
-                
+
                     prefix = os.path.commonprefix([abs_directory, abs_target])
-                    
+
                     return prefix == abs_directory
-                
+
                 def safe_extract(tar, path=".", members=None, *, numeric_owner=False):
-                
+
                     for member in tar.getmembers():
                         member_path = os.path.join(path, member.name)
                         if not is_within_directory(path, member_path):
                             raise Exception("Attempted Path Traversal in Tar File")
-                
-                    tar.extractall(path, members, numeric_owner=numeric_owner) 
-                    
-                
+
+                    tar.extractall(path, members, numeric_owner=numeric_owner)
+
                 safe_extract(tar, path=self.gzip_path)
 
     def download(self):
@@ -192,9 +212,9 @@ def sha256(self) -> str:
         """
         h = hashlib.sha256()
         # buffer size: 128 kB
-        b = bytearray(128*1024)
+        b = bytearray(128 * 1024)
         mv = memoryview(b)
-        with open(self.path, 'rb', buffering=0) as f:
+        with open(self.path, "rb", buffering=0) as f:
             for n in iter(lambda: f.readinto(mv), 0):
                 h.update(mv[:n])
         return h.hexdigest()
@@ -202,11 +222,14 @@ def sha256(self) -> str:
     @property
     def exists(self) -> bool:
         """Check if target file exists."""
-        return self.path.exists() and (self.sha256 == self.reference_sha256 or self.reference_sha256 is None)
+        return self.path.exists() and (
+            self.sha256 == self.reference_sha256 or self.reference_sha256 is None
+        )
 
 
 class Build(metaclass=ABCMeta):
     """Build process."""
+
     @abstractproperty
     def resources(self) -> Dict[str, OnlineResource]:
         """Required resources."""
@@ -239,7 +262,10 @@ def __call__(self):
                 if not dd.built:
                     dd()
                 else:
-                    dlog.info("Skip installing %s, which has been already installed" % dd.__class__.__name__)
+                    dlog.info(
+                        "Skip installing %s, which has been already installed"
+                        % dd.__class__.__name__
+                    )
             dlog.info("Start installing %s..." % self.__class__.__name__)
             with tempfile.TemporaryDirectory() as tmpdirname:
                 self._prefix = Path(tmpdirname)
@@ -286,18 +312,18 @@ def set_directory(path: Path):
 
 
 def list2env(l: list) -> str:
-    return ':'.join(map(str, l))
+    return ":".join(map(str, l))
 
 
 def get_shlib_ext():
     """Return the shared library extension."""
     plat = sys.platform
-    if plat.startswith('win'):
-        return '.dll'
-    elif plat in ['osx', 'darwin']:
-        return '.dylib'
-    elif plat.startswith('linux'):
-        return '.so'
+    if plat.startswith("win"):
+        return ".dll"
+    elif plat in ["osx", "darwin"]:
+        return ".dylib"
+    elif plat.startswith("linux"):
+        return ".so"
     else:
         raise NotImplementedError(plat)
 
@@ -313,14 +339,16 @@ def copytree2(src: Path, dst: Path, *args, **kwargs):
         # hack to support override
         tmpdst = Path(td) / "dst"
         copytree(str(src), str(tmpdst), *args, **kwargs)
-        call([
-            "/bin/cp",
-            # archieve, recursive, force, do not create one inside
-            # https://stackoverflow.com/a/24486142/9567349
-            "-arfT",
-            str(tmpdst),
-            str(dst),
-        ])
+        call(
+            [
+                "/bin/cp",
+                # archieve, recursive, force, do not create one inside
+                # https://stackoverflow.com/a/24486142/9567349
+                "-arfT",
+                str(tmpdst),
+                str(dst),
+            ]
+        )
 
 
 def include_patterns(*include_patterns):
@@ -328,13 +356,20 @@ def include_patterns(*include_patterns):
 
     Remove directory starts with _.
     """
+
     def _ignore_patterns(path, names):
-        keep = set(name for pattern in include_patterns
-                   for name in filter(names, pattern))
+        keep = set(
+            name for pattern in include_patterns for name in filter(names, pattern)
+        )
         removed_dir = any([x.startswith("_") for x in path.split(os.path.sep)])
-        ignore = set(name for name in names
-                     if (name not in keep or removed_dir) and not os.path.isdir(os.path.join(path, name)))
+        ignore = set(
+            name
+            for name in names
+            if (name not in keep or removed_dir)
+            and not os.path.isdir(os.path.join(path, name))
+        )
         return ignore
+
     return _ignore_patterns
 
 
@@ -346,13 +381,16 @@ def call(commands: List[str], env={}, **kwargs):
     RuntimeError
         returned code is not zero
     """
-    with sp.Popen(commands, stdout=sys.stdout, stderr=sys.stderr, env=env, **kwargs) as p:
+    with sp.Popen(
+        commands, stdout=sys.stdout, stderr=sys.stderr, env=env, **kwargs
+    ) as p:
         p.communicate()
         exit_code = p.wait()
 
         if exit_code:
-            raise RuntimeError("Run %s failed, return code: %d" %
-                               (" ".join(commands), exit_code))
+            raise RuntimeError(
+                "Run %s failed, return code: %d" % (" ".join(commands), exit_code)
+            )
 
 
 # the detailed step to build DeePMD-kit
@@ -399,7 +437,7 @@ def dependencies(self) -> Dict[str, Build]:
         return {}
 
     def build(self):
-        bazel_res = self.resources['bazelisk']
+        bazel_res = self.resources["bazelisk"]
         bin_dst = self.prefix / "bin"
         bin_dst.mkdir(exist_ok=True)
         copy3(bazel_res.path, bin_dst / "bazelisk")
@@ -411,6 +449,7 @@ def built(self):
 
 class BuildNumpy(Build):
     """Build NumPy"""
+
     @property
     @lru_cache()
     def resources(self) -> Dict[str, OnlineResource]:
@@ -427,19 +466,22 @@ def built(self) -> bool:
 
     def build(self):
         try:
-            call([
-                sys.executable,
-                "-m",
-                "pip",
-                "install",
-                "numpy",
-            ])
+            call(
+                [
+                    sys.executable,
+                    "-m",
+                    "pip",
+                    "install",
+                    "numpy",
+                ]
+            )
         except RuntimeError as e:
             raise RuntimeError("Please manually install numpy!") from e
 
 
 class BuildCUDA(Build):
     """Find CUDA."""
+
     @property
     @lru_cache()
     def resources(self) -> Dict[str, OnlineResource]:
@@ -455,7 +497,8 @@ def build(self):
             "NVCC is not found. Please manually install CUDA"
             "Toolkit and cuDNN!\n"
             "CUDA Toolkit: https://developer.nvidia.com/cuda-toolkit-archive\n"
-            "cuDNN: https://developer.nvidia.com/rdp/cudnn-archive")
+            "cuDNN: https://developer.nvidia.com/rdp/cudnn-archive"
+        )
 
     @property
     def built(self):
@@ -463,9 +506,11 @@ def built(self):
 
     @property
     def cuda_version(self):
-        nvcc_bin = CUDA_PATH / 'bin' / 'nvcc'
-        output = sp.check_output([str(nvcc_bin), '--version'], env={}, encoding='utf8').split('\n')
-        pattern = re.compile('V[0-9]*\\.[0-9]*\\.[0-9]*')
+        nvcc_bin = CUDA_PATH / "bin" / "nvcc"
+        output = sp.check_output(
+            [str(nvcc_bin), "--version"], env={}, encoding="utf8"
+        ).split("\n")
+        pattern = re.compile("V[0-9]*\\.[0-9]*\\.[0-9]*")
         for x in output:
             search = pattern.search(x)
             if search is not None:
@@ -477,12 +522,12 @@ def cuda_version(self):
 
     @property
     def cudnn_version(self):
-        cudnn_header = CUDNN_PATH / 'include' / 'cudnn.h'
+        cudnn_header = CUDNN_PATH / "include" / "cudnn.h"
         with open(cudnn_header) as f:
             for line in f:
                 if line.startswith("#define CUDNN_MAJOR "):
                     return line.split()[-1]
-        cudnn_header = CUDNN_PATH / 'include' / 'cudnn_version.h'
+        cudnn_header = CUDNN_PATH / "include" / "cudnn_version.h"
         with open(cudnn_header) as f:
             for line in f:
                 if line.startswith("#define CUDNN_MAJOR "):
@@ -490,7 +535,7 @@ def cudnn_version(self):
         raise RuntimeError(
             "cuDNN version is not found!\n"
             "Download from: https://developer.nvidia.com/rdp/cudnn-archive"
-            )
+        )
 
     @property
     @lru_cache()
@@ -509,6 +554,7 @@ def cuda_compute_capabilities(self):
 
 class BuildROCM(Build):
     """Find ROCm."""
+
     @property
     @lru_cache()
     def resources(self) -> Dict[str, OnlineResource]:
@@ -542,7 +588,13 @@ class BuildTensorFlow(Build):
         Enable ROCm build
     """
 
-    def __init__(self, version: str ="2.9.1", enable_mkl: bool=True, enable_cuda: bool=False, enable_rocm: bool = False) -> None:
+    def __init__(
+        self,
+        version: str = "2.9.1",
+        enable_mkl: bool = True,
+        enable_cuda: bool = False,
+        enable_rocm: bool = False,
+    ) -> None:
         self.version = version
         self.enable_mkl = enable_mkl
         self.enable_cuda = enable_cuda
@@ -560,9 +612,9 @@ def resources(self) -> Dict[str, OnlineResource]:
     def dependencies(self) -> Dict[str, Build]:
         optional_dep = {}
         if self.enable_cuda:
-            optional_dep['cuda'] = BuildCUDA()
+            optional_dep["cuda"] = BuildCUDA()
         if self.enable_rocm:
-            optional_dep['rocm'] = BuildROCM()
+            optional_dep["rocm"] = BuildROCM()
         return {
             "bazelisk": BuildBazelisk(),
             "numpy": BuildNumpy(),
@@ -570,31 +622,37 @@ def dependencies(self) -> Dict[str, Build]:
         }
 
     def build(self):
-        tf_res = self.resources['tensorflow']
+        tf_res = self.resources["tensorflow"]
         src = tf_res.gzip_path / ("tensorflow-%s" % self.version)
         with set_directory(src):
             # configure -- need bazelisk in PATH
-            call([str(src / "configure")], env={
-                "PATH": list2env([PREFIX / "bin", "/usr/bin", "/bin"]),
-                "LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH", ""),
-                **self._environments,
-            })
+            call(
+                [str(src / "configure")],
+                env={
+                    "PATH": list2env([PREFIX / "bin", "/usr/bin", "/bin"]),
+                    "LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH", ""),
+                    **self._environments,
+                },
+            )
             # bazel build
-            call([
-                str(PREFIX / "bin" / "bazelisk"),
-                *self._bazel_opts,
-                "build",
-                *self._build_opts,
-                *self._build_targets,
-            ], env={
-                "PATH": list2env(["/usr/bin", "/bin"]),
-                "HOME": os.environ.get("HOME"),
-                "TEST_TMPDIR": str(PACKAGE_DIR / "bazelcache"),
-                # for libstdc++
-                "LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH", ""),
-                "CC": str(Path(GCC).resolve()),
-                "CXX": str(Path(GXX).resolve()),
-            })
+            call(
+                [
+                    str(PREFIX / "bin" / "bazelisk"),
+                    *self._bazel_opts,
+                    "build",
+                    *self._build_opts,
+                    *self._build_targets,
+                ],
+                env={
+                    "PATH": list2env(["/usr/bin", "/bin"]),
+                    "HOME": os.environ.get("HOME"),
+                    "TEST_TMPDIR": str(PACKAGE_DIR / "bazelcache"),
+                    # for libstdc++
+                    "LD_LIBRARY_PATH": os.environ.get("LD_LIBRARY_PATH", ""),
+                    "CC": str(Path(GCC).resolve()),
+                    "CXX": str(Path(GXX).resolve()),
+                },
+            )
 
         # copy libraries and directories
         ext = get_shlib_ext()
@@ -605,30 +663,56 @@ def build(self):
 
         # 1. copy headers
         (include_dst / "tensorflow").mkdir(exist_ok=True)
-        copytree2(src / "tensorflow" / "cc", include_dst /
-                  "tensorflow" / "cc", ignore=include_patterns('*.h', '*.inc'))
-        copytree2(src / "tensorflow" / "core", include_dst /
-                  "tensorflow" / "core", ignore=include_patterns('*.h', '*.inc'))
+        copytree2(
+            src / "tensorflow" / "cc",
+            include_dst / "tensorflow" / "cc",
+            ignore=include_patterns("*.h", "*.inc"),
+        )
+        copytree2(
+            src / "tensorflow" / "core",
+            include_dst / "tensorflow" / "core",
+            ignore=include_patterns("*.h", "*.inc"),
+        )
         # bazel-bin includes generated headers like version, pb.h, ..
-        copytree2(src / "bazel-bin", include_dst,
-                  ignore=include_patterns('*.h', '*.inc'))
-
-        copytree2(src / "third_party", include_dst /
-                  "third_party", ignore=ignore_patterns('*.cc'))
+        copytree2(
+            src / "bazel-bin", include_dst, ignore=include_patterns("*.h", "*.inc")
+        )
+
+        copytree2(
+            src / "third_party",
+            include_dst / "third_party",
+            ignore=ignore_patterns("*.cc"),
+        )
         bazel_tensorflow = src / ("bazel-" + src.name)
-        copytree2(bazel_tensorflow / "external" /
-                  "eigen_archive" / "Eigen", include_dst / "Eigen")
-        copytree2(bazel_tensorflow / "external" / "eigen_archive" /
-                  "unsupported", include_dst / "unsupported")
-        copytree2(bazel_tensorflow / "external" / "com_google_protobuf" /
-                  "src" / "google", include_dst / "google")
-        copytree2(bazel_tensorflow / "external" /
-                  "com_google_absl" / "absl", include_dst / "absl")
+        copytree2(
+            bazel_tensorflow / "external" / "eigen_archive" / "Eigen",
+            include_dst / "Eigen",
+        )
+        copytree2(
+            bazel_tensorflow / "external" / "eigen_archive" / "unsupported",
+            include_dst / "unsupported",
+        )
+        copytree2(
+            bazel_tensorflow / "external" / "com_google_protobuf" / "src" / "google",
+            include_dst / "google",
+        )
+        copytree2(
+            bazel_tensorflow / "external" / "com_google_absl" / "absl",
+            include_dst / "absl",
+        )
 
         # 2. copy libraries
         if self.enable_mkl:
-            copy3(src / "bazel-out" / "k8-opt" / "bin" / "external" /
-                "llvm_openmp" / ("libiomp5" + ext), lib_dst)
+            copy3(
+                src
+                / "bazel-out"
+                / "k8-opt"
+                / "bin"
+                / "external"
+                / "llvm_openmp"
+                / ("libiomp5" + ext),
+                lib_dst,
+            )
         lib_src = src / "bazel-bin" / "tensorflow"
         self.copy_lib("libtensorflow_framework" + ext, lib_src, lib_dst)
         self.copy_lib("libtensorflow_cc" + ext, lib_src, lib_dst)
@@ -637,8 +721,7 @@ def copy_lib(self, libname, src, dst):
         """Copy library and make symlink."""
         copy3(src / (libname + "." + self.version), dst)
         libname_v = libname + "." + self.version
-        (dst / (libname + "." + self.version.split(".")
-         [0])).symlink_to(libname_v)
+        (dst / (libname + "." + self.version.split(".")[0])).symlink_to(libname_v)
         (dst / libname).symlink_to(libname_v)
 
     @property
@@ -648,10 +731,12 @@ def _environments(self) -> dict:
                 "TF_NEED_CUDA": "1",
                 # /usr is path to driver
                 "TF_CUDA_PATHS": ",".join((str(CUDA_PATH), str(CUDNN_PATH), "/usr")),
-                "TF_CUDA_VERSION": str(self.dependencies['cuda'].cuda_version),
-                "TF_CUDNN_VERSION": str(self.dependencies['cuda'].cudnn_version),
+                "TF_CUDA_VERSION": str(self.dependencies["cuda"].cuda_version),
+                "TF_CUDNN_VERSION": str(self.dependencies["cuda"].cudnn_version),
                 "TF_NCCL_VERSION": "",
-                "TF_CUDA_COMPUTE_CAPABILITIES": self.dependencies['cuda'].cuda_compute_capabilities,
+                "TF_CUDA_COMPUTE_CAPABILITIES": self.dependencies[
+                    "cuda"
+                ].cuda_compute_capabilities,
                 "GCC_HOST_COMPILER_PATH": str(Path(GCC).resolve()),
                 "GCC_HOST_COMPILER_PREFIX": str(Path(GCC).resolve().parent.parent),
             }
@@ -685,8 +770,8 @@ def _environments(self) -> dict:
             "TF_DOWNLOAD_CLANG": "0",
             "TF_SET_ANDROID_WORKSPACE": "0",
             "TF_CONFIGURE_IOS": "0",
-            ** cuda_env,
-            ** rocm_env,
+            **cuda_env,
+            **rocm_env,
         }
 
     @property
@@ -715,7 +800,9 @@ def _bazel_opts(self) -> List[str]:
 
     @property
     def built(self):
-        return (PREFIX / "lib" / ("libtensorflow_cc%s.%s" % (get_shlib_ext(), self.version))).exists()
+        return (
+            PREFIX / "lib" / ("libtensorflow_cc%s.%s" % (get_shlib_ext(), self.version))
+        ).exists()
 
 
 def clean_package():
@@ -735,6 +822,7 @@ def clean_package():
 
 # interface
 
+
 def env() -> Dict[str, str]:
     return {
         "Python": sys.executable,
@@ -749,11 +837,18 @@ def env() -> Dict[str, str]:
 
 
 def pretty_print_env() -> str:
-    return ("Build configs:\n" +
-            "\n".join(["%s:%s%s" % (kk, " "*(19-len(kk)), vv) for kk, vv in env().items() if vv is not None]))
+    return "Build configs:\n" + "\n".join(
+        [
+            "%s:%s%s" % (kk, " " * (19 - len(kk)), vv)
+            for kk, vv in env().items()
+            if vv is not None
+        ]
+    )
 
 
-class RawTextArgumentDefaultsHelpFormatter(argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
+class RawTextArgumentDefaultsHelpFormatter(
+    argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter
+):
     pass
 
 
@@ -785,7 +880,7 @@ def parse_args(args: Optional[List[str]] = None):
     parser_variant = parser.add_mutually_exclusive_group()
     parser_variant.add_argument(
         "--cuda",
-        action='store_true',
+        action="store_true",
         help="Enable CUDA for TensorFlow",
     )
     parser.add_argument(
@@ -802,7 +897,7 @@ def parse_args(args: Optional[List[str]] = None):
     )
     parser_variant.add_argument(
         "--rocm",
-        action='store_true',
+        action="store_true",
         help="Enable ROCm for TensorFlow",
     )
     parser.add_argument(
@@ -830,9 +925,9 @@ def parse_args(args: Optional[List[str]] = None):
         help="Number of CPU cores used to build.",
     )
     parser.add_argument(
-        '--clean',
-        action='store_true',
-        help='Clean files after build.',
+        "--clean",
+        action="store_true",
+        help="Clean files after build.",
     )
     parsed_args = parser.parse_args(args=args)
 
@@ -872,4 +967,3 @@ def str_to_path_if_not_none(x: str) -> Path:
     # clean
     if args.clean:
         clean_package()
-
diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh
index d56268700b..95291dfbd5 100755
--- a/source/install/docker_package_c.sh
+++ b/source/install/docker_package_c.sh
@@ -3,7 +3,7 @@ set -e
 SCRIPT_PATH=$(dirname $(realpath -s $0))
 
 docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \
-       ghcr.io/deepmodeling/libtensorflow_cc:2.9.2_cuda11.6_centos7_cmake \
-       /bin/sh -c "source /opt/rh/devtoolset-10/enable \
+	ghcr.io/deepmodeling/libtensorflow_cc:2.9.2_cuda11.6_centos7_cmake \
+	/bin/sh -c "source /opt/rh/devtoolset-10/enable \
             && cd /root/deepmd-kit/source/install \
             && /bin/sh package_c.sh"
diff --git a/source/install/docker_test_package_c.sh b/source/install/docker_test_package_c.sh
index 1c1719545b..ada1799953 100755
--- a/source/install/docker_test_package_c.sh
+++ b/source/install/docker_test_package_c.sh
@@ -8,8 +8,8 @@ SCRIPT_PATH=$(dirname $(realpath -s $0))
 wget "https://drive.google.com/uc?export=download&id=1xldLhzm4uSkq6iPohSycNWAsWqKAenKX" -O ${SCRIPT_PATH}/../../examples/infer_water/"graph.pb"
 
 docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \
-       gcc:4.9 \
-       /bin/sh -c "tar vxzf libdeepmd_c.tar.gz \
+	gcc:4.9 \
+	/bin/sh -c "tar vxzf libdeepmd_c.tar.gz \
             && cd examples/infer_water \
             && gcc infer_water.c -std=c99 -L ../../libdeepmd_c/lib -I ../../libdeepmd_c/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=../../libdeepmd_c/lib -o infer_water \
             && ./infer_water"
diff --git a/source/install/install_tf.sh b/source/install/install_tf.sh
index 9dd9db6727..f7932d9583 100755
--- a/source/install/install_tf.sh
+++ b/source/install/install_tf.sh
@@ -1,31 +1,31 @@
 set -e
 
 SCRIPT_PATH=$(dirname $(realpath -s $0))
-if [ ! $# -eq 2  ];then
-  echo "${SCRIPT_PATH}: Params error, installation of tensorflow libraries failed!"
-  exit 1
+if [ ! $# -eq 2 ]; then
+	echo "${SCRIPT_PATH}: Params error, installation of tensorflow libraries failed!"
+	exit 1
 fi
 
 PYTHON_SITE_PACKAGE_PATH=$(realpath -s $1)
 TENSORFLOW_ROOT=$(realpath -s $2)
 TF_INSTALL_PATH=${PYTHON_SITE_PACKAGE_PATH}/tensorflow
 
-if [ ! -d ${TF_INSTALL_PATH}  ];then
-  echo "${SCRIPT_PATH}: ${TF_INSTALL_PATH}, TensorFlow not found!"
-  exit 1
+if [ ! -d ${TF_INSTALL_PATH} ]; then
+	echo "${SCRIPT_PATH}: ${TF_INSTALL_PATH}, TensorFlow not found!"
+	exit 1
 fi
 
 #----------------------------------------
 # check if the installation folders exist
 #----------------------------------------
-if [ ! -d ${TENSORFLOW_ROOT}  ];then
-  mkdir ${TENSORFLOW_ROOT}
+if [ ! -d ${TENSORFLOW_ROOT} ]; then
+	mkdir ${TENSORFLOW_ROOT}
 fi
-if [ ! -d ${TENSORFLOW_ROOT}/include  ];then
-  mkdir ${TENSORFLOW_ROOT}/include
+if [ ! -d ${TENSORFLOW_ROOT}/include ]; then
+	mkdir ${TENSORFLOW_ROOT}/include
 fi
-if [ ! -d ${TENSORFLOW_ROOT}/lib  ];then
-  mkdir ${TENSORFLOW_ROOT}/lib
+if [ ! -d ${TENSORFLOW_ROOT}/lib ]; then
+	mkdir ${TENSORFLOW_ROOT}/lib
 fi
 
 #----------------------------------------
@@ -36,4 +36,3 @@ cp ${TF_INSTALL_PATH}/libtensorflow_framework.so* ${TENSORFLOW_ROOT}/lib
 cp ${TF_INSTALL_PATH}/python/_pywrap_tensorflow_internal.so ${TENSORFLOW_ROOT}/lib
 ln -s ${TENSORFLOW_ROOT}/lib/libtensorflow_framework.so* ${TENSORFLOW_ROOT}/lib/libtensorflow_framework.so
 ln -s ${TENSORFLOW_ROOT}/lib/_pywrap_tensorflow_internal.so ${TENSORFLOW_ROOT}/lib/libtensorflow_cc.so
-
diff --git a/source/install/package_c.sh b/source/install/package_c.sh
index 1f0186a724..ce188425c9 100755
--- a/source/install/package_c.sh
+++ b/source/install/package_c.sh
@@ -3,9 +3,8 @@
 set -e
 
 SCRIPT_PATH=$(dirname $(realpath -s $0))
-if [ -z "$INSTALL_PREFIX" ]
-then
-  INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp_c)
+if [ -z "$INSTALL_PREFIX" ]; then
+	INSTALL_PREFIX=$(realpath -s ${SCRIPT_PATH}/../../dp_c)
 fi
 mkdir -p ${INSTALL_PREFIX}
 echo "Installing DeePMD-kit to ${INSTALL_PREFIX}"
@@ -17,10 +16,10 @@ BUILD_TMP_DIR=${SCRIPT_PATH}/../build_c
 mkdir -p ${BUILD_TMP_DIR}
 cd ${BUILD_TMP_DIR}
 cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-      -DUSE_CUDA_TOOLKIT=TRUE \
-      -DOP_CXX_ABI=0 \
-      -DPACKAGE_C=TRUE \
-      ..
+	-DUSE_CUDA_TOOLKIT=TRUE \
+	-DOP_CXX_ABI=0 \
+	-DPACKAGE_C=TRUE \
+	..
 make -j${NPROC}
 make install
 
diff --git a/source/ipi/CMakeLists.txt b/source/ipi/CMakeLists.txt
index e31e5ddce0..af291c5451 100644
--- a/source/ipi/CMakeLists.txt
+++ b/source/ipi/CMakeLists.txt
@@ -1,57 +1,42 @@
 # md
 
-list (APPEND MD_INCLUDE_PATH "include")
+list(APPEND MD_INCLUDE_PATH "include")
 
 file(GLOB IN_SRC src/*.cc src/*.c)
 
 function(_add_ipi_variant variant_name prec_def)
-set (ipiname "dp_ipi${variant_name}")
-set (libipiname "${LIB_DEEPMD_IPI}${variant_name}")
-add_library(${libipiname} SHARED ${IN_SRC})
-target_include_directories(${libipiname} PUBLIC ${MD_INCLUDE_PATH})
+  set(ipiname "dp_ipi${variant_name}")
+  set(libipiname "${LIB_DEEPMD_IPI}${variant_name}")
+  add_library(${libipiname} SHARED ${IN_SRC})
+  target_include_directories(${libipiname} PUBLIC ${MD_INCLUDE_PATH})
 
-set(DRIVER_SOURCE_FILES driver.cc)
-add_executable(${ipiname} ${DRIVER_SOURCE_FILES})
-# link: libdeepmd_cc
-target_link_libraries(${ipiname} PRIVATE ${libipiname} ${LIB_DEEPMD_CC})
-target_include_directories(${ipiname} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
+  set(DRIVER_SOURCE_FILES driver.cc)
+  add_executable(${ipiname} ${DRIVER_SOURCE_FILES})
+  # link: libdeepmd_cc
+  target_link_libraries(${ipiname} PRIVATE ${libipiname} ${LIB_DEEPMD_CC})
+  target_include_directories(${ipiname}
+                             PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
 
-if (APPLE)
-  set_target_properties(
-    ${ipiname}
-    PROPERTIES
-    INSTALL_RPATH "@loader_path/../lib:${TensorFlow_LIBRARY_PATH}"
-    COMPILE_DEFINITIONS ${prec_def}
-  )
-else()
-set_target_properties(
-  ${ipiname}
-  PROPERTIES
-  LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-  INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}"
-  COMPILE_DEFINITIONS ${prec_def}
-)
-endif()
+  if(APPLE)
+    set_target_properties(
+      ${ipiname}
+      PROPERTIES INSTALL_RPATH "@loader_path/../lib:${TensorFlow_LIBRARY_PATH}"
+                 COMPILE_DEFINITIONS ${prec_def})
+  else()
+    set_target_properties(
+      ${ipiname}
+      PROPERTIES LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
+                 INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}"
+                 COMPILE_DEFINITIONS ${prec_def})
+  endif()
 
-if(BUILD_PY_IF)
-install(
-  TARGETS	${libipiname}
-  DESTINATION	lib/
-)
-install(
-  TARGETS	${ipiname}
-  DESTINATION	bin/
-)
-else(BUILD_PY_IF)
-  install(
-    TARGETS	${libipiname}
-    DESTINATION	deepmd/op/
-  )
-  install(
-    TARGETS	${ipiname}
-    DESTINATION	deepmd/op/
-  )
-endif(BUILD_PY_IF)
+  if(BUILD_PY_IF)
+    install(TARGETS ${libipiname} DESTINATION lib/)
+    install(TARGETS ${ipiname} DESTINATION bin/)
+  else(BUILD_PY_IF)
+    install(TARGETS ${libipiname} DESTINATION deepmd/op/)
+    install(TARGETS ${ipiname} DESTINATION deepmd/op/)
+  endif(BUILD_PY_IF)
 endfunction()
 _add_ipi_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
-_add_ipi_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
\ No newline at end of file
+_add_ipi_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
diff --git a/source/ipi/driver.cc b/source/ipi/driver.cc
index d147f286ff..f7c3f1e13e 100644
--- a/source/ipi/driver.cc
+++ b/source/ipi/driver.cc
@@ -1,71 +1,68 @@
-#include <iostream>
-#include <iomanip>
-#include <fstream>
 #include <cstdint>
-#include "sockets.h"
-#include "DeepPot.h"
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+
 #include "Convert.h"
-#include "XyzFileManager.h"
+#include "DeepPot.h"
 #include "SimulationRegion.h"
-
+#include "XyzFileManager.h"
 #include "json.hpp"
+#include "sockets.h"
 using json = nlohmann::json;
 
-
 // using namespace std;
 
 // bohr -> angstrom
-const double cvt_len  = 0.52917721;
-const double icvt_len = 1./cvt_len;
+const double cvt_len = 0.52917721;
+const double icvt_len = 1. / cvt_len;
 // hatree -> eV
-const double cvt_ener  = 27.21138602;
-const double icvt_ener = 1./cvt_ener;
+const double cvt_ener = 27.21138602;
+const double icvt_ener = 1. / cvt_ener;
 // hatree/Bohr -> eV / angstrom
-const double cvt_f  = cvt_ener / cvt_len;
-const double icvt_f = 1./cvt_f;
+const double cvt_f = cvt_ener / cvt_len;
+const double icvt_f = 1. / cvt_f;
 
-char *trimwhitespace(char *str)
-{
+char *trimwhitespace(char *str) {
   char *end;
   // Trim leading space
-  while(isspace((unsigned char)*str)) str++;
-  if(*str == 0)  // All spaces?
+  while (isspace((unsigned char)*str)) str++;
+  if (*str == 0)  // All spaces?
     return str;
   // Trim trailing space
   end = str + strlen(str) - 1;
-  while(end > str && isspace((unsigned char)*end)) end--;
+  while (end > str && isspace((unsigned char)*end)) end--;
   // Write new null terminator
-  *(end+1) = 0;
+  *(end + 1) = 0;
   return str;
 }
 
-void 
-normalize_coord (std::vector<double > & coord,
-		 const SimulationRegion<double > & region)
-{
+void normalize_coord(std::vector<double> &coord,
+                     const SimulationRegion<double> &region) {
   int natoms = coord.size() / 3;
 
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     double inter[3];
-    region.phys2Inter (inter, &coord[3*ii]);
-    for (int dd = 0; dd < 3; ++dd){
+    region.phys2Inter(inter, &coord[3 * ii]);
+    for (int dd = 0; dd < 3; ++dd) {
       inter[dd] -= int(floor(inter[dd]));
-      if      (inter[dd] < 0 ) inter[dd] += 1.;
-      else if (inter[dd] >= 1) inter[dd] -= 1.;
+      if (inter[dd] < 0)
+        inter[dd] += 1.;
+      else if (inter[dd] >= 1)
+        inter[dd] -= 1.;
     }
-    region.inter2Phys (&coord[3*ii], inter);
+    region.inter2Phys(&coord[3 * ii], inter);
   }
 }
 
-int main(int argc, char * argv[])
-{
+int main(int argc, char *argv[]) {
   if (argc == 1) {
     std::cerr << "usage " << std::endl;
     std::cerr << argv[0] << " input_script " << std::endl;
     return 1;
   }
 
-  std::ifstream fp (argv[1]);
+  std::ifstream fp(argv[1]);
   json jdata;
   fp >> jdata;
   std::cout << "# using data base" << std::endl;
@@ -78,115 +75,120 @@ int main(int argc, char * argv[])
   }
   int port = jdata["port"];
   std::string host_str = jdata["host"];
-  const char * host = host_str.c_str();
+  const char *host = host_str.c_str();
   std::string graph_file = jdata["graph_file"];
   std::string coord_file = jdata["coord_file"];
   std::map<std::string, int> name_type_map = jdata["atom_type"];
   bool b_verb = jdata["verbose"];
-  
-  std::vector<std::string > atom_name;  
+
+  std::vector<std::string> atom_name;
   {
-    std::vector<std::vector<double > >  posi;
-    std::vector<std::vector<double > >  velo;
-    std::vector<std::vector<double > >  forc;
-    XyzFileManager::read (coord_file, atom_name, posi, velo, forc);
+    std::vector<std::vector<double> > posi;
+    std::vector<std::vector<double> > velo;
+    std::vector<std::vector<double> > forc;
+    XyzFileManager::read(coord_file, atom_name, posi, velo, forc);
   }
 
-  Convert<double> cvt (atom_name, name_type_map);
-  deepmd::DeepPot nnp_inter (graph_file);
-  
+  Convert<double> cvt(atom_name, name_type_map);
+  deepmd::DeepPot nnp_inter(graph_file);
+
   enum { _MSGLEN = 12 };
   int MSGLEN = _MSGLEN;
-  char header [_MSGLEN+1] = {'\0'};
+  char header[_MSGLEN + 1] = {'\0'};
   bool hasdata = false;
   int32_t cbuf = 0;
   char initbuffer[2048];
-  double cell_h [9];
+  double cell_h[9];
   double cell_ih[9];
   int32_t natoms = -1;
-  double dener (0);
-  std::vector<double > dforce;
-  std::vector<double > dforce_tmp;
-  std::vector<double > dvirial (9, 0);
-  std::vector<double > dcoord ;
-  std::vector<double > dcoord_tmp ;
-  std::vector<int > dtype = cvt.get_type();
-  std::vector<double > dbox (9, 0) ;
-  SimulationRegion<double > region;
-  double * msg_buff = NULL;
+  double dener(0);
+  std::vector<double> dforce;
+  std::vector<double> dforce_tmp;
+  std::vector<double> dvirial(9, 0);
+  std::vector<double> dcoord;
+  std::vector<double> dcoord_tmp;
+  std::vector<int> dtype = cvt.get_type();
+  std::vector<double> dbox(9, 0);
+  SimulationRegion<double> region;
+  double *msg_buff = NULL;
   double ener;
   double virial[9];
-  char msg_needinit[]	= "NEEDINIT    ";
-  char msg_havedata[]	= "HAVEDATA    ";
-  char msg_ready[]	= "READY       ";
+  char msg_needinit[] = "NEEDINIT    ";
+  char msg_havedata[] = "HAVEDATA    ";
+  char msg_ready[] = "READY       ";
   char msg_forceready[] = "FORCEREADY  ";
-  char msg_nothing[]	= "nothing";
-  
-  open_socket_ (&socket, &inet, &port, host);
-  
+  char msg_nothing[] = "nothing";
+
+  open_socket_(&socket, &inet, &port, host);
+
   bool isinit = true;
 
   while (true) {
-    readbuffer_ (&socket, header, MSGLEN);
-    std::string header_str (trimwhitespace(header));
+    readbuffer_(&socket, header, MSGLEN);
+    std::string header_str(trimwhitespace(header));
     if (b_verb) std::cout << "# get header " << header_str << std::endl;
 
-    if (header_str == "STATUS"){
-      if (! isinit) {
-	writebuffer_ (&socket, msg_needinit, MSGLEN);
-	if (b_verb) std::cout << "# send back  " << "NEEDINIT" << std::endl;
+    if (header_str == "STATUS") {
+      if (!isinit) {
+        writebuffer_(&socket, msg_needinit, MSGLEN);
+        if (b_verb)
+          std::cout << "# send back  "
+                    << "NEEDINIT" << std::endl;
+      } else if (hasdata) {
+        writebuffer_(&socket, msg_havedata, MSGLEN);
+        if (b_verb)
+          std::cout << "# send back  "
+                    << "HAVEDATA" << std::endl;
+      } else {
+        writebuffer_(&socket, msg_ready, MSGLEN);
+        if (b_verb)
+          std::cout << "# send back  "
+                    << "READY" << std::endl;
       }
-      else if (hasdata) {
-	writebuffer_ (&socket, msg_havedata, MSGLEN);
-	if (b_verb) std::cout << "# send back  " << "HAVEDATA" << std::endl;
-      }
-      else {
-	writebuffer_ (&socket, msg_ready, MSGLEN);
-	if (b_verb) std::cout << "# send back  " << "READY" << std::endl;
-      }
-    }
-    else if (header_str == "INIT") {
-      assert (4 == sizeof(int32_t));
-      readbuffer_ (&socket, (char *)(&cbuf), sizeof(int32_t));
-      readbuffer_ (&socket, initbuffer, cbuf);
-      if (b_verb) std::cout << "Init sys from wrapper, using " << initbuffer << std::endl;
-    }
-    else if (header_str == "POSDATA"){
-      assert (8 == sizeof(double));
-      
+    } else if (header_str == "INIT") {
+      assert(4 == sizeof(int32_t));
+      readbuffer_(&socket, (char *)(&cbuf), sizeof(int32_t));
+      readbuffer_(&socket, initbuffer, cbuf);
+      if (b_verb)
+        std::cout << "Init sys from wrapper, using " << initbuffer << std::endl;
+    } else if (header_str == "POSDATA") {
+      assert(8 == sizeof(double));
+
       // get box
-      readbuffer_ (&socket, (char *)(cell_h),  9*sizeof(double));
-      readbuffer_ (&socket, (char *)(cell_ih), 9*sizeof(double));
-      for (int dd = 0; dd < 9; ++dd){
-	dbox[dd] = cell_h[(dd%3)*3+(dd/3)] * cvt_len;
+      readbuffer_(&socket, (char *)(cell_h), 9 * sizeof(double));
+      readbuffer_(&socket, (char *)(cell_ih), 9 * sizeof(double));
+      for (int dd = 0; dd < 9; ++dd) {
+        dbox[dd] = cell_h[(dd % 3) * 3 + (dd / 3)] * cvt_len;
       }
-      region.reinitBox (&dbox[0]);
-      
+      region.reinitBox(&dbox[0]);
+
       // get number of atoms
-      readbuffer_ (&socket, (char *)(&cbuf), sizeof(int32_t));
+      readbuffer_(&socket, (char *)(&cbuf), sizeof(int32_t));
       if (natoms < 0) {
-	natoms = cbuf;
-	if (b_verb) std::cout << "# get number of atoms in system: " << natoms << std::endl;
-	
-	dcoord.resize (3 * natoms);
-	dforce.resize (3 * natoms, 0);
-	dcoord_tmp.resize (3 * natoms);
-	dforce_tmp.resize (3 * natoms, 0);
-	msg_buff = new double [3 * natoms];
+        natoms = cbuf;
+        if (b_verb)
+          std::cout << "# get number of atoms in system: " << natoms
+                    << std::endl;
+
+        dcoord.resize(3 * natoms);
+        dforce.resize(3 * natoms, 0);
+        dcoord_tmp.resize(3 * natoms);
+        dforce_tmp.resize(3 * natoms, 0);
+        msg_buff = new double[3 * natoms];
       }
-      
+
       // get coord
-      readbuffer_ (&socket, (char *)(msg_buff), natoms * 3 * sizeof(double));
-      for (int ii = 0; ii < natoms * 3; ++ii){
-	dcoord_tmp[ii] = msg_buff[ii] * cvt_len;
+      readbuffer_(&socket, (char *)(msg_buff), natoms * 3 * sizeof(double));
+      for (int ii = 0; ii < natoms * 3; ++ii) {
+        dcoord_tmp[ii] = msg_buff[ii] * cvt_len;
       }
-      cvt.forward (dcoord, dcoord_tmp, 3);
-      normalize_coord (dcoord, region);
+      cvt.forward(dcoord, dcoord_tmp, 3);
+      normalize_coord(dcoord, region);
 
       // nnp over writes ener, force and virial
 #ifdef HIGH_PREC
-      nnp_inter.compute (dener, dforce_tmp, dvirial, dcoord, dtype, dbox);   
-#else 
+      nnp_inter.compute(dener, dforce_tmp, dvirial, dcoord, dtype, dbox);
+#else
       // model in float prec
       std::vector<float> dcoord_(dcoord.size());
       std::vector<float> dbox_(dbox.size());
@@ -195,40 +197,42 @@ int main(int argc, char * argv[])
       std::vector<float> dforce_(dforce.size(), 0);
       std::vector<float> dvirial_(dvirial.size(), 0);
       double dener_ = 0;
-      nnp_inter.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_);   
-      for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce_tmp[dd] = dforce_[dd];	
-      for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
-      dener = dener_;      
+      nnp_inter.compute(dener_, dforce_, dvirial_, dcoord_, dtype, dbox_);
+      for (unsigned dd = 0; dd < dforce.size(); ++dd)
+        dforce_tmp[dd] = dforce_[dd];
+      for (unsigned dd = 0; dd < dvirial.size(); ++dd)
+        dvirial[dd] = dvirial_[dd];
+      dener = dener_;
 #endif
-      cvt.backward (dforce, dforce_tmp, 3);
+      cvt.backward(dforce, dforce_tmp, 3);
       hasdata = true;
-    }
-    else if (header_str == "GETFORCE"){
+    } else if (header_str == "GETFORCE") {
       ener = dener * icvt_ener;
-      for (int ii = 0; ii < natoms * 3; ++ii){
-	msg_buff[ii] = dforce[ii] * icvt_f;
+      for (int ii = 0; ii < natoms * 3; ++ii) {
+        msg_buff[ii] = dforce[ii] * icvt_f;
       }
-      for (int ii = 0; ii < 9; ++ii){
-	virial[ii] = dvirial[(ii%3)*3+(ii/3)] * icvt_ener * (1.0);
+      for (int ii = 0; ii < 9; ++ii) {
+        virial[ii] = dvirial[(ii % 3) * 3 + (ii / 3)] * icvt_ener * (1.0);
       }
-      if (b_verb) std::cout << "# energy of sys. : " << std::scientific << std::setprecision(10) << dener << std::endl;
-      writebuffer_ (&socket, msg_forceready, MSGLEN);
-      writebuffer_ (&socket, (char *)(&ener), sizeof(double));
-      writebuffer_ (&socket, (char *)(&natoms), sizeof(int32_t));
-      writebuffer_ (&socket, (char *)(msg_buff), 3 * natoms * sizeof(double));
-      writebuffer_ (&socket, (char *)(virial), 9 * sizeof(double));
+      if (b_verb)
+        std::cout << "# energy of sys. : " << std::scientific
+                  << std::setprecision(10) << dener << std::endl;
+      writebuffer_(&socket, msg_forceready, MSGLEN);
+      writebuffer_(&socket, (char *)(&ener), sizeof(double));
+      writebuffer_(&socket, (char *)(&natoms), sizeof(int32_t));
+      writebuffer_(&socket, (char *)(msg_buff), 3 * natoms * sizeof(double));
+      writebuffer_(&socket, (char *)(virial), 9 * sizeof(double));
       cbuf = 7;
-      writebuffer_ (&socket, (char *)(&cbuf), sizeof(int32_t));
-      writebuffer_ (&socket, msg_nothing, 7);
+      writebuffer_(&socket, (char *)(&cbuf), sizeof(int32_t));
+      writebuffer_(&socket, msg_nothing, 7);
       hasdata = false;
-    }
-    else {
+    } else {
       std::cerr << "unexpected header " << std::endl;
       return 1;
     }
   }
 
-  if (msg_buff != NULL){
-    delete [] msg_buff;
+  if (msg_buff != NULL) {
+    delete[] msg_buff;
   }
 }
diff --git a/source/ipi/include/Convert.h b/source/ipi/include/Convert.h
index 7440aea6d2..f116cc61e9 100644
--- a/source/ipi/include/Convert.h
+++ b/source/ipi/include/Convert.h
@@ -1,27 +1,25 @@
 #pragma once
 
-#include <vector>
-#include <string>
 #include <map>
+#include <string>
+#include <vector>
 
 // using namespace std;
 
 template <typename VALUETYPE>
-class Convert 
-{
-public:
-  Convert(const std::vector<std::string > &  atomname,
-	  std::map<std::string, int> & name_type_map);
-  void forward (
-      std::vector<VALUETYPE > & out,
-      const std::vector<double > & in, 
-      const int stride = 1) const ;
-  void backward (
-      std::vector<VALUETYPE > & out,
-      const std::vector<double > & in,
-      const int stride = 1) const ;
-  const std::vector<int > & get_type () const {return atype;}
-private:
+class Convert {
+ public:
+  Convert(const std::vector<std::string>& atomname,
+          std::map<std::string, int>& name_type_map);
+  void forward(std::vector<VALUETYPE>& out,
+               const std::vector<double>& in,
+               const int stride = 1) const;
+  void backward(std::vector<VALUETYPE>& out,
+                const std::vector<double>& in,
+                const int stride = 1) const;
+  const std::vector<int>& get_type() const { return atype; }
+
+ private:
   std::vector<int> idx_map;
   std::vector<int> atype;
 };
diff --git a/source/ipi/include/StringSplit.h b/source/ipi/include/StringSplit.h
index 6e270ba891..af80f11447 100644
--- a/source/ipi/include/StringSplit.h
+++ b/source/ipi/include/StringSplit.h
@@ -1,40 +1,34 @@
 #ifndef __StringSplit_h_wanghan__
 #define __StringSplit_h_wanghan__
 
+#include <algorithm>
+#include <iterator>
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
-#include <iterator>
-#include <algorithm>
 
-namespace StringOperation{
-  void split (const std::string & in,
-	      std::vector<std::string > & out);
+namespace StringOperation {
+void split(const std::string& in, std::vector<std::string>& out);
 }
 
-
-void StringOperation::
-split (const std::string & in,
-       std::vector<std::string > & out)
-{
+void StringOperation::split(const std::string& in,
+                            std::vector<std::string>& out) {
   std::istringstream iss(in);
   out.clear();
-  
+
   do {
     std::string sub;
     iss >> sub;
-    out.push_back (sub);
-  // std::vector<std::string > tokens;
-  // tokens.push_back (" ");
-  // tokens.push_back ("\t");
-  // std::copy(std::istream_iterator<std::string>(iss),
-  // 	    std::istream_iterator<std::string>(),
-  // 	    std::back_inserter<std::vector<std::string> >(tokens));
+    out.push_back(sub);
+    // std::vector<std::string > tokens;
+    // tokens.push_back (" ");
+    // tokens.push_back ("\t");
+    // std::copy(std::istream_iterator<std::string>(iss),
+    // 	    std::istream_iterator<std::string>(),
+    // 	    std::back_inserter<std::vector<std::string> >(tokens));
   } while (iss);
 
   out.pop_back();
 }
 
-
-
 #endif
diff --git a/source/ipi/include/XyzFileManager.h b/source/ipi/include/XyzFileManager.h
index cc48624554..aa50e76461 100644
--- a/source/ipi/include/XyzFileManager.h
+++ b/source/ipi/include/XyzFileManager.h
@@ -5,15 +5,14 @@
 #include <vector>
 // using namespace std;
 
-namespace XyzFileManager{
+namespace XyzFileManager {
+
+void read(const std::string& file,
+          std::vector<std::string>& atom_name,
+          std::vector<std::vector<double> >& posi,
+          std::vector<std::vector<double> >& velo,
+          std::vector<std::vector<double> >& forc);
 
-  void
-  read (const std::string & file,
-	std::vector<std::string > & atom_name,
-	std::vector<std::vector<double > > & posi,
-	std::vector<std::vector<double > > & velo,
-	std::vector<std::vector<double > > & forc);
-  
 };
 
 #endif
diff --git a/source/ipi/include/sockets.h b/source/ipi/include/sockets.h
index d4a465ab17..f54ce500ac 100644
--- a/source/ipi/include/sockets.h
+++ b/source/ipi/include/sockets.h
@@ -1,20 +1,20 @@
 #pragma once
 
+#include <netdb.h>
+#include <netinet/in.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
 #include <string.h>
-#include <sys/types.h>
 #include <sys/socket.h>
-#include <netinet/in.h>
+#include <sys/types.h>
 #include <sys/un.h>
-#include <netdb.h>
+#include <unistd.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-    void error (const char *msg);
+void error(const char *msg);
 
 /* Opens a socket.
    Note that fortran passes an extra argument for the string length, but this is
@@ -28,7 +28,7 @@ extern "C" {
       recommended.
    host: The name of the host server.
 */
-    void open_socket_(int *psockfd, int* inet, int* port, const char* host);
+void open_socket_(int *psockfd, int *inet, int *port, const char *host);
 
 /* Writes to a socket.
    Args:
@@ -36,7 +36,7 @@ extern "C" {
    data: The data to be written to the socket.
    plen: The length of the data in bytes.
 */
-    void writebuffer_(int *psockfd, char *data, int len);    
+void writebuffer_(int *psockfd, char *data, int len);
 
 /* Reads from a socket.
    Args:
@@ -44,8 +44,8 @@ extern "C" {
    data: The storage array for data read from the socket.
    plen: The length of the data in bytes.
 */
-    void readbuffer_(int *psockfd, char *data, int len);    
-    
+void readbuffer_(int *psockfd, char *data, int len);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/ipi/src/Convert.cc b/source/ipi/src/Convert.cc
index 8019181efd..56d03db659 100644
--- a/source/ipi/src/Convert.cc
+++ b/source/ipi/src/Convert.cc
@@ -4,63 +4,54 @@
 #include <cassert>
 
 template <typename VALUETYPE>
-Convert<VALUETYPE>::
-Convert(const std::vector<std::string > &  atomname,
-	std::map<std::string, int> & name_type_map)
-{
+Convert<VALUETYPE>::Convert(const std::vector<std::string>& atomname,
+                            std::map<std::string, int>& name_type_map) {
   int natoms = atomname.size();
-  atype.resize (natoms);
-  for (unsigned ii = 0; ii < atype.size(); ++ii){
+  atype.resize(natoms);
+  for (unsigned ii = 0; ii < atype.size(); ++ii) {
     atype[ii] = name_type_map[atomname[ii]];
   }
-  std::vector<std::pair<int, int > > sorting (natoms);
-  for (unsigned ii = 0; ii < sorting.size(); ++ii){
-    sorting[ii] = std::pair<int, int > (atype[ii], ii);
+  std::vector<std::pair<int, int> > sorting(natoms);
+  for (unsigned ii = 0; ii < sorting.size(); ++ii) {
+    sorting[ii] = std::pair<int, int>(atype[ii], ii);
   }
   // sort (sorting.begin(), sorting.end());
   idx_map.resize(natoms);
-  for (unsigned ii = 0; ii < idx_map.size(); ++ii){
+  for (unsigned ii = 0; ii < idx_map.size(); ++ii) {
     idx_map[ii] = sorting[ii].second;
     atype[ii] = sorting[ii].first;
   }
 }
 
 template <typename VALUETYPE>
-void
-Convert<VALUETYPE>::
-forward (std::vector<VALUETYPE > & out,
-	 const std::vector<double > & in, 
-	 const int stride) const 
-{
-  assert (in.size() == stride * idx_map.size());
+void Convert<VALUETYPE>::forward(std::vector<VALUETYPE>& out,
+                                 const std::vector<double>& in,
+                                 const int stride) const {
+  assert(in.size() == stride * idx_map.size());
   int natoms = idx_map.size();
-  out.resize (stride * natoms);
-  for (int ii = 0; ii < natoms; ++ii){
+  out.resize(stride * natoms);
+  for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map[ii];
-    for (int dd = 0; dd < stride; ++dd){
-      out[ii*stride+dd] = in[gro_i*stride+dd];
+    for (int dd = 0; dd < stride; ++dd) {
+      out[ii * stride + dd] = in[gro_i * stride + dd];
     }
   }
 }
 
 template <typename VALUETYPE>
-void
-Convert<VALUETYPE>::
-backward (std::vector<VALUETYPE > & out,
-	  const std::vector<double > & in,
-	  const int stride) const 
-{
+void Convert<VALUETYPE>::backward(std::vector<VALUETYPE>& out,
+                                  const std::vector<double>& in,
+                                  const int stride) const {
   int natoms = idx_map.size();
-  assert (in.size() == stride * idx_map.size());
+  assert(in.size() == stride * idx_map.size());
   out.resize(stride * natoms);
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map[ii];
-    for (int dd = 0; dd < stride; ++dd){
-      out[gro_i*stride+dd] = in[ii*stride+dd];
+    for (int dd = 0; dd < stride; ++dd) {
+      out[gro_i * stride + dd] = in[ii * stride + dd];
     }
   }
 }
 
 template class Convert<float>;
 template class Convert<double>;
-
diff --git a/source/ipi/src/XyzFileManager.cc b/source/ipi/src/XyzFileManager.cc
index a90b853fae..8529acbe7d 100644
--- a/source/ipi/src/XyzFileManager.cc
+++ b/source/ipi/src/XyzFileManager.cc
@@ -1,82 +1,77 @@
-#include "StringSplit.h"
 #include "XyzFileManager.h"
 
-#include <iostream>
-#include <iomanip>
-#include <fstream>
 #include <assert.h>
 
-void
-XyzFileManager::
-read (const std::string & file,
-      std::vector<std::string > & atom_name,
-      std::vector<std::vector<double > > & posi,
-      std::vector<std::vector<double > > & velo,
-      std::vector<std::vector<double > > & forc)
-{
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+
+#include "StringSplit.h"
+
+void XyzFileManager::read(const std::string& file,
+                          std::vector<std::string>& atom_name,
+                          std::vector<std::vector<double> >& posi,
+                          std::vector<std::vector<double> >& velo,
+                          std::vector<std::vector<double> >& forc) {
   // getBoxSize (file, boxsize);
-  
+
   posi.clear();
   velo.clear();
 
-  std::ifstream data0 (file.c_str());
+  std::ifstream data0(file.c_str());
   if (!data0.is_open()) {
-    std::cerr <<  "cannot open file " << file << std::endl;
+    std::cerr << "cannot open file " << file << std::endl;
     exit(1);
   }
-  
+
   std::string valueline;
   std::vector<std::string> words;
-  words.reserve (10);
+  words.reserve(10);
   std::string tmpname;
-  std::vector<double > tmpp(3);
-  std::vector<double > tmpv(3);
-  std::vector<double > tmpf(3);
+  std::vector<double> tmpp(3);
+  std::vector<double> tmpv(3);
+  std::vector<double> tmpf(3);
   std::getline(data0, valueline);
-  long long int numb_atom = atoll (valueline.c_str());
+  long long int numb_atom = atoll(valueline.c_str());
   std::getline(data0, valueline);
-  
-  for (long long int ii = 0; ii< numb_atom; ++ii) {
+
+  for (long long int ii = 0; ii < numb_atom; ++ii) {
     std::getline(data0, valueline);
-    StringOperation::split (std::string(valueline), words);
-    if (words.size() == 10){
-      tmpp[0] = atof (words[1+0].c_str());
-      tmpp[1] = atof (words[1+1].c_str());
-      tmpp[2] = atof (words[1+2].c_str());
-      tmpv[0] = atof (words[1+3].c_str());
-      tmpv[1] = atof (words[1+4].c_str());
-      tmpv[2] = atof (words[1+5].c_str());
-      tmpf[0] = atof (words[1+6].c_str());
-      tmpf[1] = atof (words[1+7].c_str());
-      tmpf[2] = atof (words[1+8].c_str());
-      posi.push_back (tmpp);
-      velo.push_back (tmpv);
-      forc.push_back (tmpf);
-      atom_name.push_back (words[0]);
-    }
-    else if (words.size() == 7){
-      tmpp[0] = atof (words[1+0].c_str());
-      tmpp[1] = atof (words[1+1].c_str());
-      tmpp[2] = atof (words[1+2].c_str());
-      tmpv[0] = atof (words[1+3].c_str());
-      tmpv[1] = atof (words[1+4].c_str());
-      tmpv[2] = atof (words[1+5].c_str());
-      posi.push_back (tmpp);
-      velo.push_back (tmpv);
-      atom_name.push_back (words[0]);
-    }
-    else if (words.size() == 4){
-      tmpp[0] = atof (words[1+0].c_str());
-      tmpp[1] = atof (words[1+1].c_str());
-      tmpp[2] = atof (words[1+2].c_str());
-      posi.push_back (tmpp);
-      atom_name.push_back (words[0]);
-    }
-    else {
-      std::cerr << "XyzFileManager::read: wrong format, line has "<< words.size() << " words" << std::endl;
-      exit (1);
+    StringOperation::split(std::string(valueline), words);
+    if (words.size() == 10) {
+      tmpp[0] = atof(words[1 + 0].c_str());
+      tmpp[1] = atof(words[1 + 1].c_str());
+      tmpp[2] = atof(words[1 + 2].c_str());
+      tmpv[0] = atof(words[1 + 3].c_str());
+      tmpv[1] = atof(words[1 + 4].c_str());
+      tmpv[2] = atof(words[1 + 5].c_str());
+      tmpf[0] = atof(words[1 + 6].c_str());
+      tmpf[1] = atof(words[1 + 7].c_str());
+      tmpf[2] = atof(words[1 + 8].c_str());
+      posi.push_back(tmpp);
+      velo.push_back(tmpv);
+      forc.push_back(tmpf);
+      atom_name.push_back(words[0]);
+    } else if (words.size() == 7) {
+      tmpp[0] = atof(words[1 + 0].c_str());
+      tmpp[1] = atof(words[1 + 1].c_str());
+      tmpp[2] = atof(words[1 + 2].c_str());
+      tmpv[0] = atof(words[1 + 3].c_str());
+      tmpv[1] = atof(words[1 + 4].c_str());
+      tmpv[2] = atof(words[1 + 5].c_str());
+      posi.push_back(tmpp);
+      velo.push_back(tmpv);
+      atom_name.push_back(words[0]);
+    } else if (words.size() == 4) {
+      tmpp[0] = atof(words[1 + 0].c_str());
+      tmpp[1] = atof(words[1 + 1].c_str());
+      tmpp[2] = atof(words[1 + 2].c_str());
+      posi.push_back(tmpp);
+      atom_name.push_back(words[0]);
+    } else {
+      std::cerr << "XyzFileManager::read: wrong format, line has "
+                << words.size() << " words" << std::endl;
+      exit(1);
     }
   }
 }
-
-
diff --git a/source/ipi/src/sockets.c b/source/ipi/src/sockets.c
index 624ee1dd7f..497f77fba4 100644
--- a/source/ipi/src/sockets.c
+++ b/source/ipi/src/sockets.c
@@ -34,21 +34,24 @@ Can be linked to a FORTRAN code that does not support sockets natively.
    read_buffer_: Reads data from the socket.
 */
 
+#include <netdb.h>
+#include <netinet/in.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
 #include <string.h>
-#include <sys/types.h>
 #include <sys/socket.h>
-#include <netinet/in.h>
+#include <sys/types.h>
 #include <sys/un.h>
-#include <netdb.h>
+#include <unistd.h>
 
 void error(const char *msg)
 // Prints an error message and then exits.
-{   perror(msg);  exit(-1);   }
+{
+  perror(msg);
+  exit(-1);
+}
 
-void open_socket_(int *psockfd, int* inet, int* port, const char* host)
+void open_socket_(int *psockfd, int *inet, int *port, const char *host)
 /* Opens a socket.
 
 Note that fortran passes an extra argument for the string length, but this is
@@ -65,42 +68,46 @@ ignored here for C compatibility.
 */
 
 {
-   int sockfd, portno, n;
-   struct hostent *server;
-
-   struct sockaddr * psock; int ssock;
-
-   if (*inet>0)
-   {  // creates an internet socket
-      struct sockaddr_in serv_addr;      psock=(struct sockaddr *)&serv_addr;     ssock=sizeof(serv_addr);
-      sockfd = socket(AF_INET, SOCK_STREAM, 0);
-      if (sockfd < 0)  error("Error opening socket");
-
-      server = gethostbyname(host);
-      if (server == NULL)
-      {
-         fprintf(stderr, "Error opening socket: no such host %s \n", host);
-         exit(-1);
-      }
-
-      bzero((char *) &serv_addr, sizeof(serv_addr));
-      serv_addr.sin_family = AF_INET;
-      bcopy((char *)server->h_addr, (char *)&serv_addr.sin_addr.s_addr, server->h_length);
-      serv_addr.sin_port = htons(*port);
-      if (connect(sockfd, psock, ssock) < 0) error("Error opening socket: wrong host address, or broken connection");
-   }
-   else
-   {  // creates a unix socket
-      struct sockaddr_un serv_addr;      psock=(struct sockaddr *)&serv_addr;     ssock=sizeof(serv_addr);
-      sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
-      bzero((char *) &serv_addr, sizeof(serv_addr));
-      serv_addr.sun_family = AF_UNIX;
-      strcpy(serv_addr.sun_path, "/tmp/ipi_");
-      strcpy(serv_addr.sun_path+9, host);
-      if (connect(sockfd, psock, ssock) < 0) error("Error opening socket: wrong host address, or broken connection");
-   }
-
-   *psockfd=sockfd;
+  int sockfd, portno, n;
+  struct hostent *server;
+
+  struct sockaddr *psock;
+  int ssock;
+
+  if (*inet > 0) {  // creates an internet socket
+    struct sockaddr_in serv_addr;
+    psock = (struct sockaddr *)&serv_addr;
+    ssock = sizeof(serv_addr);
+    sockfd = socket(AF_INET, SOCK_STREAM, 0);
+    if (sockfd < 0) error("Error opening socket");
+
+    server = gethostbyname(host);
+    if (server == NULL) {
+      fprintf(stderr, "Error opening socket: no such host %s \n", host);
+      exit(-1);
+    }
+
+    bzero((char *)&serv_addr, sizeof(serv_addr));
+    serv_addr.sin_family = AF_INET;
+    bcopy((char *)server->h_addr, (char *)&serv_addr.sin_addr.s_addr,
+          server->h_length);
+    serv_addr.sin_port = htons(*port);
+    if (connect(sockfd, psock, ssock) < 0)
+      error("Error opening socket: wrong host address, or broken connection");
+  } else {  // creates a unix socket
+    struct sockaddr_un serv_addr;
+    psock = (struct sockaddr *)&serv_addr;
+    ssock = sizeof(serv_addr);
+    sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+    bzero((char *)&serv_addr, sizeof(serv_addr));
+    serv_addr.sun_family = AF_UNIX;
+    strcpy(serv_addr.sun_path, "/tmp/ipi_");
+    strcpy(serv_addr.sun_path + 9, host);
+    if (connect(sockfd, psock, ssock) < 0)
+      error("Error opening socket: wrong host address, or broken connection");
+  }
+
+  *psockfd = sockfd;
 }
 
 void writebuffer_(int *psockfd, char *data, int len)
@@ -113,14 +120,14 @@ void writebuffer_(int *psockfd, char *data, int len)
 */
 
 {
-   int n;
-   int sockfd=*psockfd;
+  int n;
+  int sockfd = *psockfd;
 
-   n = write(sockfd,data,len);
-   if (n < 0) error("Error writing to socket: server has quit or connection broke");
+  n = write(sockfd, data, len);
+  if (n < 0)
+    error("Error writing to socket: server has quit or connection broke");
 }
 
-
 void readbuffer_(int *psockfd, char *data, int len)
 /* Reads from a socket.
 
@@ -131,15 +138,16 @@ void readbuffer_(int *psockfd, char *data, int len)
 */
 
 {
-   int n, nr;
-   int sockfd=*psockfd;
+  int n, nr;
+  int sockfd = *psockfd;
 
-   n = nr = read(sockfd,data,len);
+  n = nr = read(sockfd, data, len);
 
-   while (nr>0 && n<len )
-   {  nr=read(sockfd,&data[n],len-n); n+=nr; }
+  while (nr > 0 && n < len) {
+    nr = read(sockfd, &data[n], len - n);
+    n += nr;
+  }
 
-   if (n == 0) error("Error reading from socket: server has quit or connection broke");
+  if (n == 0)
+    error("Error reading from socket: server has quit or connection broke");
 }
-
-
diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt
index 5902b78a61..af88cb5ae6 100644
--- a/source/lib/CMakeLists.txt
+++ b/source/lib/CMakeLists.txt
@@ -1,45 +1,39 @@
 # libmd
-set (libname ${LIB_DEEPMD})
+set(libname ${LIB_DEEPMD})
 
 file(GLOB LIB_SRC src/*.cc src/*.cpp)
 file(GLOB INC_SRC include/*.h ${CMAKE_CURRENT_BINARY_DIR}/version.h)
 
 add_library(${libname} SHARED ${LIB_SRC})
 target_include_directories(
-  ${libname} PUBLIC
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  $<INSTALL_INTERFACE:include>
-)
+  ${libname} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+                    $<INSTALL_INTERFACE:include>)
 
-if (USE_CUDA_TOOLKIT)
+if(USE_CUDA_TOOLKIT)
   add_definitions("-DGOOGLE_CUDA")
   add_subdirectory(src/cuda)
-  set (EXTRA_LIBS ${EXTRA_LIBS} deepmd_op_cuda)
-  target_link_libraries (${libname} INTERFACE deepmd_dyn_cudart ${EXTRA_LIBS})
+  set(EXTRA_LIBS ${EXTRA_LIBS} deepmd_op_cuda)
+  target_link_libraries(${libname} INTERFACE deepmd_dyn_cudart ${EXTRA_LIBS})
   # gpu_cuda.h
   target_include_directories(
-    ${libname} PUBLIC
-    $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>
-    $<INSTALL_INTERFACE:include>
-  )
+    ${libname} PUBLIC $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>
+                      $<INSTALL_INTERFACE:include>)
 endif()
 
-if (USE_ROCM_TOOLKIT)
+if(USE_ROCM_TOOLKIT)
   add_definitions("-DTENSORFLOW_USE_ROCM")
   add_subdirectory(src/rocm)
-  set (EXTRA_LIBS ${EXTRA_LIBS} deepmd_op_rocm)
-  target_link_libraries (${libname} INTERFACE ${ROCM_LIBRARIES} ${EXTRA_LIBS})
+  set(EXTRA_LIBS ${EXTRA_LIBS} deepmd_op_rocm)
+  target_link_libraries(${libname} INTERFACE ${ROCM_LIBRARIES} ${EXTRA_LIBS})
   # gpu_rocm.h
   target_include_directories(
-    ${libname} PUBLIC
-    $<BUILD_INTERFACE:${ROCM_INCLUDE_DIRS}>
-    $<INSTALL_INTERFACE:include>
-  )
+    ${libname} PUBLIC $<BUILD_INTERFACE:${ROCM_INCLUDE_DIRS}>
+                      $<INSTALL_INTERFACE:include>)
 endif()
 
 set_target_properties(${libname} PROPERTIES INSTALL_RPATH $ORIGIN)
 
-if (CMAKE_TESTING_ENABLED)
+if(CMAKE_TESTING_ENABLED)
   target_link_libraries(${libname} PRIVATE coverage_config)
 endif()
 
@@ -47,16 +41,12 @@ if(BUILD_PY_IF)
   install(TARGETS ${libname} DESTINATION deepmd/op/)
 else(BUILD_PY_IF)
   install(
-    TARGETS
-    ${libname}
+    TARGETS ${libname}
     EXPORT ${CMAKE_PROJECT_NAME}Targets
     DESTINATION lib/)
-  install(
-    FILES	${INC_SRC}
-    DESTINATION	include/deepmd
-    )
+  install(FILES ${INC_SRC} DESTINATION include/deepmd)
 endif(BUILD_PY_IF)
 
-if (BUILD_CPP_IF AND CMAKE_TESTING_ENABLED)
+if(BUILD_CPP_IF AND CMAKE_TESTING_ENABLED)
   add_subdirectory(tests)
 endif()
diff --git a/source/lib/include/ComputeDescriptor.h b/source/lib/include/ComputeDescriptor.h
index 8bc246881a..cde9c0eb92 100644
--- a/source/lib/include/ComputeDescriptor.h
+++ b/source/lib/include/ComputeDescriptor.h
@@ -1,350 +1,339 @@
 #pragma once
 
 #include <algorithm>
-#include <iterator>
 #include <cassert>
+#include <iterator>
 
 #include "SimulationRegion.h"
-#include "utilities.h"
 #include "switcher.h"
+#include "utilities.h"
 
+inline void compute_descriptor(std::vector<double> &descrpt_a,
+                               std::vector<double> &descrpt_r,
+                               std::vector<double> &rot_mat,
+                               const std::vector<double> &posi,
+                               const int &ntypes,
+                               const std::vector<int> &type,
+                               const SimulationRegion<double> &region,
+                               const bool &b_pbc,
+                               const int &i_idx,
+                               const std::vector<int> &fmt_nlist_a,
+                               const std::vector<int> &fmt_nlist_r,
+                               const std::vector<int> &sec_a,
+                               const std::vector<int> &sec_r,
+                               const int axis0_type,
+                               const int axis0_idx,
+                               const int axis1_type,
+                               const int axis1_idx);
 
-inline
-void compute_descriptor (std::vector<double > &			descrpt_a,
-			 std::vector<double > &			descrpt_r,
-			 std::vector<double > &			rot_mat,
-			 const std::vector<double > &		posi,
-			 const int &				ntypes,
-			 const std::vector<int > &		type,
-			 const SimulationRegion<double> &	region,
-			 const bool &				b_pbc,
-			 const int &				i_idx,
-			 const std::vector<int > &		fmt_nlist_a,
-			 const std::vector<int > &		fmt_nlist_r,
-			 const std::vector<int > &		sec_a,
-			 const std::vector<int > &		sec_r,
-			 const int				axis0_type,
-			 const int				axis0_idx,
-			 const int				axis1_type,
-			 const int				axis1_idx);
-
-inline
-void compute_descriptor (std::vector<double > &			descrpt_a,
-			 std::vector<double > &			descrpt_a_deriv,
-			 std::vector<double > &			descrpt_r,
-			 std::vector<double > &			descrpt_r_deriv,
-			 std::vector<double > &			rij_a,
-			 std::vector<double > &			rij_r,
-			 std::vector<double > &			rot_mat,
-			 const std::vector<double > &		posi,
-			 const int &				ntypes,
-			 const std::vector<int > &		type,
-			 const SimulationRegion<double> &	region,
-			 const bool &				b_pbc,
-			 const int &				i_idx,
-			 const std::vector<int > &		fmt_nlist_a,
-			 const std::vector<int > &		fmt_nlist_r,
-			 const std::vector<int > &		sec_a,
-			 const std::vector<int > &		sec_r,
-			 const int				axis0_type,
-			 const int				axis0_idx,
-			 const int				axis1_type,
-			 const int				axis1_idx);
-
+inline void compute_descriptor(std::vector<double> &descrpt_a,
+                               std::vector<double> &descrpt_a_deriv,
+                               std::vector<double> &descrpt_r,
+                               std::vector<double> &descrpt_r_deriv,
+                               std::vector<double> &rij_a,
+                               std::vector<double> &rij_r,
+                               std::vector<double> &rot_mat,
+                               const std::vector<double> &posi,
+                               const int &ntypes,
+                               const std::vector<int> &type,
+                               const SimulationRegion<double> &region,
+                               const bool &b_pbc,
+                               const int &i_idx,
+                               const std::vector<int> &fmt_nlist_a,
+                               const std::vector<int> &fmt_nlist_r,
+                               const std::vector<int> &sec_a,
+                               const std::vector<int> &sec_r,
+                               const int axis0_type,
+                               const int axis0_idx,
+                               const int axis1_type,
+                               const int axis1_idx);
 
-inline
-void compute_descriptor_se_a_extf (std::vector<double > &	descrpt_a,
-				   std::vector<double > &	descrpt_a_deriv,
-				   std::vector<double > &	rij_a,
-				   const std::vector<double > &	posi,
-				   const int &			ntypes,
-				   const std::vector<int > &	type,
-				   const SimulationRegion<double> &	region,
-				   const bool &			b_pbc,
-				   const std::vector<double > &	efield,
-				   const int &			i_idx,
-				   const std::vector<int > &	fmt_nlist_a,
-				   const std::vector<int > &	sec_a, 
-				   const double &		rmin, 
-				   const double &		rmax);
-inline
-void compute_descriptor_se_a_ef_para (std::vector<double > &			descrpt_a,
-				      std::vector<double > &			descrpt_a_deriv,
-				      std::vector<double > &			rij_a,
-				      const std::vector<double > &		posi,
-				      const int &				ntypes,
-				      const std::vector<int > &			type,
-				      const SimulationRegion<double> &		region,
-				      const bool &				b_pbc,
-				      const std::vector<double > &		efield,
-				      const int &				i_idx,
-				      const std::vector<int > &			fmt_nlist_a,
-				      const std::vector<int > &			sec_a, 
-				      const double &				rmin, 
-				      const double &				rmax);
-inline
-void compute_descriptor_se_a_ef_vert (std::vector<double > &			descrpt_a,
-				      std::vector<double > &			descrpt_a_deriv,
-				      std::vector<double > &			rij_a,
-				      const std::vector<double > &		posi,
-				      const int &				ntypes,
-				      const std::vector<int > &			type,
-				      const SimulationRegion<double> &		region,
-				      const bool &				b_pbc,
-				      const std::vector<double > &		efield,
-				      const int &				i_idx,
-				      const std::vector<int > &			fmt_nlist_a,
-				      const std::vector<int > &			sec_a, 
-				      const double &				rmin, 
-				      const double &				rmax);
+inline void compute_descriptor_se_a_extf(std::vector<double> &descrpt_a,
+                                         std::vector<double> &descrpt_a_deriv,
+                                         std::vector<double> &rij_a,
+                                         const std::vector<double> &posi,
+                                         const int &ntypes,
+                                         const std::vector<int> &type,
+                                         const SimulationRegion<double> &region,
+                                         const bool &b_pbc,
+                                         const std::vector<double> &efield,
+                                         const int &i_idx,
+                                         const std::vector<int> &fmt_nlist_a,
+                                         const std::vector<int> &sec_a,
+                                         const double &rmin,
+                                         const double &rmax);
+inline void compute_descriptor_se_a_ef_para(
+    std::vector<double> &descrpt_a,
+    std::vector<double> &descrpt_a_deriv,
+    std::vector<double> &rij_a,
+    const std::vector<double> &posi,
+    const int &ntypes,
+    const std::vector<int> &type,
+    const SimulationRegion<double> &region,
+    const bool &b_pbc,
+    const std::vector<double> &efield,
+    const int &i_idx,
+    const std::vector<int> &fmt_nlist_a,
+    const std::vector<int> &sec_a,
+    const double &rmin,
+    const double &rmax);
+inline void compute_descriptor_se_a_ef_vert(
+    std::vector<double> &descrpt_a,
+    std::vector<double> &descrpt_a_deriv,
+    std::vector<double> &rij_a,
+    const std::vector<double> &posi,
+    const int &ntypes,
+    const std::vector<int> &type,
+    const SimulationRegion<double> &region,
+    const bool &b_pbc,
+    const std::vector<double> &efield,
+    const int &i_idx,
+    const std::vector<int> &fmt_nlist_a,
+    const std::vector<int> &sec_a,
+    const double &rmin,
+    const double &rmax);
 
-static void 
-compute_dRdT (double (* dRdT)[9], 
-	      const double * r1, 
-	      const double * r2, 
-	      const double * rot)
-{
-  double * dRdT0 = dRdT[0];
-  double * dRdT1 = dRdT[1];
-  double * dRdT2 = dRdT[2];
+static void compute_dRdT(double (*dRdT)[9],
+                         const double *r1,
+                         const double *r2,
+                         const double *rot) {
+  double *dRdT0 = dRdT[0];
+  double *dRdT1 = dRdT[1];
+  double *dRdT2 = dRdT[2];
   const double *xx = rot;
-  const double *yy = rot+3;
+  const double *yy = rot + 3;
 
   double nr1 = sqrt(deepmd::dot3(r1, r1));
-  double nr12 = nr1  * nr1;
-  double nr13 = nr1  * nr12;
+  double nr12 = nr1 * nr1;
+  double nr13 = nr1 * nr12;
   double nr14 = nr12 * nr12;
   double r1dr2 = deepmd::dot3(r1, r2);
 
   // dRdT0
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdT0[ii*3+jj] = r1[ii] * r1[jj] / nr13;
-      if (ii == jj) dRdT0[ii*3+jj] -= 1./nr1;
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdT0[ii * 3 + jj] = r1[ii] * r1[jj] / nr13;
+      if (ii == jj) dRdT0[ii * 3 + jj] -= 1. / nr1;
     }
   }
-  
-  // dRdT1  
+
+  // dRdT1
   double dRdy[9];
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdy[ii*3+jj] = (- 2 * r1dr2 / nr14 * r1[ii] * r1[jj] 
-		       + (r1[ii] + r2[ii]) * r1[jj] / nr12 );
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdy[ii * 3 + jj] = (-2 * r1dr2 / nr14 * r1[ii] * r1[jj] +
+                           (r1[ii] + r2[ii]) * r1[jj] / nr12);
       if (ii == jj) {
-	dRdy[ii*3+jj] += r1dr2 / nr12 - 1.;
+        dRdy[ii * 3 + jj] += r1dr2 / nr12 - 1.;
       }
     }
   }
   double tmpy[3];
   for (int dd = 0; dd < 3; ++dd) tmpy[dd] = r2[dd] - r1dr2 / nr12 * r1[dd];
   double ntmpy = sqrt(deepmd::dot3(tmpy, tmpy));
-  double ydRdy [3] = {0};
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      ydRdy[ii] += tmpy[jj] * dRdy[ii*3 + jj];
+  double ydRdy[3] = {0};
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      ydRdy[ii] += tmpy[jj] * dRdy[ii * 3 + jj];
     }
   }
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdT1[ii*3+jj] = (- ydRdy[ii] * tmpy[jj] / (ntmpy * ntmpy * ntmpy) 
-			+ dRdy[3*ii+jj] / ntmpy );
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdT1[ii * 3 + jj] = (-ydRdy[ii] * tmpy[jj] / (ntmpy * ntmpy * ntmpy) +
+                            dRdy[3 * ii + jj] / ntmpy);
     }
   }
   // dRdT2
-  for (int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     double res[3];
-    deepmd::cprod(dRdT0 + ii*3, yy, dRdT2 + ii*3);
-    deepmd::cprod(xx, dRdT1 + ii*3, res);
-    for (int dd = 0; dd < 3; ++dd) dRdT2[ii*3+dd] += res[dd];
+    deepmd::cprod(dRdT0 + ii * 3, yy, dRdT2 + ii * 3);
+    deepmd::cprod(xx, dRdT1 + ii * 3, res);
+    for (int dd = 0; dd < 3; ++dd) dRdT2[ii * 3 + dd] += res[dd];
   }
 }
 
-static void 
-compute_dRdT_1 (double (* dRdT)[9], 
-		const double * r1, 
-		const double * r2, 
-		const double * rot)
-{
-  double * dRdT0 = dRdT[0];
-  double * dRdT1 = dRdT[1];
-  double * dRdT2 = dRdT[2];
+static void compute_dRdT_1(double (*dRdT)[9],
+                           const double *r1,
+                           const double *r2,
+                           const double *rot) {
+  double *dRdT0 = dRdT[0];
+  double *dRdT1 = dRdT[1];
+  double *dRdT2 = dRdT[2];
   const double *xx = rot;
-  const double *yy = rot+3;
+  const double *yy = rot + 3;
 
   double nr1 = sqrt(deepmd::dot3(r1, r1));
-  double nr12 = nr1  * nr1;
-  double nr13 = nr1  * nr12;
+  double nr12 = nr1 * nr1;
+  double nr13 = nr1 * nr12;
   double nr14 = nr12 * nr12;
   double r1dr2 = deepmd::dot3(r1, r2);
 
   // dRdT0
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdT0[ii*3+jj] = -r1[ii] * r1[jj] / nr13;
-      if (ii == jj) dRdT0[ii*3+jj] += 1./nr1;
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdT0[ii * 3 + jj] = -r1[ii] * r1[jj] / nr13;
+      if (ii == jj) dRdT0[ii * 3 + jj] += 1. / nr1;
     }
   }
-  
-  // dRdT1  
+
+  // dRdT1
   double dRdy[9];
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdy[ii*3+jj] = (+ 2 * r1dr2 / nr14 * r1[ii] * r1[jj] 
-		       - r2[ii] * r1[jj] / nr12 );
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdy[ii * 3 + jj] =
+          (+2 * r1dr2 / nr14 * r1[ii] * r1[jj] - r2[ii] * r1[jj] / nr12);
       if (ii == jj) {
-	dRdy[ii*3+jj] -= r1dr2 / nr12;
+        dRdy[ii * 3 + jj] -= r1dr2 / nr12;
       }
     }
   }
   double tmpy[3];
   for (int dd = 0; dd < 3; ++dd) tmpy[dd] = r2[dd] - r1dr2 / nr12 * r1[dd];
   double ntmpy = sqrt(deepmd::dot3(tmpy, tmpy));
-  double ydRdy [3] = {0};
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      ydRdy[ii] += tmpy[jj] * dRdy[ii*3 + jj];
+  double ydRdy[3] = {0};
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      ydRdy[ii] += tmpy[jj] * dRdy[ii * 3 + jj];
     }
   }
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdT1[ii*3+jj] = (- ydRdy[ii] * tmpy[jj] / (ntmpy * ntmpy * ntmpy) 
-			+ dRdy[3*ii+jj] / ntmpy );
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdT1[ii * 3 + jj] = (-ydRdy[ii] * tmpy[jj] / (ntmpy * ntmpy * ntmpy) +
+                            dRdy[3 * ii + jj] / ntmpy);
     }
   }
   // dRdT2
-  for (int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     double res[3];
-    deepmd::cprod(dRdT0 + ii*3, yy, dRdT2 + ii*3);
-    deepmd::cprod(xx, dRdT1 + ii*3, res);
-    for (int dd = 0; dd < 3; ++dd) dRdT2[ii*3+dd] += res[dd];
+    deepmd::cprod(dRdT0 + ii * 3, yy, dRdT2 + ii * 3);
+    deepmd::cprod(xx, dRdT1 + ii * 3, res);
+    for (int dd = 0; dd < 3; ++dd) dRdT2[ii * 3 + dd] += res[dd];
   }
 }
 
-
-static void 
-compute_dRdT_2 (double (* dRdT)[9], 
-		const double * r1, 
-		const double * r2, 
-		const double * rot)
-{
-  double * dRdT0 = dRdT[0];
-  double * dRdT1 = dRdT[1];
-  double * dRdT2 = dRdT[2];
+static void compute_dRdT_2(double (*dRdT)[9],
+                           const double *r1,
+                           const double *r2,
+                           const double *rot) {
+  double *dRdT0 = dRdT[0];
+  double *dRdT1 = dRdT[1];
+  double *dRdT2 = dRdT[2];
   const double *xx = rot;
-  const double *yy = rot+3;
+  const double *yy = rot + 3;
 
   double nr1 = sqrt(deepmd::dot3(r1, r1));
-  double nr12 = nr1  * nr1;
+  double nr12 = nr1 * nr1;
   double r1dr2 = deepmd::dot3(r1, r2);
 
   // dRdT0
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdT0[ii*3+jj] = 0.;
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdT0[ii * 3 + jj] = 0.;
     }
   }
-  
-  // dRdT1  
+
+  // dRdT1
   double dRdy[9];
-  for (int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     for (int jj = 0; jj < 3; ++jj) {
-      dRdy[ii*3+jj] = - r1[ii] * r1[jj] / nr12;
+      dRdy[ii * 3 + jj] = -r1[ii] * r1[jj] / nr12;
       if (ii == jj) {
-	dRdy[ii*3+jj] += 1;
+        dRdy[ii * 3 + jj] += 1;
       }
     }
   }
   double tmpy[3];
   for (int dd = 0; dd < 3; ++dd) tmpy[dd] = r2[dd] - r1dr2 / nr12 * r1[dd];
   double ntmpy = sqrt(deepmd::dot3(tmpy, tmpy));
-  double ydRdy [3] = {0};
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      ydRdy[ii] += tmpy[jj] * dRdy[ii*3 + jj];
+  double ydRdy[3] = {0};
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      ydRdy[ii] += tmpy[jj] * dRdy[ii * 3 + jj];
     }
   }
-  for (int ii = 0; ii < 3; ++ii){
-    for (int jj = 0; jj < 3; ++jj){
-      dRdT1[ii*3+jj] = (- ydRdy[ii] * tmpy[jj] / (ntmpy * ntmpy * ntmpy) 
-			+ dRdy[3*ii+jj] / ntmpy );
+  for (int ii = 0; ii < 3; ++ii) {
+    for (int jj = 0; jj < 3; ++jj) {
+      dRdT1[ii * 3 + jj] = (-ydRdy[ii] * tmpy[jj] / (ntmpy * ntmpy * ntmpy) +
+                            dRdy[3 * ii + jj] / ntmpy);
     }
   }
   // dRdT2
-  for (int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     double res[3];
-    deepmd::cprod(dRdT0 + ii*3, yy, dRdT2 + ii*3);
-    deepmd::cprod(xx, dRdT1 + ii*3, res);
-    for (int dd = 0; dd < 3; ++dd) dRdT2[ii*3+dd] += res[dd];
+    deepmd::cprod(dRdT0 + ii * 3, yy, dRdT2 + ii * 3);
+    deepmd::cprod(xx, dRdT1 + ii * 3, res);
+    for (int dd = 0; dd < 3; ++dd) dRdT2[ii * 3 + dd] += res[dd];
   }
 }
 
-
-
-// output deriv size: n_sel_a_nei x 4 x 12				    + n_sel_r_nei x 12
-//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) + (1./rr) x 4 x (x, y, z)
-void compute_descriptor (std::vector<double > &			descrpt_a,
-			 std::vector<double > &			descrpt_a_deriv,
-			 std::vector<double > &			descrpt_r,
-			 std::vector<double > &			descrpt_r_deriv,
-			 std::vector<double > &			rij_a,
-			 std::vector<double > &			rij_r,
-			 std::vector<double > &			rot_mat,
-			 const std::vector<double > &		posi,
-			 const int &				ntypes,
-			 const std::vector<int > &		type,
-			 const SimulationRegion<double> &	region,
-			 const bool &				b_pbc,
-			 const int &				i_idx,
-			 const std::vector<int > &		fmt_nlist_a,
-			 const std::vector<int > &		fmt_nlist_r,
-			 const std::vector<int > &		sec_a,
-			 const std::vector<int > &		sec_r,
-			 const int				axis0_type,
-			 const int				axis0_idx,
-			 const int				axis1_type,
-			 const int				axis1_idx)
-{  
+// output deriv size: n_sel_a_nei x 4 x 12				    + n_sel_r_nei
+// x 12
+//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) +
+//(1./rr) x 4 x (x, y, z)
+void compute_descriptor(std::vector<double> &descrpt_a,
+                        std::vector<double> &descrpt_a_deriv,
+                        std::vector<double> &descrpt_r,
+                        std::vector<double> &descrpt_r_deriv,
+                        std::vector<double> &rij_a,
+                        std::vector<double> &rij_r,
+                        std::vector<double> &rot_mat,
+                        const std::vector<double> &posi,
+                        const int &ntypes,
+                        const std::vector<int> &type,
+                        const SimulationRegion<double> &region,
+                        const bool &b_pbc,
+                        const int &i_idx,
+                        const std::vector<int> &fmt_nlist_a,
+                        const std::vector<int> &fmt_nlist_r,
+                        const std::vector<int> &sec_a,
+                        const std::vector<int> &sec_r,
+                        const int axis0_type,
+                        const int axis0_idx,
+                        const int axis1_type,
+                        const int axis1_idx) {
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_a_diff (sec_a.back());
-  rij_a.resize (sec_a.back() * 3);
-  fill (rij_a.begin(), rij_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_a_diff(sec_a.back());
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
       sel_a_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_a[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_a_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      const int &j_idx = fmt_nlist_a[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_a_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      for (int dd = 0; dd < 3; ++dd) rij_a[jj*3+dd] = sel_a_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij_a[jj * 3 + dd] = sel_a_diff[jj][dd];
     }
   }
 
-  std::vector<std::vector<double > > sel_r_diff (sec_r.back());
-  rij_r.resize (sec_r.back() * 3);
-  fill (rij_r.begin(), rij_r.end(), 0.0);
-  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii){
-    for (int jj = sec_r[ii]; jj < sec_r[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_r_diff(sec_r.back());
+  rij_r.resize(sec_r.back() * 3);
+  fill(rij_r.begin(), rij_r.end(), 0.0);
+  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii) {
+    for (int jj = sec_r[ii]; jj < sec_r[ii + 1]; ++jj) {
       if (fmt_nlist_r[jj] < 0) break;
       sel_r_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_r[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_r_diff[jj][0], sel_r_diff[jj][1], sel_r_diff[jj][2]);
+      const int &j_idx = fmt_nlist_r[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_r_diff[jj][0], sel_r_diff[jj][1], sel_r_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_r_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_r_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
-      }
-      for (int dd = 0; dd < 3; ++dd) rij_r[jj*3+dd] = sel_r_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij_r[jj * 3 + dd] = sel_r_diff[jj][dd];
     }
   }
-  
+
   // if (i_idx == 0){
   //   for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
   //     for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
@@ -353,7 +342,8 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
   // 	cout << jj << "\t  jidx " << j_idx;
   // 	if (j_idx >= 0){
   // 	  cout << "\t type " << type[j_idx];
-  // 	  cout << "\t " << sqrt(deepmd::dot3(&sel_a_diff[jj][0], &sel_a_diff[jj][0]));
+  // 	  cout << "\t " << sqrt(deepmd::dot3(&sel_a_diff[jj][0],
+  // &sel_a_diff[jj][0]));
   // 	}
   // 	cout << endl;
   //     }
@@ -365,7 +355,8 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
   // 	cout << jj << "\t  jidx " << j_idx;
   // 	if (j_idx >= 0){
   // 	  cout << "\t type " << type[j_idx];
-  // 	  cout << "\t " << sqrt(deepmd::dot3(&sel_r_diff[jj][0], &sel_r_diff[jj][0]));
+  // 	  cout << "\t " << sqrt(deepmd::dot3(&sel_r_diff[jj][0],
+  // &sel_r_diff[jj][0]));
   // 	}
   // 	cout << endl;
   //     }
@@ -374,31 +365,29 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
 
   // record axis vectors
   double r1[3], r2[3];
-  for (unsigned dd = 0; dd < 3; ++dd){
-    if (axis0_type == 0){
-      assert  (sel_a_diff[axis0_idx].size() == 3);
+  for (unsigned dd = 0; dd < 3; ++dd) {
+    if (axis0_type == 0) {
+      assert(sel_a_diff[axis0_idx].size() == 3);
       r1[dd] = sel_a_diff[axis0_idx][dd];
-    }
-    else {
-      assert  (sel_r_diff[axis0_idx].size() == 3);
+    } else {
+      assert(sel_r_diff[axis0_idx].size() == 3);
       r1[dd] = sel_r_diff[axis0_idx][dd];
     }
-    if (axis1_type == 0){
-      assert  (sel_a_diff[axis1_idx].size() == 3);
+    if (axis1_type == 0) {
+      assert(sel_a_diff[axis1_idx].size() == 3);
       r2[dd] = sel_a_diff[axis1_idx][dd];
-    }
-    else {
-      assert  (sel_r_diff[axis1_idx].size() == 3);
+    } else {
+      assert(sel_r_diff[axis1_idx].size() == 3);
       r2[dd] = sel_r_diff[axis1_idx][dd];
     }
-  }  
+  }
 
   // rotation matrix
-  double rot [9];
+  double rot[9];
   double *xx = rot;
-  double *yy = rot+3;
-  double *zz = rot+6;
-  for (unsigned dd = 0; dd < 3; ++dd){
+  double *yy = rot + 3;
+  double *zz = rot + 6;
+  for (unsigned dd = 0; dd < 3; ++dd) {
     xx[dd] = r1[dd];
     yy[dd] = r2[dd];
   }
@@ -408,17 +397,17 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
   for (unsigned dd = 0; dd < 3; ++dd) yy[dd] -= dxy * xx[dd];
   double norm_yy = sqrt(deepmd::dot3(yy, yy));
   for (unsigned dd = 0; dd < 3; ++dd) yy[dd] /= norm_yy;
-  deepmd::cprod(xx, yy, zz);  
-  rot_mat.resize (9);
+  deepmd::cprod(xx, yy, zz);
+  rot_mat.resize(9);
   for (int dd = 0; dd < 9; ++dd) rot_mat[dd] = rot[dd];
 
   // 1./rr, cos(theta), cos(phi), sin(phi)
-  descrpt_a.resize (sec_a.back() * 4);
-  fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
-      double rdiff[3] ;
+      double rdiff[3];
       deepmd::dotmv3(rdiff, rot, &sel_a_diff[jj][0]);
       double rr2 = deepmd::dot3(rdiff, rdiff);
       double rr = sqrt(rr2);
@@ -432,32 +421,32 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
       double cos_phi = rdiff[0] / rr2;
       double sin_phi = rdiff[1] / rr2;
 #endif
-      descrpt_a[jj * 4 + 0] = 1./rr;
+      descrpt_a[jj * 4 + 0] = 1. / rr;
       descrpt_a[jj * 4 + 1] = cos_theta;
       descrpt_a[jj * 4 + 2] = cos_phi;
-      descrpt_a[jj * 4 + 3] = sin_phi;      
+      descrpt_a[jj * 4 + 3] = sin_phi;
     }
   }
   // 1./rr
-  descrpt_r.resize (sec_r.back());
-  fill (descrpt_r.begin(), descrpt_r.end(), 0.0);
-  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii){
-    for (int jj = sec_r[ii]; jj < sec_r[ii+1]; ++jj){
+  descrpt_r.resize(sec_r.back());
+  fill(descrpt_r.begin(), descrpt_r.end(), 0.0);
+  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii) {
+    for (int jj = sec_r[ii]; jj < sec_r[ii + 1]; ++jj) {
       if (fmt_nlist_r[jj] < 0) break;
       const double *rdiff = &sel_r_diff[jj][0];
-      double rr = sqrt (deepmd::dot3(rdiff, rdiff));
-      descrpt_r[jj] = 1./rr;      
+      double rr = sqrt(deepmd::dot3(rdiff, rdiff));
+      descrpt_r[jj] = 1. / rr;
     }
   }
-  
+
   // first_dim: T_i, second_dim: R_k (T_i)_j
   double dRdT_0[3][9];
   double dRdT_1[3][9];
   double dRdT_2[3][9];
   if (sec_a.back() > 0) {
-    compute_dRdT   (dRdT_0, r1, r2, rot);
-    compute_dRdT_1 (dRdT_1, r1, r2, rot);
-    compute_dRdT_2 (dRdT_2, r1, r2, rot);
+    compute_dRdT(dRdT_0, r1, r2, rot);
+    compute_dRdT_1(dRdT_1, r1, r2, rot);
+    compute_dRdT_2(dRdT_2, r1, r2, rot);
   }
 
   // deriv wrt center: 3
@@ -465,237 +454,239 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
   // deriv wrt axis 2: 3
   // deriv wrt atom k: 3
   // if k == 1 or k == 2, 2 copies of data stored.
-  descrpt_a_deriv.resize (sec_a.back() * 4 * 12);
-  fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.);
-  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 12);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.);
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
       if (fmt_nlist_a[nei_iter] < 0) break;
       // drdS, stored in tranposed form
       double dtrdST[4][3];
-      double * rr = &sel_a_diff[nei_iter][0];
-      double tr[3] ;
+      double *rr = &sel_a_diff[nei_iter][0];
+      double tr[3];
       deepmd::dotmv3(tr, rot, rr);
       double nr2 = deepmd::dot3(tr, tr);
       double nr = sqrt(nr2);
       double nr3 = nr * nr2;
-      for (int dd = 0; dd < 3; ++dd){
-	dtrdST[0][dd] = -tr[dd] / nr3;
+      for (int dd = 0; dd < 3; ++dd) {
+        dtrdST[0][dd] = -tr[dd] / nr3;
       }
 #ifdef DESCRPT_THETAPHI
-      for (int dd = 0; dd < 3; ++dd){
-	dtrdST[1][dd] = -tr[dd] / nr3 * tr[2];
+      for (int dd = 0; dd < 3; ++dd) {
+        dtrdST[1][dd] = -tr[dd] / nr3 * tr[2];
       }
-      dtrdST[1][2] += 1./nr;
+      dtrdST[1][2] += 1. / nr;
       double nr01 = sqrt(tr[0] * tr[0] + tr[1] * tr[1]);
       double nr013 = nr01 * nr01 * nr01;
-      dtrdST[2][0] = -tr[0] * tr[0] / nr013 + 1./nr01;
+      dtrdST[2][0] = -tr[0] * tr[0] / nr013 + 1. / nr01;
       dtrdST[2][1] = -tr[1] * tr[0] / nr013;
       dtrdST[2][2] = 0.;
       dtrdST[3][0] = -tr[0] * tr[1] / nr013;
-      dtrdST[3][1] = -tr[1] * tr[1] / nr013 + 1./nr01;
+      dtrdST[3][1] = -tr[1] * tr[1] / nr013 + 1. / nr01;
       dtrdST[3][2] = 0.;
 #else
       double nr4 = nr2 * nr2;
-      for (int dd = 0; dd < 3; ++dd){
-	dtrdST[1][dd] = -2. * tr[dd] / nr4 * tr[2];
-	dtrdST[2][dd] = -2. * tr[dd] / nr4 * tr[0];
-	dtrdST[3][dd] = -2. * tr[dd] / nr4 * tr[1];
+      for (int dd = 0; dd < 3; ++dd) {
+        dtrdST[1][dd] = -2. * tr[dd] / nr4 * tr[2];
+        dtrdST[2][dd] = -2. * tr[dd] / nr4 * tr[0];
+        dtrdST[3][dd] = -2. * tr[dd] / nr4 * tr[1];
       }
-      dtrdST[1][2] += 1./nr2;
-      dtrdST[2][0] += 1./nr2;
-      dtrdST[3][1] += 1./nr2;
+      dtrdST[1][2] += 1. / nr2;
+      dtrdST[2][0] += 1. / nr2;
+      dtrdST[3][1] += 1. / nr2;
 #endif
       // dRdTr
       double dRdTr_0[3][3];
-      for (int ii = 0; ii < 3; ++ii){
-	for (int jj = 0; jj < 3; ++jj){
-	  dRdTr_0[ii][jj] = 0;	
-	  for (int ll = 0; ll < 3; ++ll){
-	    dRdTr_0[ii][jj] += dRdT_0[jj][ii*3+ll] * rr[ll];
-	  }
-	  dRdTr_0[ii][jj] -= rot[jj*3 + ii];
-	}
+      for (int ii = 0; ii < 3; ++ii) {
+        for (int jj = 0; jj < 3; ++jj) {
+          dRdTr_0[ii][jj] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            dRdTr_0[ii][jj] += dRdT_0[jj][ii * 3 + ll] * rr[ll];
+          }
+          dRdTr_0[ii][jj] -= rot[jj * 3 + ii];
+        }
       }
       // dRdTr_1
       double dRdTr_1[3][3];
-      for (int ii = 0; ii < 3; ++ii){
-	for (int jj = 0; jj < 3; ++jj){
-	  dRdTr_1[ii][jj] = 0;	
-	  for (int ll = 0; ll < 3; ++ll){
-	    dRdTr_1[ii][jj] += dRdT_1[jj][ii*3+ll] * rr[ll];
-	  }
-	  if (axis0_type == 0 && nei_iter == axis0_idx) dRdTr_1[ii][jj] += rot[jj*3 + ii];
-	}
+      for (int ii = 0; ii < 3; ++ii) {
+        for (int jj = 0; jj < 3; ++jj) {
+          dRdTr_1[ii][jj] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            dRdTr_1[ii][jj] += dRdT_1[jj][ii * 3 + ll] * rr[ll];
+          }
+          if (axis0_type == 0 && nei_iter == axis0_idx)
+            dRdTr_1[ii][jj] += rot[jj * 3 + ii];
+        }
       }
       // dRdTr_2
       double dRdTr_2[3][3];
-      for (int ii = 0; ii < 3; ++ii){
-	for (int jj = 0; jj < 3; ++jj){
-	  dRdTr_2[ii][jj] = 0;	
-	  for (int ll = 0; ll < 3; ++ll){
-	    dRdTr_2[ii][jj] += dRdT_2[jj][ii*3+ll] * rr[ll];
-	  }
-	  if (axis1_type == 0 && nei_iter == axis1_idx) dRdTr_2[ii][jj] += rot[jj*3 + ii];
-	}
+      for (int ii = 0; ii < 3; ++ii) {
+        for (int jj = 0; jj < 3; ++jj) {
+          dRdTr_2[ii][jj] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            dRdTr_2[ii][jj] += dRdT_2[jj][ii * 3 + ll] * rr[ll];
+          }
+          if (axis1_type == 0 && nei_iter == axis1_idx)
+            dRdTr_2[ii][jj] += rot[jj * 3 + ii];
+        }
       }
       // dRdTr_k
       double dRdTr_k[3][3];
-      for (int ii = 0; ii < 3; ++ii){
-	for (int jj = 0; jj < 3; ++jj){
-	  dRdTr_k[ii][jj] = 0;	
-	  if (axis0_type == 0 && nei_iter == axis0_idx){
-	    for (int ll = 0; ll < 3; ++ll){
-	      dRdTr_k[ii][jj] += dRdT_1[jj][ii*3+ll] * rr[ll];
-	    }
-	  }
-	  if (axis1_type == 0 && nei_iter == axis1_idx){
-	    for (int ll = 0; ll < 3; ++ll){
-	      dRdTr_k[ii][jj] += dRdT_2[jj][ii*3+ll] * rr[ll];
-	    }
-	  }
-	  dRdTr_k[ii][jj] += rot[jj*3 + ii];
-	}
+      for (int ii = 0; ii < 3; ++ii) {
+        for (int jj = 0; jj < 3; ++jj) {
+          dRdTr_k[ii][jj] = 0;
+          if (axis0_type == 0 && nei_iter == axis0_idx) {
+            for (int ll = 0; ll < 3; ++ll) {
+              dRdTr_k[ii][jj] += dRdT_1[jj][ii * 3 + ll] * rr[ll];
+            }
+          }
+          if (axis1_type == 0 && nei_iter == axis1_idx) {
+            for (int ll = 0; ll < 3; ++ll) {
+              dRdTr_k[ii][jj] += dRdT_2[jj][ii * 3 + ll] * rr[ll];
+            }
+          }
+          dRdTr_k[ii][jj] += rot[jj * 3 + ii];
+        }
       }
 
       // assemble
       // 4 components times 12 derivs
       int idx_start = nei_iter * 4 * 12;
       // loop over components
-      for (int ii = 0; ii < 4; ++ii){
-	for (int jj = 0; jj < 3; ++jj){
-	  int idx = idx_start + ii * 12 + jj;
-	  descrpt_a_deriv[idx] = 0;
-	  for (int ll = 0; ll < 3; ++ll){
-	    descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_0[jj][ll];
-	  }
-	}
-	for (int jj = 0; jj < 3; ++jj){
-	  int idx = idx_start + ii * 12 + jj + 3;
-	  descrpt_a_deriv[idx] = 0;
-	  for (int ll = 0; ll < 3; ++ll){
-	    descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_1[jj][ll];
-	  }
-	}
-	for (int jj = 0; jj < 3; ++jj){
-	  int idx = idx_start + ii * 12 + jj + 6;
-	  descrpt_a_deriv[idx] = 0;
-	  for (int ll = 0; ll < 3; ++ll){
-	    descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_2[jj][ll];
-	  }
-	}
-	for (int jj = 0; jj < 3; ++jj){
-	  int idx = idx_start + ii * 12 + jj + 9;
-	  descrpt_a_deriv[idx] = 0;
-	  for (int ll = 0; ll < 3; ++ll){
-	    descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_k[jj][ll];
-	  }
-	}
+      for (int ii = 0; ii < 4; ++ii) {
+        for (int jj = 0; jj < 3; ++jj) {
+          int idx = idx_start + ii * 12 + jj;
+          descrpt_a_deriv[idx] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_0[jj][ll];
+          }
+        }
+        for (int jj = 0; jj < 3; ++jj) {
+          int idx = idx_start + ii * 12 + jj + 3;
+          descrpt_a_deriv[idx] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_1[jj][ll];
+          }
+        }
+        for (int jj = 0; jj < 3; ++jj) {
+          int idx = idx_start + ii * 12 + jj + 6;
+          descrpt_a_deriv[idx] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_2[jj][ll];
+          }
+        }
+        for (int jj = 0; jj < 3; ++jj) {
+          int idx = idx_start + ii * 12 + jj + 9;
+          descrpt_a_deriv[idx] = 0;
+          for (int ll = 0; ll < 3; ++ll) {
+            descrpt_a_deriv[idx] += dtrdST[ii][ll] * dRdTr_k[jj][ll];
+          }
+        }
       }
     }
-  } 
+  }
 
-  descrpt_r_deriv.resize (sec_r.back() * 1 * 12);
-  fill (descrpt_r_deriv.begin(), descrpt_r_deriv.end(), 0.);
-  for (int sec_iter = 0; sec_iter < int(sec_r.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec_r[sec_iter]; nei_iter < sec_r[sec_iter+1]; ++nei_iter) {
-      if (fmt_nlist_r[nei_iter] < 0) break;      
+  descrpt_r_deriv.resize(sec_r.back() * 1 * 12);
+  fill(descrpt_r_deriv.begin(), descrpt_r_deriv.end(), 0.);
+  for (int sec_iter = 0; sec_iter < int(sec_r.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_r[sec_iter]; nei_iter < sec_r[sec_iter + 1];
+         ++nei_iter) {
+      if (fmt_nlist_r[nei_iter] < 0) break;
 
-      const double * rr = &sel_r_diff[nei_iter][0];
+      const double *rr = &sel_r_diff[nei_iter][0];
       double nr = sqrt(deepmd::dot3(rr, rr));
       double nr3 = nr * nr * nr;
       int idx = nei_iter * 12;
 
-      for (int jj = 0; jj < 3; ++jj){
-	double value = rr[jj] / nr3;
-	descrpt_r_deriv[idx+0+jj] =  value;
-	descrpt_r_deriv[idx+9+jj] = -value;
-	if (nei_iter == axis0_idx) {
-	  descrpt_r_deriv[idx+3+jj] = -value;
-	}
-	if (nei_iter == axis1_idx) {
-	  descrpt_r_deriv[idx+6+jj] = -value;
-	}
+      for (int jj = 0; jj < 3; ++jj) {
+        double value = rr[jj] / nr3;
+        descrpt_r_deriv[idx + 0 + jj] = value;
+        descrpt_r_deriv[idx + 9 + jj] = -value;
+        if (nei_iter == axis0_idx) {
+          descrpt_r_deriv[idx + 3 + jj] = -value;
+        }
+        if (nei_iter == axis1_idx) {
+          descrpt_r_deriv[idx + 6 + jj] = -value;
+        }
       }
     }
   }
 }
 
-
-void compute_descriptor (std::vector<double > &			descrpt_a,
-			 std::vector<double > &			descrpt_r,
-			 std::vector<double > &			rot_mat,
-			 const std::vector<double > &		posi,
-			 const int &				ntypes,
-			 const std::vector<int > &		type,
-			 const SimulationRegion<double> &	region,
-			 const bool &				b_pbc,
-			 const int &				i_idx,
-			 const std::vector<int > &		fmt_nlist_a,
-			 const std::vector<int > &		fmt_nlist_r,
-			 const std::vector<int > &		sec_a,
-			 const std::vector<int > &		sec_r,
-			 const int				axis0_type,
-			 const int				axis0_idx,
-			 const int				axis1_type,
-			 const int				axis1_idx)
-{  
+void compute_descriptor(std::vector<double> &descrpt_a,
+                        std::vector<double> &descrpt_r,
+                        std::vector<double> &rot_mat,
+                        const std::vector<double> &posi,
+                        const int &ntypes,
+                        const std::vector<int> &type,
+                        const SimulationRegion<double> &region,
+                        const bool &b_pbc,
+                        const int &i_idx,
+                        const std::vector<int> &fmt_nlist_a,
+                        const std::vector<int> &fmt_nlist_r,
+                        const std::vector<int> &sec_a,
+                        const std::vector<int> &sec_r,
+                        const int axis0_type,
+                        const int axis0_idx,
+                        const int axis1_type,
+                        const int axis1_idx) {
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_a_diff (sec_a.back());
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_a_diff(sec_a.back());
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
       sel_a_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_a[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_a_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      const int &j_idx = fmt_nlist_a[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_a_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
     }
   }
-  std::vector<std::vector<double > > sel_r_diff (sec_r.back());
-  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii){
-    for (int jj = sec_r[ii]; jj < sec_r[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_r_diff(sec_r.back());
+  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii) {
+    for (int jj = sec_r[ii]; jj < sec_r[ii + 1]; ++jj) {
       if (fmt_nlist_r[jj] < 0) break;
       sel_r_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_r[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_r_diff[jj][0], sel_r_diff[jj][1], sel_r_diff[jj][2]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_r_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      const int &j_idx = fmt_nlist_r[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_r_diff[jj][0], sel_r_diff[jj][1], sel_r_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_r_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
     }
   }
 
   // record axis vectors
   double r1[3], r2[3];
-  for (unsigned dd = 0; dd < 3; ++dd){
-    if (axis0_type == 0){
+  for (unsigned dd = 0; dd < 3; ++dd) {
+    if (axis0_type == 0) {
       r1[dd] = sel_a_diff[axis0_idx][dd];
-    }
-    else {
+    } else {
       r1[dd] = sel_r_diff[axis0_idx][dd];
     }
-    if (axis1_type == 0){
+    if (axis1_type == 0) {
       r2[dd] = sel_a_diff[axis1_idx][dd];
-    }
-    else {
+    } else {
       r2[dd] = sel_r_diff[axis1_idx][dd];
     }
-  }  
+  }
 
   // rotation matrix
-  double rot [9];
+  double rot[9];
   double *xx = rot;
-  double *yy = rot+3;
-  double *zz = rot+6;
-  for (unsigned dd = 0; dd < 3; ++dd){
+  double *yy = rot + 3;
+  double *zz = rot + 6;
+  for (unsigned dd = 0; dd < 3; ++dd) {
     xx[dd] = r1[dd];
     yy[dd] = r2[dd];
   }
@@ -705,17 +696,17 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
   for (unsigned dd = 0; dd < 3; ++dd) yy[dd] -= dxy * xx[dd];
   double norm_yy = sqrt(deepmd::dot3(yy, yy));
   for (unsigned dd = 0; dd < 3; ++dd) yy[dd] /= norm_yy;
-  deepmd::cprod(xx, yy, zz);  
-  rot_mat.resize (9);
+  deepmd::cprod(xx, yy, zz);
+  rot_mat.resize(9);
   for (int dd = 0; dd < 9; ++dd) rot_mat[dd] = rot[dd];
 
   // 1./rr, cos(theta), cos(phi), sin(phi)
-  descrpt_a.resize (sec_a.back() * 4);
-  fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
-      double rdiff[3] ;
+      double rdiff[3];
       deepmd::dotmv3(rdiff, rot, &sel_a_diff[jj][0]);
       double rr2 = deepmd::dot3(rdiff, rdiff);
       double rr = sqrt(rr2);
@@ -729,106 +720,100 @@ void compute_descriptor (std::vector<double > &			descrpt_a,
       double cos_phi = rdiff[0] / rr2;
       double sin_phi = rdiff[1] / rr2;
 #endif
-      descrpt_a[jj * 4 + 0] = 1./rr;
+      descrpt_a[jj * 4 + 0] = 1. / rr;
       descrpt_a[jj * 4 + 1] = cos_theta;
       descrpt_a[jj * 4 + 2] = cos_phi;
-      descrpt_a[jj * 4 + 3] = sin_phi;      
+      descrpt_a[jj * 4 + 3] = sin_phi;
     }
   }
   // 1./rr
-  descrpt_r.resize (sec_r.back());
-  fill (descrpt_r.begin(), descrpt_r.end(), 0.0);
-  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii){
-    for (int jj = sec_r[ii]; jj < sec_r[ii+1]; ++jj){
+  descrpt_r.resize(sec_r.back());
+  fill(descrpt_r.begin(), descrpt_r.end(), 0.0);
+  for (int ii = 0; ii < int(sec_r.size()) - 1; ++ii) {
+    for (int jj = sec_r[ii]; jj < sec_r[ii + 1]; ++jj) {
       if (fmt_nlist_r[jj] < 0) break;
-      double rdiff[3] ;
+      double rdiff[3];
       deepmd::dotmv3(rdiff, rot, &sel_r_diff[jj][0]);
-      double rr = sqrt (deepmd::dot3(rdiff, rdiff));
-      descrpt_r[jj] = 1./rr;
+      double rr = sqrt(deepmd::dot3(rdiff, rdiff));
+      descrpt_r[jj] = 1. / rr;
     }
-  }  
+  }
 }
 
-
-
-
-
-
-
-
-// output deriv size: n_sel_a_nei x 4 x 12				    
-//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) 
-void compute_descriptor_se_a_extf (std::vector<double > &		descrpt_a,
-				   std::vector<double > &		descrpt_a_deriv,
-				   std::vector<double > &		rij_a,
-				   const std::vector<double > &		posi,
-				   const int &				ntypes,
-				   const std::vector<int > &		type,
-				   const SimulationRegion<double> &	region,
-				   const bool &				b_pbc,
-				   const std::vector<double > &		efield,
-				   const int &				i_idx,
-				   const std::vector<int > &		fmt_nlist_a,
-				   const std::vector<int > &		sec_a, 
-				   const double &			rmin, 
-				   const double &			rmax)
-{
-  const double * ef_ = &efield[i_idx*3+0];
+// output deriv size: n_sel_a_nei x 4 x 12
+//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z)
+void compute_descriptor_se_a_extf(std::vector<double> &descrpt_a,
+                                  std::vector<double> &descrpt_a_deriv,
+                                  std::vector<double> &rij_a,
+                                  const std::vector<double> &posi,
+                                  const int &ntypes,
+                                  const std::vector<int> &type,
+                                  const SimulationRegion<double> &region,
+                                  const bool &b_pbc,
+                                  const std::vector<double> &efield,
+                                  const int &i_idx,
+                                  const std::vector<int> &fmt_nlist_a,
+                                  const std::vector<int> &sec_a,
+                                  const double &rmin,
+                                  const double &rmax) {
+  const double *ef_ = &efield[i_idx * 3 + 0];
   double ef[3] = {0.};
-  if (std::isnan(ef_[0]) || std::isnan(ef_[1]) || std::isnan(ef_[2])){
+  if (std::isnan(ef_[0]) || std::isnan(ef_[1]) || std::isnan(ef_[2])) {
     ef[0] = 1.;
     ef[1] = ef[2] = 0.;
-  }
-  else {
-    for (int ii = 0; ii < 3; ++ii){
+  } else {
+    for (int ii = 0; ii < 3; ++ii) {
       ef[ii] = ef_[ii];
     }
   }
-  assert( fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12 ), "ef should be a normalized std::vector";
+  assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12),
+      "ef should be a normalized std::vector";
 
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_a_diff (sec_a.back());
-  rij_a.resize (sec_a.back() * 3);
-  fill (rij_a.begin(), rij_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_a_diff(sec_a.back());
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
       sel_a_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_a[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_a_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      const int &j_idx = fmt_nlist_a[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_a_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      for (int dd = 0; dd < 3; ++dd) rij_a[jj*3+dd] = sel_a_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij_a[jj * 3 + dd] = sel_a_diff[jj][dd];
     }
   }
-  
+
   // 1./rr, cos(theta), cos(phi), sin(phi)
-  descrpt_a.resize (sec_a.back() * 4);
-  fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
   // deriv wrt center: 3
-  descrpt_a_deriv.resize (sec_a.back() * 4 * 3);
-  fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
 
-  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {      
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
       if (fmt_nlist_a[nei_iter] < 0) break;
-      const double * rr = &sel_a_diff[nei_iter][0];
+      const double *rr = &sel_a_diff[nei_iter][0];
       // check validity of ef
       double nr2 = deepmd::dot3(rr, rr);
-      double inr = 1./sqrt(nr2);
+      double inr = 1. / sqrt(nr2);
       double nr = nr2 * inr;
       double inr2 = inr * inr;
       double inr4 = inr2 * inr2;
       double inr3 = inr4 * nr;
       double sw, dsw;
       deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-      int idx_deriv = nei_iter * 4 * 3;	// 4 components time 3 directions
-      int idx_value = nei_iter * 4;	// 4 components
+      int idx_deriv = nei_iter * 4 * 3;  // 4 components time 3 directions
+      int idx_value = nei_iter * 4;      // 4 components
       // projections
       double rp = deepmd::dot3(rr, ef);
       double rv[3];
@@ -841,21 +826,45 @@ void compute_descriptor_se_a_extf (std::vector<double > &		descrpt_a,
       descrpt_a[idx_value + 2] = rv[1] / nr2;
       descrpt_a[idx_value + 3] = rv[2] / nr2;
       // deriv of component rp/r2
-      descrpt_a_deriv[idx_deriv + 0] = (2. * inr4 * rp * rr[0] - inr2 * ef[0]) * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 1] = (2. * inr4 * rp * rr[1] - inr2 * ef[1]) * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 2] = (2. * inr4 * rp * rr[2] - inr2 * ef[2]) * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 0] =
+          (2. * inr4 * rp * rr[0] - inr2 * ef[0]) * sw -
+          descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 1] =
+          (2. * inr4 * rp * rr[1] - inr2 * ef[1]) * sw -
+          descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 2] =
+          (2. * inr4 * rp * rr[2] - inr2 * ef[2]) * sw -
+          descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
       // deriv of component rvx/r2
-      descrpt_a_deriv[idx_deriv + 3] = (2. * inr4 * rv[0] * rr[0] - inr2 * (1. - ef[0] * ef[0])) * sw - descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 4] = (2. * inr4 * rv[0] * rr[1] - inr2 * (   - ef[0] * ef[1])) * sw - descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 5] = (2. * inr4 * rv[0] * rr[2] - inr2 * (   - ef[0] * ef[2])) * sw - descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 3] =
+          (2. * inr4 * rv[0] * rr[0] - inr2 * (1. - ef[0] * ef[0])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 4] =
+          (2. * inr4 * rv[0] * rr[1] - inr2 * (-ef[0] * ef[1])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 5] =
+          (2. * inr4 * rv[0] * rr[2] - inr2 * (-ef[0] * ef[2])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
       // deriv of component rvy/r2
-      descrpt_a_deriv[idx_deriv + 6] = (2. * inr4 * rv[1] * rr[0] - inr2 * (   - ef[1] * ef[0])) * sw - descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 7] = (2. * inr4 * rv[1] * rr[1] - inr2 * (1. - ef[1] * ef[1])) * sw - descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 8] = (2. * inr4 * rv[1] * rr[2] - inr2 * (   - ef[1] * ef[2])) * sw - descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 6] =
+          (2. * inr4 * rv[1] * rr[0] - inr2 * (-ef[1] * ef[0])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 7] =
+          (2. * inr4 * rv[1] * rr[1] - inr2 * (1. - ef[1] * ef[1])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 8] =
+          (2. * inr4 * rv[1] * rr[2] - inr2 * (-ef[1] * ef[2])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
       // deriv of component rvz/r2
-      descrpt_a_deriv[idx_deriv + 9] = (2. * inr4 * rv[2] * rr[0] - inr2 * (   - ef[2] * ef[0])) * sw - descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv +10] = (2. * inr4 * rv[2] * rr[1] - inr2 * (   - ef[2] * ef[1])) * sw - descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv +11] = (2. * inr4 * rv[2] * rr[2] - inr2 * (1. - ef[2] * ef[2])) * sw - descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 9] =
+          (2. * inr4 * rv[2] * rr[0] - inr2 * (-ef[2] * ef[0])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 10] =
+          (2. * inr4 * rv[2] * rr[1] - inr2 * (-ef[2] * ef[1])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 11] =
+          (2. * inr4 * rv[2] * rr[2] - inr2 * (1. - ef[2] * ef[2])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
       // 4 value components
       descrpt_a[idx_value + 0] *= sw;
       descrpt_a[idx_value + 1] *= sw;
@@ -865,79 +874,80 @@ void compute_descriptor_se_a_extf (std::vector<double > &		descrpt_a,
   }
 }
 
-// output deriv size: n_sel_a_nei x 4 x 12				    
-//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) 
-void compute_descriptor_se_a_ef_para (std::vector<double > &		descrpt_a,
-				      std::vector<double > &		descrpt_a_deriv,
-				      std::vector<double > &		rij_a,
-				      const std::vector<double > &	posi,
-				      const int &			ntypes,
-				      const std::vector<int > &		type,
-				      const SimulationRegion<double> &	region,
-				      const bool &			b_pbc,
-				      const std::vector<double > &	efield,
-				      const int &			i_idx,
-				      const std::vector<int > &		fmt_nlist_a,
-				      const std::vector<int > &		sec_a, 
-				      const double &			rmin, 
-				      const double &			rmax)
-{
-  const double * ef_ = &efield[i_idx*3+0];
+// output deriv size: n_sel_a_nei x 4 x 12
+//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z)
+void compute_descriptor_se_a_ef_para(std::vector<double> &descrpt_a,
+                                     std::vector<double> &descrpt_a_deriv,
+                                     std::vector<double> &rij_a,
+                                     const std::vector<double> &posi,
+                                     const int &ntypes,
+                                     const std::vector<int> &type,
+                                     const SimulationRegion<double> &region,
+                                     const bool &b_pbc,
+                                     const std::vector<double> &efield,
+                                     const int &i_idx,
+                                     const std::vector<int> &fmt_nlist_a,
+                                     const std::vector<int> &sec_a,
+                                     const double &rmin,
+                                     const double &rmax) {
+  const double *ef_ = &efield[i_idx * 3 + 0];
   double ef[3] = {0.};
-  if (std::isnan(ef_[0]) || std::isnan(ef_[1]) || std::isnan(ef_[2])){
+  if (std::isnan(ef_[0]) || std::isnan(ef_[1]) || std::isnan(ef_[2])) {
     ef[0] = 1.;
     ef[1] = ef[2] = 0.;
-  }
-  else {
-    for (int ii = 0; ii < 3; ++ii){
+  } else {
+    for (int ii = 0; ii < 3; ++ii) {
       ef[ii] = ef_[ii];
     }
   }
-  assert( fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12 ), "ef should be a normalized vector";
+  assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12),
+      "ef should be a normalized vector";
 
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_a_diff (sec_a.back());
-  rij_a.resize (sec_a.back() * 3);
-  fill (rij_a.begin(), rij_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_a_diff(sec_a.back());
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
       sel_a_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_a[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_a_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      const int &j_idx = fmt_nlist_a[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_a_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      for (int dd = 0; dd < 3; ++dd) rij_a[jj*3+dd] = sel_a_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij_a[jj * 3 + dd] = sel_a_diff[jj][dd];
     }
   }
-  
+
   // 1./rr, cos(theta), cos(phi), sin(phi)
-  descrpt_a.resize (sec_a.back() * 4);
-  fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
   // deriv wrt center: 3
-  descrpt_a_deriv.resize (sec_a.back() * 4 * 3);
-  fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
 
-  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {      
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
       if (fmt_nlist_a[nei_iter] < 0) break;
-      const double * rr = &sel_a_diff[nei_iter][0];
+      const double *rr = &sel_a_diff[nei_iter][0];
       // check validity of ef
       double nr2 = deepmd::dot3(rr, rr);
-      double inr = 1./sqrt(nr2);
+      double inr = 1. / sqrt(nr2);
       double nr = nr2 * inr;
       double inr2 = inr * inr;
       double inr4 = inr2 * inr2;
       double inr3 = inr4 * nr;
       double sw, dsw;
       deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-      int idx_deriv = nei_iter * 4 * 3;	// 4 components time 3 directions
-      int idx_value = nei_iter * 4;	// 4 components
+      int idx_deriv = nei_iter * 4 * 3;  // 4 components time 3 directions
+      int idx_value = nei_iter * 4;      // 4 components
       // projections
       double rp[3];
       rp[0] = deepmd::dot3(rr, ef) * ef[0];
@@ -949,21 +959,42 @@ void compute_descriptor_se_a_ef_para (std::vector<double > &		descrpt_a,
       descrpt_a[idx_value + 2] = rp[1] / nr2;
       descrpt_a[idx_value + 3] = rp[2] / nr2;
       // deriv of component 1/r
-      descrpt_a_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 0] =
+          rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 1] =
+          rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 2] =
+          rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
       // deriv of component rpx/r2
-      descrpt_a_deriv[idx_deriv + 3] = (2. * inr4 * rp[0] * rr[0] - inr2 * (ef[0] * ef[0])) * sw - descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 4] = (2. * inr4 * rp[0] * rr[1] - inr2 * (ef[0] * ef[1])) * sw - descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 5] = (2. * inr4 * rp[0] * rr[2] - inr2 * (ef[0] * ef[2])) * sw - descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 3] =
+          (2. * inr4 * rp[0] * rr[0] - inr2 * (ef[0] * ef[0])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 4] =
+          (2. * inr4 * rp[0] * rr[1] - inr2 * (ef[0] * ef[1])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 5] =
+          (2. * inr4 * rp[0] * rr[2] - inr2 * (ef[0] * ef[2])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
       // deriv of component rpy/r2
-      descrpt_a_deriv[idx_deriv + 6] = (2. * inr4 * rp[1] * rr[0] - inr2 * (ef[1] * ef[0])) * sw - descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 7] = (2. * inr4 * rp[1] * rr[1] - inr2 * (ef[1] * ef[1])) * sw - descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 8] = (2. * inr4 * rp[1] * rr[2] - inr2 * (ef[1] * ef[2])) * sw - descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 6] =
+          (2. * inr4 * rp[1] * rr[0] - inr2 * (ef[1] * ef[0])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 7] =
+          (2. * inr4 * rp[1] * rr[1] - inr2 * (ef[1] * ef[1])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 8] =
+          (2. * inr4 * rp[1] * rr[2] - inr2 * (ef[1] * ef[2])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
       // deriv of component rpz/r2
-      descrpt_a_deriv[idx_deriv + 9] = (2. * inr4 * rp[2] * rr[0] - inr2 * (ef[2] * ef[0])) * sw - descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv +10] = (2. * inr4 * rp[2] * rr[1] - inr2 * (ef[2] * ef[1])) * sw - descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv +11] = (2. * inr4 * rp[2] * rr[2] - inr2 * (ef[2] * ef[2])) * sw - descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 9] =
+          (2. * inr4 * rp[2] * rr[0] - inr2 * (ef[2] * ef[0])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 10] =
+          (2. * inr4 * rp[2] * rr[1] - inr2 * (ef[2] * ef[1])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 11] =
+          (2. * inr4 * rp[2] * rr[2] - inr2 * (ef[2] * ef[2])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
       // 4 value components
       descrpt_a[idx_value + 0] *= sw;
       descrpt_a[idx_value + 1] *= sw;
@@ -973,79 +1004,80 @@ void compute_descriptor_se_a_ef_para (std::vector<double > &		descrpt_a,
   }
 }
 
-// output deriv size: n_sel_a_nei x 4 x 12				    
-//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) 
-void compute_descriptor_se_a_ef_vert (std::vector<double > &		descrpt_a,
-				      std::vector<double > &		descrpt_a_deriv,
-				      std::vector<double > &		rij_a,
-				      const std::vector<double > &	posi,
-				      const int &			ntypes,
-				      const std::vector<int > &		type,
-				      const SimulationRegion<double> &	region,
-				      const bool &			b_pbc,
-				      const std::vector<double > &	efield,
-				      const int &			i_idx,
-				      const std::vector<int > &		fmt_nlist_a,
-				      const std::vector<int > &		sec_a, 
-				      const double &			rmin, 
-				      const double &			rmax)
-{
-  const double * ef_ = &efield[i_idx*3+0];
+// output deriv size: n_sel_a_nei x 4 x 12
+//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z)
+void compute_descriptor_se_a_ef_vert(std::vector<double> &descrpt_a,
+                                     std::vector<double> &descrpt_a_deriv,
+                                     std::vector<double> &rij_a,
+                                     const std::vector<double> &posi,
+                                     const int &ntypes,
+                                     const std::vector<int> &type,
+                                     const SimulationRegion<double> &region,
+                                     const bool &b_pbc,
+                                     const std::vector<double> &efield,
+                                     const int &i_idx,
+                                     const std::vector<int> &fmt_nlist_a,
+                                     const std::vector<int> &sec_a,
+                                     const double &rmin,
+                                     const double &rmax) {
+  const double *ef_ = &efield[i_idx * 3 + 0];
   double ef[3] = {0.};
-  if (std::isnan(ef_[0]) || std::isnan(ef_[1]) || std::isnan(ef_[2])){
+  if (std::isnan(ef_[0]) || std::isnan(ef_[1]) || std::isnan(ef_[2])) {
     ef[0] = 1.;
     ef[1] = ef[2] = 0.;
-  }
-  else {
-    for (int ii = 0; ii < 3; ++ii){
+  } else {
+    for (int ii = 0; ii < 3; ++ii) {
       ef[ii] = ef_[ii];
     }
   }
-  assert( fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12 ), "ef should be a normalized vector";
+  assert(fabs(deepmd::dot3(ef, ef) - 1.0) < 1e-12),
+      "ef should be a normalized vector";
 
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_a_diff (sec_a.back());
-  rij_a.resize (sec_a.back() * 3);
-  fill (rij_a.begin(), rij_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_a_diff(sec_a.back());
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
       sel_a_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_a[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      const int &j_idx = fmt_nlist_a[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_a_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_a_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
-      }
-      for (int dd = 0; dd < 3; ++dd) rij_a[jj*3+dd] = sel_a_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij_a[jj * 3 + dd] = sel_a_diff[jj][dd];
     }
   }
-  
+
   // 1./rr, cos(theta), cos(phi), sin(phi)
-  descrpt_a.resize (sec_a.back() * 4);
-  fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
   // deriv wrt center: 3
-  descrpt_a_deriv.resize (sec_a.back() * 4 * 3);
-  fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
 
-  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {      
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
       if (fmt_nlist_a[nei_iter] < 0) break;
-      const double * rr = &sel_a_diff[nei_iter][0];
+      const double *rr = &sel_a_diff[nei_iter][0];
       // check validity of ef
       double nr2 = deepmd::dot3(rr, rr);
-      double inr = 1./sqrt(nr2);
+      double inr = 1. / sqrt(nr2);
       double nr = nr2 * inr;
       double inr2 = inr * inr;
       double inr4 = inr2 * inr2;
       double inr3 = inr4 * nr;
       double sw, dsw;
       deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-      int idx_deriv = nei_iter * 4 * 3;	// 4 components time 3 directions
-      int idx_value = nei_iter * 4;	// 4 components
+      int idx_deriv = nei_iter * 4 * 3;  // 4 components time 3 directions
+      int idx_value = nei_iter * 4;      // 4 components
       // projections
       double rp = deepmd::dot3(rr, ef);
       double rv[3];
@@ -1058,21 +1090,42 @@ void compute_descriptor_se_a_ef_vert (std::vector<double > &		descrpt_a,
       descrpt_a[idx_value + 2] = rv[1] / nr2;
       descrpt_a[idx_value + 3] = rv[2] / nr2;
       // deriv of component 1/r
-      descrpt_a_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 0] =
+          rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 1] =
+          rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 2] =
+          rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
       // deriv of component rvx/r2
-      descrpt_a_deriv[idx_deriv + 3] = (2. * inr4 * rv[0] * rr[0] - inr2 * (1. - ef[0] * ef[0])) * sw - descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 4] = (2. * inr4 * rv[0] * rr[1] - inr2 * (   - ef[0] * ef[1])) * sw - descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 5] = (2. * inr4 * rv[0] * rr[2] - inr2 * (   - ef[0] * ef[2])) * sw - descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 3] =
+          (2. * inr4 * rv[0] * rr[0] - inr2 * (1. - ef[0] * ef[0])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 4] =
+          (2. * inr4 * rv[0] * rr[1] - inr2 * (-ef[0] * ef[1])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 5] =
+          (2. * inr4 * rv[0] * rr[2] - inr2 * (-ef[0] * ef[2])) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
       // deriv of component rvy/r2
-      descrpt_a_deriv[idx_deriv + 6] = (2. * inr4 * rv[1] * rr[0] - inr2 * (   - ef[1] * ef[0])) * sw - descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 7] = (2. * inr4 * rv[1] * rr[1] - inr2 * (1. - ef[1] * ef[1])) * sw - descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 8] = (2. * inr4 * rv[1] * rr[2] - inr2 * (   - ef[1] * ef[2])) * sw - descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 6] =
+          (2. * inr4 * rv[1] * rr[0] - inr2 * (-ef[1] * ef[0])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 7] =
+          (2. * inr4 * rv[1] * rr[1] - inr2 * (1. - ef[1] * ef[1])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 8] =
+          (2. * inr4 * rv[1] * rr[2] - inr2 * (-ef[1] * ef[2])) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
       // deriv of component rvz/r2
-      descrpt_a_deriv[idx_deriv + 9] = (2. * inr4 * rv[2] * rr[0] - inr2 * (   - ef[2] * ef[0])) * sw - descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv +10] = (2. * inr4 * rv[2] * rr[1] - inr2 * (   - ef[2] * ef[1])) * sw - descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv +11] = (2. * inr4 * rv[2] * rr[2] - inr2 * (1. - ef[2] * ef[2])) * sw - descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 9] =
+          (2. * inr4 * rv[2] * rr[0] - inr2 * (-ef[2] * ef[0])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 10] =
+          (2. * inr4 * rv[2] * rr[1] - inr2 * (-ef[2] * ef[1])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 11] =
+          (2. * inr4 * rv[2] * rr[2] - inr2 * (1. - ef[2] * ef[2])) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
       // 4 value components
       descrpt_a[idx_value + 0] *= sw;
       descrpt_a[idx_value + 1] *= sw;
@@ -1081,6 +1134,3 @@ void compute_descriptor_se_a_ef_vert (std::vector<double > &		descrpt_a,
     }
   }
 }
-
-
-
diff --git a/source/lib/include/DeviceFunctor.h b/source/lib/include/DeviceFunctor.h
index c422b013b0..a86727e8c9 100644
--- a/source/lib/include/DeviceFunctor.h
+++ b/source/lib/include/DeviceFunctor.h
@@ -1,61 +1,147 @@
 #pragma once
-#include <vector>
-#include <climits>
 #include <stdio.h>
+
+#include <climits>
 #include <iostream>
+#include <vector>
+
 #include "device.h"
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct DescrptSeRGPUExecuteFunctor {
-    void operator()(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descript, FPTYPE * descript_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int MAGIC_NUMBER);
+  void operator()(const FPTYPE* coord,
+                  const int* type,
+                  const int* ilist,
+                  const int* jrange,
+                  const int* jlist,
+                  int* array_int,
+                  unsigned long long* array_longlong,
+                  const FPTYPE* avg,
+                  const FPTYPE* std,
+                  FPTYPE* descript,
+                  FPTYPE* descript_deriv,
+                  FPTYPE* rij,
+                  int* nlist,
+                  const int nloc,
+                  const int nall,
+                  const int nnei,
+                  const int ndescrpt,
+                  const float rcut_r,
+                  const float rcut_r_smth,
+                  const std::vector<int> sec_a,
+                  const bool fill_nei_a,
+                  const int MAGIC_NUMBER);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct ProdForceSeAGPUExecuteFunctor {
-    void operator()(FPTYPE * force, const FPTYPE * net_derive, const FPTYPE * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const int n_a_sel, const int n_a_shift);
+  void operator()(FPTYPE* force,
+                  const FPTYPE* net_derive,
+                  const FPTYPE* in_deriv,
+                  const int* nlist,
+                  const int nloc,
+                  const int nall,
+                  const int nnei,
+                  const int ndescrpt,
+                  const int n_a_sel,
+                  const int n_a_shift);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct ProdForceSeRGPUExecuteFunctor {
-    void operator()(FPTYPE * force, const FPTYPE * net_derive, const FPTYPE * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt);
+  void operator()(FPTYPE* force,
+                  const FPTYPE* net_derive,
+                  const FPTYPE* in_deriv,
+                  const int* nlist,
+                  const int nloc,
+                  const int nall,
+                  const int nnei,
+                  const int ndescrpt);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct ProdVirialSeAGPUExecuteFunctor {
-    void operator()(FPTYPE * virial, FPTYPE * atom_virial, const FPTYPE * net_deriv, const FPTYPE * in_deriv, const FPTYPE * rij, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const int n_a_sel, const int n_a_shift);
+  void operator()(FPTYPE* virial,
+                  FPTYPE* atom_virial,
+                  const FPTYPE* net_deriv,
+                  const FPTYPE* in_deriv,
+                  const FPTYPE* rij,
+                  const int* nlist,
+                  const int nloc,
+                  const int nall,
+                  const int nnei,
+                  const int ndescrpt,
+                  const int n_a_sel,
+                  const int n_a_shift);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct ProdVirialSeRGPUExecuteFunctor {
-    void operator()(FPTYPE * virial, FPTYPE * atom_virial, const FPTYPE * net_deriv, const FPTYPE * in_deriv, const FPTYPE * rij, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt);
+  void operator()(FPTYPE* virial,
+                  FPTYPE* atom_virial,
+                  const FPTYPE* net_deriv,
+                  const FPTYPE* in_deriv,
+                  const FPTYPE* rij,
+                  const int* nlist,
+                  const int nloc,
+                  const int nall,
+                  const int nnei,
+                  const int ndescrpt);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct GeluGPUExecuteFunctor {
-    void operator()(const FPTYPE * in, FPTYPE * out, const int size);
+  void operator()(const FPTYPE* in, FPTYPE* out, const int size);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct GeluGradGPUExecuteFunctor {
-    void operator()(const FPTYPE * dy, const FPTYPE * in, FPTYPE * out, const int size);
+  void operator()(const FPTYPE* dy,
+                  const FPTYPE* in,
+                  FPTYPE* out,
+                  const int size);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct GeluGradGradGPUExecuteFunctor {
-    void operator()(const FPTYPE * dy, const FPTYPE * dy_, const FPTYPE * in, FPTYPE * out, const int size);
+  void operator()(const FPTYPE* dy,
+                  const FPTYPE* dy_,
+                  const FPTYPE* in,
+                  FPTYPE* out,
+                  const int size);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct TabulateFusionGPUExecuteFunctor {
-    void operator()(const FPTYPE * table, const FPTYPE * table_info, const FPTYPE * in, const FPTYPE * ff, const int nloc, const int nnei, const int last_layer_size, FPTYPE * out);
+  void operator()(const FPTYPE* table,
+                  const FPTYPE* table_info,
+                  const FPTYPE* in,
+                  const FPTYPE* ff,
+                  const int nloc,
+                  const int nnei,
+                  const int last_layer_size,
+                  FPTYPE* out);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct TabulateFusionGradGPUExecuteFunctor {
-    void operator()(const FPTYPE * table, const FPTYPE * table_info, const FPTYPE * in, const FPTYPE * ff, const FPTYPE * dy, const int nloc, const int nnei, const int last_layer_size, FPTYPE * dy_dx, FPTYPE * dy_df);
+  void operator()(const FPTYPE* table,
+                  const FPTYPE* table_info,
+                  const FPTYPE* in,
+                  const FPTYPE* ff,
+                  const FPTYPE* dy,
+                  const int nloc,
+                  const int nnei,
+                  const int last_layer_size,
+                  FPTYPE* dy_dx,
+                  FPTYPE* dy_df);
 };
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 struct TabulateCheckerGPUExecuteFunctor {
-    void operator()(const FPTYPE * table_info, const FPTYPE * in, int * out, const int nloc, const int nnei);
-};
\ No newline at end of file
+  void operator()(const FPTYPE* table_info,
+                  const FPTYPE* in,
+                  int* out,
+                  const int nloc,
+                  const int nnei);
+};
diff --git a/source/lib/include/SimulationRegion.h b/source/lib/include/SimulationRegion.h
index d9de1bff18..e42fe388f5 100644
--- a/source/lib/include/SimulationRegion.h
+++ b/source/lib/include/SimulationRegion.h
@@ -2,144 +2,147 @@
 #define __SimulationRegion_h_wanghan__
 
 #define MOASPNDIM 3
-#include "utilities.h"
 #include <fstream>
 
-  template<typename VALUETYPE>
-  class SimulationRegion
-  {
-protected:
-    const static int SPACENDIM = MOASPNDIM;
-public:
-    void reinitBox (const double * boxv);
-    void affineTransform (const double * affine_map);
-    void reinitOrigin (const double * orig);
-    void reinitOrigin (const std::vector<double> & orig);
-    void backup  ();
-    void recover ();
-public:
-    SimulationRegion ();
-    ~SimulationRegion ();
-    double *		getBoxTensor	()		{return boxt;};
-    const double *	getBoxTensor	() const	{return boxt;};
-    double *		getRecBoxTensor ()		{return rec_boxt;}
-    const double *	getRecBoxTensor () const	{return rec_boxt;}
-    double *		getBoxOrigin	()		{return origin;}
-    const double *	getBoxOrigin	() const	{return origin;}
-    double		getVolume	() const	{return volume;}
-public:
-    void		toFaceDistance	(double * dd) const;
-public:
-    void phys2Inter (double * i_v, const VALUETYPE * p_v) const;
-    void inter2Phys (VALUETYPE * p_v, const double * i_v) const;
-public:
-    bool		isPeriodic	(const int dim) const {return is_periodic[dim];}
-    static int		compactIndex	(const int * idx) ;
-    double *		getShiftVec	(const int index = 0) ;
-    const double *	getShiftVec	(const int index = 0) const;
-    int			getShiftIndex	(const int * idx) const;
-    int			getNullShiftIndex() const;
-    void		shiftCoord	(const int * idx,
-					 VALUETYPE &x,
-					 VALUETYPE &y,
-					 VALUETYPE &z) const;
-    static int		getNumbShiftVec ()	 {return shift_info_size;}
-    static int		getShiftVecTotalSize ()  {return shift_vec_size;}
-public:
-    void 
-    diffNearestNeighbor (const VALUETYPE * r0,
-			 const VALUETYPE * r1,
-			 VALUETYPE * phys) const;
-    virtual void 
-    diffNearestNeighbor (const VALUETYPE x0,
-			 const VALUETYPE y0,
-			 const VALUETYPE z0,
-			 const VALUETYPE x1,
-			 const VALUETYPE y1,
-			 const VALUETYPE z1,
-			 VALUETYPE & dx,
-			 VALUETYPE & dy,
-			 VALUETYPE & dz) const ;
-    virtual void diffNearestNeighbor (const VALUETYPE x0,
-				      const VALUETYPE y0,
-				      const VALUETYPE z0,
-				      const VALUETYPE x1,
-				      const VALUETYPE y1,
-				      const VALUETYPE z1,
-				      VALUETYPE & dx,
-				      VALUETYPE & dy,
-				      VALUETYPE & dz,
-				      int & shift_x,
-				      int & shift_y,
-				      int & shift_z) const ;
-    virtual void diffNearestNeighbor (const VALUETYPE x0,
-				      const VALUETYPE y0,
-				      const VALUETYPE z0,
-				      const VALUETYPE x1,
-				      const VALUETYPE y1,
-				      const VALUETYPE z1,
-				      VALUETYPE & dx,
-				      VALUETYPE & dy,
-				      VALUETYPE & dz,
-				      VALUETYPE & shift_x,
-				      VALUETYPE & shift_y,
-				      VALUETYPE & shift_z) const ;
-private:
-    void computeVolume ();
-    void computeRecBox ();
-    double		volume;
-    double		volumei;
-    double		boxt		[SPACENDIM*SPACENDIM];
-    double		boxt_bk		[SPACENDIM*SPACENDIM];
-    double		rec_boxt	[SPACENDIM*SPACENDIM];
-    double		origin		[SPACENDIM];
-    bool		is_periodic	[SPACENDIM];
-    std::string		class_name;
-    bool		enable_restart;
-protected:
-    void computeShiftVec ();
-    const static int			DBOX_XX = 1;
-    const static int			DBOX_YY = 1;
-    const static int			DBOX_ZZ = 1;
-    const static int			NBOX_XX = DBOX_XX*2+1;
-    const static int			NBOX_YY = DBOX_YY*2+1;
-    const static int			NBOX_ZZ = DBOX_ZZ*2+1;
-    const static int			shift_info_size = NBOX_XX * NBOX_YY * NBOX_ZZ;
-    const static int			shift_vec_size = SPACENDIM * shift_info_size;
-    double				shift_vec	[shift_vec_size];
-    double				inter_shift_vec [shift_vec_size];
-    static int index3to1 (const int tx, const int ty, const int tz) 
-	{
-	  return (NBOX_ZZ * (NBOX_YY * (tx+DBOX_XX) + ty+DBOX_YY)+ tz+DBOX_ZZ);
-	}    
-    double *		getInterShiftVec	(const int index = 0) ;
-    const double *	getInterShiftVec	(const int index = 0) const;
-private:
-    void copy	    (double * o_v, const double * i_v) const;
-    void naiveTensorDotVector (double * out,
-			       const double * i_t,
-			       const double * i_v) const;
-    void naiveTensorTransDotVector (double * out,
-				    const double * i_t,
-				    const double * i_v) const;
-    void tensorDotVector (double * out,
-			  const double * i_t,
-			  const double * i_v) const;
-    void tensorTransDotVector (double * out,
-			       const double * i_t,
-			       const double * i_v) const;
-    void getFromRestart (double * my_boxv, double * my_orig, bool * period) const;
-    void defaultInitBox (double * my_boxv, double * my_orig, bool * period) const;
-    void apply_periodic (int dim, double * dd) const;
-    void apply_periodic (int dim, double * dd, int & shift) const;
-private:
-    std::fstream fp;
-  };
+#include "utilities.h"
+
+template <typename VALUETYPE>
+class SimulationRegion {
+ protected:
+  const static int SPACENDIM = MOASPNDIM;
+
+ public:
+  void reinitBox(const double *boxv);
+  void affineTransform(const double *affine_map);
+  void reinitOrigin(const double *orig);
+  void reinitOrigin(const std::vector<double> &orig);
+  void backup();
+  void recover();
+
+ public:
+  SimulationRegion();
+  ~SimulationRegion();
+  double *getBoxTensor() { return boxt; };
+  const double *getBoxTensor() const { return boxt; };
+  double *getRecBoxTensor() { return rec_boxt; }
+  const double *getRecBoxTensor() const { return rec_boxt; }
+  double *getBoxOrigin() { return origin; }
+  const double *getBoxOrigin() const { return origin; }
+  double getVolume() const { return volume; }
+
+ public:
+  void toFaceDistance(double *dd) const;
+
+ public:
+  void phys2Inter(double *i_v, const VALUETYPE *p_v) const;
+  void inter2Phys(VALUETYPE *p_v, const double *i_v) const;
+
+ public:
+  bool isPeriodic(const int dim) const { return is_periodic[dim]; }
+  static int compactIndex(const int *idx);
+  double *getShiftVec(const int index = 0);
+  const double *getShiftVec(const int index = 0) const;
+  int getShiftIndex(const int *idx) const;
+  int getNullShiftIndex() const;
+  void shiftCoord(const int *idx,
+                  VALUETYPE &x,
+                  VALUETYPE &y,
+                  VALUETYPE &z) const;
+  static int getNumbShiftVec() { return shift_info_size; }
+  static int getShiftVecTotalSize() { return shift_vec_size; }
+
+ public:
+  void diffNearestNeighbor(const VALUETYPE *r0,
+                           const VALUETYPE *r1,
+                           VALUETYPE *phys) const;
+  virtual void diffNearestNeighbor(const VALUETYPE x0,
+                                   const VALUETYPE y0,
+                                   const VALUETYPE z0,
+                                   const VALUETYPE x1,
+                                   const VALUETYPE y1,
+                                   const VALUETYPE z1,
+                                   VALUETYPE &dx,
+                                   VALUETYPE &dy,
+                                   VALUETYPE &dz) const;
+  virtual void diffNearestNeighbor(const VALUETYPE x0,
+                                   const VALUETYPE y0,
+                                   const VALUETYPE z0,
+                                   const VALUETYPE x1,
+                                   const VALUETYPE y1,
+                                   const VALUETYPE z1,
+                                   VALUETYPE &dx,
+                                   VALUETYPE &dy,
+                                   VALUETYPE &dz,
+                                   int &shift_x,
+                                   int &shift_y,
+                                   int &shift_z) const;
+  virtual void diffNearestNeighbor(const VALUETYPE x0,
+                                   const VALUETYPE y0,
+                                   const VALUETYPE z0,
+                                   const VALUETYPE x1,
+                                   const VALUETYPE y1,
+                                   const VALUETYPE z1,
+                                   VALUETYPE &dx,
+                                   VALUETYPE &dy,
+                                   VALUETYPE &dz,
+                                   VALUETYPE &shift_x,
+                                   VALUETYPE &shift_y,
+                                   VALUETYPE &shift_z) const;
+
+ private:
+  void computeVolume();
+  void computeRecBox();
+  double volume;
+  double volumei;
+  double boxt[SPACENDIM * SPACENDIM];
+  double boxt_bk[SPACENDIM * SPACENDIM];
+  double rec_boxt[SPACENDIM * SPACENDIM];
+  double origin[SPACENDIM];
+  bool is_periodic[SPACENDIM];
+  std::string class_name;
+  bool enable_restart;
+
+ protected:
+  void computeShiftVec();
+  const static int DBOX_XX = 1;
+  const static int DBOX_YY = 1;
+  const static int DBOX_ZZ = 1;
+  const static int NBOX_XX = DBOX_XX * 2 + 1;
+  const static int NBOX_YY = DBOX_YY * 2 + 1;
+  const static int NBOX_ZZ = DBOX_ZZ * 2 + 1;
+  const static int shift_info_size = NBOX_XX * NBOX_YY * NBOX_ZZ;
+  const static int shift_vec_size = SPACENDIM * shift_info_size;
+  double shift_vec[shift_vec_size];
+  double inter_shift_vec[shift_vec_size];
+  static int index3to1(const int tx, const int ty, const int tz) {
+    return (NBOX_ZZ * (NBOX_YY * (tx + DBOX_XX) + ty + DBOX_YY) + tz + DBOX_ZZ);
+  }
+  double *getInterShiftVec(const int index = 0);
+  const double *getInterShiftVec(const int index = 0) const;
+
+ private:
+  void copy(double *o_v, const double *i_v) const;
+  void naiveTensorDotVector(double *out,
+                            const double *i_t,
+                            const double *i_v) const;
+  void naiveTensorTransDotVector(double *out,
+                                 const double *i_t,
+                                 const double *i_v) const;
+  void tensorDotVector(double *out, const double *i_t, const double *i_v) const;
+  void tensorTransDotVector(double *out,
+                            const double *i_t,
+                            const double *i_v) const;
+  void getFromRestart(double *my_boxv, double *my_orig, bool *period) const;
+  void defaultInitBox(double *my_boxv, double *my_orig, bool *period) const;
+  void apply_periodic(int dim, double *dd) const;
+  void apply_periodic(int dim, double *dd, int &shift) const;
+
+ private:
+  std::fstream fp;
+};
 
 #ifdef MOASP_INLINE_IMPLEMENTATION
 #include "SimulationRegion_Impl.h"
 #endif
 
 #endif
-
-
diff --git a/source/lib/include/SimulationRegion_Impl.h b/source/lib/include/SimulationRegion_Impl.h
index 341ae8f1dd..8d7807a1e7 100644
--- a/source/lib/include/SimulationRegion_Impl.h
+++ b/source/lib/include/SimulationRegion_Impl.h
@@ -2,45 +2,40 @@
 #define __SimulationRegion_Impl_h_wanghan__
 
 // #include <iomanip>
+#include <cmath>
 #include <iostream>
 #include <limits>
-#include <typeinfo>
 #include <stdexcept>
-#include <cmath>
+#include <typeinfo>
+
 #include "errors.h"
 
 // using namespace std;
 
-template<typename VALUETYPE>
-SimulationRegion<VALUETYPE>::
-~SimulationRegion ()
-{
-}
+template <typename VALUETYPE>
+SimulationRegion<VALUETYPE>::~SimulationRegion() {}
 
-template<typename VALUETYPE>
-SimulationRegion<VALUETYPE>::
-SimulationRegion ()
-{
+template <typename VALUETYPE>
+SimulationRegion<VALUETYPE>::SimulationRegion() {
   is_periodic[0] = is_periodic[1] = is_periodic[2] = true;
-  std::fill (boxt,		boxt    + SPACENDIM*SPACENDIM, 0);
-  std::fill (boxt_bk,		boxt_bk + SPACENDIM*SPACENDIM, 0);
-  std::fill (origin,		origin  + SPACENDIM, 0);
+  std::fill(boxt, boxt + SPACENDIM * SPACENDIM, 0);
+  std::fill(boxt_bk, boxt_bk + SPACENDIM * SPACENDIM, 0);
+  std::fill(origin, origin + SPACENDIM, 0);
 }
 
 template <typename VALUETYPE>
-void
-SimulationRegion<VALUETYPE>::
-defaultInitBox (double * my_boxv, double * my_orig, bool * period) const
-{
+void SimulationRegion<VALUETYPE>::defaultInitBox(double *my_boxv,
+                                                 double *my_orig,
+                                                 bool *period) const {
   // by default is a 1,1,1 logical box
-  for (int ii = 0; ii < SPACENDIM; ++ii){
-    for (int jj = 0; jj < SPACENDIM; ++jj){
-      my_boxv[ii*3+jj] = 0.;
+  for (int ii = 0; ii < SPACENDIM; ++ii) {
+    for (int jj = 0; jj < SPACENDIM; ++jj) {
+      my_boxv[ii * 3 + jj] = 0.;
     }
   }
   // origin is at 0,0,0
-  for (int jj = 0; jj < SPACENDIM; ++jj){
-    my_boxv[jj*3+jj] = 1.;
+  for (int jj = 0; jj < SPACENDIM; ++jj) {
+    my_boxv[jj * 3 + jj] = 1.;
   }
   for (int ii = 0; ii < SPACENDIM; ++ii) {
     my_orig[ii] = 0.;
@@ -48,31 +43,21 @@ defaultInitBox (double * my_boxv, double * my_orig, bool * period) const
   }
 }
 
-
-template<typename VALUETYPE>
-void
-SimulationRegion<VALUETYPE>::
-backup ()
-{
-  for (int ii = 0; ii < SPACENDIM * SPACENDIM; ++ii){
+template <typename VALUETYPE>
+void SimulationRegion<VALUETYPE>::backup() {
+  for (int ii = 0; ii < SPACENDIM * SPACENDIM; ++ii) {
     boxt_bk[ii] = boxt[ii];
-  }  
+  }
 }
 
-template<typename VALUETYPE>
-void
-SimulationRegion<VALUETYPE>::
-recover ()
-{
-  reinitBox (boxt_bk);
+template <typename VALUETYPE>
+void SimulationRegion<VALUETYPE>::recover() {
+  reinitBox(boxt_bk);
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-reinitBox (const double * boxv_)
-{
-  for (int ii = 0; ii < SPACENDIM * SPACENDIM; ++ii){
+inline void SimulationRegion<VALUETYPE>::reinitBox(const double *boxv_) {
+  for (int ii = 0; ii < SPACENDIM * SPACENDIM; ++ii) {
     boxt[ii] = boxv_[ii];
   }
   computeVolume();
@@ -81,128 +66,95 @@ reinitBox (const double * boxv_)
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-affineTransform (const double * affine_map)
-{
-  tensorDotVector (boxt+SPACENDIM*0, affine_map, boxt+SPACENDIM*0);
-  tensorDotVector (boxt+SPACENDIM*1, affine_map, boxt+SPACENDIM*1);
-  tensorDotVector (boxt+SPACENDIM*2, affine_map, boxt+SPACENDIM*2);
+inline void SimulationRegion<VALUETYPE>::affineTransform(
+    const double *affine_map) {
+  tensorDotVector(boxt + SPACENDIM * 0, affine_map, boxt + SPACENDIM * 0);
+  tensorDotVector(boxt + SPACENDIM * 1, affine_map, boxt + SPACENDIM * 1);
+  tensorDotVector(boxt + SPACENDIM * 2, affine_map, boxt + SPACENDIM * 2);
   computeVolume();
   computeRecBox();
   computeShiftVec();
 }
 
-
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-reinitOrigin (const double * orig)
-{
-  for (int ii = 0; ii < SPACENDIM ; ++ii){
+inline void SimulationRegion<VALUETYPE>::reinitOrigin(const double *orig) {
+  for (int ii = 0; ii < SPACENDIM; ++ii) {
     origin[ii] = orig[ii];
-  }  
+  }
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-reinitOrigin (const std::vector<double>& orig)
-{
-  for (int ii = 0; ii < SPACENDIM ; ++ii){
+inline void SimulationRegion<VALUETYPE>::reinitOrigin(
+    const std::vector<double> &orig) {
+  for (int ii = 0; ii < SPACENDIM; ++ii) {
     origin[ii] = orig[ii];
-  }  
+  }
 }
 
 template <typename VALUETYPE>
-void
-SimulationRegion<VALUETYPE>::
-computeShiftVec ()
-{
+void SimulationRegion<VALUETYPE>::computeShiftVec() {
   int tmp_idx[3];
-  int & ii (tmp_idx[0]);
-  int & jj (tmp_idx[1]);
-  int & kk (tmp_idx[2]);
-  for (ii = -DBOX_XX; ii <= DBOX_XX; ++ii){
-    for (jj = -DBOX_YY; jj <= DBOX_YY; ++jj){
-      for (kk = -DBOX_ZZ; kk <= DBOX_ZZ; ++kk){
-	double *posi = getShiftVec(getShiftIndex(tmp_idx));
-	double *inter_posi = getInterShiftVec(getShiftIndex(tmp_idx));
-	inter_posi[0] = ii;
-	inter_posi[1] = jj;
-	inter_posi[2] = kk;
-	// inter2Phys (posi, inter_posi);
-	tensorTransDotVector (posi, boxt, inter_posi);
+  int &ii(tmp_idx[0]);
+  int &jj(tmp_idx[1]);
+  int &kk(tmp_idx[2]);
+  for (ii = -DBOX_XX; ii <= DBOX_XX; ++ii) {
+    for (jj = -DBOX_YY; jj <= DBOX_YY; ++jj) {
+      for (kk = -DBOX_ZZ; kk <= DBOX_ZZ; ++kk) {
+        double *posi = getShiftVec(getShiftIndex(tmp_idx));
+        double *inter_posi = getInterShiftVec(getShiftIndex(tmp_idx));
+        inter_posi[0] = ii;
+        inter_posi[1] = jj;
+        inter_posi[2] = kk;
+        // inter2Phys (posi, inter_posi);
+        tensorTransDotVector(posi, boxt, inter_posi);
       }
     }
   }
 }
 
 template <typename VALUETYPE>
-inline double *
-SimulationRegion<VALUETYPE>::
-getShiftVec (const int index)
-{
-  return shift_vec + SPACENDIM*index;
+inline double *SimulationRegion<VALUETYPE>::getShiftVec(const int index) {
+  return shift_vec + SPACENDIM * index;
 }
 
 template <typename VALUETYPE>
-inline const double *
-SimulationRegion<VALUETYPE>::
-getShiftVec (const int index) const
-{
-  return shift_vec + SPACENDIM*index;
+inline const double *SimulationRegion<VALUETYPE>::getShiftVec(
+    const int index) const {
+  return shift_vec + SPACENDIM * index;
 }
 
 template <typename VALUETYPE>
-inline double *
-SimulationRegion<VALUETYPE>::
-getInterShiftVec (const int index)
-{
-  return inter_shift_vec + SPACENDIM*index;
+inline double *SimulationRegion<VALUETYPE>::getInterShiftVec(const int index) {
+  return inter_shift_vec + SPACENDIM * index;
 }
 
 template <typename VALUETYPE>
-inline const double *
-SimulationRegion<VALUETYPE>::
-getInterShiftVec (const int index) const
-{
-  return inter_shift_vec + SPACENDIM*index;
+inline const double *SimulationRegion<VALUETYPE>::getInterShiftVec(
+    const int index) const {
+  return inter_shift_vec + SPACENDIM * index;
 }
 
 template <typename VALUETYPE>
-inline int
-SimulationRegion<VALUETYPE>::
-getShiftIndex (const int * idx) const
-{
+inline int SimulationRegion<VALUETYPE>::getShiftIndex(const int *idx) const {
   return index3to1(idx[0], idx[1], idx[2]);
 }
 
 template <typename VALUETYPE>
-inline int
-SimulationRegion<VALUETYPE>::
-getNullShiftIndex () const
-{
-  return index3to1(0,0,0);
+inline int SimulationRegion<VALUETYPE>::getNullShiftIndex() const {
+  return index3to1(0, 0, 0);
 }
 
 template <typename VALUETYPE>
-inline int
-SimulationRegion<VALUETYPE>::
-compactIndex (const int * idx) 
-{
+inline int SimulationRegion<VALUETYPE>::compactIndex(const int *idx) {
   return index3to1(idx[0], idx[1], idx[2]);
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-shiftCoord (const int * idx,
-	    VALUETYPE &x,
-	    VALUETYPE &y,
-	    VALUETYPE &z) const
-{
-  const double * shift = getShiftVec(getShiftIndex(idx));
+inline void SimulationRegion<VALUETYPE>::shiftCoord(const int *idx,
+                                                    VALUETYPE &x,
+                                                    VALUETYPE &y,
+                                                    VALUETYPE &z) const {
+  const double *shift = getShiftVec(getShiftIndex(idx));
   x += shift[0];
   y += shift[1];
   z += shift[2];
@@ -240,202 +192,180 @@ shiftCoord (const int * idx,
 // 		     VALUETYPE & dz,
 // 		     int & shift_x,
 // 		     int & shift_y,
-// 		     int & shift_z) const 
+// 		     int & shift_z) const
 // {
 //   shift_x = shift_y = shift_z = 0;
 //   diffNearestNeighbor (x0, y0, z0, x1, y1, z1, dx, dy, dz);
 // }
 
-template<typename VALUETYPE> 
-inline void
-SimulationRegion<VALUETYPE>::
-apply_periodic (int dim, double * dd) const
-{
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::apply_periodic(int dim,
+                                                        double *dd) const {
   if (!is_periodic[dim]) return;
-  if      (dd[dim] >= static_cast<double>(0.5)) dd[dim] -= static_cast<double>(1.);
-  else if (dd[dim] < -static_cast<double>(0.5)) dd[dim] += static_cast<double>(1.);
+  if (dd[dim] >= static_cast<double>(0.5))
+    dd[dim] -= static_cast<double>(1.);
+  else if (dd[dim] < -static_cast<double>(0.5))
+    dd[dim] += static_cast<double>(1.);
 }
 
-template<typename VALUETYPE> 
-inline void
-SimulationRegion<VALUETYPE>::
-apply_periodic (int dim,
-		double * dd,
-		int & shift) const
-{
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::apply_periodic(int dim,
+                                                        double *dd,
+                                                        int &shift) const {
   shift = 0;
   if (!is_periodic[dim]) return;
-  if      (dd[dim] >= static_cast<double>(0.5)) {
+  if (dd[dim] >= static_cast<double>(0.5)) {
     dd[dim] -= static_cast<double>(1.);
     shift = -1;
-  }
-  else if (dd[dim] < -static_cast<double>(0.5)) {
+  } else if (dd[dim] < -static_cast<double>(0.5)) {
     dd[dim] += static_cast<double>(1.);
     shift = 1;
   }
 }
 
-template<typename VALUETYPE> 
-inline void
-SimulationRegion<VALUETYPE>::
-diffNearestNeighbor (const VALUETYPE * r0,
-		     const VALUETYPE * r1,
-		     VALUETYPE * phys) const
-{
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::diffNearestNeighbor(
+    const VALUETYPE *r0, const VALUETYPE *r1, VALUETYPE *phys) const {
   double inter[3];
   for (int dd = 0; dd < 3; ++dd) phys[dd] = r0[dd] - r1[dd];
-  SimulationRegion<VALUETYPE>::phys2Inter (inter, phys);
-  for (int dd = 0; dd < 3; ++dd) apply_periodic (dd, inter);
-  SimulationRegion<VALUETYPE>::inter2Phys (phys, inter);
-}
-
-template<typename VALUETYPE> 
-inline void
-SimulationRegion<VALUETYPE>::
-diffNearestNeighbor (const VALUETYPE x0,
-		     const VALUETYPE y0,
-		     const VALUETYPE z0,
-		     const VALUETYPE x1,
-		     const VALUETYPE y1,
-		     const VALUETYPE z1,
-		     VALUETYPE & dx,
-		     VALUETYPE & dy,
-		     VALUETYPE & dz) const
-{
+  SimulationRegion<VALUETYPE>::phys2Inter(inter, phys);
+  for (int dd = 0; dd < 3; ++dd) apply_periodic(dd, inter);
+  SimulationRegion<VALUETYPE>::inter2Phys(phys, inter);
+}
+
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::diffNearestNeighbor(
+    const VALUETYPE x0,
+    const VALUETYPE y0,
+    const VALUETYPE z0,
+    const VALUETYPE x1,
+    const VALUETYPE y1,
+    const VALUETYPE z1,
+    VALUETYPE &dx,
+    VALUETYPE &dy,
+    VALUETYPE &dz) const {
   // diffNearestNeighbor (0, x0, x1, dx);
   // diffNearestNeighbor (1, y0, y1, dy);
   // diffNearestNeighbor (2, z0, z1, dz);
-  VALUETYPE phys [3];
+  VALUETYPE phys[3];
   double inter[3];
   phys[0] = x0 - x1;
   phys[1] = y0 - y1;
-  phys[2] = z0 - z1;  
-  SimulationRegion<VALUETYPE>::phys2Inter (inter, phys);
-  apply_periodic (0, inter);
-  apply_periodic (1, inter);
-  apply_periodic (2, inter);
-  SimulationRegion<VALUETYPE>::inter2Phys (phys, inter);
+  phys[2] = z0 - z1;
+  SimulationRegion<VALUETYPE>::phys2Inter(inter, phys);
+  apply_periodic(0, inter);
+  apply_periodic(1, inter);
+  apply_periodic(2, inter);
+  SimulationRegion<VALUETYPE>::inter2Phys(phys, inter);
   dx = phys[0];
   dy = phys[1];
   dz = phys[2];
 }
 
-template<typename VALUETYPE> 
-inline void
-SimulationRegion<VALUETYPE>::
-diffNearestNeighbor (const VALUETYPE x0,
-		     const VALUETYPE y0,
-		     const VALUETYPE z0,
-		     const VALUETYPE x1,
-		     const VALUETYPE y1,
-		     const VALUETYPE z1,
-		     VALUETYPE & dx,
-		     VALUETYPE & dy,
-		     VALUETYPE & dz,
-		     int & shift_x,
-		     int & shift_y,
-		     int & shift_z) const 
-{
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::diffNearestNeighbor(
+    const VALUETYPE x0,
+    const VALUETYPE y0,
+    const VALUETYPE z0,
+    const VALUETYPE x1,
+    const VALUETYPE y1,
+    const VALUETYPE z1,
+    VALUETYPE &dx,
+    VALUETYPE &dy,
+    VALUETYPE &dz,
+    int &shift_x,
+    int &shift_y,
+    int &shift_z) const {
   // diffNearestNeighbor (0, x0, x1, dx, shift_x);
   // diffNearestNeighbor (1, y0, y1, dy, shift_y);
   // diffNearestNeighbor (2, z0, z1, dz, shift_z);
-  VALUETYPE phys [3];
+  VALUETYPE phys[3];
   double inter[3];
   phys[0] = x0 - x1;
   phys[1] = y0 - y1;
-  phys[2] = z0 - z1;  
-  SimulationRegion<VALUETYPE>::phys2Inter (inter, phys);
-  apply_periodic (0, inter, shift_x);
-  apply_periodic (1, inter, shift_y);
-  apply_periodic (2, inter, shift_z);
-  SimulationRegion<VALUETYPE>::inter2Phys (phys, inter);
+  phys[2] = z0 - z1;
+  SimulationRegion<VALUETYPE>::phys2Inter(inter, phys);
+  apply_periodic(0, inter, shift_x);
+  apply_periodic(1, inter, shift_y);
+  apply_periodic(2, inter, shift_z);
+  SimulationRegion<VALUETYPE>::inter2Phys(phys, inter);
   dx = phys[0];
   dy = phys[1];
-  dz = phys[2];  
-}
-
-template<typename VALUETYPE> 
-inline void
-SimulationRegion<VALUETYPE>::
-diffNearestNeighbor (const VALUETYPE x0,
-		     const VALUETYPE y0,
-		     const VALUETYPE z0,
-		     const VALUETYPE x1,
-		     const VALUETYPE y1,
-		     const VALUETYPE z1,
-		     VALUETYPE & dx,
-		     VALUETYPE & dy,
-		     VALUETYPE & dz,
-		     VALUETYPE & shift_x,
-		     VALUETYPE & shift_y,
-		     VALUETYPE & shift_z) const 
-{
+  dz = phys[2];
+}
+
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::diffNearestNeighbor(
+    const VALUETYPE x0,
+    const VALUETYPE y0,
+    const VALUETYPE z0,
+    const VALUETYPE x1,
+    const VALUETYPE y1,
+    const VALUETYPE z1,
+    VALUETYPE &dx,
+    VALUETYPE &dy,
+    VALUETYPE &dz,
+    VALUETYPE &shift_x,
+    VALUETYPE &shift_y,
+    VALUETYPE &shift_z) const {
   // diffNearestNeighbor (0, x0, x1, dx, shift_x);
   // diffNearestNeighbor (1, y0, y1, dy, shift_y);
   // diffNearestNeighbor (2, z0, z1, dz, shift_z);
-  VALUETYPE phys [3];
+  VALUETYPE phys[3];
   double inter[3];
   phys[0] = x0 - x1;
   phys[1] = y0 - y1;
-  phys[2] = z0 - z1;  
-  SimulationRegion<VALUETYPE>::phys2Inter (inter, phys);
+  phys[2] = z0 - z1;
+  SimulationRegion<VALUETYPE>::phys2Inter(inter, phys);
   int i_shift_x, i_shift_y, i_shift_z;
-  apply_periodic (0, inter, i_shift_x);
-  apply_periodic (1, inter, i_shift_y);
-  apply_periodic (2, inter, i_shift_z);
-  SimulationRegion<VALUETYPE>::inter2Phys (phys, inter);
+  apply_periodic(0, inter, i_shift_x);
+  apply_periodic(1, inter, i_shift_y);
+  apply_periodic(2, inter, i_shift_z);
+  SimulationRegion<VALUETYPE>::inter2Phys(phys, inter);
   dx = phys[0];
   dy = phys[1];
   dz = phys[2];
-  const double * tmp_shift (getShiftVec (index3to1 (i_shift_x, i_shift_y, i_shift_z) ) );
+  const double *tmp_shift(
+      getShiftVec(index3to1(i_shift_x, i_shift_y, i_shift_z)));
   shift_x = tmp_shift[0];
   shift_y = tmp_shift[1];
   shift_z = tmp_shift[2];
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-phys2Inter (double * i_v, const VALUETYPE * p_v_) const
-{
+inline void SimulationRegion<VALUETYPE>::phys2Inter(
+    double *i_v, const VALUETYPE *p_v_) const {
   double p_v[3];
   for (int dd = 0; dd < 3; ++dd) p_v[dd] = p_v_[dd];
-  tensorDotVector (i_v, rec_boxt, p_v);
+  tensorDotVector(i_v, rec_boxt, p_v);
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-inter2Phys (VALUETYPE * p_v_, const double * i_v) const
-{
+inline void SimulationRegion<VALUETYPE>::inter2Phys(VALUETYPE *p_v_,
+                                                    const double *i_v) const {
   double p_v[3];
-  tensorTransDotVector (p_v, boxt, i_v);
+  tensorTransDotVector(p_v, boxt, i_v);
   for (int dd = 0; dd < 3; ++dd) p_v_[dd] = p_v[dd];
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-toFaceDistance	(double * dd) const
-{
+inline void SimulationRegion<VALUETYPE>::toFaceDistance(double *dd) const {
   double tmp[3];
-  deepmd::cprod(boxt+3, boxt+6, tmp);
-  dd[0] = volume * deepmd::invsqrt(deepmd::dot3(tmp,tmp));
-  deepmd::cprod(boxt+6, boxt+0, tmp);
-  dd[1] = volume * deepmd::invsqrt(deepmd::dot3(tmp,tmp));
-  deepmd::cprod(boxt+0, boxt+3, tmp);
-  dd[2] = volume * deepmd::invsqrt(deepmd::dot3(tmp,tmp));
+  deepmd::cprod(boxt + 3, boxt + 6, tmp);
+  dd[0] = volume * deepmd::invsqrt(deepmd::dot3(tmp, tmp));
+  deepmd::cprod(boxt + 6, boxt + 0, tmp);
+  dd[1] = volume * deepmd::invsqrt(deepmd::dot3(tmp, tmp));
+  deepmd::cprod(boxt + 0, boxt + 3, tmp);
+  dd[2] = volume * deepmd::invsqrt(deepmd::dot3(tmp, tmp));
 }
 
 // static int tmp_count = 0;
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-copy (double * o_v, const double * i_v) const
-{
+inline void SimulationRegion<VALUETYPE>::copy(double *o_v,
+                                              const double *i_v) const {
 #ifdef DEBUG_CHECK_ASSERTIONS
-  assert (o_v != i_v);
+  assert(o_v != i_v);
 #endif
   o_v[0] = i_v[0];
   o_v[1] = i_v[1];
@@ -443,36 +373,30 @@ copy (double * o_v, const double * i_v) const
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-naiveTensorDotVector (double * o_v,
-		      const double * i_t,
-		      const double * i_v) const
-{
-  o_v[0] = i_v[0] * i_t[0*3+0] + i_v[1] * i_t[0*3+1] + i_v[2] * i_t[0*3+2];
-  o_v[1] = i_v[0] * i_t[1*3+0] + i_v[1] * i_t[1*3+1] + i_v[2] * i_t[1*3+2];
-  o_v[2] = i_v[0] * i_t[2*3+0] + i_v[1] * i_t[2*3+1] + i_v[2] * i_t[2*3+2];
+inline void SimulationRegion<VALUETYPE>::naiveTensorDotVector(
+    double *o_v, const double *i_t, const double *i_v) const {
+  o_v[0] = i_v[0] * i_t[0 * 3 + 0] + i_v[1] * i_t[0 * 3 + 1] +
+           i_v[2] * i_t[0 * 3 + 2];
+  o_v[1] = i_v[0] * i_t[1 * 3 + 0] + i_v[1] * i_t[1 * 3 + 1] +
+           i_v[2] * i_t[1 * 3 + 2];
+  o_v[2] = i_v[0] * i_t[2 * 3 + 0] + i_v[1] * i_t[2 * 3 + 1] +
+           i_v[2] * i_t[2 * 3 + 2];
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-naiveTensorTransDotVector (double * o_v,
-			   const double * i_t,
-			   const double * i_v) const
-{
-  o_v[0] = i_v[0] * i_t[0*3+0] + i_v[1] * i_t[1*3+0] + i_v[2] * i_t[2*3+0];
-  o_v[1] = i_v[0] * i_t[0*3+1] + i_v[1] * i_t[1*3+1] + i_v[2] * i_t[2*3+1];
-  o_v[2] = i_v[0] * i_t[0*3+2] + i_v[1] * i_t[1*3+2] + i_v[2] * i_t[2*3+2];
+inline void SimulationRegion<VALUETYPE>::naiveTensorTransDotVector(
+    double *o_v, const double *i_t, const double *i_v) const {
+  o_v[0] = i_v[0] * i_t[0 * 3 + 0] + i_v[1] * i_t[1 * 3 + 0] +
+           i_v[2] * i_t[2 * 3 + 0];
+  o_v[1] = i_v[0] * i_t[0 * 3 + 1] + i_v[1] * i_t[1 * 3 + 1] +
+           i_v[2] * i_t[2 * 3 + 1];
+  o_v[2] = i_v[0] * i_t[0 * 3 + 2] + i_v[1] * i_t[1 * 3 + 2] +
+           i_v[2] * i_t[2 * 3 + 2];
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-tensorDotVector (double * o_v,
-		 const double * i_t,
-		 const double * i_v) const
-{
+inline void SimulationRegion<VALUETYPE>::tensorDotVector(
+    double *o_v, const double *i_t, const double *i_v) const {
   // the compiler will auto-matically optimize the following code away...
   // const double * tmp_v (i_v);
   // if (o_v == i_v){
@@ -480,61 +404,69 @@ tensorDotVector (double * o_v,
   //   copy (ii_v, i_v);
   //   tmp_v = ii_v;
   // }
-  naiveTensorDotVector (o_v, i_t, i_v);
+  naiveTensorDotVector(o_v, i_t, i_v);
 }
 
 template <typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-tensorTransDotVector (double * o_v,
-		      const double * i_t,
-		      const double * i_v) const
-{
-  naiveTensorTransDotVector (o_v, i_t, i_v);
+inline void SimulationRegion<VALUETYPE>::tensorTransDotVector(
+    double *o_v, const double *i_t, const double *i_v) const {
+  naiveTensorTransDotVector(o_v, i_t, i_v);
 }
 
-template<typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-computeVolume()
-{
-  volume =
-      boxt[0*3+0] * (boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) - 
-      boxt[0*3+1] * (boxt[1*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[1*3+2]) +
-      boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]);
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::computeVolume() {
+  volume = boxt[0 * 3 + 0] * (boxt[1 * 3 + 1] * boxt[2 * 3 + 2] -
+                              boxt[2 * 3 + 1] * boxt[1 * 3 + 2]) -
+           boxt[0 * 3 + 1] * (boxt[1 * 3 + 0] * boxt[2 * 3 + 2] -
+                              boxt[2 * 3 + 0] * boxt[1 * 3 + 2]) +
+           boxt[0 * 3 + 2] * (boxt[1 * 3 + 0] * boxt[2 * 3 + 1] -
+                              boxt[2 * 3 + 0] * boxt[1 * 3 + 1]);
   volume = std::abs(volume);
-  volumei = static_cast<VALUETYPE>(1.)/volume;
+  volumei = static_cast<VALUETYPE>(1.) / volume;
 }
 
-template<typename VALUETYPE>
-inline void
-SimulationRegion<VALUETYPE>::
-computeRecBox	()
-{
-  // rec_boxt[0*3+0] =( boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) * volumei;
-  // rec_boxt[1*3+1] =( boxt[0*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[0*3+2]) * volumei;
-  // rec_boxt[2*3+2] =( boxt[0*3+0]*boxt[1*3+1] - boxt[1*3+0]*boxt[0*3+1]) * volumei;
-  // rec_boxt[1*3+0] =(-boxt[1*3+0]*boxt[2*3+2] + boxt[2*3+0]*boxt[1*3+2]) * volumei;
-  // rec_boxt[2*3+0] =( boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]) * volumei;
-  // rec_boxt[0*3+1] =(-boxt[0*3+1]*boxt[2*3+2] + boxt[2*3+1]*boxt[0*3+2]) * volumei;
-  // rec_boxt[2*3+1] =(-boxt[0*3+0]*boxt[2*3+1] + boxt[2*3+0]*boxt[0*3+1]) * volumei;
-  // rec_boxt[0*3+2] =( boxt[0*3+1]*boxt[1*3+2] - boxt[1*3+1]*boxt[0*3+2]) * volumei;
-  // rec_boxt[1*3+2] =(-boxt[0*3+0]*boxt[1*3+2] + boxt[1*3+0]*boxt[0*3+2]) * volumei;  
-
-  rec_boxt[0*3+0] =( boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) * volumei;
-  rec_boxt[1*3+1] =( boxt[0*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[0*3+2]) * volumei;
-  rec_boxt[2*3+2] =( boxt[0*3+0]*boxt[1*3+1] - boxt[1*3+0]*boxt[0*3+1]) * volumei;
-  rec_boxt[0*3+1] =(-boxt[1*3+0]*boxt[2*3+2] + boxt[2*3+0]*boxt[1*3+2]) * volumei;
-  rec_boxt[0*3+2] =( boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]) * volumei;
-  rec_boxt[1*3+0] =(-boxt[0*3+1]*boxt[2*3+2] + boxt[2*3+1]*boxt[0*3+2]) * volumei;
-  rec_boxt[1*3+2] =(-boxt[0*3+0]*boxt[2*3+1] + boxt[2*3+0]*boxt[0*3+1]) * volumei;
-  rec_boxt[2*3+0] =( boxt[0*3+1]*boxt[1*3+2] - boxt[1*3+1]*boxt[0*3+2]) * volumei;
-  rec_boxt[2*3+1] =(-boxt[0*3+0]*boxt[1*3+2] + boxt[1*3+0]*boxt[0*3+2]) * volumei;
+template <typename VALUETYPE>
+inline void SimulationRegion<VALUETYPE>::computeRecBox() {
+  // rec_boxt[0*3+0] =( boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) *
+  // volumei; rec_boxt[1*3+1] =( boxt[0*3+0]*boxt[2*3+2] -
+  // boxt[2*3+0]*boxt[0*3+2]) * volumei; rec_boxt[2*3+2] =(
+  // boxt[0*3+0]*boxt[1*3+1] - boxt[1*3+0]*boxt[0*3+1]) * volumei;
+  // rec_boxt[1*3+0] =(-boxt[1*3+0]*boxt[2*3+2] + boxt[2*3+0]*boxt[1*3+2]) *
+  // volumei; rec_boxt[2*3+0] =( boxt[1*3+0]*boxt[2*3+1] -
+  // boxt[2*3+0]*boxt[1*3+1]) * volumei; rec_boxt[0*3+1]
+  // =(-boxt[0*3+1]*boxt[2*3+2] + boxt[2*3+1]*boxt[0*3+2]) * volumei;
+  // rec_boxt[2*3+1] =(-boxt[0*3+0]*boxt[2*3+1] + boxt[2*3+0]*boxt[0*3+1]) *
+  // volumei; rec_boxt[0*3+2] =( boxt[0*3+1]*boxt[1*3+2] -
+  // boxt[1*3+1]*boxt[0*3+2]) * volumei; rec_boxt[1*3+2]
+  // =(-boxt[0*3+0]*boxt[1*3+2] + boxt[1*3+0]*boxt[0*3+2]) * volumei;
+
+  rec_boxt[0 * 3 + 0] =
+      (boxt[1 * 3 + 1] * boxt[2 * 3 + 2] - boxt[2 * 3 + 1] * boxt[1 * 3 + 2]) *
+      volumei;
+  rec_boxt[1 * 3 + 1] =
+      (boxt[0 * 3 + 0] * boxt[2 * 3 + 2] - boxt[2 * 3 + 0] * boxt[0 * 3 + 2]) *
+      volumei;
+  rec_boxt[2 * 3 + 2] =
+      (boxt[0 * 3 + 0] * boxt[1 * 3 + 1] - boxt[1 * 3 + 0] * boxt[0 * 3 + 1]) *
+      volumei;
+  rec_boxt[0 * 3 + 1] =
+      (-boxt[1 * 3 + 0] * boxt[2 * 3 + 2] + boxt[2 * 3 + 0] * boxt[1 * 3 + 2]) *
+      volumei;
+  rec_boxt[0 * 3 + 2] =
+      (boxt[1 * 3 + 0] * boxt[2 * 3 + 1] - boxt[2 * 3 + 0] * boxt[1 * 3 + 1]) *
+      volumei;
+  rec_boxt[1 * 3 + 0] =
+      (-boxt[0 * 3 + 1] * boxt[2 * 3 + 2] + boxt[2 * 3 + 1] * boxt[0 * 3 + 2]) *
+      volumei;
+  rec_boxt[1 * 3 + 2] =
+      (-boxt[0 * 3 + 0] * boxt[2 * 3 + 1] + boxt[2 * 3 + 0] * boxt[0 * 3 + 1]) *
+      volumei;
+  rec_boxt[2 * 3 + 0] =
+      (boxt[0 * 3 + 1] * boxt[1 * 3 + 2] - boxt[1 * 3 + 1] * boxt[0 * 3 + 2]) *
+      volumei;
+  rec_boxt[2 * 3 + 1] =
+      (-boxt[0 * 3 + 0] * boxt[1 * 3 + 2] + boxt[1 * 3 + 0] * boxt[0 * 3 + 2]) *
+      volumei;
 }
 
-
-
-
 #endif
-
-
diff --git a/source/lib/include/coord.h b/source/lib/include/coord.h
index a6beb6a013..17aa1e3dfb 100644
--- a/source/lib/include/coord.h
+++ b/source/lib/include/coord.h
@@ -2,15 +2,13 @@
 
 #include "region.h"
 
-namespace deepmd{
+namespace deepmd {
 
 // normalize coords
 template <typename FPTYPE>
-void
-normalize_coord_cpu(
-    FPTYPE * coord,
-    const int natom,
-    const deepmd::Region<FPTYPE> & region);
+void normalize_coord_cpu(FPTYPE* coord,
+                         const int natom,
+                         const deepmd::Region<FPTYPE>& region);
 
 // copy coordinates
 // outputs:
@@ -23,30 +21,27 @@ normalize_coord_cpu(
 //	1: the memory is not large enough to hold all copied coords and types.
 //	   i.e. nall > mem_nall
 template <typename FPTYPE>
-int
-copy_coord_cpu(
-    FPTYPE * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    const FPTYPE * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const float & rcut,
-    const deepmd::Region<FPTYPE> & region);
+int copy_coord_cpu(FPTYPE* out_c,
+                   int* out_t,
+                   int* mapping,
+                   int* nall,
+                   const FPTYPE* in_c,
+                   const int* in_t,
+                   const int& nloc,
+                   const int& mem_nall,
+                   const float& rcut,
+                   const deepmd::Region<FPTYPE>& region);
 
 // compute cell information
 // output:
-// cell_info: nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,total_cellnum,loc_cellnum
+// cell_info:
+// nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,total_cellnum,loc_cellnum
 // input:
 // boxt
 template <typename FPTYPE>
-void
-compute_cell_info(
-    int * cell_info,
-    const float & rcut,
-    const deepmd::Region<FPTYPE> & region);
+void compute_cell_info(int* cell_info,
+                       const float& rcut,
+                       const deepmd::Region<FPTYPE>& region);
 
 #if GOOGLE_CUDA
 // normalize coords
@@ -55,42 +50,38 @@ compute_cell_info(
 // input:
 // natom, box_info: boxt, rec_boxt
 template <typename FPTYPE>
-void
-normalize_coord_gpu(
-    FPTYPE * coord,
-    const int natom,
-    const deepmd::Region<FPTYPE> & region);
+void normalize_coord_gpu(FPTYPE* coord,
+                         const int natom,
+                         const deepmd::Region<FPTYPE>& region);
 
 // copy coordinates
 // outputs:
-//	out_c, out_t, mapping, nall, 
-//  int_data(temp cuda memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
+//	out_c, out_t, mapping, nall,
+//  int_data(temp cuda
+//  memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
 //                             cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
 // inputs:
-//	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info, box_info
-//	mem_nall is the size of allocated memory for out_c, out_t, mapping
+//	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
+//box_info 	mem_nall is the size of allocated memory for out_c, out_t, mapping
 // returns
 //	0: succssful
 //	1: the memory is not large enough to hold all copied coords and types.
 //	   i.e. nall > mem_nall
 template <typename FPTYPE>
-int
-copy_coord_gpu(
-    FPTYPE * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    int * int_data,
-    const FPTYPE * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const int & loc_cellnum,
-    const int & total_cellnum,
-    const int * cell_info,
-    const deepmd::Region<FPTYPE> & region);
-#endif // GOOGLE_CUDA
-
+int copy_coord_gpu(FPTYPE* out_c,
+                   int* out_t,
+                   int* mapping,
+                   int* nall,
+                   int* int_data,
+                   const FPTYPE* in_c,
+                   const int* in_t,
+                   const int& nloc,
+                   const int& mem_nall,
+                   const int& loc_cellnum,
+                   const int& total_cellnum,
+                   const int* cell_info,
+                   const deepmd::Region<FPTYPE>& region);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
 // normalize coords
@@ -99,40 +90,37 @@ copy_coord_gpu(
 // input:
 // natom, box_info: boxt, rec_boxt
 template <typename FPTYPE>
-void
-normalize_coord_gpu_rocm(
-    FPTYPE * coord,
-    const int natom,
-    const deepmd::Region<FPTYPE> & region);
+void normalize_coord_gpu_rocm(FPTYPE* coord,
+                              const int natom,
+                              const deepmd::Region<FPTYPE>& region);
 
 // copy coordinates
 // outputs:
-//	out_c, out_t, mapping, nall, 
-//  int_data(temp cuda memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
+//	out_c, out_t, mapping, nall,
+//  int_data(temp cuda
+//  memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
 //                             cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
 // inputs:
-//	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info, box_info
-//	mem_nall is the size of allocated memory for out_c, out_t, mapping
+//	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
+//box_info 	mem_nall is the size of allocated memory for out_c, out_t, mapping
 // returns
 //	0: succssful
 //	1: the memory is not large enough to hold all copied coords and types.
 //	   i.e. nall > mem_nall
 template <typename FPTYPE>
-int
-copy_coord_gpu_rocm(
-    FPTYPE * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    int * int_data,
-    const FPTYPE * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const int & loc_cellnum,
-    const int & total_cellnum,
-    const int * cell_info,
-    const deepmd::Region<FPTYPE> & region);
-#endif // TENSORFLOW_USE_ROCM
+int copy_coord_gpu_rocm(FPTYPE* out_c,
+                        int* out_t,
+                        int* mapping,
+                        int* nall,
+                        int* int_data,
+                        const FPTYPE* in_c,
+                        const int* in_t,
+                        const int& nloc,
+                        const int& mem_nall,
+                        const int& loc_cellnum,
+                        const int& total_cellnum,
+                        const int* cell_info,
+                        const deepmd::Region<FPTYPE>& region);
+#endif  // TENSORFLOW_USE_ROCM
 
-}
+}  // namespace deepmd
diff --git a/source/lib/include/device.h b/source/lib/include/device.h
index 9493533366..c1ac0d1ff0 100644
--- a/source/lib/include/device.h
+++ b/source/lib/include/device.h
@@ -1,11 +1,12 @@
 #pragma once
-#include <vector>
-#include <climits>
 #include <stdio.h>
+
+#include <climits>
 #include <iostream>
+#include <vector>
 
 #define TPB 256
-#define SQRT_2_PI 0.7978845608028654 
+#define SQRT_2_PI 0.7978845608028654
 typedef long long int_64;
 typedef unsigned long long uint_64;
 
diff --git a/source/lib/include/env_mat.h b/source/lib/include/env_mat.h
index b94e683027..8a92c5087a 100644
--- a/source/lib/include/env_mat.h
+++ b/source/lib/include/env_mat.h
@@ -2,35 +2,33 @@
 
 #include <vector>
 
-namespace deepmd{
+namespace deepmd {
 
-template<typename FPTYPE> 
-void env_mat_a_cpu (
-    std::vector<FPTYPE > &	        descrpt_a,
-    std::vector<FPTYPE > &	        descrpt_a_deriv,
-    std::vector<FPTYPE > &	        rij_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax) ;
+template <typename FPTYPE>
+void env_mat_a_cpu(std::vector<FPTYPE>& descrpt_a,
+                   std::vector<FPTYPE>& descrpt_a_deriv,
+                   std::vector<FPTYPE>& rij_a,
+                   const std::vector<FPTYPE>& posi,
+                   const std::vector<int>& type,
+                   const int& i_idx,
+                   const std::vector<int>& fmt_nlist,
+                   const std::vector<int>& sec,
+                   const float& rmin,
+                   const float& rmax);
 
-template<typename FPTYPE> 
-void env_mat_r_cpu (
-    std::vector<FPTYPE > &	        descrpt_a,
-    std::vector<FPTYPE > &	        descrpt_a_deriv,
-    std::vector<FPTYPE > &	        rij_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist_a,
-    const std::vector<int > &		sec_a, 
-    const float &			rmin,
-    const float &			rmax);
+template <typename FPTYPE>
+void env_mat_r_cpu(std::vector<FPTYPE>& descrpt_a,
+                   std::vector<FPTYPE>& descrpt_a_deriv,
+                   std::vector<FPTYPE>& rij_a,
+                   const std::vector<FPTYPE>& posi,
+                   const std::vector<int>& type,
+                   const int& i_idx,
+                   const std::vector<int>& fmt_nlist_a,
+                   const std::vector<int>& sec_a,
+                   const float& rmin,
+                   const float& rmax);
 
-}
+}  // namespace deepmd
 
 ////////////////////////////////////////////////////////
 // legacy code
@@ -38,33 +36,30 @@ void env_mat_r_cpu (
 
 #include "SimulationRegion.h"
 
-void env_mat_a (
-    std::vector<double > &		descrpt_a,
-    std::vector<double > &		descrpt_a_deriv,
-    std::vector<double > &		rij_a,
-    const std::vector<double > &	posi,
-    const int &				ntypes,
-    const std::vector<int > &		type,
-    const SimulationRegion<double> &	region,
-    const bool &			b_pbc,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const double &			rmin,
-    const double &			rmax);
-
-void env_mat_r (
-    std::vector<double > &		descrpt_r,
-    std::vector<double > &		descrpt_r_deriv,
-    std::vector<double > &		rij_r,
-    const std::vector<double > &	posi,
-    const int &				ntypes,
-    const std::vector<int > &		type,
-    const SimulationRegion<double> &	region,
-    const bool &			b_pbc,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec,
-    const double &			rmin, 
-    const double &			rmax);
+void env_mat_a(std::vector<double>& descrpt_a,
+               std::vector<double>& descrpt_a_deriv,
+               std::vector<double>& rij_a,
+               const std::vector<double>& posi,
+               const int& ntypes,
+               const std::vector<int>& type,
+               const SimulationRegion<double>& region,
+               const bool& b_pbc,
+               const int& i_idx,
+               const std::vector<int>& fmt_nlist,
+               const std::vector<int>& sec,
+               const double& rmin,
+               const double& rmax);
 
+void env_mat_r(std::vector<double>& descrpt_r,
+               std::vector<double>& descrpt_r_deriv,
+               std::vector<double>& rij_r,
+               const std::vector<double>& posi,
+               const int& ntypes,
+               const std::vector<int>& type,
+               const SimulationRegion<double>& region,
+               const bool& b_pbc,
+               const int& i_idx,
+               const std::vector<int>& fmt_nlist,
+               const std::vector<int>& sec,
+               const double& rmin,
+               const double& rmax);
diff --git a/source/lib/include/env_mat_nvnmd.h b/source/lib/include/env_mat_nvnmd.h
index b2d168ca5d..2517c1ef9c 100644
--- a/source/lib/include/env_mat_nvnmd.h
+++ b/source/lib/include/env_mat_nvnmd.h
@@ -1,11 +1,11 @@
 
 /*
 //==================================================
- _   _  __     __  _   _   __  __   ____  
-| \ | | \ \   / / | \ | | |  \/  | |  _ \ 
+ _   _  __     __  _   _   __  __   ____
+| \ | | \ \   / / | \ | | |  \/  | |  _ \
 |  \| |  \ \ / /  |  \| | | |\/| | | | | |
 | |\  |   \ V /   | |\  | | |  | | | |_| |
-|_| \_|    \_/    |_| \_| |_|  |_| |____/ 
+|_| \_|    \_/    |_| \_| |_|  |_| |____/
 
 //==================================================
 
@@ -20,28 +20,26 @@ date: 2021-12-6
 
 #include <cmath>
 #include <vector>
-#include "utilities.h"
+
 #include "env_mat_nvnmd.h"
+#include "utilities.h"
 
-namespace deepmd{
-
-template<typename FPTYPE> 
-void env_mat_a_nvnmd_quantize_cpu (
-    std::vector<FPTYPE > &	        descrpt_a,
-    std::vector<FPTYPE > &	        descrpt_a_deriv,
-    std::vector<FPTYPE > &	        rij_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax);
+namespace deepmd {
+
+template <typename FPTYPE>
+void env_mat_a_nvnmd_quantize_cpu(std::vector<FPTYPE> &descrpt_a,
+                                  std::vector<FPTYPE> &descrpt_a_deriv,
+                                  std::vector<FPTYPE> &rij_a,
+                                  const std::vector<FPTYPE> &posi,
+                                  const std::vector<int> &type,
+                                  const int &i_idx,
+                                  const std::vector<int> &fmt_nlist,
+                                  const std::vector<int> &sec,
+                                  const float &rmin,
+                                  const float &rmax);
 }
 
-
-union U_Flt64_Int64
-{
+union U_Flt64_Int64 {
   double nflt;
   int64_t nint;
 };
@@ -59,25 +57,25 @@ union U_Flt64_Int64
 /*
   split double into sign, expo, and frac
 */
-template <class T> // float and double
+template <class T>  // float and double
 void split_flt(T x, int64_t &sign, int64_t &expo, int64_t &mant) {
   U_Flt64_Int64 ufi;
   ufi.nflt = x;
-  sign = ( ufi.nint >> 63) & 0x01;
-  expo = ((ufi.nint >> 52) & 0x7ff) -1023;
-  mant = ( ufi.nint & 0xfffffffffffff) | 0x10000000000000; // 1+52
+  sign = (ufi.nint >> 63) & 0x01;
+  expo = ((ufi.nint >> 52) & 0x7ff) - 1023;
+  mant = (ufi.nint & 0xfffffffffffff) | 0x10000000000000;  // 1+52
 }
 
 /*
  find the max exponent for float array x
 */
-template <class T> // float and double
+template <class T>  // float and double
 void find_max_expo(int64_t &max_expo, T *x, int64_t M) {
   int ii, jj, kk;
   U_Flt64_Int64 ufi;
   int64_t expo;
   max_expo = -100;
-  for (jj=0; jj<M; jj++) {
+  for (jj = 0; jj < M; jj++) {
     ufi.nflt = x[jj];
     expo = ((ufi.nint >> 52) & 0x7ff) - 1023;
     max_expo = (expo > max_expo) ? expo : max_expo;
@@ -87,25 +85,23 @@ void find_max_expo(int64_t &max_expo, T *x, int64_t M) {
 /*
  find the max exponent for float array x
 */
-template <class T> // float and double
+template <class T>  // float and double
 void find_max_expo(int64_t &max_expo, T *x, int64_t N, int64_t M) {
   int ii, jj, kk;
   U_Flt64_Int64 ufi;
   int64_t expo;
   max_expo = -100;
-  for (ii=0; ii<N; ii++) {
-    ufi.nflt = x[ii*M];
+  for (ii = 0; ii < N; ii++) {
+    ufi.nflt = x[ii * M];
     expo = ((ufi.nint >> 52) & 0x7ff) - 1023;
     max_expo = (expo > max_expo) ? expo : max_expo;
   }
 };
 
-
-
 /*
  dot multiply
 */
-template <class T> // float and double
+template <class T>  // float and double
 void dotmul_flt_nvnmd(T &y, T *x1, T *x2, int64_t M) {
   int ii, jj, kk;
   U_Flt64_Int64 ufi;
@@ -120,8 +116,8 @@ void dotmul_flt_nvnmd(T &y, T *x1, T *x2, int64_t M) {
   find_max_expo(expo_max1, x1, M);
   find_max_expo(expo_max2, x2, M);
   //
-  int64_t s=0;
-  for (jj=0; jj<M; jj++) {
+  int64_t s = 0;
+  for (jj = 0; jj < M; jj++) {
     // x1
     split_flt(x1[jj], sign1, expo1, mant1);
     mant1 >>= NBIT_CUTF;
@@ -145,11 +141,10 @@ void dotmul_flt_nvnmd(T &y, T *x1, T *x2, int64_t M) {
   y = ufi.nflt;
 }
 
-
 /*
   multiply
 */
-template <class T> // float and double
+template <class T>  // float and double
 void mul_flt_nvnmd(T &y, T x1, T x2) {
   U_Flt64_Int64 ufi1, ufi2, ufi3;
   ufi1.nflt = x1;
@@ -164,7 +159,7 @@ void mul_flt_nvnmd(T &y, T x1, T x2) {
 /*
   add
 */
-template <class T> // float and double
+template <class T>  // float and double
 void add_flt_nvnmd(T &y, T x1, T x2) {
   U_Flt64_Int64 ufi1, ufi2, ufi3;
   int64_t sign1, sign2, sign3;
@@ -200,6 +195,3 @@ void add_flt_nvnmd(T &y, T x1, T x2) {
   ufi3.nint &= FLT_MASK;
   y = ufi3.nflt;
 }
-
-
-
diff --git a/source/lib/include/errors.h b/source/lib/include/errors.h
index 29329768ea..cd3bf407c0 100644
--- a/source/lib/include/errors.h
+++ b/source/lib/include/errors.h
@@ -3,21 +3,21 @@
 #include <stdexcept>
 #include <string>
 
-namespace deepmd{
-    /**
-    * @brief General DeePMD-kit exception. Throw if anything doesn't work.
-    **/
-    struct
-    deepmd_exception: public std::runtime_error {
-    public:
-        deepmd_exception(): runtime_error("DeePMD-kit Error!") {};
-        deepmd_exception(const std::string& msg): runtime_error(std::string("DeePMD-kit Error: ") + msg) {};
-    };
+namespace deepmd {
+/**
+ * @brief General DeePMD-kit exception. Throw if anything doesn't work.
+ **/
+struct deepmd_exception : public std::runtime_error {
+ public:
+  deepmd_exception() : runtime_error("DeePMD-kit Error!"){};
+  deepmd_exception(const std::string& msg)
+      : runtime_error(std::string("DeePMD-kit Error: ") + msg){};
+};
 
-    struct
-    deepmd_exception_oom: public deepmd_exception{
-    public:
-        deepmd_exception_oom(): deepmd_exception("DeePMD-kit OOM!") {};
-        deepmd_exception_oom(const std::string& msg): deepmd_exception(std::string("DeePMD-kit OOM: ") + msg) {};
-    };
-};
\ No newline at end of file
+struct deepmd_exception_oom : public deepmd_exception {
+ public:
+  deepmd_exception_oom() : deepmd_exception("DeePMD-kit OOM!"){};
+  deepmd_exception_oom(const std::string& msg)
+      : deepmd_exception(std::string("DeePMD-kit OOM: ") + msg){};
+};
+};  // namespace deepmd
diff --git a/source/lib/include/ewald.h b/source/lib/include/ewald.h
index 654a6983e7..9efac560e4 100644
--- a/source/lib/include/ewald.h
+++ b/source/lib/include/ewald.h
@@ -1,25 +1,24 @@
 #pragma once
 
-#include<algorithm>
-#include<cassert>
+#include <algorithm>
+#include <cassert>
 #if defined(_OPENMP)
-#include<omp.h>
+#include <omp.h>
 #else
 int omp_get_num_threads() { return 1; }
 int omp_get_thread_num() { return 0; }
 #endif
 
-#include "utilities.h"
 #include "region.h"
+#include "utilities.h"
 
-namespace deepmd{
+namespace deepmd {
 
 // 8.988e9 / pc.electron_volt / pc.angstrom * (1.602e-19)**2
 const double ElectrostaticConvertion = 14.39964535475696995031;
 
 template <typename VALUETYPE>
-struct EwaldParameters 
-{
+struct EwaldParameters {
   VALUETYPE rcut = 6.0;
   VALUETYPE beta = 2;
   VALUETYPE spacing = 4;
@@ -29,14 +28,12 @@ struct EwaldParameters
 // outputs: energy force virial
 // inputs: coordinates charges region
 template <typename VALUETYPE>
-void 
-ewald_recp(
-    VALUETYPE &				ener, 
-    std::vector<VALUETYPE> &		force,
-    std::vector<VALUETYPE> &		virial,
-    const std::vector<VALUETYPE>&	coord,
-    const std::vector<VALUETYPE>&	charge,
-    const deepmd::Region<VALUETYPE>&	region, 
-    const EwaldParameters<VALUETYPE>&	param);
+void ewald_recp(VALUETYPE& ener,
+                std::vector<VALUETYPE>& force,
+                std::vector<VALUETYPE>& virial,
+                const std::vector<VALUETYPE>& coord,
+                const std::vector<VALUETYPE>& charge,
+                const deepmd::Region<VALUETYPE>& region,
+                const EwaldParameters<VALUETYPE>& param);
 
-}
+}  // namespace deepmd
diff --git a/source/lib/include/fmt_nlist.h b/source/lib/include/fmt_nlist.h
index f893897f6b..01f2e32b0e 100644
--- a/source/lib/include/fmt_nlist.h
+++ b/source/lib/include/fmt_nlist.h
@@ -1,77 +1,71 @@
 #pragma once
 
 #include <vector>
+
 #include "device.h"
 #include "neighbor_list.h"
 
-namespace deepmd{
+namespace deepmd {
 
 template <typename FPTYPE>
-void format_nlist_cpu(
-    int * nlist,
-    const InputNlist & in_nlist,
-    const FPTYPE * coord, 
-    const int * type, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec);
+void format_nlist_cpu(int* nlist,
+                      const InputNlist& in_nlist,
+                      const FPTYPE* coord,
+                      const int* type,
+                      const int nloc,
+                      const int nall,
+                      const float rcut,
+                      const std::vector<int> sec);
 
 #if GOOGLE_CUDA
 template <typename FPTYPE>
-void format_nbor_list_gpu_cuda(    
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const deepmd::InputNlist & gpu_inlist,
-    int * array_int,
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec);
+void format_nbor_list_gpu_cuda(int* nlist,
+                               const FPTYPE* coord,
+                               const int* type,
+                               const deepmd::InputNlist& gpu_inlist,
+                               int* array_int,
+                               uint_64* array_longlong,
+                               const int max_nbor_size,
+                               const int nloc,
+                               const int nall,
+                               const float rcut,
+                               const std::vector<int> sec);
 
 template <typename FPTYPE>
-void test_encoding_decoding_nbor_info_gpu_cuda(
-    uint_64 * key,
-    int * out_type,
-    int * out_index,
-    const int * in_type,
-    const FPTYPE * in_dist,
-    const int * in_index,
-    const int size_of_array);
-#endif //GOOGLE_CUDA
+void test_encoding_decoding_nbor_info_gpu_cuda(uint_64* key,
+                                               int* out_type,
+                                               int* out_index,
+                                               const int* in_type,
+                                               const FPTYPE* in_dist,
+                                               const int* in_index,
+                                               const int size_of_array);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
 template <typename FPTYPE>
-void format_nbor_list_gpu_rocm(    
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const deepmd::InputNlist & gpu_inlist,
-    int * array_int,
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec);
+void format_nbor_list_gpu_rocm(int* nlist,
+                               const FPTYPE* coord,
+                               const int* type,
+                               const deepmd::InputNlist& gpu_inlist,
+                               int* array_int,
+                               uint_64* array_longlong,
+                               const int max_nbor_size,
+                               const int nloc,
+                               const int nall,
+                               const float rcut,
+                               const std::vector<int> sec);
 
 template <typename FPTYPE>
-void test_encoding_decoding_nbor_info_gpu_rocm(
-    uint_64 * key,
-    int * out_type,
-    int * out_index,
-    const int * in_type,
-    const FPTYPE * in_dist,
-    const int * in_index,
-    const int size_of_array);
-#endif //TENSORFLOW_USE_ROCM
-
-}
-
+void test_encoding_decoding_nbor_info_gpu_rocm(uint_64* key,
+                                               int* out_type,
+                                               int* out_index,
+                                               const int* in_type,
+                                               const FPTYPE* in_dist,
+                                               const int* in_index,
+                                               const int size_of_array);
+#endif  // TENSORFLOW_USE_ROCM
 
+}  // namespace deepmd
 
 ////////////////////////////////////////////////////////
 // legacy code
@@ -81,31 +75,25 @@ void test_encoding_decoding_nbor_info_gpu_rocm(
 
 // return:	-1	OK
 //		> 0	the type of unsuccessful neighbor list
-int format_nlist_i_fill_a (
-    std::vector<int > &			fmt_nei_idx_a,
-    std::vector<int > &			fmt_nei_idx_r,
-    const std::vector<double > &	posi,
-    const int &				ntypes,
-    const std::vector<int > &		type,
-    const SimulationRegion<double> &	region,
-    const bool &			b_pbc,
-    const int &				i_idx,
-    const std::vector<int > &		nei_idx_a, 
-    const std::vector<int > &		nei_idx_r, 
-    const double &			rcut,
-    const std::vector<int > &		sec_a, 
-    const std::vector<int > &		sec_r);
-
-
-template<typename FPTYPE> 
-int format_nlist_i_cpu (
-    std::vector<int > &			fmt_nei_idx_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		nei_idx_a, 
-    const float &			rcut,
-    const std::vector<int > &		sec_a);
-
-
+int format_nlist_i_fill_a(std::vector<int>& fmt_nei_idx_a,
+                          std::vector<int>& fmt_nei_idx_r,
+                          const std::vector<double>& posi,
+                          const int& ntypes,
+                          const std::vector<int>& type,
+                          const SimulationRegion<double>& region,
+                          const bool& b_pbc,
+                          const int& i_idx,
+                          const std::vector<int>& nei_idx_a,
+                          const std::vector<int>& nei_idx_r,
+                          const double& rcut,
+                          const std::vector<int>& sec_a,
+                          const std::vector<int>& sec_r);
 
+template <typename FPTYPE>
+int format_nlist_i_cpu(std::vector<int>& fmt_nei_idx_a,
+                       const std::vector<FPTYPE>& posi,
+                       const std::vector<int>& type,
+                       const int& i_idx,
+                       const std::vector<int>& nei_idx_a,
+                       const float& rcut,
+                       const std::vector<int>& sec_a);
diff --git a/source/lib/include/gelu.h b/source/lib/include/gelu.h
index 969cde7ca7..029d073f78 100644
--- a/source/lib/include/gelu.h
+++ b/source/lib/include/gelu.h
@@ -1,73 +1,58 @@
 #pragma once
 #include "device.h"
 
-namespace deepmd{
+namespace deepmd {
 
-template<typename FPTYPE>
-void gelu_cpu(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size);
+template <typename FPTYPE>
+void gelu_cpu(FPTYPE* out, const FPTYPE* xx, const int_64 size);
 
-template<typename FPTYPE>
-void gelu_grad_cpu(
-    FPTYPE * out, 
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const int_64 size);
+template <typename FPTYPE>
+void gelu_grad_cpu(FPTYPE* out,
+                   const FPTYPE* xx,
+                   const FPTYPE* dy,
+                   const int_64 size);
 
-template<typename FPTYPE>
-void gelu_grad_grad_cpu(
-    FPTYPE * out,
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size);
+template <typename FPTYPE>
+void gelu_grad_grad_cpu(FPTYPE* out,
+                        const FPTYPE* xx,
+                        const FPTYPE* dy,
+                        const FPTYPE* dy_2,
+                        const int_64 size);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-void gelu_gpu_cuda(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size);
-
-template<typename FPTYPE>
-void gelu_grad_gpu_cuda(
-    FPTYPE * out, 
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const int_64 size);
-
-template<typename FPTYPE>
-void gelu_grad_grad_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void gelu_gpu_cuda(FPTYPE* out, const FPTYPE* xx, const int_64 size);
+
+template <typename FPTYPE>
+void gelu_grad_gpu_cuda(FPTYPE* out,
+                        const FPTYPE* xx,
+                        const FPTYPE* dy,
+                        const int_64 size);
+
+template <typename FPTYPE>
+void gelu_grad_grad_gpu_cuda(FPTYPE* out,
+                             const FPTYPE* xx,
+                             const FPTYPE* dy,
+                             const FPTYPE* dy_2,
+                             const int_64 size);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-void gelu_gpu_rocm(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size);
-
-template<typename FPTYPE>
-void gelu_grad_gpu_rocm(
-    FPTYPE * out, 
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const int_64 size);
-
-template<typename FPTYPE>
-void gelu_grad_grad_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size);
-
-#endif//TENSORFLOW_USE_ROCM
-}
+template <typename FPTYPE>
+void gelu_gpu_rocm(FPTYPE* out, const FPTYPE* xx, const int_64 size);
+
+template <typename FPTYPE>
+void gelu_grad_gpu_rocm(FPTYPE* out,
+                        const FPTYPE* xx,
+                        const FPTYPE* dy,
+                        const int_64 size);
+
+template <typename FPTYPE>
+void gelu_grad_grad_gpu_rocm(FPTYPE* out,
+                             const FPTYPE* xx,
+                             const FPTYPE* dy,
+                             const FPTYPE* dy_2,
+                             const int_64 size);
+
+#endif  // TENSORFLOW_USE_ROCM
+}  // namespace deepmd
diff --git a/source/lib/include/gpu_cuda.h b/source/lib/include/gpu_cuda.h
index cc23969857..e15211ab07 100644
--- a/source/lib/include/gpu_cuda.h
+++ b/source/lib/include/gpu_cuda.h
@@ -1,162 +1,148 @@
 #pragma once
-#include <vector>
-#include <stdio.h>
 #include <assert.h>
 #include <cuda_runtime.h>
+#include <stdio.h>
+
+#include <vector>
+
 #include "errors.h"
 
 #define GPU_MAX_NBOR_SIZE 4096
-#define DPErrcheck(res) {DPAssert((res), __FILE__, __LINE__);}
-inline void DPAssert(cudaError_t code, const char *file, int line, bool abort=true) 
-{
+#define DPErrcheck(res) \
+  { DPAssert((res), __FILE__, __LINE__); }
+inline void DPAssert(cudaError_t code,
+                     const char *file,
+                     int line,
+                     bool abort = true) {
   if (code != cudaSuccess) {
-    fprintf(stderr,"cuda assert: %s %s %d\n", cudaGetErrorString(code), file, line);
+    fprintf(stderr, "cuda assert: %s %s %d\n", cudaGetErrorString(code), file,
+            line);
     if (code == 2) {
       // out of memory
-      fprintf(stderr, "Your memory is not enough, thus an error has been raised " \
-        "above. You need to take the following actions:\n" \
-        "1. Check if the network size of the model is too large.\n" \
-        "2. Check if the batch size of training or testing is too large. " \
-        "You can set the training batch size to `auto`.\n" \
-        "3. Check if the number of atoms is too large.\n" \
-        "4. Check if another program is using the same GPU by execuating `nvidia-smi`. " \
-        "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " \
-        "environment variable.\n");
+      fprintf(stderr,
+              "Your memory is not enough, thus an error has been raised "
+              "above. You need to take the following actions:\n"
+              "1. Check if the network size of the model is too large.\n"
+              "2. Check if the batch size of training or testing is too large. "
+              "You can set the training batch size to `auto`.\n"
+              "3. Check if the number of atoms is too large.\n"
+              "4. Check if another program is using the same GPU by execuating "
+              "`nvidia-smi`. "
+              "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` "
+              "environment variable.\n");
       if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert");
     }
     if (abort) throw deepmd::deepmd_exception("CUDA Assert");
   }
 }
 
-#define nborErrcheck(res) {nborAssert((res), __FILE__, __LINE__);}
-inline void nborAssert(cudaError_t code, const char *file, int line, bool abort=true) 
-{
-    if (code != cudaSuccess) {
-        fprintf(stderr,"cuda assert: %s %s %d\n", "DeePMD-kit:\tillegal nbor list sorting", file, line);
-        if (code == 2) {
-          // out of memory
-          fprintf(stderr, "Your memory is not enough, thus an error has been raised " \
-            "above. You need to take the following actions:\n" \
-            "1. Check if the network size of the model is too large.\n" \
-            "2. Check if the batch size of training or testing is too large. " \
-            "You can set the training batch size to `auto`.\n" \
-            "3. Check if the number of atoms is too large.\n" \
-            "4. Check if another program is using the same GPU by execuating `nvidia-smi`. " \
-            "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` " \
-            "environment variable.\n");
-            if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert");
-        }
-        if (abort) throw deepmd::deepmd_exception("CUDA Assert");
+#define nborErrcheck(res) \
+  { nborAssert((res), __FILE__, __LINE__); }
+inline void nborAssert(cudaError_t code,
+                       const char *file,
+                       int line,
+                       bool abort = true) {
+  if (code != cudaSuccess) {
+    fprintf(stderr, "cuda assert: %s %s %d\n",
+            "DeePMD-kit:\tillegal nbor list sorting", file, line);
+    if (code == 2) {
+      // out of memory
+      fprintf(stderr,
+              "Your memory is not enough, thus an error has been raised "
+              "above. You need to take the following actions:\n"
+              "1. Check if the network size of the model is too large.\n"
+              "2. Check if the batch size of training or testing is too large. "
+              "You can set the training batch size to `auto`.\n"
+              "3. Check if the number of atoms is too large.\n"
+              "4. Check if another program is using the same GPU by execuating "
+              "`nvidia-smi`. "
+              "The usage of GPUs is controlled by `CUDA_VISIBLE_DEVICES` "
+              "environment variable.\n");
+      if (abort) throw deepmd::deepmd_exception_oom("CUDA Assert");
     }
+    if (abort) throw deepmd::deepmd_exception("CUDA Assert");
+  }
 }
 
 #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
-static __inline__ __device__ double atomicAdd(
-    double* address, 
-    double val) 
-{
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
+static __inline__ __device__ double atomicAdd(double *address, double val) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
   unsigned long long int old = *address_as_ull, assumed;
   do {
     assumed = old;
     old = atomicCAS(address_as_ull, assumed,
-          __double_as_longlong(val + __longlong_as_double(assumed)));
-  // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } while (assumed != old);
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+    // Note: uses integer comparison to avoid hang in case of NaN (since NaN !=
+    // NaN) } while (assumed != old);
   } while (assumed != old);
   return __longlong_as_double(old);
 }
 #endif
 
 namespace deepmd {
-  
-inline void DPGetDeviceCount(int &gpu_num) { cudaGetDeviceCount(&gpu_num) ;}
 
-inline cudaError_t DPSetDevice(int rank) { return  cudaSetDevice(rank); }
+inline void DPGetDeviceCount(int &gpu_num) { cudaGetDeviceCount(&gpu_num); }
+
+inline cudaError_t DPSetDevice(int rank) { return cudaSetDevice(rank); }
 
 template <typename FPTYPE>
-void memcpy_host_to_device(
-    FPTYPE * device, 
-    const std::vector<FPTYPE> &host) 
-{
-  DPErrcheck(cudaMemcpy(device, &host[0], sizeof(FPTYPE) * host.size(), cudaMemcpyHostToDevice));  
+void memcpy_host_to_device(FPTYPE *device, const std::vector<FPTYPE> &host) {
+  DPErrcheck(cudaMemcpy(device, &host[0], sizeof(FPTYPE) * host.size(),
+                        cudaMemcpyHostToDevice));
 }
 
 template <typename FPTYPE>
-void memcpy_host_to_device(
-    FPTYPE * device, 
-    const FPTYPE * host,
-    const int size) 
-{
-  DPErrcheck(cudaMemcpy(device, host, sizeof(FPTYPE) * size, cudaMemcpyHostToDevice));  
+void memcpy_host_to_device(FPTYPE *device, const FPTYPE *host, const int size) {
+  DPErrcheck(
+      cudaMemcpy(device, host, sizeof(FPTYPE) * size, cudaMemcpyHostToDevice));
 }
 
 template <typename FPTYPE>
-void memcpy_device_to_host(
-    const FPTYPE * device, 
-    std::vector<FPTYPE> &host) 
-{
-  DPErrcheck(cudaMemcpy(&host[0], device, sizeof(FPTYPE) * host.size(), cudaMemcpyDeviceToHost));  
+void memcpy_device_to_host(const FPTYPE *device, std::vector<FPTYPE> &host) {
+  DPErrcheck(cudaMemcpy(&host[0], device, sizeof(FPTYPE) * host.size(),
+                        cudaMemcpyDeviceToHost));
 }
 
 template <typename FPTYPE>
-void memcpy_device_to_host(
-    const FPTYPE * device, 
-    FPTYPE * host,
-    const int size) 
-{
-  DPErrcheck(cudaMemcpy(host, device, sizeof(FPTYPE) * size, cudaMemcpyDeviceToHost));  
+void memcpy_device_to_host(const FPTYPE *device, FPTYPE *host, const int size) {
+  DPErrcheck(
+      cudaMemcpy(host, device, sizeof(FPTYPE) * size, cudaMemcpyDeviceToHost));
 }
 
 template <typename FPTYPE>
-void malloc_device_memory(
-    FPTYPE * &device, 
-    const std::vector<FPTYPE> &host) 
-{
+void malloc_device_memory(FPTYPE *&device, const std::vector<FPTYPE> &host) {
   DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * host.size()));
 }
 
 template <typename FPTYPE>
-void malloc_device_memory(
-    FPTYPE * &device, 
-    const int size) 
-{
+void malloc_device_memory(FPTYPE *&device, const int size) {
   DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * size));
 }
 
 template <typename FPTYPE>
-void malloc_device_memory_sync(
-    FPTYPE * &device,
-    const std::vector<FPTYPE> &host) 
-{
+void malloc_device_memory_sync(FPTYPE *&device,
+                               const std::vector<FPTYPE> &host) {
   DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * host.size()));
   memcpy_host_to_device(device, host);
 }
 
 template <typename FPTYPE>
-void malloc_device_memory_sync(
-    FPTYPE * &device,
-    const FPTYPE * host,
-    const int size)
-{
+void malloc_device_memory_sync(FPTYPE *&device,
+                               const FPTYPE *host,
+                               const int size) {
   DPErrcheck(cudaMalloc((void **)&device, sizeof(FPTYPE) * size));
   memcpy_host_to_device(device, host, size);
 }
 
 template <typename FPTYPE>
-void delete_device_memory(
-    FPTYPE * &device) 
-{
+void delete_device_memory(FPTYPE *&device) {
   if (device != NULL) {
     DPErrcheck(cudaFree(device));
   }
 }
 
 template <typename FPTYPE>
-void memset_device_memory(
-    FPTYPE * device, 
-    const int var,
-    const int size) 
-{
-  DPErrcheck(cudaMemset(device, var, sizeof(FPTYPE) * size));  
+void memset_device_memory(FPTYPE *device, const int var, const int size) {
+  DPErrcheck(cudaMemset(device, var, sizeof(FPTYPE) * size));
 }
-} // end of namespace deepmd
\ No newline at end of file
+}  // end of namespace deepmd
diff --git a/source/lib/include/gpu_rocm.h b/source/lib/include/gpu_rocm.h
index 35328c701f..7dccde6af9 100644
--- a/source/lib/include/gpu_rocm.h
+++ b/source/lib/include/gpu_rocm.h
@@ -1,121 +1,102 @@
-#pragma once 
-#include <vector>
-#include <stdio.h>
+#pragma once
 #include <assert.h>
-#include<hip/hip_runtime.h>
-//#include<rocprim/rocprim.hpp>
-//#include <hipcub/hipcub.hpp>
+#include <hip/hip_runtime.h>
+#include <stdio.h>
+
+#include <vector>
+// #include<rocprim/rocprim.hpp>
+// #include <hipcub/hipcub.hpp>
 #include "errors.h"
 
 #define GPU_MAX_NBOR_SIZE 4096
 
-#define DPErrcheck(res) { DPAssert((res), __FILE__, __LINE__); }
-inline void DPAssert(hipError_t code, const char *file, int line, bool abort=true) {
-    if (code != hipSuccess) {
-        fprintf(stderr,"hip assert: %s %s %d\n", hipGetErrorString(code), file, line);
-        if (abort) throw deepmd::deepmd_exception("HIP Assert");
-    }
+#define DPErrcheck(res) \
+  { DPAssert((res), __FILE__, __LINE__); }
+inline void DPAssert(hipError_t code,
+                     const char *file,
+                     int line,
+                     bool abort = true) {
+  if (code != hipSuccess) {
+    fprintf(stderr, "hip assert: %s %s %d\n", hipGetErrorString(code), file,
+            line);
+    if (abort) throw deepmd::deepmd_exception("HIP Assert");
+  }
 }
 
-#define nborErrcheck(res) {nborAssert((res), __FILE__, __LINE__);}
-inline void nborAssert(hipError_t code, const char *file, int line, bool abort=true) {
-    if (code != hipSuccess) {
-        fprintf(stderr,"hip assert: %s %s %d\n", "DeePMD-kit:\tillegal nbor list sorting", file, line);
-        if (abort) throw deepmd::deepmd_exception("HIP Assert: illegal nbor list sorting");
-    }
+#define nborErrcheck(res) \
+  { nborAssert((res), __FILE__, __LINE__); }
+inline void nborAssert(hipError_t code,
+                       const char *file,
+                       int line,
+                       bool abort = true) {
+  if (code != hipSuccess) {
+    fprintf(stderr, "hip assert: %s %s %d\n",
+            "DeePMD-kit:\tillegal nbor list sorting", file, line);
+    if (abort)
+      throw deepmd::deepmd_exception("HIP Assert: illegal nbor list sorting");
+  }
 }
 
-
 namespace deepmd {
-inline void DPGetDeviceCount(int &gpu_num) { hipGetDeviceCount(&gpu_num) ;}
+inline void DPGetDeviceCount(int &gpu_num) { hipGetDeviceCount(&gpu_num); }
 
-inline hipError_t DPSetDevice(int rank) { return  hipSetDevice(rank); }
+inline hipError_t DPSetDevice(int rank) { return hipSetDevice(rank); }
 
 template <typename FPTYPE>
-void memcpy_host_to_device(
-    FPTYPE * device, 
-    std::vector<FPTYPE> &host) 
-{
-  DPErrcheck(hipMemcpy(device, &host[0], sizeof(FPTYPE) * host.size(), hipMemcpyHostToDevice));  
+void memcpy_host_to_device(FPTYPE *device, std::vector<FPTYPE> &host) {
+  DPErrcheck(hipMemcpy(device, &host[0], sizeof(FPTYPE) * host.size(),
+                       hipMemcpyHostToDevice));
 }
 
 template <typename FPTYPE>
-void memcpy_host_to_device(
-    FPTYPE * device, 
-    const FPTYPE * host,
-    const int size) 
-{
-  DPErrcheck(hipMemcpy(device, host, sizeof(FPTYPE) * size, hipMemcpyHostToDevice));  
+void memcpy_host_to_device(FPTYPE *device, const FPTYPE *host, const int size) {
+  DPErrcheck(
+      hipMemcpy(device, host, sizeof(FPTYPE) * size, hipMemcpyHostToDevice));
 }
 
 template <typename FPTYPE>
-void memcpy_device_to_host(
-    FPTYPE * device, 
-    std::vector<FPTYPE> &host) 
-{
-  DPErrcheck(hipMemcpy(&host[0], device, sizeof(FPTYPE) * host.size(), hipMemcpyDeviceToHost));  
+void memcpy_device_to_host(FPTYPE *device, std::vector<FPTYPE> &host) {
+  DPErrcheck(hipMemcpy(&host[0], device, sizeof(FPTYPE) * host.size(),
+                       hipMemcpyDeviceToHost));
 }
 template <typename FPTYPE>
-void memcpy_device_to_host(
-    const FPTYPE * device, 
-    FPTYPE * host,
-    const int size) 
-{
-  DPErrcheck(hipMemcpy(host, device, sizeof(FPTYPE) * size, hipMemcpyDeviceToHost));  
+void memcpy_device_to_host(const FPTYPE *device, FPTYPE *host, const int size) {
+  DPErrcheck(
+      hipMemcpy(host, device, sizeof(FPTYPE) * size, hipMemcpyDeviceToHost));
 }
 
 template <typename FPTYPE>
-void malloc_device_memory(
-    FPTYPE * &device, 
-    std::vector<FPTYPE> &host) 
-{
+void malloc_device_memory(FPTYPE *&device, std::vector<FPTYPE> &host) {
   DPErrcheck(hipMalloc((void **)&device, sizeof(FPTYPE) * host.size()));
 }
 
 template <typename FPTYPE>
-void malloc_device_memory(
-    FPTYPE * &device, 
-    const int size) 
-{
+void malloc_device_memory(FPTYPE *&device, const int size) {
   DPErrcheck(hipMalloc((void **)&device, sizeof(FPTYPE) * size));
 }
 
 template <typename FPTYPE>
-void malloc_device_memory_sync(
-    FPTYPE * &device,
-    std::vector<FPTYPE> &host) 
-{
+void malloc_device_memory_sync(FPTYPE *&device, std::vector<FPTYPE> &host) {
   DPErrcheck(hipMalloc((void **)&device, sizeof(FPTYPE) * host.size()));
   memcpy_host_to_device(device, host);
 }
 template <typename FPTYPE>
-void malloc_device_memory_sync(
-    FPTYPE * &device,
-    const FPTYPE * host,
-    const int size)
-{
+void malloc_device_memory_sync(FPTYPE *&device,
+                               const FPTYPE *host,
+                               const int size) {
   DPErrcheck(hipMalloc((void **)&device, sizeof(FPTYPE) * size));
   memcpy_host_to_device(device, host, size);
 }
 
 template <typename FPTYPE>
-void delete_device_memory(
-    FPTYPE * &device) 
-{
+void delete_device_memory(FPTYPE *&device) {
   if (device != NULL) {
     DPErrcheck(hipFree(device));
   }
 }
 
 template <typename FPTYPE>
-void memset_device_memory(
-  FPTYPE * device,
-  const int var,
-  const int size)
-  {
-    DPErrcheck(hipMemset(device,var,sizeof(FPTYPE)*size));
-  }
+void memset_device_memory(FPTYPE *device, const int var, const int size) {
+  DPErrcheck(hipMemset(device, var, sizeof(FPTYPE) * size));
 }
-
-
-
+}  // namespace deepmd
diff --git a/source/lib/include/map_aparam.h b/source/lib/include/map_aparam.h
index 3ee3d1dc12..f816229214 100644
--- a/source/lib/include/map_aparam.h
+++ b/source/lib/include/map_aparam.h
@@ -1,15 +1,13 @@
 #pragma once
 
-namespace deepmd{
-  
+namespace deepmd {
+
 template <typename FPTYPE>
-void map_aparam_cpu (
-    FPTYPE * output,
-    const FPTYPE * aparam,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei,
-    const int & numb_aparam
-    );
+void map_aparam_cpu(FPTYPE* output,
+                    const FPTYPE* aparam,
+                    const int* nlist,
+                    const int& nloc,
+                    const int& nnei,
+                    const int& numb_aparam);
 
 }
diff --git a/source/lib/include/neighbor_list.h b/source/lib/include/neighbor_list.h
index 0155715cdc..7e3e82db31 100644
--- a/source/lib/include/neighbor_list.h
+++ b/source/lib/include/neighbor_list.h
@@ -1,67 +1,61 @@
 #pragma once
 
 #include <algorithm>
-#include <iterator>
 #include <cassert>
+#include <iterator>
 #include <vector>
 
+#include "SimulationRegion.h"
 #include "region.h"
 #include "utilities.h"
-#include "SimulationRegion.h"
 
-namespace deepmd{
+namespace deepmd {
 
 /**
  * @brief             Construct InputNlist with the input LAMMPS nbor list info.
- * 
+ *
  * @struct            InputNlist
-*/
-struct InputNlist
-{
+ */
+struct InputNlist {
   /// Number of core region atoms
   int inum;
   /// Array stores the core region atom's index
-  int * ilist;
+  int* ilist;
   /// Array stores the core region atom's neighbor atom number
-  int * numneigh;
+  int* numneigh;
   /// Array stores the core region atom's neighbor index
-  int ** firstneigh;
-  InputNlist () 
-      : inum(0), ilist(NULL), numneigh(NULL), firstneigh(NULL)
-      {};
-  InputNlist (
-      int inum_, 
-      int * ilist_,
-      int * numneigh_, 
-      int ** firstneigh_
-      ) 
-      : inum(inum_), ilist(ilist_), numneigh(numneigh_), firstneigh(firstneigh_)
-      {};
+  int** firstneigh;
+  InputNlist() : inum(0), ilist(NULL), numneigh(NULL), firstneigh(NULL){};
+  InputNlist(int inum_, int* ilist_, int* numneigh_, int** firstneigh_)
+      : inum(inum_),
+        ilist(ilist_),
+        numneigh(numneigh_),
+        firstneigh(firstneigh_){};
   ~InputNlist(){};
 };
 
 /**
  *@brief              Construct the InputNlist with a two-dimensional vector.
  *
- *@param              to_nlist:   InputNlist struct which stores the neighbor information of the core region atoms.
- *@param              from_nlist: Vector which stores the neighbor information of the core region atoms.
+ *@param              to_nlist:   InputNlist struct which stores the neighbor
+ *information of the core region atoms.
+ *@param              from_nlist: Vector which stores the neighbor information
+ *of the core region atoms.
  */
-void convert_nlist(
-    InputNlist & to_nlist,
-    std::vector<std::vector<int> > & from_nlist
-    );
+void convert_nlist(InputNlist& to_nlist,
+                   std::vector<std::vector<int> >& from_nlist);
 
 /**
- *@brief              Compute the max number of neighbors within the core region atoms
+ *@brief              Compute the max number of neighbors within the core region
+ *atoms
  *
- *@param              to_nlist:   InputNlist struct which stores the neighbor information of the core region atoms.
+ *@param              to_nlist:   InputNlist struct which stores the neighbor
+ *information of the core region atoms.
  *
  *@return             integer
  *@retval             max number of neighbors
  */
-int max_numneigh(
-    const InputNlist & to_nlist
-    );
+int max_numneigh(const InputNlist& to_nlist);
 
 // build neighbor list.
 // outputs
@@ -75,57 +69,56 @@ int max_numneigh(
 //	1: the memory is not large enough to hold all neighbors.
 //	   i.e. max_list_size > mem_nall
 template <typename FPTYPE>
-int
-build_nlist_cpu(
-    InputNlist & nlist,
-    int * max_list_size,
-    const FPTYPE * c_cpy,
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut);
-
-void use_nei_info_cpu(
-    int * nlist, 
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei,
-    const int ntypes,
-    const bool b_nlist_map);
+int build_nlist_cpu(InputNlist& nlist,
+                    int* max_list_size,
+                    const FPTYPE* c_cpy,
+                    const int& nloc,
+                    const int& nall,
+                    const int& mem_size,
+                    const float& rcut);
+
+void use_nei_info_cpu(int* nlist,
+                      int* ntype,
+                      bool* nmask,
+                      const int* type,
+                      const int* nlist_map,
+                      const int nloc,
+                      const int nnei,
+                      const int ntypes,
+                      const bool b_nlist_map);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 /**
- *@brief              Convert the a host memory InputNlist to a device memory InputNlist
+ *@brief              Convert the a host memory InputNlist to a device memory
+ *InputNlist
  *
- *@param              cpu_nlist:    Host memory InputNlist struct which stores the neighbor information of the core region atoms
- *@param              gpu_nlist:    Device memory InputNlist struct which stores the neighbor information of the core region atoms
- *@param              gpu_memory:   Device array which stores the elements of gpu_nlist
+ *@param              cpu_nlist:    Host memory InputNlist struct which stores
+ *the neighbor information of the core region atoms
+ *@param              gpu_nlist:    Device memory InputNlist struct which stores
+ *the neighbor information of the core region atoms
+ *@param              gpu_memory:   Device array which stores the elements of
+ *gpu_nlist
  *@param              max_nbor_size
  */
-void convert_nlist_gpu_device(
-    InputNlist & gpu_nlist,
-    InputNlist & cpu_nlist,
-    int* & gpu_memory,
-    const int & max_nbor_size);
+void convert_nlist_gpu_device(InputNlist& gpu_nlist,
+                              InputNlist& cpu_nlist,
+                              int*& gpu_memory,
+                              const int& max_nbor_size);
 
 /**
  *@brief              Reclaim the allocated device memory of struct InputNlist
  *
- *@param              gpu_nlist:    Device memory InputNlist struct which stores the neighbor information of the core region atoms
+ *@param              gpu_nlist:    Device memory InputNlist struct which stores
+ *the neighbor information of the core region atoms
  */
-void free_nlist_gpu_device(
-    InputNlist & gpu_nlist);
+void free_nlist_gpu_device(InputNlist& gpu_nlist);
 
-void use_nlist_map(
-    int * nlist, 
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei);
+void use_nlist_map(int* nlist,
+                   const int* nlist_map,
+                   const int nloc,
+                   const int nnei);
 
-#endif //GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #if GOOGLE_CUDA
 // build neighbor list.
@@ -140,30 +133,26 @@ void use_nlist_map(
 //	1: the memory is not large enough to hold all neighbors.
 //	   i.e. max_list_size > mem_nall
 template <typename FPTYPE>
-int
-build_nlist_gpu(
-    InputNlist & nlist,
-    int * max_list_size,
-    int * nlist_data,
-    const FPTYPE * c_cpy, 
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut);
-
-void use_nei_info_gpu(
-    int * nlist, 
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei,
-    const int ntypes,
-    const bool b_nlist_map);
-
-#endif // GOOGLE_CUDA
-
+int build_nlist_gpu(InputNlist& nlist,
+                    int* max_list_size,
+                    int* nlist_data,
+                    const FPTYPE* c_cpy,
+                    const int& nloc,
+                    const int& nall,
+                    const int& mem_size,
+                    const float& rcut);
+
+void use_nei_info_gpu(int* nlist,
+                      int* ntype,
+                      bool* nmask,
+                      const int* type,
+                      const int* nlist_map,
+                      const int nloc,
+                      const int nnei,
+                      const int ntypes,
+                      const bool b_nlist_map);
+
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
 // build neighbor list.
@@ -178,93 +167,84 @@ void use_nei_info_gpu(
 //	1: the memory is not large enough to hold all neighbors.
 //	   i.e. max_list_size > mem_nall
 template <typename FPTYPE>
-int
-build_nlist_gpu_rocm(
-    InputNlist & nlist,
-    int * max_list_size,
-    int * nlist_data,
-    const FPTYPE * c_cpy, 
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut);
-	
-void use_nei_info_gpu_rocm(
-    int * nlist, 
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei,
-    const int ntypes,
-    const bool b_nlist_map);
-
-#endif // TENSORFLOW_USE_ROCM
-
-} // namespace deepmd
-
+int build_nlist_gpu_rocm(InputNlist& nlist,
+                         int* max_list_size,
+                         int* nlist_data,
+                         const FPTYPE* c_cpy,
+                         const int& nloc,
+                         const int& nall,
+                         const int& mem_size,
+                         const float& rcut);
+
+void use_nei_info_gpu_rocm(int* nlist,
+                           int* ntype,
+                           bool* nmask,
+                           const int* type,
+                           const int* nlist_map,
+                           const int nloc,
+                           const int nnei,
+                           const int ntypes,
+                           const bool b_nlist_map);
+
+#endif  // TENSORFLOW_USE_ROCM
+
+}  // namespace deepmd
 
 ////////////////////////////////////////////////////////
 // legacy code
 ////////////////////////////////////////////////////////
 
 // build nlist by an extended grid
-void
-build_nlist (std::vector<std::vector<int > > &	nlist0,
-	     std::vector<std::vector<int > > &	nlist1,
-	     const std::vector<double > &	coord,
-	     const int &			nloc,
-	     const double &			rc0,
-	     const double &			rc1,
-	     const std::vector<int > &		nat_stt_,
-	     const std::vector<int > &		nat_end_,
-	     const std::vector<int > &		ext_stt_,
-	     const std::vector<int > &		ext_end_,
-	     const SimulationRegion<double> &	region,
-	     const std::vector<int > &		global_grid);
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const int& nloc,
+                 const double& rc0,
+                 const double& rc1,
+                 const std::vector<int>& nat_stt_,
+                 const std::vector<int>& nat_end_,
+                 const std::vector<int>& ext_stt_,
+                 const std::vector<int>& ext_end_,
+                 const SimulationRegion<double>& region,
+                 const std::vector<int>& global_grid);
 
 // build nlist by a grid for a periodic region
-void
-build_nlist (std::vector<std::vector<int > > &	nlist0,
-	     std::vector<std::vector<int > > &	nlist1,
-	     const std::vector<double > &	coord,
-	     const double &			rc0,
-	     const double &			rc1,
-	     const std::vector<int > &		grid,
-	     const SimulationRegion<double> &	region);
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const double& rc0,
+                 const double& rc1,
+                 const std::vector<int>& grid,
+                 const SimulationRegion<double>& region);
 
 // build nlist by a grid for a periodic region, atoms selected by sel0 and sel1
-void
-build_nlist (std::vector<std::vector<int > > &	nlist0,
-	     std::vector<std::vector<int > > &	nlist1,
-	     const std::vector<double > &	coord,
-	     const std::vector<int> &		sel0,
-	     const std::vector<int> &		sel1,
-	     const double &			rc0,
-	     const double &			rc1,
-	     const std::vector<int > &		grid,
-	     const SimulationRegion<double> &	region);
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const std::vector<int>& sel0,
+                 const std::vector<int>& sel1,
+                 const double& rc0,
+                 const double& rc1,
+                 const std::vector<int>& grid,
+                 const SimulationRegion<double>& region);
 
 // brute force (all-to-all distance computation) neighbor list building
 // if region is NULL, open boundary is assumed,
 // otherwise, periodic boundary condition is defined by region
-void
-build_nlist (std::vector<std::vector<int > > & nlist0,
-	     std::vector<std::vector<int > > & nlist1,
-	     const std::vector<double > &	coord,
-	     const double &			rc0_,
-	     const double &			rc1_,
-	     const SimulationRegion<double > * region = NULL);
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const double& rc0_,
+                 const double& rc1_,
+                 const SimulationRegion<double>* region = NULL);
 
 // copy periodic images for the system
-void 
-copy_coord (std::vector<double > &		out_c, 
-	    std::vector<int > &			out_t, 
-	    std::vector<int > &			mapping,
-	    std::vector<int> &			ncell,
-	    std::vector<int> &			ngcell,
-	    const std::vector<double > &	in_c,
-	    const std::vector<int > &		in_t,
-	    const double &			rc,
-	    const SimulationRegion<double > &	region);
+void copy_coord(std::vector<double>& out_c,
+                std::vector<int>& out_t,
+                std::vector<int>& mapping,
+                std::vector<int>& ncell,
+                std::vector<int>& ngcell,
+                const std::vector<double>& in_c,
+                const std::vector<int>& in_t,
+                const double& rc,
+                const SimulationRegion<double>& region);
diff --git a/source/lib/include/pair_tab.h b/source/lib/include/pair_tab.h
index c99ba3bff5..2bb88b36c2 100644
--- a/source/lib/include/pair_tab.h
+++ b/source/lib/include/pair_tab.h
@@ -1,22 +1,20 @@
 #pragma once
 #include <vector>
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void pair_tab_cpu(
-    FPTYPE * energy,
-    FPTYPE * force,
-    FPTYPE * virial,
-    const double * table_info,
-    const double * table_data,
-    const FPTYPE * rij,
-    const FPTYPE * scale,
-    const int * type,
-    const int * nlist,
-    const int * natoms,
-    const std::vector<int> & sel_a,
-    const std::vector<int> & sel_r
-    );
+namespace deepmd {
+
+template <typename FPTYPE>
+void pair_tab_cpu(FPTYPE* energy,
+                  FPTYPE* force,
+                  FPTYPE* virial,
+                  const double* table_info,
+                  const double* table_data,
+                  const FPTYPE* rij,
+                  const FPTYPE* scale,
+                  const int* type,
+                  const int* nlist,
+                  const int* natoms,
+                  const std::vector<int>& sel_a,
+                  const std::vector<int>& sel_r);
 
 }
diff --git a/source/lib/include/prod_env_mat.h b/source/lib/include/prod_env_mat.h
index cab2cda93a..3052dd2230 100644
--- a/source/lib/include/prod_env_mat.h
+++ b/source/lib/include/prod_env_mat.h
@@ -1,148 +1,140 @@
 #pragma once
 #include <vector>
+
 #include "device.h"
 #include "neighbor_list.h"
 
-namespace deepmd{
+namespace deepmd {
 
-template<typename FPTYPE>
-void prod_env_mat_a_cpu(
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type = NULL);
+template <typename FPTYPE>
+void prod_env_mat_a_cpu(FPTYPE *em,
+                        FPTYPE *em_deriv,
+                        FPTYPE *rij,
+                        int *nlist,
+                        const FPTYPE *coord,
+                        const int *type,
+                        const InputNlist &inlist,
+                        const int max_nbor_size,
+                        const FPTYPE *avg,
+                        const FPTYPE *std,
+                        const int nloc,
+                        const int nall,
+                        const float rcut,
+                        const float rcut_smth,
+                        const std::vector<int> sec,
+                        const int *f_type = NULL);
 
-template<typename FPTYPE>
-void prod_env_mat_r_cpu(
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec);
+template <typename FPTYPE>
+void prod_env_mat_r_cpu(FPTYPE *em,
+                        FPTYPE *em_deriv,
+                        FPTYPE *rij,
+                        int *nlist,
+                        const FPTYPE *coord,
+                        const int *type,
+                        const InputNlist &inlist,
+                        const int max_nbor_size,
+                        const FPTYPE *avg,
+                        const FPTYPE *std,
+                        const int nloc,
+                        const int nall,
+                        const float rcut,
+                        const float rcut_smth,
+                        const std::vector<int> sec);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE> 
-void prod_env_mat_a_gpu_cuda(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    unsigned long long * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type=NULL);
+template <typename FPTYPE>
+void prod_env_mat_a_gpu_cuda(FPTYPE *em,
+                             FPTYPE *em_deriv,
+                             FPTYPE *rij,
+                             int *nlist,
+                             const FPTYPE *coord,
+                             const int *type,
+                             const InputNlist &gpu_inlist,
+                             int *array_int,
+                             unsigned long long *array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE *avg,
+                             const FPTYPE *std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec,
+                             const int *f_type = NULL);
 
-template<typename FPTYPE> 
-void prod_env_mat_r_gpu_cuda(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    unsigned long long * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec);
+template <typename FPTYPE>
+void prod_env_mat_r_gpu_cuda(FPTYPE *em,
+                             FPTYPE *em_deriv,
+                             FPTYPE *rij,
+                             int *nlist,
+                             const FPTYPE *coord,
+                             const int *type,
+                             const InputNlist &gpu_inlist,
+                             int *array_int,
+                             unsigned long long *array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE *avg,
+                             const FPTYPE *std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec);
 
-void env_mat_nbor_update(
-    InputNlist &inlist,
-    InputNlist &gpu_inlist,
-    int &max_nbor_size,
-    int* &nbor_list_dev,
-    const int * mesh, 
-    const int size);
-#endif // GOOGLE_CUDA
+void env_mat_nbor_update(InputNlist &inlist,
+                         InputNlist &gpu_inlist,
+                         int &max_nbor_size,
+                         int *&nbor_list_dev,
+                         const int *mesh,
+                         const int size);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE> 
-void prod_env_mat_a_gpu_rocm(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    unsigned long long * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type=NULL);
-
-template<typename FPTYPE> 
-void prod_env_mat_r_gpu_rocm(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    unsigned long long * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec);
+template <typename FPTYPE>
+void prod_env_mat_a_gpu_rocm(FPTYPE *em,
+                             FPTYPE *em_deriv,
+                             FPTYPE *rij,
+                             int *nlist,
+                             const FPTYPE *coord,
+                             const int *type,
+                             const InputNlist &gpu_inlist,
+                             int *array_int,
+                             unsigned long long *array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE *avg,
+                             const FPTYPE *std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec,
+                             const int *f_type = NULL);
 
-void env_mat_nbor_update(
-    InputNlist &inlist,
-    InputNlist &gpu_inlist,
-    int &max_nbor_size,
-    int* &nbor_list_dev,
-    const int * mesh, 
-    const int size);
-#endif // TENSORFLOW_USE_ROCM
+template <typename FPTYPE>
+void prod_env_mat_r_gpu_rocm(FPTYPE *em,
+                             FPTYPE *em_deriv,
+                             FPTYPE *rij,
+                             int *nlist,
+                             const FPTYPE *coord,
+                             const int *type,
+                             const InputNlist &gpu_inlist,
+                             int *array_int,
+                             unsigned long long *array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE *avg,
+                             const FPTYPE *std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec);
 
-}
+void env_mat_nbor_update(InputNlist &inlist,
+                         InputNlist &gpu_inlist,
+                         int &max_nbor_size,
+                         int *&nbor_list_dev,
+                         const int *mesh,
+                         const int size);
+#endif  // TENSORFLOW_USE_ROCM
 
+}  // namespace deepmd
diff --git a/source/lib/include/prod_env_mat_nvnmd.h b/source/lib/include/prod_env_mat_nvnmd.h
index 367dc11443..33563eaa5f 100644
--- a/source/lib/include/prod_env_mat_nvnmd.h
+++ b/source/lib/include/prod_env_mat_nvnmd.h
@@ -1,10 +1,10 @@
 /*
 //==================================================
- _   _  __     __  _   _   __  __   ____  
-| \ | | \ \   / / | \ | | |  \/  | |  _ \ 
+ _   _  __     __  _   _   __  __   ____
+| \ | | \ \   / / | \ | | |  \/  | |  _ \
 |  \| |  \ \ / /  |  \| | | |\/| | | | | |
 | |\  |   \ V /   | |\  | | |  | | | |_| |
-|_| \_|    \_/    |_| \_| |_|  |_| |____/ 
+|_| \_|    \_/    |_| \_| |_|  |_| |____/
 
 //==================================================
 
@@ -17,39 +17,38 @@ date: 2021-12-6
 
 #pragma once
 #include <vector>
+
 #include "device.h"
 #include "neighbor_list.h"
 
-namespace deepmd{
+namespace deepmd {
 
 // prod_env_mat_a_nvnmd_cpu
 // have been remove for the same function
 
-template<typename FPTYPE>
-void prod_env_mat_a_nvnmd_quantize_cpu(
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec);
+template <typename FPTYPE>
+void prod_env_mat_a_nvnmd_quantize_cpu(FPTYPE* em,
+                                       FPTYPE* em_deriv,
+                                       FPTYPE* rij,
+                                       int* nlist,
+                                       const FPTYPE* coord,
+                                       const int* type,
+                                       const InputNlist& inlist,
+                                       const int max_nbor_size,
+                                       const FPTYPE* avg,
+                                       const FPTYPE* std,
+                                       const int nloc,
+                                       const int nall,
+                                       const float rcut,
+                                       const float rcut_smth,
+                                       const std::vector<int> sec);
 
 #if GOOGLE_CUDA
 // UNDEFINE
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
 // UNDEFINE
-#endif // TENSORFLOW_USE_ROCM
-
-}
+#endif  // TENSORFLOW_USE_ROCM
 
+}  // namespace deepmd
diff --git a/source/lib/include/prod_force.h b/source/lib/include/prod_force.h
index 4d3629f2bf..3c9bd89549 100644
--- a/source/lib/include/prod_force.h
+++ b/source/lib/include/prod_force.h
@@ -1,70 +1,64 @@
 #pragma once
 
-namespace deepmd{
+namespace deepmd {
 
-template<typename FPTYPE>
-void prod_force_a_cpu(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei,
-    const int start_index=0);
+template <typename FPTYPE>
+void prod_force_a_cpu(FPTYPE* force,
+                      const FPTYPE* net_deriv,
+                      const FPTYPE* in_deriv,
+                      const int* nlist,
+                      const int nloc,
+                      const int nall,
+                      const int nnei,
+                      const int start_index = 0);
 
-template<typename FPTYPE>
-void prod_force_r_cpu(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_force_r_cpu(FPTYPE* force,
+                      const FPTYPE* net_deriv,
+                      const FPTYPE* in_deriv,
+                      const int* nlist,
+                      const int nloc,
+                      const int nall,
+                      const int nnei);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE> 
-void prod_force_a_gpu_cuda(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_force_a_gpu_cuda(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei);
 
-template<typename FPTYPE> 
-void prod_force_r_gpu_cuda(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void prod_force_r_gpu_cuda(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE> 
-void prod_force_a_gpu_rocm(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_force_a_gpu_rocm(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei);
 
-template<typename FPTYPE> 
-void prod_force_r_gpu_rocm(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-#endif // TENSORFLOW_USE_ROCM
+template <typename FPTYPE>
+void prod_force_r_gpu_rocm(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei);
+#endif  // TENSORFLOW_USE_ROCM
 
-}
+}  // namespace deepmd
diff --git a/source/lib/include/prod_force_grad.h b/source/lib/include/prod_force_grad.h
index ff0bbe8015..b0b5a59a18 100644
--- a/source/lib/include/prod_force_grad.h
+++ b/source/lib/include/prod_force_grad.h
@@ -1,62 +1,56 @@
 #pragma once
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void prod_force_grad_a_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
+namespace deepmd {
 
-template<typename FPTYPE>
-void prod_force_grad_r_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_force_grad_a_cpu(FPTYPE* grad_net,
+                           const FPTYPE* grad,
+                           const FPTYPE* env_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nnei);
+
+template <typename FPTYPE>
+void prod_force_grad_r_cpu(FPTYPE* grad_net,
+                           const FPTYPE* grad,
+                           const FPTYPE* env_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nnei);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-void prod_force_grad_a_gpu_cuda(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_force_grad_a_gpu_cuda(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei);
 
-template<typename FPTYPE>
-void prod_force_grad_r_gpu_cuda(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void prod_force_grad_r_gpu_cuda(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-void prod_force_grad_a_gpu_rocm(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_force_grad_a_gpu_rocm(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei);
 
-template<typename FPTYPE>
-void prod_force_grad_r_gpu_rocm(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
-#endif // TENSORFLOW_USE_ROCM
-}
+template <typename FPTYPE>
+void prod_force_grad_r_gpu_rocm(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei);
+#endif  // TENSORFLOW_USE_ROCM
+}  // namespace deepmd
diff --git a/source/lib/include/prod_virial.h b/source/lib/include/prod_virial.h
index 229ba968da..c51e333a47 100644
--- a/source/lib/include/prod_virial.h
+++ b/source/lib/include/prod_virial.h
@@ -1,82 +1,75 @@
 #pragma once
 
-namespace deepmd{
+namespace deepmd {
 
-template<typename FPTYPE>
-void prod_virial_a_cpu(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_virial_a_cpu(FPTYPE* virial,
+                       FPTYPE* atom_virial,
+                       const FPTYPE* net_deriv,
+                       const FPTYPE* env_deriv,
+                       const FPTYPE* rij,
+                       const int* nlist,
+                       const int nloc,
+                       const int nall,
+                       const int nnei);
 
-template<typename FPTYPE>
-void prod_virial_r_cpu(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_virial_r_cpu(FPTYPE* virial,
+                       FPTYPE* atom_virial,
+                       const FPTYPE* net_deriv,
+                       const FPTYPE* env_deriv,
+                       const FPTYPE* rij,
+                       const int* nlist,
+                       const int nloc,
+                       const int nall,
+                       const int nnei);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-void prod_virial_a_gpu_cuda(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-
-template<typename FPTYPE>
-void prod_virial_r_gpu_cuda(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void prod_virial_a_gpu_cuda(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* env_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei);
 
+template <typename FPTYPE>
+void prod_virial_r_gpu_cuda(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* env_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-void prod_virial_a_gpu_rocm(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template <typename FPTYPE>
+void prod_virial_a_gpu_rocm(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* env_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei);
 
-template<typename FPTYPE>
-void prod_virial_r_gpu_rocm(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-#endif // TENSORFLOW_USE_ROCM
+template <typename FPTYPE>
+void prod_virial_r_gpu_rocm(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* env_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei);
+#endif  // TENSORFLOW_USE_ROCM
 
-} //namespace deepmd
+}  // namespace deepmd
diff --git a/source/lib/include/prod_virial_grad.h b/source/lib/include/prod_virial_grad.h
index 8ab8171e14..0e2cc46baa 100644
--- a/source/lib/include/prod_virial_grad.h
+++ b/source/lib/include/prod_virial_grad.h
@@ -1,69 +1,63 @@
 #pragma once
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void prod_virial_grad_a_cpu(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
+namespace deepmd {
 
-template<typename FPTYPE>
-void prod_virial_grad_r_cpu(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
+template <typename FPTYPE>
+void prod_virial_grad_a_cpu(FPTYPE* grad_net,
+                            const FPTYPE* grad,
+                            const FPTYPE* env_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nnei);
+
+template <typename FPTYPE>
+void prod_virial_grad_r_cpu(FPTYPE* grad_net,
+                            const FPTYPE* grad,
+                            const FPTYPE* env_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nnei);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-void prod_virial_grad_a_gpu_cuda(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
+template <typename FPTYPE>
+void prod_virial_grad_a_gpu_cuda(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei);
 
-template<typename FPTYPE>
-void prod_virial_grad_r_gpu_cuda(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void prod_virial_grad_r_gpu_cuda(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-void prod_virial_grad_a_gpu_rocm(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
+template <typename FPTYPE>
+void prod_virial_grad_a_gpu_rocm(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei);
 
-template<typename FPTYPE>
-void prod_virial_grad_r_gpu_rocm(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
-#endif // TENSORFLOW_USE_ROCM
+template <typename FPTYPE>
+void prod_virial_grad_r_gpu_rocm(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei);
+#endif  // TENSORFLOW_USE_ROCM
 
-}
+}  // namespace deepmd
diff --git a/source/lib/include/region.cuh b/source/lib/include/region.cuh
index d3da188239..0feafad49e 100644
--- a/source/lib/include/region.cuh
+++ b/source/lib/include/region.cuh
@@ -1,48 +1,46 @@
 #pragma once
 
-template<typename FPTYPE>
-__device__ inline void tensorDotVector(
-    FPTYPE *o_v, 
-    const FPTYPE *i_v, 
-    const FPTYPE *i_t)
-{
-    o_v[0] = i_v[0] * i_t[0*3+0] + i_v[1] * i_t[0*3+1] + i_v[2] * i_t[0*3+2];
-    o_v[1] = i_v[0] * i_t[1*3+0] + i_v[1] * i_t[1*3+1] + i_v[2] * i_t[1*3+2];
-    o_v[2] = i_v[0] * i_t[2*3+0] + i_v[1] * i_t[2*3+1] + i_v[2] * i_t[2*3+2];
+template <typename FPTYPE>
+__device__ inline void tensorDotVector(FPTYPE *o_v,
+                                       const FPTYPE *i_v,
+                                       const FPTYPE *i_t) {
+  o_v[0] = i_v[0] * i_t[0 * 3 + 0] + i_v[1] * i_t[0 * 3 + 1] +
+           i_v[2] * i_t[0 * 3 + 2];
+  o_v[1] = i_v[0] * i_t[1 * 3 + 0] + i_v[1] * i_t[1 * 3 + 1] +
+           i_v[2] * i_t[1 * 3 + 2];
+  o_v[2] = i_v[0] * i_t[2 * 3 + 0] + i_v[1] * i_t[2 * 3 + 1] +
+           i_v[2] * i_t[2 * 3 + 2];
 }
-template<typename FPTYPE>
-__device__ inline void tensorTransDotVector(
-    FPTYPE *o_v, 
-    const FPTYPE *i_v, 
-    const FPTYPE *i_t)
-{
-    o_v[0] = i_v[0] * i_t[0*3+0] + i_v[1] * i_t[1*3+0] + i_v[2] * i_t[2*3+0];
-    o_v[1] = i_v[0] * i_t[0*3+1] + i_v[1] * i_t[1*3+1] + i_v[2] * i_t[2*3+1];
-    o_v[2] = i_v[0] * i_t[0*3+2] + i_v[1] * i_t[1*3+2] + i_v[2] * i_t[2*3+2];
+template <typename FPTYPE>
+__device__ inline void tensorTransDotVector(FPTYPE *o_v,
+                                            const FPTYPE *i_v,
+                                            const FPTYPE *i_t) {
+  o_v[0] = i_v[0] * i_t[0 * 3 + 0] + i_v[1] * i_t[1 * 3 + 0] +
+           i_v[2] * i_t[2 * 3 + 0];
+  o_v[1] = i_v[0] * i_t[0 * 3 + 1] + i_v[1] * i_t[1 * 3 + 1] +
+           i_v[2] * i_t[2 * 3 + 1];
+  o_v[2] = i_v[0] * i_t[0 * 3 + 2] + i_v[1] * i_t[1 * 3 + 2] +
+           i_v[2] * i_t[2 * 3 + 2];
 }
-template<typename FPTYPE>
-__device__ inline void phys2Inter(
-    FPTYPE *inter, 
-    const FPTYPE *phys, 
-    const FPTYPE *rec_boxt)
-{
-    tensorDotVector(inter, phys, rec_boxt);
+template <typename FPTYPE>
+__device__ inline void phys2Inter(FPTYPE *inter,
+                                  const FPTYPE *phys,
+                                  const FPTYPE *rec_boxt) {
+  tensorDotVector(inter, phys, rec_boxt);
 }
-template<typename FPTYPE>
-__device__ inline void inter2Phys(
-    FPTYPE *phys, 
-    const FPTYPE *inter, 
-    const FPTYPE *boxt)
-{
-    tensorTransDotVector(phys, inter, boxt);
+template <typename FPTYPE>
+__device__ inline void inter2Phys(FPTYPE *phys,
+                                  const FPTYPE *inter,
+                                  const FPTYPE *boxt) {
+  tensorTransDotVector(phys, inter, boxt);
+}
+template <typename FPTYPE>
+__device__ inline FPTYPE compute_volume(const FPTYPE *boxt) {
+  FPTYPE volume = boxt[0 * 3 + 0] * (boxt[1 * 3 + 1] * boxt[2 * 3 + 2] -
+                                     boxt[2 * 3 + 1] * boxt[1 * 3 + 2]) -
+                  boxt[0 * 3 + 1] * (boxt[1 * 3 + 0] * boxt[2 * 3 + 2] -
+                                     boxt[2 * 3 + 0] * boxt[1 * 3 + 2]) +
+                  boxt[0 * 3 + 2] * (boxt[1 * 3 + 0] * boxt[2 * 3 + 1] -
+                                     boxt[2 * 3 + 0] * boxt[1 * 3 + 1]);
+  return volume;
 }
-template<typename FPTYPE>
-__device__ inline FPTYPE compute_volume(
-    const FPTYPE * boxt)
-{
-    FPTYPE volume =
-    boxt[0*3+0] * (boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) - 
-    boxt[0*3+1] * (boxt[1*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[1*3+2]) +
-    boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]);
-    return volume;
-}
\ No newline at end of file
diff --git a/source/lib/include/region.h b/source/lib/include/region.h
index b6428a1bf4..207c8af5f2 100644
--- a/source/lib/include/region.h
+++ b/source/lib/include/region.h
@@ -1,86 +1,60 @@
 #pragma once
 
-namespace deepmd{
+namespace deepmd {
 
-template<typename FPTYPE>
-struct Region
-{
-  FPTYPE * boxt;
-  FPTYPE * rec_boxt;
+template <typename FPTYPE>
+struct Region {
+  FPTYPE* boxt;
+  FPTYPE* rec_boxt;
   Region();
   ~Region();
 };
 
-template<typename FPTYPE>
-void
-init_region_cpu(
-    Region<FPTYPE> & region,
-    const FPTYPE * boxt);
+template <typename FPTYPE>
+void init_region_cpu(Region<FPTYPE>& region, const FPTYPE* boxt);
 
-template<typename FPTYPE>
-FPTYPE
-volume_cpu(
-    const Region<FPTYPE> & region);
+template <typename FPTYPE>
+FPTYPE volume_cpu(const Region<FPTYPE>& region);
 
-template<typename FPTYPE>
-void
-convert_to_inter_cpu(
-    FPTYPE * ri, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * rp);
+template <typename FPTYPE>
+void convert_to_inter_cpu(FPTYPE* ri,
+                          const Region<FPTYPE>& region,
+                          const FPTYPE* rp);
 
-template<typename FPTYPE>
-void
-convert_to_phys_cpu(
-    FPTYPE * rp, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * ri);
+template <typename FPTYPE>
+void convert_to_phys_cpu(FPTYPE* rp,
+                         const Region<FPTYPE>& region,
+                         const FPTYPE* ri);
 
 #if GOOGLE_CUDA
-//only for unittest
-template<typename FPTYPE>
-void
-convert_to_inter_gpu(
-    FPTYPE * ri, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * rp);
+// only for unittest
+template <typename FPTYPE>
+void convert_to_inter_gpu(FPTYPE* ri,
+                          const Region<FPTYPE>& region,
+                          const FPTYPE* rp);
 
-template<typename FPTYPE>
-void
-convert_to_phys_gpu(
-    FPTYPE * rp, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * ri);
+template <typename FPTYPE>
+void convert_to_phys_gpu(FPTYPE* rp,
+                         const Region<FPTYPE>& region,
+                         const FPTYPE* ri);
 
-template<typename FPTYPE>
-void
-volume_gpu(
-    FPTYPE * volume, 
-    const Region<FPTYPE> & region);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void volume_gpu(FPTYPE* volume, const Region<FPTYPE>& region);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-//only for unittest
-template<typename FPTYPE>
-void
-convert_to_inter_gpu_rocm(
-    FPTYPE * ri, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * rp);
-
-template<typename FPTYPE>
-void
-convert_to_phys_gpu_rocm(
-    FPTYPE * rp, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * ri);
-
-template<typename FPTYPE>
-void
-volume_gpu_rocm(
-    FPTYPE * volume, 
-    const Region<FPTYPE> & region);
-#endif // TENSORFLOW_USE_ROCM
-}
-
-
+// only for unittest
+template <typename FPTYPE>
+void convert_to_inter_gpu_rocm(FPTYPE* ri,
+                               const Region<FPTYPE>& region,
+                               const FPTYPE* rp);
+
+template <typename FPTYPE>
+void convert_to_phys_gpu_rocm(FPTYPE* rp,
+                              const Region<FPTYPE>& region,
+                              const FPTYPE* ri);
+
+template <typename FPTYPE>
+void volume_gpu_rocm(FPTYPE* volume, const Region<FPTYPE>& region);
+#endif  // TENSORFLOW_USE_ROCM
+}  // namespace deepmd
diff --git a/source/lib/include/soft_min_switch.h b/source/lib/include/soft_min_switch.h
index 4b382cde93..3e71fb54a1 100644
--- a/source/lib/include/soft_min_switch.h
+++ b/source/lib/include/soft_min_switch.h
@@ -1,17 +1,16 @@
 #pragma once
 
-namespace deepmd{
-  
+namespace deepmd {
+
 template <typename FPTYPE>
-void soft_min_switch_cpu(
-    FPTYPE * sw_value,
-    FPTYPE * sw_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei, 
-    const FPTYPE & alpha,
-    const FPTYPE & rmin,
-    const FPTYPE & rmax);
+void soft_min_switch_cpu(FPTYPE* sw_value,
+                         FPTYPE* sw_deriv,
+                         const FPTYPE* rij,
+                         const int* nlist,
+                         const int& nloc,
+                         const int& nnei,
+                         const FPTYPE& alpha,
+                         const FPTYPE& rmin,
+                         const FPTYPE& rmax);
 
 }
diff --git a/source/lib/include/soft_min_switch_force.h b/source/lib/include/soft_min_switch_force.h
index 854458a3c7..cd033de115 100644
--- a/source/lib/include/soft_min_switch_force.h
+++ b/source/lib/include/soft_min_switch_force.h
@@ -1,15 +1,14 @@
 #pragma once
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void soft_min_switch_force_cpu(
-    FPTYPE * force, 
-    const FPTYPE * du, 
-    const FPTYPE * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+namespace deepmd {
+
+template <typename FPTYPE>
+void soft_min_switch_force_cpu(FPTYPE* force,
+                               const FPTYPE* du,
+                               const FPTYPE* sw_deriv,
+                               const int* nlist,
+                               const int nloc,
+                               const int nall,
+                               const int nnei);
 
 }
diff --git a/source/lib/include/soft_min_switch_force_grad.h b/source/lib/include/soft_min_switch_force_grad.h
index afe4c3b36e..f7b2db45a4 100644
--- a/source/lib/include/soft_min_switch_force_grad.h
+++ b/source/lib/include/soft_min_switch_force_grad.h
@@ -1,14 +1,13 @@
 #pragma once
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void soft_min_switch_force_grad_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad,
-    const FPTYPE * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
+namespace deepmd {
+
+template <typename FPTYPE>
+void soft_min_switch_force_grad_cpu(FPTYPE* grad_net,
+                                    const FPTYPE* grad,
+                                    const FPTYPE* sw_deriv,
+                                    const int* nlist,
+                                    const int nloc,
+                                    const int nnei);
 
 }
diff --git a/source/lib/include/soft_min_switch_virial.h b/source/lib/include/soft_min_switch_virial.h
index 4833eec262..b5c11d8764 100644
--- a/source/lib/include/soft_min_switch_virial.h
+++ b/source/lib/include/soft_min_switch_virial.h
@@ -1,17 +1,16 @@
 #pragma once
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void soft_min_switch_virial_cpu(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * du, 
-    const FPTYPE * sw_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+namespace deepmd {
+
+template <typename FPTYPE>
+void soft_min_switch_virial_cpu(FPTYPE* virial,
+                                FPTYPE* atom_virial,
+                                const FPTYPE* du,
+                                const FPTYPE* sw_deriv,
+                                const FPTYPE* rij,
+                                const int* nlist,
+                                const int nloc,
+                                const int nall,
+                                const int nnei);
 
 }
diff --git a/source/lib/include/soft_min_switch_virial_grad.h b/source/lib/include/soft_min_switch_virial_grad.h
index 1b1ec0da44..e9b623d8eb 100644
--- a/source/lib/include/soft_min_switch_virial_grad.h
+++ b/source/lib/include/soft_min_switch_virial_grad.h
@@ -1,15 +1,14 @@
 #pragma once
 
-namespace deepmd{
-  
-template<typename FPTYPE>
-void soft_min_switch_virial_grad_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad,
-    const FPTYPE * sw_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei);
+namespace deepmd {
+
+template <typename FPTYPE>
+void soft_min_switch_virial_grad_cpu(FPTYPE* grad_net,
+                                     const FPTYPE* grad,
+                                     const FPTYPE* sw_deriv,
+                                     const FPTYPE* rij,
+                                     const int* nlist,
+                                     const int nloc,
+                                     const int nnei);
 
 }
diff --git a/source/lib/include/switcher.h b/source/lib/include/switcher.h
index 77c6241f67..65012f0a47 100644
--- a/source/lib/include/switcher.h
+++ b/source/lib/include/switcher.h
@@ -2,95 +2,83 @@
 
 #include <cmath>
 
-namespace deepmd{
+namespace deepmd {
 
-inline double
-cos_switch (const double & xx, 
-	    const double & rmin, 
-	    const double & rmax) 
-{
+inline double cos_switch(const double& xx,
+                         const double& rmin,
+                         const double& rmax) {
   if (xx < rmin) {
     return 1.;
-  }
-  else if (xx < rmax) {
+  } else if (xx < rmax) {
     const double value = (xx - rmin) / (rmax - rmin) * M_PI;
-    return 0.5 * (cos(value) + 1);  
-  }
-  else {
+    return 0.5 * (cos(value) + 1);
+  } else {
     return 0.;
   }
 }
 
-inline void
-cos_switch (double & vv,
-	    double & dd,
-	    const double & xx, 
-	    const double & rmin, 
-	    const double & rmax) 
-{
+inline void cos_switch(double& vv,
+                       double& dd,
+                       const double& xx,
+                       const double& rmin,
+                       const double& rmax) {
   if (xx < rmin) {
     dd = 0;
     vv = 1;
-  }
-  else if (xx < rmax) {
+  } else if (xx < rmax) {
     double value = (xx - rmin) / (rmax - rmin) * M_PI;
     dd = -0.5 * sin(value) * M_PI / (rmax - rmin);
-    vv = 0.5 * (cos(value) + 1);    
-  }
-  else {
+    vv = 0.5 * (cos(value) + 1);
+  } else {
     dd = 0;
     vv = 0;
   }
 }
 
-inline void
-spline3_switch (double & vv,
-		double & dd,
-		const double & xx, 
-		const double & rmin, 
-		const double & rmax) 
-{
+inline void spline3_switch(double& vv,
+                           double& dd,
+                           const double& xx,
+                           const double& rmin,
+                           const double& rmax) {
   if (xx < rmin) {
     dd = 0;
     vv = 1;
-  }
-  else if (xx < rmax) {
-    double uu = (xx - rmin) / (rmax - rmin) ;
-    double du = 1. / (rmax - rmin) ;
+  } else if (xx < rmax) {
+    double uu = (xx - rmin) / (rmax - rmin);
+    double du = 1. / (rmax - rmin);
     // s(u) = (1+2u)(1-u)^2
     // s'(u) = 2(2u+1)(u-1) + 2(u-1)^2
-    vv = (1 + 2*uu) * (1-uu) * (1-uu);
-    dd = (2 * (2*uu + 1) * (uu-1) + 2 * (uu-1) * (uu-1) ) * du;
-  }
-  else {
+    vv = (1 + 2 * uu) * (1 - uu) * (1 - uu);
+    dd = (2 * (2 * uu + 1) * (uu - 1) + 2 * (uu - 1) * (uu - 1)) * du;
+  } else {
     dd = 0;
     vv = 0;
   }
 }
 
 template <typename FPTYPE>
-inline void 
-spline5_switch (
-    FPTYPE & vv,
-    FPTYPE & dd,
-    const FPTYPE & xx, 
-    const float & rmin, 
-    const float & rmax)
-{
+inline void spline5_switch(FPTYPE& vv,
+                           FPTYPE& dd,
+                           const FPTYPE& xx,
+                           const float& rmin,
+                           const float& rmax) {
   if (xx < rmin) {
     dd = (FPTYPE)0.;
     vv = (FPTYPE)1.;
-  }
-  else if (xx < rmax) {
-    FPTYPE uu = (xx - rmin) / (rmax - rmin) ;
-    FPTYPE du = (FPTYPE)1. / (rmax - rmin) ;
-    vv = uu*uu*uu * ((FPTYPE)-6. * uu*uu + (FPTYPE)15. * uu - (FPTYPE)10.) + (FPTYPE)1.;
-    dd = ( (FPTYPE)3. * uu*uu * ((FPTYPE)-6. * uu*uu + (FPTYPE)15. * uu - (FPTYPE)10.) + uu*uu*uu * ((FPTYPE)-12. * uu + (FPTYPE)15.) ) * du;
-  }
-  else {
+  } else if (xx < rmax) {
+    FPTYPE uu = (xx - rmin) / (rmax - rmin);
+    FPTYPE du = (FPTYPE)1. / (rmax - rmin);
+    vv = uu * uu * uu *
+             ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) +
+         (FPTYPE)1.;
+    dd = ((FPTYPE)3. * uu * uu *
+              ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) +
+          uu * uu * uu * ((FPTYPE)-12. * uu + (FPTYPE)15.)) *
+         du;
+  } else {
     dd = (FPTYPE)0.;
     vv = (FPTYPE)0.;
   }
 }
 
-}
+}  // namespace deepmd
diff --git a/source/lib/include/tabulate.h b/source/lib/include/tabulate.h
index 846f27263a..3ad79f3cb1 100644
--- a/source/lib/include/tabulate.h
+++ b/source/lib/include/tabulate.h
@@ -1,339 +1,309 @@
 #pragma once
 
-namespace deepmd{
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_cpu(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_cpu(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_grad_cpu(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_cpu(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_cpu(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_grad_cpu(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei_i,
-    const int nnei_j,
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_cpu(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_cpu(
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_grad_cpu(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
-
-
+namespace deepmd {
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_cpu(FPTYPE* out,
+                              const FPTYPE* table,
+                              const FPTYPE* table_info,
+                              const FPTYPE* em_x,
+                              const FPTYPE* em,
+                              const int nloc,
+                              const int nnei,
+                              const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
+                                   FPTYPE* dy_dem,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const FPTYPE* dy,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dz_dy_dem_x,
+                                        const FPTYPE* dz_dy_dem,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_cpu(FPTYPE* out,
+                              const FPTYPE* table,
+                              const FPTYPE* table_info,
+                              const FPTYPE* em_x,
+                              const FPTYPE* em,
+                              const int nloc,
+                              const int nnei_i,
+                              const int nnei_j,
+                              const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_cpu(FPTYPE* dy_dem_x,
+                                   FPTYPE* dy_dem,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const FPTYPE* dy,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_grad_cpu(FPTYPE* dz_dy,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dz_dy_dem_x,
+                                        const FPTYPE* dz_dy_dem,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_cpu(FPTYPE* out,
+                              const FPTYPE* table,
+                              const FPTYPE* table_info,
+                              const FPTYPE* em,
+                              const int nloc,
+                              const int nnei,
+                              const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_cpu(FPTYPE* dy_dem,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em,
+                                   const FPTYPE* dy,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_grad_cpu(FPTYPE* dz_dy,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dz_dy_dem,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-void tabulate_fusion_se_a_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_gpu_cuda(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_grad_gpu_cuda(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_gpu_cuda(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_grad_gpu_cuda(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei_i,
-    const int nnei_j,
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_gpu_cuda(
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_grad_gpu_cuda(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
-#endif // GOOGLE_CUDA
+template <typename FPTYPE>
+void tabulate_fusion_se_a_gpu_cuda(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_gpu_cuda(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_grad_gpu_cuda(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_gpu_cuda(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_gpu_cuda(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_grad_gpu_cuda(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_gpu_cuda(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_gpu_cuda(FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_grad_gpu_cuda(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size);
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-void tabulate_fusion_se_a_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_gpu_rocm(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_grad_gpu_rocm(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_gpu_rocm(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_grad_gpu_rocm(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei_i,
-    const int nnei_j,
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info,  
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_gpu_rocm(
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size);
-
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_grad_gpu_rocm(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size);
-
-#endif // TENSORFLOW_USE_ROCM
-}
-
+template <typename FPTYPE>
+void tabulate_fusion_se_a_gpu_rocm(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_gpu_rocm(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_grad_gpu_rocm(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_gpu_rocm(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_gpu_rocm(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_grad_gpu_rocm(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_gpu_rocm(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_gpu_rocm(FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size);
+
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_grad_gpu_rocm(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size);
+
+#endif  // TENSORFLOW_USE_ROCM
+}  // namespace deepmd
diff --git a/source/lib/include/utilities.h b/source/lib/include/utilities.h
index 06e6498ed6..fa0016d129 100644
--- a/source/lib/include/utilities.h
+++ b/source/lib/include/utilities.h
@@ -1,79 +1,59 @@
 #pragma once
 
-#include <vector>
-#include <string>
-#include <iostream>
 #include <cmath>
+#include <iostream>
+#include <string>
+#include <vector>
 
-namespace deepmd{
+namespace deepmd {
 
-void cum_sum(
-    std::vector<int> & sec, 
-    const std::vector<int> & n_sel);
+void cum_sum(std::vector<int>& sec, const std::vector<int>& n_sel);
 
 template <typename TYPE>
-inline TYPE
-dot1 (const TYPE* r0, const TYPE* r1)
-{
+inline TYPE dot1(const TYPE* r0, const TYPE* r1) {
   return r0[0] * r1[0];
 }
 
 template <typename TYPE>
-inline TYPE
-dot2 (const TYPE* r0, const TYPE* r1)
-{
+inline TYPE dot2(const TYPE* r0, const TYPE* r1) {
   return r0[0] * r1[0] + r0[1] * r1[1];
 }
 
 template <typename TYPE>
-inline TYPE
-dot3 (const TYPE* r0, const TYPE* r1)
-{
+inline TYPE dot3(const TYPE* r0, const TYPE* r1) {
   return r0[0] * r1[0] + r0[1] * r1[1] + r0[2] * r1[2];
 }
 
 template <typename TYPE>
-inline TYPE
-dot4 (const TYPE* r0, const TYPE* r1)
-{
+inline TYPE dot4(const TYPE* r0, const TYPE* r1) {
   return r0[0] * r1[0] + r0[1] * r1[1] + r0[2] * r1[2] + r0[3] * r1[3];
 }
 
 template <typename TYPE>
-inline void 
-dotmv3 (TYPE * vec_o, const TYPE * tensor, const TYPE * vec_i)
-{
-  vec_o[0] = dot3(tensor+0, vec_i);
-  vec_o[1] = dot3(tensor+3, vec_i);
-  vec_o[2] = dot3(tensor+6, vec_i);
+inline void dotmv3(TYPE* vec_o, const TYPE* tensor, const TYPE* vec_i) {
+  vec_o[0] = dot3(tensor + 0, vec_i);
+  vec_o[1] = dot3(tensor + 3, vec_i);
+  vec_o[2] = dot3(tensor + 6, vec_i);
 }
 
 template <typename TYPE>
-inline void
-cprod (const TYPE * r0,
-       const TYPE * r1,
-       TYPE* r2)
-{
+inline void cprod(const TYPE* r0, const TYPE* r1, TYPE* r2) {
   r2[0] = r0[1] * r1[2] - r0[2] * r1[1];
   r2[1] = r0[2] * r1[0] - r0[0] * r1[2];
   r2[2] = r0[0] * r1[1] - r0[1] * r1[0];
 }
 
 template <typename TYPE>
-inline TYPE invsqrt (const TYPE x);
+inline TYPE invsqrt(const TYPE x);
 
 template <>
-inline double
-invsqrt<double> (const double x) 
-{
-  return 1./sqrt (x);
+inline double invsqrt<double>(const double x) {
+  return 1. / sqrt(x);
 }
 
 template <>
-inline float
-invsqrt<float> (const float x) 
-{
-  return 1.f/sqrtf (x);
+inline float invsqrt<float>(const float x) {
+  return 1.f / sqrtf(x);
 }
 
-}
+}  // namespace deepmd
diff --git a/source/lib/src/SimulationRegion.cpp b/source/lib/src/SimulationRegion.cpp
index 9e37802448..c588420d07 100644
--- a/source/lib/src/SimulationRegion.cpp
+++ b/source/lib/src/SimulationRegion.cpp
@@ -1,6 +1,7 @@
 #ifndef MOASP_INLINE_IMPLEMENTATION
 
 #include "SimulationRegion.h"
+
 #include "SimulationRegion_Impl.h"
 
 template class SimulationRegion<double>;
diff --git a/source/lib/src/coord.cc b/source/lib/src/coord.cc
index 7e40286023..8fb0ddc56a 100644
--- a/source/lib/src/coord.cc
+++ b/source/lib/src/coord.cc
@@ -1,66 +1,59 @@
 #include "coord.h"
-#include "neighbor_list.h"
-#include "SimulationRegion.h"
+
 #include <vector>
 
+#include "SimulationRegion.h"
+#include "neighbor_list.h"
+
 using namespace deepmd;
 
 // normalize coords
 template <typename FPTYPE>
-void
-deepmd::
-normalize_coord_cpu(
-    FPTYPE * coord,
-    const int natom,
-    const Region<FPTYPE> & region)
-{
-  for(int ii = 0; ii < natom; ++ii){
+void deepmd::normalize_coord_cpu(FPTYPE* coord,
+                                 const int natom,
+                                 const Region<FPTYPE>& region) {
+  for (int ii = 0; ii < natom; ++ii) {
     FPTYPE ri[3];
-    convert_to_inter_cpu(ri, region, coord+3*ii);
-    for(int dd = 0; dd < 3; ++dd){
+    convert_to_inter_cpu(ri, region, coord + 3 * ii);
+    for (int dd = 0; dd < 3; ++dd) {
       ri[dd] = fmod(ri[dd], (FPTYPE)1.);
       if (ri[dd] < (FPTYPE)0.) ri[dd] += (FPTYPE)1.;
     }
-    convert_to_phys_cpu(coord+3*ii, region, ri);
+    convert_to_phys_cpu(coord + 3 * ii, region, ri);
   }
 }
 
-
 template <typename FPTYPE>
-int
-deepmd::
-copy_coord_cpu(
-    FPTYPE * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    const FPTYPE * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall_,
-    const float & rcut,
-    const Region<FPTYPE> & region)
-{
+int deepmd::copy_coord_cpu(FPTYPE* out_c,
+                           int* out_t,
+                           int* mapping,
+                           int* nall,
+                           const FPTYPE* in_c,
+                           const int* in_t,
+                           const int& nloc,
+                           const int& mem_nall_,
+                           const float& rcut,
+                           const Region<FPTYPE>& region) {
   const int mem_nall = mem_nall_;
   std::vector<double> coord(nloc * 3);
   std::vector<int> atype(nloc);
-  std::copy(in_c, in_c+nloc*3, coord.begin());
-  std::copy(in_t, in_t+nloc, atype.begin());
+  std::copy(in_c, in_c + nloc * 3, coord.begin());
+  std::copy(in_t, in_t + nloc, atype.begin());
   SimulationRegion<double> tmpr;
   double tmp_boxt[9];
-  std::copy(region.boxt, region.boxt+9, tmp_boxt);
+  std::copy(region.boxt, region.boxt + 9, tmp_boxt);
   tmpr.reinitBox(tmp_boxt);
-  
-  std::vector<double > out_coord;
+
+  std::vector<double> out_coord;
   std::vector<int> out_atype, out_mapping, ncell, ngcell;
-  copy_coord(out_coord, out_atype, out_mapping, ncell, ngcell, coord, atype, rcut, tmpr);
-  
+  copy_coord(out_coord, out_atype, out_mapping, ncell, ngcell, coord, atype,
+             rcut, tmpr);
+
   *nall = out_atype.size();
-  if(*nall > mem_nall){
+  if (*nall > mem_nall) {
     // size of the output arrays is not large enough
     return 1;
-  }
-  else{
+  } else {
     std::copy(out_coord.begin(), out_coord.end(), out_c);
     std::copy(out_atype.begin(), out_atype.end(), out_t);
     std::copy(out_mapping.begin(), out_mapping.end(), mapping);
@@ -69,97 +62,74 @@ copy_coord_cpu(
 }
 
 template <typename FPTYPE>
-void
-deepmd::
-compute_cell_info(
-    int * cell_info, //nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,loc_cellnum,total_cellnum
-    const float & rcut,
-    const Region<FPTYPE> & region)
-{
+void deepmd::compute_cell_info(
+    int*
+        cell_info,  // nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,loc_cellnum,total_cellnum
+    const float& rcut,
+    const Region<FPTYPE>& region) {
   SimulationRegion<double> tmpr;
-	double to_face [3];
+  double to_face[3];
   double tmp_boxt[9];
-  std::copy(region.boxt, region.boxt+9, tmp_boxt);
-	tmpr.reinitBox(tmp_boxt);
-	tmpr.toFaceDistance (to_face);
-  double cell_size [3];
-  for (int dd = 0; dd < 3; ++dd){
-    cell_info[dd]=0; //nat_stt
-    cell_info[3+dd]  = to_face[dd] / rcut; //ncell
-    if (cell_info[3+dd] == 0) cell_info[3+dd] = 1;
-    cell_size[dd] = to_face[dd] / cell_info[3+dd]; 
-    cell_info[12+dd] = int(rcut / cell_size[dd]) + 1; //ngcell
-    cell_info[6+dd]=-cell_info[12+dd]; //ext_stt
-    cell_info[9+dd]=cell_info[3+dd]+cell_info[12+dd]; //ext_end
-    cell_info[15+dd]=cell_info[12+dd]; //cell_shift
-    cell_info[18+dd]= rcut / cell_size[dd]; //cell_iter
-    if (cell_info[18+dd] * cell_size[dd] < rcut) cell_info[18+dd] += 1;
+  std::copy(region.boxt, region.boxt + 9, tmp_boxt);
+  tmpr.reinitBox(tmp_boxt);
+  tmpr.toFaceDistance(to_face);
+  double cell_size[3];
+  for (int dd = 0; dd < 3; ++dd) {
+    cell_info[dd] = 0;                       // nat_stt
+    cell_info[3 + dd] = to_face[dd] / rcut;  // ncell
+    if (cell_info[3 + dd] == 0) cell_info[3 + dd] = 1;
+    cell_size[dd] = to_face[dd] / cell_info[3 + dd];
+    cell_info[12 + dd] = int(rcut / cell_size[dd]) + 1;          // ngcell
+    cell_info[6 + dd] = -cell_info[12 + dd];                     // ext_stt
+    cell_info[9 + dd] = cell_info[3 + dd] + cell_info[12 + dd];  // ext_end
+    cell_info[15 + dd] = cell_info[12 + dd];                     // cell_shift
+    cell_info[18 + dd] = rcut / cell_size[dd];                   // cell_iter
+    if (cell_info[18 + dd] * cell_size[dd] < rcut) cell_info[18 + dd] += 1;
   }
-  cell_info[21] = (cell_info[3+0]) * (cell_info[3+1]) * (cell_info[3+2]); //loc_cellnum
-  cell_info[22] = (2 * cell_info[12+0] + cell_info[3+0]) * (2 * cell_info[12+1] + cell_info[3+1]) * (2 * cell_info[12+2] + cell_info[3+2]); //total_cellnum
+  cell_info[21] = (cell_info[3 + 0]) * (cell_info[3 + 1]) *
+                  (cell_info[3 + 2]);  // loc_cellnum
+  cell_info[22] = (2 * cell_info[12 + 0] + cell_info[3 + 0]) *
+                  (2 * cell_info[12 + 1] + cell_info[3 + 1]) *
+                  (2 * cell_info[12 + 2] + cell_info[3 + 2]);  // total_cellnum
 }
 
-template
-void
-deepmd::
-normalize_coord_cpu<double>(
-    double * coord,
-    const int natom,
-    const deepmd::Region<double> & region);
-
-template
-void
-deepmd::
-normalize_coord_cpu<float>(
-    float * coord,
-    const int natom,
-    const deepmd::Region<float> & region);
-
-template
-int
-deepmd::
-copy_coord_cpu<double>(
-    double * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    const double * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const float & rcut,
-    const deepmd::Region<double> & region);
-
-template
-int
-deepmd::
-copy_coord_cpu<float>(
-    float * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    const float * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const float & rcut,
-    const deepmd::Region<float> & region);
+template void deepmd::normalize_coord_cpu<double>(
+    double* coord, const int natom, const deepmd::Region<double>& region);
 
-template
-void
-deepmd::
-compute_cell_info<double>(
-    int * cell_info, //nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,loc_cellnum,total_cellnum
-    const float & rcut,
-    const Region<double> & region);
+template void deepmd::normalize_coord_cpu<float>(
+    float* coord, const int natom, const deepmd::Region<float>& region);
 
-template
-void
-deepmd::
-compute_cell_info<float>(
-    int * cell_info, //nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,loc_cellnum,total_cellnum
-    const float & rcut,
-    const Region<float> & region);
+template int deepmd::copy_coord_cpu<double>(
+    double* out_c,
+    int* out_t,
+    int* mapping,
+    int* nall,
+    const double* in_c,
+    const int* in_t,
+    const int& nloc,
+    const int& mem_nall,
+    const float& rcut,
+    const deepmd::Region<double>& region);
 
+template int deepmd::copy_coord_cpu<float>(float* out_c,
+                                           int* out_t,
+                                           int* mapping,
+                                           int* nall,
+                                           const float* in_c,
+                                           const int* in_t,
+                                           const int& nloc,
+                                           const int& mem_nall,
+                                           const float& rcut,
+                                           const deepmd::Region<float>& region);
 
+template void deepmd::compute_cell_info<double>(
+    int*
+        cell_info,  // nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,loc_cellnum,total_cellnum
+    const float& rcut,
+    const Region<double>& region);
 
+template void deepmd::compute_cell_info<float>(
+    int*
+        cell_info,  // nat_stt,ncell,ext_stt,ext_end,ngcell,cell_shift,cell_iter,loc_cellnum,total_cellnum
+    const float& rcut,
+    const Region<float>& region);
diff --git a/source/lib/src/cuda/CMakeLists.txt b/source/lib/src/cuda/CMakeLists.txt
index 8ba570a3f9..4d1cf35405 100644
--- a/source/lib/src/cuda/CMakeLists.txt
+++ b/source/lib/src/cuda/CMakeLists.txt
@@ -5,158 +5,239 @@ project(deepmd_op_cuda)
 
 # SET(CUDA_SEPARABLE_COMPILATION ON)
 find_package(CUDA REQUIRED)
-if (NOT CUDA_FOUND)
-    message(STATUS "CUDA not found. Project will not be built.")
+if(NOT CUDA_FOUND)
+  message(STATUS "CUDA not found. Project will not be built.")
 endif(NOT CUDA_FOUND)
 
-# take dynamic open cudart library replace of static one
-# so it's not required when using CPUs
+# take dynamic open cudart library replace of static one so it's not required
+# when using CPUs
 add_subdirectory(cudart)
 # important: it must be before cuda_add_library and any link target to cudart
 set(CUDA_LIBRARIES deepmd_dyn_cudart)
 
 # set c++ version c++11
-SET(CMAKE_CXX_STANDARD 11)
-SET(CMAKE_CUDA_STANDARD 11)
-# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu -L/usr/local/cuda/lib64 -lcudadevrt
-# very important here! Include path to cub.
-# for searching device compute capability, https://developer.nvidia.com/cuda-gpus
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+# nvcc -o libdeepmd_op_cuda.so -I/usr/local/cub-1.8.0 -rdc=true -DHIGH_PREC=true
+# -gencode arch=compute_61,code=sm_61 -shared -Xcompiler -fPIC deepmd_op.cu
+# -L/usr/local/cuda/lib64 -lcudadevrt very important here! Include path to cub.
+# for searching device compute capability,
+# https://developer.nvidia.com/cuda-gpus
 
-# cub has been included in CUDA Toolkit 11, we do not need to include it any more
-# see https://github.com/NVIDIA/cub
-if (${CUDA_VERSION_MAJOR} LESS_EQUAL "10")
-include_directories(cub)
-endif ()
+# cub has been included in CUDA Toolkit 11, we do not need to include it any
+# more see https://github.com/NVIDIA/cub
+if(${CUDA_VERSION_MAJOR} LESS_EQUAL "10")
+  include_directories(cub)
+endif()
 
 message(STATUS "CUDA major version is " ${CUDA_VERSION_MAJOR})
 
-if (${CUDA_VERSION_MAJOR} GREATER "11" OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER_EQUAL "5"))
-    # nvcc flags
-    set(CUDA_NVCC_FLAGS -arch=all; # embeds a compiled code image for all supported architectures (sm_*), and a PTX program for the highest major virtual architecture
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-elseif (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER "0")
-    # nvcc flags
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-                        -gencode arch=compute_53,code=sm_53; 
-                        -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
-                        -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
-                        -gencode arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-                        -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-                        -gencode arch=compute_80,code=sm_80; # Anpere - A100
-                        -gencode arch=compute_86,code=sm_86; # Anpere - RTX 3090
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-elseif (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} STREQUAL "0")
-    # nvcc flags
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-                        -gencode arch=compute_53,code=sm_53; 
-                        -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
-                        -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
-                        -gencode arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-                        -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-                        -gencode arch=compute_80,code=sm_80; # Anpere - A100
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-elseif (${CUDA_VERSION_MAJOR} STREQUAL "10")
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
-                        -gencode arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
-                        -gencode arch=compute_37,code=sm_37; # Tesla K80
-                        -gencode arch=compute_50,code=sm_50; # Quadro 620 1200
-                        -gencode arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000 M2000, TITAN X, GTX 980Ti 980 970 960 950
-                        -gencode arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
-                        -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
-                        -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
-                        -gencode arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-                        -gencode arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-elseif (${CUDA_VERSION_MAJOR} STREQUAL "9")
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
-                        -gencode arch=compute_35,code=sm_35;
-                        -gencode arch=compute_37,code=sm_37;
-                        -gencode arch=compute_50,code=sm_50;
-                        -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-                        -gencode arch=compute_53,code=sm_53; 
-                        -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
-                        -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
-                        -gencode arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-elseif (${CUDA_VERSION_MAJOR} STREQUAL "8")
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
-                        -gencode arch=compute_35,code=sm_35;
-                        -gencode arch=compute_37,code=sm_37;
-                        -gencode arch=compute_50,code=sm_50;
-                        -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-                        -gencode arch=compute_53,code=sm_53; 
-                        -gencode arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic Pascal)
-                        -gencode arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX 1050, GTX 1030, Titan Xp, Tesla P40, Tesla P4, Discrete GPU on the NVIDIA Drive PX2
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-elseif (${CUDA_VERSION_MAJOR} STREQUAL "7")
-    set(CUDA_NVCC_FLAGS -gencode arch=compute_30,code=sm_30;
-                        -gencode arch=compute_35,code=sm_35;
-                        -gencode arch=compute_37,code=sm_37;
-                        -gencode arch=compute_50,code=sm_50;
-                        -gencode arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
-                        -gencode arch=compute_53,code=sm_53; 
-                        -O3; -Xcompiler -fPIC;
-                        ${CUDA_NVCC_FLAGS}
-        )
-else () 
-    message(FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION} ", please use a newer version (>=7.0) of CUDA toolkit!")
+if(${CUDA_VERSION_MAJOR} GREATER "11"
+   OR (${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR}
+                                               GREATER_EQUAL "5"))
+  # nvcc flags
+  set(CUDA_NVCC_FLAGS
+      -arch=all; # embeds a compiled code image for all supported architectures
+                 # (sm_*), and a PTX program for the highest major virtual
+                 # architecture
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+elseif(${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} GREATER
+                                               "0")
+  # nvcc flags
+  set(CUDA_NVCC_FLAGS
+      -gencode
+      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
+      -gencode
+      arch=compute_53,code=sm_53;
+      -gencode
+      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
+                                  # Pascal)
+      -gencode
+      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
+                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
+                                  # P4, Discrete GPU on the NVIDIA Drive PX2
+      -gencode
+      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
+      -gencode
+      arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
+      -gencode
+      arch=compute_80,code=sm_80; # Anpere - A100
+      -gencode
+      arch=compute_86,code=sm_86; # Anpere - RTX 3090
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+elseif(${CUDA_VERSION_MAJOR} STREQUAL "11" AND ${CUDA_VERSION_MINOR} STREQUAL
+                                               "0")
+  # nvcc flags
+  set(CUDA_NVCC_FLAGS
+      -gencode
+      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
+      -gencode
+      arch=compute_53,code=sm_53;
+      -gencode
+      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
+                                  # Pascal)
+      -gencode
+      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
+                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
+                                  # P4, Discrete GPU on the NVIDIA Drive PX2
+      -gencode
+      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
+      -gencode
+      arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
+      -gencode
+      arch=compute_80,code=sm_80; # Anpere - A100
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+elseif(${CUDA_VERSION_MAJOR} STREQUAL "10")
+  set(CUDA_NVCC_FLAGS
+      -gencode
+      arch=compute_30,code=sm_30; # Tesla K10, Quadro K600 K420 K410,
+      -gencode
+      arch=compute_35,code=sm_35; # Tesla K20 K40, TITAN Z Black, GTX 780Ti 780
+      -gencode
+      arch=compute_37,code=sm_37; # Tesla K80
+      -gencode
+      arch=compute_50,code=sm_50; # Quadro 620 1200
+      -gencode
+      arch=compute_52,code=sm_52; # Tesla M40 M40, Quadro M6000 M5000 M4000
+                                  # M2000, TITAN X, GTX 980Ti 980 970 960 950
+      -gencode
+      arch=compute_53,code=sm_53; # Jetson TX1, Tegra X1
+      -gencode
+      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
+                                  # Pascal)
+      -gencode
+      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
+                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
+                                  # P4, Discrete GPU on the NVIDIA Drive PX2
+      -gencode
+      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
+      -gencode
+      arch=compute_75,code=sm_75; # Turing - RTX 2080, Titan RTX, Quadro R8000
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+elseif(${CUDA_VERSION_MAJOR} STREQUAL "9")
+  set(CUDA_NVCC_FLAGS
+      -gencode
+      arch=compute_30,code=sm_30;
+      -gencode
+      arch=compute_35,code=sm_35;
+      -gencode
+      arch=compute_37,code=sm_37;
+      -gencode
+      arch=compute_50,code=sm_50;
+      -gencode
+      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
+      -gencode
+      arch=compute_53,code=sm_53;
+      -gencode
+      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
+                                  # Pascal)
+      -gencode
+      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
+                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
+                                  # P4, Discrete GPU on the NVIDIA Drive PX2
+      -gencode
+      arch=compute_70,code=sm_70; # Volta  - GV100/Tesla V100, GTX 1180 (GV104)
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+elseif(${CUDA_VERSION_MAJOR} STREQUAL "8")
+  set(CUDA_NVCC_FLAGS
+      -gencode
+      arch=compute_30,code=sm_30;
+      -gencode
+      arch=compute_35,code=sm_35;
+      -gencode
+      arch=compute_37,code=sm_37;
+      -gencode
+      arch=compute_50,code=sm_50;
+      -gencode
+      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
+      -gencode
+      arch=compute_53,code=sm_53;
+      -gencode
+      arch=compute_60,code=sm_60; # Pascal – GP100/Tesla P100 – DGX-1 (Generic
+                                  # Pascal)
+      -gencode
+      arch=compute_61,code=sm_61; # Pascal - GTX 1080, GTX 1070, GTX 1060, GTX
+                                  # 1050, GTX 1030, Titan Xp, Tesla P40, Tesla
+                                  # P4, Discrete GPU on the NVIDIA Drive PX2
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+elseif(${CUDA_VERSION_MAJOR} STREQUAL "7")
+  set(CUDA_NVCC_FLAGS
+      -gencode
+      arch=compute_30,code=sm_30;
+      -gencode
+      arch=compute_35,code=sm_35;
+      -gencode
+      arch=compute_37,code=sm_37;
+      -gencode
+      arch=compute_50,code=sm_50;
+      -gencode
+      arch=compute_52,code=sm_52; # Tesla M40, Tesla M40, Quadro M6000...
+      -gencode
+      arch=compute_53,code=sm_53;
+      -O3;
+      -Xcompiler
+      -fPIC;
+      ${CUDA_NVCC_FLAGS})
+else()
+  message(FATAL_ERROR "unsupported CUDA_VERSION " ${CUDA_VERSION}
+                      ", please use a newer version (>=7.0) of CUDA toolkit!")
 endif()
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DCUB_IGNORE_DEPRECATED_CPP_DIALECT")
+set(CMAKE_CXX_FLAGS
+    "${CMAKE_CXX_FLAGS} -std=c++11 -DCUB_IGNORE_DEPRECATED_CPP_DIALECT -DCUB_IGNORE_DEPRECATED_CPP_DIALECT"
+)
 
-if (${CUDA_VERSION_MAJOR} LESS_EQUAL "11")
-	# check unsupported -std=c++17
-	set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}")
-	separate_arguments(CMAKE_CXX_FLAGS_LIST)
-	if ("-std=c++17" IN_LIST CMAKE_CXX_FLAGS_LIST)
-		message(WARNING "Environment variable CXXFLAGS contains flag --std=c++17 which is unsupported by CUDA ${CUDA_VERSION}. Such flag will be removed automatically.")
-		string(REPLACE "-std=c++17" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-	endif()
+if(${CUDA_VERSION_MAJOR} LESS_EQUAL "11")
+  # check unsupported -std=c++17
+  set(CMAKE_CXX_FLAGS_LIST "${CMAKE_CXX_FLAGS}")
+  separate_arguments(CMAKE_CXX_FLAGS_LIST)
+  if("-std=c++17" IN_LIST CMAKE_CXX_FLAGS_LIST)
+    message(
+      WARNING
+        "Environment variable CXXFLAGS contains flag --std=c++17 which is unsupported by CUDA ${CUDA_VERSION}. Such flag will be removed automatically."
+    )
+    string(REPLACE "-std=c++17" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+  endif()
 endif()
 
-file (GLOB SOURCE_FILES "*.cu" )
+file(GLOB SOURCE_FILES "*.cu")
 
 cuda_add_library(deepmd_op_cuda SHARED ${SOURCE_FILES})
 target_include_directories(
-    deepmd_op_cuda
-    PUBLIC
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../include/>
-    $<INSTALL_INTERFACE:include>
-)
+  deepmd_op_cuda
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../include/>
+         $<INSTALL_INTERFACE:include>)
 target_precompile_headers(deepmd_op_cuda PUBLIC [["device.h"]])
 if(APPLE)
-    set_target_properties(
-        deepmd_op_cuda 
-        PROPERTIES 
-        INSTALL_RPATH @loader_path
-    )
+  set_target_properties(deepmd_op_cuda PROPERTIES INSTALL_RPATH @loader_path)
 else()
-    set_target_properties(
-        deepmd_op_cuda
-        PROPERTIES
-        INSTALL_RPATH "$ORIGIN"
-    )
+  set_target_properties(deepmd_op_cuda PROPERTIES INSTALL_RPATH "$ORIGIN")
 endif()
 
-if (BUILD_CPP_IF)
-    install(
-        TARGETS deepmd_op_cuda
-        EXPORT ${CMAKE_PROJECT_NAME}Targets
-        DESTINATION lib/
-    )
-endif (BUILD_CPP_IF)
-if (BUILD_PY_IF)
-    install(TARGETS deepmd_op_cuda DESTINATION deepmd/op/)
-endif (BUILD_PY_IF)
+if(BUILD_CPP_IF)
+  install(
+    TARGETS deepmd_op_cuda
+    EXPORT ${CMAKE_PROJECT_NAME}Targets
+    DESTINATION lib/)
+endif(BUILD_CPP_IF)
+if(BUILD_PY_IF)
+  install(TARGETS deepmd_op_cuda DESTINATION deepmd/op/)
+endif(BUILD_PY_IF)
diff --git a/source/lib/src/cuda/coord.cu b/source/lib/src/cuda/coord.cu
index 660619cbad..e35b3a0a7c 100644
--- a/source/lib/src/cuda/coord.cu
+++ b/source/lib/src/cuda/coord.cu
@@ -1,430 +1,428 @@
-#include "device.h"
 #include "coord.h"
+#include "device.h"
 #include "region.cuh"
 
-__device__ inline int collapse_index(
-    const int * idx,
-    const int * size)
-{
-    return (idx[0] * size[1] + idx[1]) * size[2] + idx[2];
+__device__ inline int collapse_index(const int *idx, const int *size) {
+  return (idx[0] * size[1] + idx[1]) * size[2] + idx[2];
 }
-__device__ inline void index_recover(
-    const int in_idx,
-    const int * size, 
-    int * idx)
-{
-    idx[2]=in_idx%size[2];
-    idx[1]=int(in_idx/size[2])%size[1];
-    idx[0]=int(int(in_idx/size[2])/size[1]);
+__device__ inline void index_recover(const int in_idx,
+                                     const int *size,
+                                     int *idx) {
+  idx[2] = in_idx % size[2];
+  idx[1] = int(in_idx / size[2]) % size[1];
+  idx[0] = int(int(in_idx / size[2]) / size[1]);
 }
-__device__ inline void idx_addshift(
-    int * idx, 
-    const int * shift)
-{
-    for(int dd=0;dd<3;dd++)
-    {
-        idx[dd]+=shift[dd];
-    }
+__device__ inline void idx_addshift(int *idx, const int *shift) {
+  for (int dd = 0; dd < 3; dd++) {
+    idx[dd] += shift[dd];
+  }
 }
-__device__ inline void idx_unshift(
-    int * idx, 
-    const int * shift)
-{
-    for(int dd=0;dd<3;dd++)
-    {
-        idx[dd]-=shift[dd];
-    }
+__device__ inline void idx_unshift(int *idx, const int *shift) {
+  for (int dd = 0; dd < 3; dd++) {
+    idx[dd] -= shift[dd];
+  }
 }
-__device__ inline int compute_pbc_shift(
-    int idx, 
-    int ncell)
-{
-    int shift = 0;
-    if (idx < 0) {
+__device__ inline int compute_pbc_shift(int idx, int ncell) {
+  int shift = 0;
+  if (idx < 0) {
     shift = 1;
-    while (idx + shift * ncell < 0) shift ++;
-    }
-    else if (idx >= ncell) {
+    while (idx + shift * ncell < 0) shift++;
+  } else if (idx >= ncell) {
     shift = -1;
-    while (idx + shift * ncell >= ncell) shift --;
-    }
-    return shift;
+    while (idx + shift * ncell >= ncell) shift--;
+  }
+  return shift;
 }
 
-__device__ inline double _fmod(double x, double y) {return fmod(x, y);}
-__device__ inline float _fmod(float x, float y) {return fmodf(x, y);}
+__device__ inline double _fmod(double x, double y) { return fmod(x, y); }
+__device__ inline float _fmod(float x, float y) { return fmodf(x, y); }
 
+template <typename FPTYPE>
+__global__ void normalize_one(FPTYPE *out_c,
+                              const FPTYPE *boxt,
+                              const FPTYPE *rec_boxt,
+                              const int nall) {
+  // <<<nall/TPB, TPB>>>
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy >= nall) {
+    return;
+  }
+  FPTYPE inter[3];
+  phys2Inter(inter, out_c + idy * 3, rec_boxt);
+  for (int dd = 0; dd < 3; ++dd) {
+    inter[dd] = _fmod(inter[dd], (FPTYPE)1.);
+    if (inter[dd] < (FPTYPE)0.) inter[dd] += (FPTYPE)1.;
+  }
+  inter2Phys(out_c + idy * 3, inter, boxt);
+}
 
-template<typename FPTYPE>
-__global__ void normalize_one(
-    FPTYPE *out_c,
-    const FPTYPE *boxt,
-    const FPTYPE *rec_boxt,
-    const int nall)
-{
-    // <<<nall/TPB, TPB>>>
-    int idy=blockIdx.x*blockDim.x+threadIdx.x;
-    if (idy>=nall){return;}
+template <typename FPTYPE>
+__global__ void _fill_idx_cellmap(int *idx_cellmap,
+                                  int *idx_cellmap_noshift,
+                                  const FPTYPE *in_c,
+                                  const FPTYPE *rec_boxt,
+                                  const int *nat_stt,
+                                  const int *nat_end,
+                                  const int *ext_stt,
+                                  const int *ext_end,
+                                  const int nloc) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  int ext_ncell[3];
+  int global_grid[3];
+  int idx_orig_shift[3];
+  FPTYPE cell_size[3];
+  FPTYPE nat_orig[3];
+  for (int dd = 0; dd < 3; ++dd) {
+    ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
+    global_grid[dd] = nat_end[dd] - nat_stt[dd];
+    idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
+    cell_size[dd] = (FPTYPE)1. / global_grid[dd];
+    nat_orig[dd] = nat_stt[dd] * cell_size[dd];
+  }
+  if (idy < nloc) {
+    int idx_noshift[3];
+    int idx[3];
     FPTYPE inter[3];
-    phys2Inter(inter,out_c+idy*3,rec_boxt);
+    phys2Inter(inter, in_c + idy * 3, rec_boxt);
     for (int dd = 0; dd < 3; ++dd) {
-        inter[dd]=_fmod(inter[dd], (FPTYPE)1.);
-        if (inter[dd] <  (FPTYPE)0.) inter[dd] += (FPTYPE)1.;
+      idx_noshift[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
+      if (inter[dd] - nat_orig[dd] < (FPTYPE)0.) idx_noshift[dd]--;
+      if (idx_noshift[dd] < nat_stt[dd]) {
+        idx_noshift[dd] = nat_stt[dd];
+      } else if (idx_noshift[dd] >= nat_end[dd]) {
+        idx_noshift[dd] = nat_end[dd] - 1;
+      }
+      idx[dd] = idx_noshift[dd] + idx_orig_shift[dd];
     }
-    inter2Phys(out_c+idy*3,inter,boxt);
+    idx_cellmap_noshift[idy] = collapse_index(idx_noshift, global_grid);
+    idx_cellmap[idy] = collapse_index(idx, ext_ncell);
+  }
 }
 
-template<typename FPTYPE>
-__global__ void _fill_idx_cellmap(
-    int * idx_cellmap,
-    int * idx_cellmap_noshift,
-    const FPTYPE *in_c,
-    const FPTYPE *rec_boxt,
-    const int *nat_stt,
-    const int *nat_end,
-    const int *ext_stt,
-    const int *ext_end,
-    const int nloc)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    int ext_ncell[3];
-    int global_grid[3];
-    int idx_orig_shift[3];
-    FPTYPE cell_size[3];
-    FPTYPE nat_orig[3];
-    for (int dd = 0; dd < 3; ++dd) 
-    {
-        ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
-        global_grid[dd] = nat_end[dd] - nat_stt[dd];
-        idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
-        cell_size[dd] = (FPTYPE)1./global_grid[dd];
-        nat_orig[dd] = nat_stt[dd] * cell_size[dd];
-    }
-    if (idy<nloc)
-    {
-        int idx_noshift[3]; 
-        int idx[3];
-        FPTYPE inter[3];
-        phys2Inter(inter,in_c+idy*3,rec_boxt);
-        for (int dd = 0; dd < 3; ++dd){
-            idx_noshift[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
-            if (inter[dd] - nat_orig[dd] < (FPTYPE)0.) idx_noshift[dd] --;
-            if (idx_noshift[dd] < nat_stt[dd]) 
-            {
-                idx_noshift[dd] = nat_stt[dd];
-            }
-            else if (idx_noshift[dd] >= nat_end[dd]) 
-            {
-                idx_noshift[dd] = nat_end[dd] - 1;
-            }
-            idx[dd] = idx_noshift[dd]+idx_orig_shift[dd];
-        }
-        idx_cellmap_noshift[idy]=collapse_index(idx_noshift, global_grid);
-        idx_cellmap[idy]=collapse_index(idx, ext_ncell);
+__global__ void _fill_loc_cellnum_map(int *temp_idx_order,
+                                      int *loc_cellnum_map,
+                                      const int *idx_cellmap_noshift,
+                                      const int nloc,
+                                      const int loc_cellnum) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy < loc_cellnum) {
+    int num = 0;
+    for (int ii = 0; ii < nloc; ii++) {
+      if (idx_cellmap_noshift[ii] == idy) {
+        temp_idx_order[ii] = num;
+        num++;
+      }
     }
+    loc_cellnum_map[idy] = num;
+  }
 }
 
-__global__ void _fill_loc_cellnum_map(
-    int * temp_idx_order,
-    int * loc_cellnum_map,
-    const int * idx_cellmap_noshift,
-    const int nloc,
-    const int loc_cellnum)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    if (idy<loc_cellnum)
-    {
-        int num=0;
-        for(int ii=0;ii<nloc;ii++)
-        {
-            if(idx_cellmap_noshift[ii]==idy)
-            {
-                temp_idx_order[ii]=num;
-                num++;
-            }
-        }
-        loc_cellnum_map[idy]=num;
+__global__ void _fill_total_cellnum_map(int *total_cellnum_map,
+                                        int *mask_cellnum_map,
+                                        int *cell_map,
+                                        int *cell_shift_map,
+                                        const int *nat_stt,
+                                        const int *nat_end,
+                                        const int *ext_stt,
+                                        const int *ext_end,
+                                        const int *loc_cellnum_map,
+                                        const int total_cellnum) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  int ext_ncell[3];
+  int global_grid[3];
+  int idx_orig_shift[3];
+  for (int dd = 0; dd < 3; ++dd) {
+    ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
+    global_grid[dd] = nat_end[dd] - nat_stt[dd];
+    idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
+  }
+  if (idy < total_cellnum) {
+    int *shift = cell_shift_map + idy * 3;
+    int idx[3];
+    index_recover(idy, ext_ncell, idx);
+    idx_unshift(idx, idx_orig_shift);
+    shift[0] = compute_pbc_shift(idx[0], global_grid[0]);
+    shift[1] = compute_pbc_shift(idx[1], global_grid[1]);
+    shift[2] = compute_pbc_shift(idx[2], global_grid[2]);
+    bool loc = false;
+    if (shift[0] == 0 && shift[1] == 0 && shift[2] == 0) loc = true;
+    for (int dd = 0; dd < 3; dd++) {
+      idx[dd] += shift[dd] * global_grid[dd];
     }
+    int orig_idy = collapse_index(idx, global_grid);
+    mask_cellnum_map[idy] = loc_cellnum_map[orig_idy];
+    total_cellnum_map[idy] = mask_cellnum_map[idy];
+    if (loc) mask_cellnum_map[idy] = 0;
+    cell_map[idy] = orig_idy;
+  }
 }
 
-__global__ void _fill_total_cellnum_map(
-    int * total_cellnum_map,
-    int * mask_cellnum_map,
-    int * cell_map,
-    int * cell_shift_map,
-    const int * nat_stt,
-    const int * nat_end,
-    const int * ext_stt,
-    const int * ext_end,
-    const int * loc_cellnum_map,
-    const int total_cellnum)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    int ext_ncell[3];
-    int global_grid[3];
-    int idx_orig_shift[3];
-    for (int dd = 0; dd < 3; ++dd) 
-    {
-        ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
-        global_grid[dd] = nat_end[dd] - nat_stt[dd];
-        idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
-    }
-    if(idy<total_cellnum)
-    {
-        int * shift=cell_shift_map+idy*3;
-        int idx[3];
-        index_recover(idy, ext_ncell, idx);
-        idx_unshift(idx, idx_orig_shift);
-        shift[0]=compute_pbc_shift(idx[0],global_grid[0]);
-        shift[1]=compute_pbc_shift(idx[1],global_grid[1]);
-        shift[2]=compute_pbc_shift(idx[2],global_grid[2]);
-        bool loc=false;
-        if(shift[0]==0&&shift[1]==0&&shift[2]==0)loc=true;
-        for(int dd=0;dd<3;dd++)
-        {
-            idx[dd]+=shift[dd]*global_grid[dd];
-        }
-        int orig_idy=collapse_index(idx, global_grid);
-        mask_cellnum_map[idy]=loc_cellnum_map[orig_idy];
-        total_cellnum_map[idy]=mask_cellnum_map[idy];
-        if(loc)mask_cellnum_map[idy]=0;
-        cell_map[idy]=orig_idy;
-    }
+__global__ void _build_loc_clist(int *clist,
+                                 const int *idx_cellmap,
+                                 const int *idx_order,
+                                 const int *sec_num_map,
+                                 const int nloc) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy >= nloc) {
+    return;
+  }
+  int cell_idx = idx_cellmap[idy];
+  int *clist_row = clist + sec_num_map[cell_idx];
+  clist_row[idx_order[idy]] = idy;
 }
 
-__global__ void _build_loc_clist(
-    int *clist,
-    const int *idx_cellmap, 
-    const int *idx_order,
-    const int *sec_num_map,
-    const int nloc)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    if(idy>=nloc){return;}
-    int cell_idx=idx_cellmap[idy];
-    int * clist_row = clist+sec_num_map[cell_idx];
-    clist_row[idx_order[idy]]=idy;
-}
-
-template<typename FPTYPE>
-__global__ void _copy_coord(
-    FPTYPE * out_c, 
-    int * out_t, 
-    int * mapping, 
-    const FPTYPE * in_c, 
-    const int * in_t, 
-    const int * cell_map, 
-    const int * cell_shift_map, 
-    const int * sec_loc_cellnum_map, 
-    const int * sec_total_cellnum_map, 
-    const int * loc_clist, 
-    const int nloc, 
-    const int nall, 
-    const int total_cellnum, 
-    const FPTYPE * boxt, 
-    const FPTYPE * rec_boxt)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    if(idy>=nall){return;}
-    if(idy<nloc)
-    {
-        mapping[idy]=idy;
-        out_t[idy]=in_t[idy];
-        for(int dd=0;dd<3;dd++)
-        {
-            out_c[idy*3+dd]=in_c[idy*3+dd];
-        }
+template <typename FPTYPE>
+__global__ void _copy_coord(FPTYPE *out_c,
+                            int *out_t,
+                            int *mapping,
+                            const FPTYPE *in_c,
+                            const int *in_t,
+                            const int *cell_map,
+                            const int *cell_shift_map,
+                            const int *sec_loc_cellnum_map,
+                            const int *sec_total_cellnum_map,
+                            const int *loc_clist,
+                            const int nloc,
+                            const int nall,
+                            const int total_cellnum,
+                            const FPTYPE *boxt,
+                            const FPTYPE *rec_boxt) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy >= nall) {
+    return;
+  }
+  if (idy < nloc) {
+    mapping[idy] = idy;
+    out_t[idy] = in_t[idy];
+    for (int dd = 0; dd < 3; dd++) {
+      out_c[idy * 3 + dd] = in_c[idy * 3 + dd];
     }
-    else
-    {
-        int cell_idx=0;
-        int atom_idx=0;
-        int orig_cell_idx=0;
-        int orig_idy=0;
-        int shift[3];
-        FPTYPE d_shift[3];
-        for(int ii=0;ii<total_cellnum;ii++)
-        {
-            if(idy>=sec_total_cellnum_map[ii+1])cell_idx++;
-            else break;
-        }
-        for(int dd=0;dd<3;dd++)
-        {
-            shift[dd]=cell_shift_map[cell_idx*3+dd];
-            d_shift[dd]=shift[dd];
-        }
-        atom_idx=idy-sec_total_cellnum_map[cell_idx];
-        orig_cell_idx=cell_map[cell_idx];
-        orig_idy=loc_clist[sec_loc_cellnum_map[orig_cell_idx]+atom_idx];
-        mapping[idy]=orig_idy;
-        out_t[idy]=in_t[orig_idy];
-        FPTYPE shift_v[3];
-        inter2Phys(shift_v,d_shift,boxt);
-        for(int dd=0;dd<3;dd++)
-        {
-            out_c[idy*3+dd]=in_c[orig_idy*3+dd]-shift_v[dd];
-        }
+  } else {
+    int cell_idx = 0;
+    int atom_idx = 0;
+    int orig_cell_idx = 0;
+    int orig_idy = 0;
+    int shift[3];
+    FPTYPE d_shift[3];
+    for (int ii = 0; ii < total_cellnum; ii++) {
+      if (idy >= sec_total_cellnum_map[ii + 1])
+        cell_idx++;
+      else
+        break;
     }
+    for (int dd = 0; dd < 3; dd++) {
+      shift[dd] = cell_shift_map[cell_idx * 3 + dd];
+      d_shift[dd] = shift[dd];
+    }
+    atom_idx = idy - sec_total_cellnum_map[cell_idx];
+    orig_cell_idx = cell_map[cell_idx];
+    orig_idy = loc_clist[sec_loc_cellnum_map[orig_cell_idx] + atom_idx];
+    mapping[idy] = orig_idy;
+    out_t[idy] = in_t[orig_idy];
+    FPTYPE shift_v[3];
+    inter2Phys(shift_v, d_shift, boxt);
+    for (int dd = 0; dd < 3; dd++) {
+      out_c[idy * 3 + dd] = in_c[orig_idy * 3 + dd] - shift_v[dd];
+    }
+  }
 }
 
 template <typename FPTYPE>
-void compute_int_data(
-    int * int_data, 
-    const FPTYPE * in_c, 
-    const int * cell_info, 
-    const deepmd::Region<FPTYPE> & region, 
-    const int nloc, 
-    const int loc_cellnum, 
-    const int total_cellnum)
-{
-    int * idx_cellmap=int_data;
-    int * idx_cellmap_noshift=idx_cellmap+nloc;
-    int * temp_idx_order=idx_cellmap_noshift+nloc;
-    int * loc_cellnum_map=temp_idx_order+nloc;
-    int * total_cellnum_map=loc_cellnum_map+loc_cellnum;
-    int * mask_cellnum_map=total_cellnum_map+total_cellnum;
-    int * cell_map=mask_cellnum_map+total_cellnum;
-    int * cell_shift_map=cell_map+total_cellnum;
-    const int * nat_stt=cell_info;
-    const int * nat_end=cell_info+3;
-    const int * ext_stt=cell_info+6;
-    const int * ext_end=cell_info+9;
-    const FPTYPE * rec_boxt = region.rec_boxt;
-    
-    const int nblock_loc=(nloc+TPB-1)/TPB;
-    _fill_idx_cellmap<<<nblock_loc, TPB>>>(idx_cellmap, idx_cellmap_noshift, in_c, rec_boxt, 
-        nat_stt, nat_end, ext_stt, ext_end, nloc);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+void compute_int_data(int *int_data,
+                      const FPTYPE *in_c,
+                      const int *cell_info,
+                      const deepmd::Region<FPTYPE> &region,
+                      const int nloc,
+                      const int loc_cellnum,
+                      const int total_cellnum) {
+  int *idx_cellmap = int_data;
+  int *idx_cellmap_noshift = idx_cellmap + nloc;
+  int *temp_idx_order = idx_cellmap_noshift + nloc;
+  int *loc_cellnum_map = temp_idx_order + nloc;
+  int *total_cellnum_map = loc_cellnum_map + loc_cellnum;
+  int *mask_cellnum_map = total_cellnum_map + total_cellnum;
+  int *cell_map = mask_cellnum_map + total_cellnum;
+  int *cell_shift_map = cell_map + total_cellnum;
+  const int *nat_stt = cell_info;
+  const int *nat_end = cell_info + 3;
+  const int *ext_stt = cell_info + 6;
+  const int *ext_end = cell_info + 9;
+  const FPTYPE *rec_boxt = region.rec_boxt;
 
-    const int nblock_loc_cellnum=(loc_cellnum+TPB-1)/TPB;
-    _fill_loc_cellnum_map<<<nblock_loc_cellnum, TPB>>>(temp_idx_order, loc_cellnum_map, 
-        idx_cellmap_noshift, nloc, loc_cellnum);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+  const int nblock_loc = (nloc + TPB - 1) / TPB;
+  _fill_idx_cellmap<<<nblock_loc, TPB>>>(idx_cellmap, idx_cellmap_noshift, in_c,
+                                         rec_boxt, nat_stt, nat_end, ext_stt,
+                                         ext_end, nloc);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 
-    const int nblock_total_cellnum=(total_cellnum+TPB-1)/TPB;
-    _fill_total_cellnum_map<<<nblock_total_cellnum, TPB>>>(total_cellnum_map, mask_cellnum_map, cell_map, 
-        cell_shift_map, nat_stt, nat_end, ext_stt, ext_end, loc_cellnum_map, total_cellnum);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+  const int nblock_loc_cellnum = (loc_cellnum + TPB - 1) / TPB;
+  _fill_loc_cellnum_map<<<nblock_loc_cellnum, TPB>>>(
+      temp_idx_order, loc_cellnum_map, idx_cellmap_noshift, nloc, loc_cellnum);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
+
+  const int nblock_total_cellnum = (total_cellnum + TPB - 1) / TPB;
+  _fill_total_cellnum_map<<<nblock_total_cellnum, TPB>>>(
+      total_cellnum_map, mask_cellnum_map, cell_map, cell_shift_map, nat_stt,
+      nat_end, ext_stt, ext_end, loc_cellnum_map, total_cellnum);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-void build_loc_clist(
-    int * int_data, 
-    const int nloc, 
-    const int loc_cellnum, 
-    const int total_cellnum)
-{
-    const int nblock=(nloc+TPB-1)/TPB;
-    const int * idx_cellmap_noshift=int_data+nloc;
-    const int * temp_idx_order=idx_cellmap_noshift+nloc;
-    const int * sec_loc_cellnum_map=temp_idx_order+nloc+loc_cellnum+2*total_cellnum+total_cellnum+3*total_cellnum;
-    int * loc_clist=int_data+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1;
-    _build_loc_clist<<<nblock, TPB>>>(loc_clist, idx_cellmap_noshift, temp_idx_order, sec_loc_cellnum_map, nloc);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+void build_loc_clist(int *int_data,
+                     const int nloc,
+                     const int loc_cellnum,
+                     const int total_cellnum) {
+  const int nblock = (nloc + TPB - 1) / TPB;
+  const int *idx_cellmap_noshift = int_data + nloc;
+  const int *temp_idx_order = idx_cellmap_noshift + nloc;
+  const int *sec_loc_cellnum_map = temp_idx_order + nloc + loc_cellnum +
+                                   2 * total_cellnum + total_cellnum +
+                                   3 * total_cellnum;
+  int *loc_clist = int_data + nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                   total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + 1;
+  _build_loc_clist<<<nblock, TPB>>>(loc_clist, idx_cellmap_noshift,
+                                    temp_idx_order, sec_loc_cellnum_map, nloc);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void copy_coord(
-    FPTYPE * out_c, 
-    int * out_t, 
-    int * mapping, 
-    const int * int_data, 
-    const FPTYPE * in_c, 
-    const int * in_t, 
-    const int nloc, 
-    const int nall, 
-    const int loc_cellnum, 
-    const int total_cellnum, 
-    const deepmd::Region<FPTYPE> & region)
-{
-    const int nblock=(nall+TPB-1)/TPB;
-    const int * cell_map=int_data+3*nloc+loc_cellnum+2*total_cellnum;
-    const int * cell_shift_map=cell_map+total_cellnum;
-    const int * sec_loc_cellnum_map=cell_shift_map+3*total_cellnum;
-    const int * sec_total_cellnum_map=sec_loc_cellnum_map+loc_cellnum+1;
-    const int * loc_clist=sec_total_cellnum_map+total_cellnum+1;
+void copy_coord(FPTYPE *out_c,
+                int *out_t,
+                int *mapping,
+                const int *int_data,
+                const FPTYPE *in_c,
+                const int *in_t,
+                const int nloc,
+                const int nall,
+                const int loc_cellnum,
+                const int total_cellnum,
+                const deepmd::Region<FPTYPE> &region) {
+  const int nblock = (nall + TPB - 1) / TPB;
+  const int *cell_map = int_data + 3 * nloc + loc_cellnum + 2 * total_cellnum;
+  const int *cell_shift_map = cell_map + total_cellnum;
+  const int *sec_loc_cellnum_map = cell_shift_map + 3 * total_cellnum;
+  const int *sec_total_cellnum_map = sec_loc_cellnum_map + loc_cellnum + 1;
+  const int *loc_clist = sec_total_cellnum_map + total_cellnum + 1;
 
-    const FPTYPE *boxt = region.boxt;
-    const FPTYPE *rec_boxt = region.rec_boxt;
-    _copy_coord<<<nblock, TPB>>>(out_c, out_t, mapping, in_c, in_t, cell_map, cell_shift_map, 
-        sec_loc_cellnum_map, sec_total_cellnum_map, loc_clist, nloc, nall, total_cellnum, boxt, rec_boxt);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+  const FPTYPE *boxt = region.boxt;
+  const FPTYPE *rec_boxt = region.rec_boxt;
+  _copy_coord<<<nblock, TPB>>>(out_c, out_t, mapping, in_c, in_t, cell_map,
+                               cell_shift_map, sec_loc_cellnum_map,
+                               sec_total_cellnum_map, loc_clist, nloc, nall,
+                               total_cellnum, boxt, rec_boxt);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
 namespace deepmd {
 template <typename FPTYPE>
-void
-normalize_coord_gpu(
-    FPTYPE * coord,
-    const int natom,
-    const Region<FPTYPE> & region)
-{
-    const FPTYPE * boxt=region.boxt;
-    const FPTYPE * rec_boxt=region.rec_boxt;
-    const int nblock=(natom+TPB-1)/TPB;
-    normalize_one<<<nblock, TPB>>>(coord, boxt, rec_boxt, natom);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+void normalize_coord_gpu(FPTYPE *coord,
+                         const int natom,
+                         const Region<FPTYPE> &region) {
+  const FPTYPE *boxt = region.boxt;
+  const FPTYPE *rec_boxt = region.rec_boxt;
+  const int nblock = (natom + TPB - 1) / TPB;
+  normalize_one<<<nblock, TPB>>>(coord, boxt, rec_boxt, natom);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-//  int_data(temp cuda memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
+//  int_data(temp cuda
+//  memory):idx_map,idx_map_noshift,temp_idx_order,loc_cellnum_map,total_cellnum_map,mask_cellnum_map,
 //                             cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
 template <typename FPTYPE>
-int
-copy_coord_gpu(
-    FPTYPE * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    int * int_data,
-    const FPTYPE * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const int & loc_cellnum,
-    const int & total_cellnum,
-    const int * cell_info,
-    const Region<FPTYPE> & region)
-{
-    compute_int_data(int_data, in_c, cell_info, region, nloc, loc_cellnum, total_cellnum);
-    int * int_data_cpu=new int [loc_cellnum+2*total_cellnum+loc_cellnum+1+total_cellnum+1];//loc_cellnum_map,total_cellnum_map,mask_cellnum_map,sec_loc_cellnum_map,sec_total_cellnum_map
-    DPErrcheck(cudaMemcpy(int_data_cpu, int_data+3*nloc, sizeof(int) * (loc_cellnum + 2 * total_cellnum), cudaMemcpyDeviceToHost));
-    int * loc_cellnum_map=int_data_cpu;
-    int * total_cellnum_map=loc_cellnum_map+loc_cellnum;
-    int * mask_cellnum_map=total_cellnum_map+total_cellnum;
-    int * sec_loc_cellnum_map=mask_cellnum_map+total_cellnum;
-    int * sec_total_cellnum_map=sec_loc_cellnum_map+loc_cellnum+1;
-    sec_loc_cellnum_map[0]=0;
-    sec_total_cellnum_map[0]=nloc;
-    int max_cell=0;
-    for(int iii=0;iii<total_cellnum;iii++)
-    {
-        if(max_cell<total_cellnum_map[iii]){max_cell=total_cellnum_map[iii];}
-        if(iii<loc_cellnum){sec_loc_cellnum_map[iii+1]=sec_loc_cellnum_map[iii]+loc_cellnum_map[iii];}
-        sec_total_cellnum_map[iii+1]=sec_total_cellnum_map[iii]+mask_cellnum_map[iii];
-    }
-    *nall=sec_total_cellnum_map[total_cellnum];
-    if(*nall > mem_nall){
-        delete[] int_data_cpu;
-        // size of the output arrays is not large enough
-        return 1;
+int copy_coord_gpu(FPTYPE *out_c,
+                   int *out_t,
+                   int *mapping,
+                   int *nall,
+                   int *int_data,
+                   const FPTYPE *in_c,
+                   const int *in_t,
+                   const int &nloc,
+                   const int &mem_nall,
+                   const int &loc_cellnum,
+                   const int &total_cellnum,
+                   const int *cell_info,
+                   const Region<FPTYPE> &region) {
+  compute_int_data(int_data, in_c, cell_info, region, nloc, loc_cellnum,
+                   total_cellnum);
+  int *int_data_cpu = new int
+      [loc_cellnum + 2 * total_cellnum + loc_cellnum + 1 + total_cellnum +
+       1];  // loc_cellnum_map,total_cellnum_map,mask_cellnum_map,sec_loc_cellnum_map,sec_total_cellnum_map
+  DPErrcheck(cudaMemcpy(int_data_cpu, int_data + 3 * nloc,
+                        sizeof(int) * (loc_cellnum + 2 * total_cellnum),
+                        cudaMemcpyDeviceToHost));
+  int *loc_cellnum_map = int_data_cpu;
+  int *total_cellnum_map = loc_cellnum_map + loc_cellnum;
+  int *mask_cellnum_map = total_cellnum_map + total_cellnum;
+  int *sec_loc_cellnum_map = mask_cellnum_map + total_cellnum;
+  int *sec_total_cellnum_map = sec_loc_cellnum_map + loc_cellnum + 1;
+  sec_loc_cellnum_map[0] = 0;
+  sec_total_cellnum_map[0] = nloc;
+  int max_cell = 0;
+  for (int iii = 0; iii < total_cellnum; iii++) {
+    if (max_cell < total_cellnum_map[iii]) {
+      max_cell = total_cellnum_map[iii];
     }
-    else{
-        DPErrcheck(cudaMemcpy(int_data+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3, 
-            sec_loc_cellnum_map, sizeof(int) * (loc_cellnum+1+total_cellnum+1), cudaMemcpyHostToDevice));
-        delete[] int_data_cpu;
-        build_loc_clist(int_data, nloc, loc_cellnum, total_cellnum);
-        copy_coord(out_c, out_t, mapping, int_data, in_c, in_t, nloc, *nall, loc_cellnum, total_cellnum, region);
+    if (iii < loc_cellnum) {
+      sec_loc_cellnum_map[iii + 1] =
+          sec_loc_cellnum_map[iii] + loc_cellnum_map[iii];
     }
-    return 0;
+    sec_total_cellnum_map[iii + 1] =
+        sec_total_cellnum_map[iii] + mask_cellnum_map[iii];
+  }
+  *nall = sec_total_cellnum_map[total_cellnum];
+  if (*nall > mem_nall) {
+    delete[] int_data_cpu;
+    // size of the output arrays is not large enough
+    return 1;
+  } else {
+    DPErrcheck(cudaMemcpy(int_data + nloc * 3 + loc_cellnum +
+                              total_cellnum * 3 + total_cellnum * 3,
+                          sec_loc_cellnum_map,
+                          sizeof(int) * (loc_cellnum + 1 + total_cellnum + 1),
+                          cudaMemcpyHostToDevice));
+    delete[] int_data_cpu;
+    build_loc_clist(int_data, nloc, loc_cellnum, total_cellnum);
+    copy_coord(out_c, out_t, mapping, int_data, in_c, in_t, nloc, *nall,
+               loc_cellnum, total_cellnum, region);
+  }
+  return 0;
 }
 
-template void normalize_coord_gpu<float>(float * coord, const int natom, const Region<float> & region);
-template void normalize_coord_gpu<double>(double * coord, const int natom, const Region<double> & region);
-template int copy_coord_gpu<float>(float * out_c, int * out_t, int * mapping, int * nall, int * int_data, const float * in_c, const int * in_t, const int & nloc, const int & mem_nall, const int & loc_cellnum, const int & total_cellnum, const int * cell_info, const Region<float> & region);
-template int copy_coord_gpu<double>(double * out_c, int * out_t, int * mapping, int * nall, int * int_data, const double * in_c, const int * in_t, const int & nloc, const int & mem_nall, const int & loc_cellnum, const int & total_cellnum, const int * cell_info, const Region<double> & region);
-}
\ No newline at end of file
+template void normalize_coord_gpu<float>(float *coord,
+                                         const int natom,
+                                         const Region<float> &region);
+template void normalize_coord_gpu<double>(double *coord,
+                                          const int natom,
+                                          const Region<double> &region);
+template int copy_coord_gpu<float>(float *out_c,
+                                   int *out_t,
+                                   int *mapping,
+                                   int *nall,
+                                   int *int_data,
+                                   const float *in_c,
+                                   const int *in_t,
+                                   const int &nloc,
+                                   const int &mem_nall,
+                                   const int &loc_cellnum,
+                                   const int &total_cellnum,
+                                   const int *cell_info,
+                                   const Region<float> &region);
+template int copy_coord_gpu<double>(double *out_c,
+                                    int *out_t,
+                                    int *mapping,
+                                    int *nall,
+                                    int *int_data,
+                                    const double *in_c,
+                                    const int *in_t,
+                                    const int &nloc,
+                                    const int &mem_nall,
+                                    const int &loc_cellnum,
+                                    const int &total_cellnum,
+                                    const int *cell_info,
+                                    const Region<double> &region);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/cudart/CMakeLists.txt b/source/lib/src/cuda/cudart/CMakeLists.txt
index 07953381c9..26103a64eb 100644
--- a/source/lib/src/cuda/cudart/CMakeLists.txt
+++ b/source/lib/src/cuda/cudart/CMakeLists.txt
@@ -1,24 +1,14 @@
 add_library(deepmd_dyn_cudart SHARED cudart_stub.cc)
-target_include_directories(
-    deepmd_dyn_cudart
-    PUBLIC
-    ${CUDA_INCLUDE_DIRS}
-)
+target_include_directories(deepmd_dyn_cudart PUBLIC ${CUDA_INCLUDE_DIRS})
 get_filename_component(CUDA_LIBRARY_DIR ${CUDA_cudart_static_LIBRARY} DIRECTORY)
-set_target_properties(
-    deepmd_dyn_cudart
-    PROPERTIES
-    INSTALL_RPATH "${CUDA_LIBRARY_DIR}"
-)
+set_target_properties(deepmd_dyn_cudart PROPERTIES INSTALL_RPATH
+                                                   "${CUDA_LIBRARY_DIR}")
 if(BUILD_CPP_IF)
-    install(
-        TARGETS
-        deepmd_dyn_cudart
-        EXPORT ${CMAKE_PROJECT_NAME}Targets
-        DESTINATION lib/
-    )
-endif (BUILD_CPP_IF)
-if (BUILD_PY_IF)
-    install(TARGETS deepmd_dyn_cudart DESTINATION deepmd/op/)
-endif (BUILD_PY_IF)
-  
\ No newline at end of file
+  install(
+    TARGETS deepmd_dyn_cudart
+    EXPORT ${CMAKE_PROJECT_NAME}Targets
+    DESTINATION lib/)
+endif(BUILD_CPP_IF)
+if(BUILD_PY_IF)
+  install(TARGETS deepmd_dyn_cudart DESTINATION deepmd/op/)
+endif(BUILD_PY_IF)
diff --git a/source/lib/src/cuda/cudart/cuda_runtime_11_8.inc b/source/lib/src/cuda/cudart/cuda_runtime_11_8.inc
index 15fafa14a3..8000ce1f92 100644
--- a/source/lib/src/cuda/cudart/cuda_runtime_11_8.inc
+++ b/source/lib/src/cuda/cudart/cuda_runtime_11_8.inc
@@ -2768,4 +2768,4 @@ cudaGetFuncBySymbol(cudaFunction_t *functionPtr, const void *symbolPtr) {
   return func_ptr(functionPtr, symbolPtr);
 }
 
-}  // extern "C"
\ No newline at end of file
+}  // extern "C"
diff --git a/source/lib/src/cuda/cudart/cuda_runtime_12_0.inc b/source/lib/src/cuda/cudart/cuda_runtime_12_0.inc
index 175934a6dc..343db23132 100644
--- a/source/lib/src/cuda/cudart/cuda_runtime_12_0.inc
+++ b/source/lib/src/cuda/cudart/cuda_runtime_12_0.inc
@@ -2673,4 +2673,4 @@ extern __host__ cudaError_t CUDARTAPI cudaGetExportTable(
   return func_ptr(ppExportTable, pExportTableId);
 }
 
-}  // extern "C"
\ No newline at end of file
+}  // extern "C"
diff --git a/source/lib/src/cuda/cudart/cudart_stub.cc b/source/lib/src/cuda/cudart/cudart_stub.cc
index edd3c86c43..c9a4f3f007 100644
--- a/source/lib/src/cuda/cudart/cudart_stub.cc
+++ b/source/lib/src/cuda/cudart/cudart_stub.cc
@@ -3,8 +3,10 @@
 */
 #include <dlfcn.h>
 #include <fcntl.h>
-#include <string>
+
 #include <iostream>
+#include <string>
+
 #include "cuda_runtime_api.h"
 
 // wraps cuda runtime with dso loader
@@ -20,9 +22,9 @@ void *GetDsoHandle() {
     std::string libname = "cudart.dll";
 #endif
 #if defined(_WIN32)
-    void* dso_handle = LoadLibrary(libname.c_str());
+    void *dso_handle = LoadLibrary(libname.c_str());
 #else
-    void* dso_handle = dlopen(libname.c_str(), RTLD_NOW | RTLD_LOCAL);
+    void *dso_handle = dlopen(libname.c_str(), RTLD_NOW | RTLD_LOCAL);
 #endif
     if (!dso_handle) {
       std::cerr << "DeePMD-kit: Cannot find " << libname << std::endl;
@@ -88,10 +90,16 @@ cudaError_t GetSymbolNotFoundError() {
 extern "C" {
 
 // Following are private symbols in libcudart that got inserted by nvcc.
-extern void CUDARTAPI __cudaRegisterFunction(
-    void **fatCubinHandle, const char *hostFun, char *deviceFun,
-    const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid,
-    dim3 *bDim, dim3 *gDim, int *wSize) {
+extern void CUDARTAPI __cudaRegisterFunction(void **fatCubinHandle,
+                                             const char *hostFun,
+                                             char *deviceFun,
+                                             const char *deviceName,
+                                             int thread_limit,
+                                             uint3 *tid,
+                                             uint3 *bid,
+                                             dim3 *bDim,
+                                             dim3 *gDim,
+                                             int *wSize) {
   using FuncPtr = void(CUDARTAPI *)(void **fatCubinHandle, const char *hostFun,
                                     char *deviceFun, const char *deviceName,
                                     int thread_limit, uint3 *tid, uint3 *bid,
@@ -109,10 +117,14 @@ extern void CUDARTAPI __cudaUnregisterFatBinary(void **fatCubinHandle) {
   func_ptr(fatCubinHandle);
 }
 
-extern void CUDARTAPI __cudaRegisterVar(void **fatCubinHandle, char *hostVar,
+extern void CUDARTAPI __cudaRegisterVar(void **fatCubinHandle,
+                                        char *hostVar,
                                         char *deviceAddress,
-                                        const char *deviceName, int ext,
-                                        size_t size, int constant, int global) {
+                                        const char *deviceName,
+                                        int ext,
+                                        size_t size,
+                                        int constant,
+                                        int global) {
   using FuncPtr = void(CUDARTAPI *)(
       void **fatCubinHandle, char *hostVar, char *deviceAddress,
       const char *deviceName, int ext, size_t size, int constant, int global);
@@ -164,4 +176,4 @@ extern void CUDARTAPI __cudaRegisterFatBinaryEnd(void **fatCubinHandle) {
   func_ptr(fatCubinHandle);
 }
 #endif
-}
\ No newline at end of file
+}
diff --git a/source/lib/src/cuda/gelu.cu b/source/lib/src/cuda/gelu.cu
index 51e580a445..64c147617a 100644
--- a/source/lib/src/cuda/gelu.cu
+++ b/source/lib/src/cuda/gelu.cu
@@ -1,64 +1,70 @@
-#include "gelu.h"
 #include "device.h"
+#include "gelu.h"
 
-__device__ inline double _tanh(double x) {return tanh(x);}
-__device__ inline float _tanh(float x) {return tanhf(x);}
+__device__ inline double _tanh(double x) { return tanh(x); }
+__device__ inline float _tanh(float x) { return tanhf(x); }
 
 template <typename FPTYPE>
-__global__ void gelu(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size) 
-{
+__global__ void gelu(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
   const int_64 idx = int_64(blockIdx.x) * blockDim.x + threadIdx.x;
   if (idx >= size) {
     return;
   }
-  out[idx] = xx[idx] * (FPTYPE)0.5 * ((FPTYPE)1.0 + _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] *xx[idx])));
+  out[idx] = xx[idx] * (FPTYPE)0.5 *
+             ((FPTYPE)1.0 +
+              _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] *
+                                                       xx[idx] * xx[idx])));
 }
 
 template <typename FPTYPE>
-__global__ void gelu_grad(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const FPTYPE * dy, 
-    const int_64 size) 
-{
+__global__ void gelu_grad(FPTYPE* out,
+                          const FPTYPE* xx,
+                          const FPTYPE* dy,
+                          const int_64 size) {
   const int_64 idx = int_64(blockIdx.x) * blockDim.x + threadIdx.x;
   if (idx >= size) {
     return;
   }
-  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 * xx[idx] * xx[idx] *xx[idx])));
-  const FPTYPE var = _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] *xx[idx]));
-  out[idx] = dy[idx] * ((FPTYPE)0.5 * SQRT_2_PI * xx[idx] * ((FPTYPE)1. - var * var) * ((FPTYPE)0.134145 * xx[idx] * xx[idx] + 1) + (FPTYPE)0.5 * var + (FPTYPE)0.5);
+  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 *
+  // xx[idx] * xx[idx] *xx[idx])));
+  const FPTYPE var =
+      _tanh((FPTYPE)SQRT_2_PI *
+            (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] * xx[idx]));
+  out[idx] =
+      dy[idx] * ((FPTYPE)0.5 * SQRT_2_PI * xx[idx] * ((FPTYPE)1. - var * var) *
+                     ((FPTYPE)0.134145 * xx[idx] * xx[idx] + 1) +
+                 (FPTYPE)0.5 * var + (FPTYPE)0.5);
 }
 
 template <typename FPTYPE>
-__global__ void gelu_grad_grad(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size) 
-{
+__global__ void gelu_grad_grad(FPTYPE* out,
+                               const FPTYPE* xx,
+                               const FPTYPE* dy,
+                               const FPTYPE* dy_2,
+                               const int_64 size) {
   const int_64 idx = int_64(blockIdx.x) * blockDim.x + threadIdx.x;
   if (idx >= size) {
     return;
   }
-  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 * xx[idx] * xx[idx] *xx[idx])));
-  const FPTYPE var1 = _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] *xx[idx]));
-  const FPTYPE var2 = (FPTYPE)SQRT_2_PI * ((FPTYPE)1. - var1 * var1) * ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.);
-  out[idx] = dy[idx] * dy_2[idx] * ((FPTYPE)0.134145 * (FPTYPE)SQRT_2_PI * xx[idx] * xx[idx] * ((FPTYPE)1. - var1 * var1) - (FPTYPE)SQRT_2_PI * xx[idx] * var2 * ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.) * var1 + var2);
+  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 *
+  // xx[idx] * xx[idx] *xx[idx])));
+  const FPTYPE var1 =
+      _tanh((FPTYPE)SQRT_2_PI *
+            (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] * xx[idx]));
+  const FPTYPE var2 = (FPTYPE)SQRT_2_PI * ((FPTYPE)1. - var1 * var1) *
+                      ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.);
+  out[idx] = dy[idx] * dy_2[idx] *
+             ((FPTYPE)0.134145 * (FPTYPE)SQRT_2_PI * xx[idx] * xx[idx] *
+                  ((FPTYPE)1. - var1 * var1) -
+              (FPTYPE)SQRT_2_PI * xx[idx] * var2 *
+                  ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.) * var1 +
+              var2);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void gelu_gpu_cuda(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size)
-{
-  if(size <= 0){
+template <typename FPTYPE>
+void gelu_gpu_cuda(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
+  if (size <= 0) {
     return;
   }
   const int THREAD_ITEMS = 1024;
@@ -69,14 +75,12 @@ void gelu_gpu_cuda(
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void gelu_grad_gpu_cuda(
-    FPTYPE * out, 
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const int_64 size)
-{
-  if(size <= 0){
+template <typename FPTYPE>
+void gelu_grad_gpu_cuda(FPTYPE* out,
+                        const FPTYPE* xx,
+                        const FPTYPE* dy,
+                        const int_64 size) {
+  if (size <= 0) {
     return;
   }
   const int THREAD_ITEMS = 1024;
@@ -87,29 +91,45 @@ void gelu_grad_gpu_cuda(
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void gelu_grad_grad_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size)
-{
-  if(size <= 0){
+template <typename FPTYPE>
+void gelu_grad_grad_gpu_cuda(FPTYPE* out,
+                             const FPTYPE* xx,
+                             const FPTYPE* dy,
+                             const FPTYPE* dy_2,
+                             const int_64 size) {
+  if (size <= 0) {
     return;
   }
   const int THREAD_ITEMS = 1024;
   const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
-  
+
   gelu_grad_grad<<<BLOCK_NUMS, THREAD_ITEMS>>>(out, xx, dy, dy_2, size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void gelu_gpu_cuda<float>(float * out, const float * x, const int_64 size);
-template void gelu_gpu_cuda<double>(double * out, const double * x, const int_64 size);
-template void gelu_grad_gpu_cuda<float>(float * out, const float * x, const float * dy, const int_64 size);
-template void gelu_grad_gpu_cuda<double>(double * out, const double * x, const double * dy, const int_64 size);
-template void gelu_grad_grad_gpu_cuda<float>(float * out, const float * x, const float * dy, const float * dy_2, const int_64 size);
-template void gelu_grad_grad_gpu_cuda<double>(double * out, const double * x, const double * dy, const double * dy_2, const int_64 size);
-}
\ No newline at end of file
+template void gelu_gpu_cuda<float>(float* out,
+                                   const float* x,
+                                   const int_64 size);
+template void gelu_gpu_cuda<double>(double* out,
+                                    const double* x,
+                                    const int_64 size);
+template void gelu_grad_gpu_cuda<float>(float* out,
+                                        const float* x,
+                                        const float* dy,
+                                        const int_64 size);
+template void gelu_grad_gpu_cuda<double>(double* out,
+                                         const double* x,
+                                         const double* dy,
+                                         const int_64 size);
+template void gelu_grad_grad_gpu_cuda<float>(float* out,
+                                             const float* x,
+                                             const float* dy,
+                                             const float* dy_2,
+                                             const int_64 size);
+template void gelu_grad_grad_gpu_cuda<double>(double* out,
+                                              const double* x,
+                                              const double* dy,
+                                              const double* dy_2,
+                                              const int_64 size);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/neighbor_list.cu b/source/lib/src/cuda/neighbor_list.cu
index 3089ab956b..1e0af02ac9 100644
--- a/source/lib/src/cuda/neighbor_list.cu
+++ b/source/lib/src/cuda/neighbor_list.cu
@@ -1,38 +1,35 @@
+#include <cub/block/block_scan.cuh>
+
 #include "device.h"
 #include "neighbor_list.h"
-
-#include <cub/block/block_scan.cuh>
 // A stateful callback functor that maintains a running prefix to be applied
 // during consecutive scan operations.
-struct parallel_prefix_scan_op
-{
+struct parallel_prefix_scan_op {
   // Running prefix
   int running_total;
   // Constructor
-  __device__ parallel_prefix_scan_op(int running_total) : running_total(running_total) {}
+  __device__ parallel_prefix_scan_op(int running_total)
+      : running_total(running_total) {}
   // Callback operator to be entered by the first warp of threads in the block.
-  // Thread-0 is responsible for returning a value for seeding the block-wide scan.
-  __device__ int operator()(int block_aggregate)
-  {
+  // Thread-0 is responsible for returning a value for seeding the block-wide
+  // scan.
+  __device__ int operator()(int block_aggregate) {
     int old_prefix = running_total;
     running_total += block_aggregate;
     return old_prefix;
   }
 };
 
-template <
-  int   THREADS_PER_BLOCK>
-__global__ void parallel_prefix_scan(
-  int * numneigh, 
-  int * nei_order, 
-  const int * temp_nlist, 
-  const int mem_size, 
-  const int nloc,
-  const int nall
-)
-{
-  // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread
-  typedef cub::BlockScan<int,  THREADS_PER_BLOCK> BlockScan;
+template <int THREADS_PER_BLOCK>
+__global__ void parallel_prefix_scan(int *numneigh,
+                                     int *nei_order,
+                                     const int *temp_nlist,
+                                     const int mem_size,
+                                     const int nloc,
+                                     const int nall) {
+  // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128
+  // threads, 4 ints per thread
+  typedef cub::BlockScan<int, THREADS_PER_BLOCK> BlockScan;
   // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan
   __shared__ typename BlockScan::TempStorage temp_storage;
 
@@ -40,264 +37,243 @@ __global__ void parallel_prefix_scan(
   parallel_prefix_scan_op prefix_op(0);
 
   // Have the block iterate over segments of items
-  for (int ii = threadIdx.x; ii < nall; ii += THREADS_PER_BLOCK)
-  {
-    int block_offset = blockIdx.x * mem_size; 
+  for (int ii = threadIdx.x; ii < nall; ii += THREADS_PER_BLOCK) {
+    int block_offset = blockIdx.x * mem_size;
     // Load a segment of consecutive items that are blocked across threads
     int i_data = temp_nlist[block_offset + ii];
     int o_data = i_data == -1 ? 0 : 1;
 
     // Collectively compute the block-wide exclusive prefix sum
-    BlockScan(temp_storage).ExclusiveSum(
-        o_data, o_data, prefix_op);
+    BlockScan(temp_storage).ExclusiveSum(o_data, o_data, prefix_op);
 
     __syncthreads();
     // Store scanned items to output segment
     if (i_data != -1) {
-        nei_order[block_offset + ii] = o_data; 
+      nei_order[block_offset + ii] = o_data;
     }
     // Store numneigh into the output array
     if (ii == nall - 1) {
-        o_data += i_data == -1 ? 0 : 1;
-        numneigh[blockIdx.x] = o_data; 
+      o_data += i_data == -1 ? 0 : 1;
+      numneigh[blockIdx.x] = o_data;
     }
   }
 }
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot(
-    FPTYPE * arr1, 
-    FPTYPE * arr2) 
-{
-    return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot(FPTYPE *arr1, FPTYPE *arr2) {
+  return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
 }
 
-template<typename FPTYPE>
-__global__ void build_nlist(
-    int * ilist, 
-    int * temp_nlist,
-    const FPTYPE * c_cpy, 
-    const FPTYPE rcut2,
-    const int nloc,
-    const int nall,
-    const int mem_size)
-{
-    const unsigned int atom_idx = blockIdx.x;
-    const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
-    if(neighbor_idx<nall)
-    {
-        int * neighbor_row = temp_nlist + atom_idx * mem_size;
-        if(neighbor_idx==atom_idx)
-        {
-            ilist[atom_idx]=atom_idx;
-        }
-        else
-        {
-            const FPTYPE * ccoord=c_cpy+atom_idx*3;
-            const FPTYPE * ncoord=c_cpy+neighbor_idx*3;
-            FPTYPE diff[3];
-            for(int kk=0;kk<3;kk++){
-                diff[kk] = ccoord[kk] - ncoord[kk];
-            }
-            FPTYPE r2 = dev_dot(diff, diff);
-            if(r2<rcut2){
-                neighbor_row[neighbor_idx]=neighbor_idx;
-            }
-        }
+template <typename FPTYPE>
+__global__ void build_nlist(int *ilist,
+                            int *temp_nlist,
+                            const FPTYPE *c_cpy,
+                            const FPTYPE rcut2,
+                            const int nloc,
+                            const int nall,
+                            const int mem_size) {
+  const unsigned int atom_idx = blockIdx.x;
+  const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (neighbor_idx < nall) {
+    int *neighbor_row = temp_nlist + atom_idx * mem_size;
+    if (neighbor_idx == atom_idx) {
+      ilist[atom_idx] = atom_idx;
+    } else {
+      const FPTYPE *ccoord = c_cpy + atom_idx * 3;
+      const FPTYPE *ncoord = c_cpy + neighbor_idx * 3;
+      FPTYPE diff[3];
+      for (int kk = 0; kk < 3; kk++) {
+        diff[kk] = ccoord[kk] - ncoord[kk];
+      }
+      FPTYPE r2 = dev_dot(diff, diff);
+      if (r2 < rcut2) {
+        neighbor_row[neighbor_idx] = neighbor_idx;
+      }
     }
+  }
 }
 
-__global__ void fill_nlist(
-    int ** firstneigh,
-    const int * temp_nlist,
-    const int * nei_order,
-    const int mem_size,
-    const int nall)
-{
-    const unsigned int atom_idx = blockIdx.x;
-    const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
-    if(neighbor_idx<nall)
-    {
-        const int * in_row = temp_nlist + atom_idx * mem_size;
-        int * out_row = firstneigh[atom_idx];
-        int nei = in_row[neighbor_idx];
-        if(nei!=-1){
-            out_row[nei_order[atom_idx * mem_size + neighbor_idx]]=nei;
-        }
+__global__ void fill_nlist(int **firstneigh,
+                           const int *temp_nlist,
+                           const int *nei_order,
+                           const int mem_size,
+                           const int nall) {
+  const unsigned int atom_idx = blockIdx.x;
+  const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (neighbor_idx < nall) {
+    const int *in_row = temp_nlist + atom_idx * mem_size;
+    int *out_row = firstneigh[atom_idx];
+    int nei = in_row[neighbor_idx];
+    if (nei != -1) {
+      out_row[nei_order[atom_idx * mem_size + neighbor_idx]] = nei;
     }
+  }
 }
 
-__global__ void map_nlist(
-    int *nlist,
-    const int *nlist_map,
-    const int nloc,
-    const int nnei
-)
-{
-    int atom_idx=blockIdx.x;
-    int nei_idx=blockIdx.y*blockDim.y+threadIdx.y;
-    if(nei_idx>=nnei){return;}
-    int nlist_idx=atom_idx*nnei+nei_idx;
-    int nlist_item=nlist[nlist_idx];
-    if(nlist_item!=-1){
-        nlist[nlist_idx]=nlist_map[nlist_item];
-    }
+__global__ void map_nlist(int *nlist,
+                          const int *nlist_map,
+                          const int nloc,
+                          const int nnei) {
+  int atom_idx = blockIdx.x;
+  int nei_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (nei_idx >= nnei) {
+    return;
+  }
+  int nlist_idx = atom_idx * nnei + nei_idx;
+  int nlist_item = nlist[nlist_idx];
+  if (nlist_item != -1) {
+    nlist[nlist_idx] = nlist_map[nlist_item];
+  }
 }
 
-__global__ void map_nei_info(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map,
-    const int nloc,
-    const int nnei,
-    const int ntypes
-)
-{
-    int atom_idx=blockIdx.x;
-    int nei_idx=blockIdx.y*blockDim.y+threadIdx.y;
-    if(nei_idx>=nnei){return;}
-    int nlist_idx=atom_idx*nnei+nei_idx;
-    int nlist_item=nlist[nlist_idx];
-    int temp=0;
-    if(nlist_item!=-1){
-        temp=nlist_map[nlist_item];
-        nlist[nlist_idx]=temp;
-        ntype[nlist_idx]=type[temp];
-        nmask[nlist_idx]=true;
-    }
-    else{
-        ntype[nlist_idx]=ntypes;
-    }
+__global__ void map_nei_info(int *nlist,
+                             int *ntype,
+                             bool *nmask,
+                             const int *type,
+                             const int *nlist_map,
+                             const int nloc,
+                             const int nnei,
+                             const int ntypes) {
+  int atom_idx = blockIdx.x;
+  int nei_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (nei_idx >= nnei) {
+    return;
+  }
+  int nlist_idx = atom_idx * nnei + nei_idx;
+  int nlist_item = nlist[nlist_idx];
+  int temp = 0;
+  if (nlist_item != -1) {
+    temp = nlist_map[nlist_item];
+    nlist[nlist_idx] = temp;
+    ntype[nlist_idx] = type[temp];
+    nmask[nlist_idx] = true;
+  } else {
+    ntype[nlist_idx] = ntypes;
+  }
 }
 
-__global__ void map_nei_info_noconvert(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int nloc,
-    const int nnei,
-    const int ntypes
-)
-{
-    int atom_idx=blockIdx.x;
-    int nei_idx=blockIdx.y*blockDim.y+threadIdx.y;
-    if(nei_idx>=nnei){return;}
-    int nlist_idx=atom_idx*nnei+nei_idx;
-    int nlist_item=nlist[nlist_idx];
-    if(nlist_item!=-1){
-        ntype[nlist_idx]=type[nlist_item];
-        nmask[nlist_idx]=true;
-    }
-    else{
-        ntype[nlist_idx]=ntypes;
-    }
+__global__ void map_nei_info_noconvert(int *nlist,
+                                       int *ntype,
+                                       bool *nmask,
+                                       const int *type,
+                                       const int nloc,
+                                       const int nnei,
+                                       const int ntypes) {
+  int atom_idx = blockIdx.x;
+  int nei_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (nei_idx >= nnei) {
+    return;
+  }
+  int nlist_idx = atom_idx * nnei + nei_idx;
+  int nlist_item = nlist[nlist_idx];
+  if (nlist_item != -1) {
+    ntype[nlist_idx] = type[nlist_item];
+    nmask[nlist_idx] = true;
+  } else {
+    ntype[nlist_idx] = ntypes;
+  }
 }
 
 namespace deepmd {
 template <typename FPTYPE>
-int build_nlist_gpu(
-    InputNlist & nlist,
-    int * max_list_size,
-    int * nlist_data,
-    const FPTYPE * c_cpy, 
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut)
-{
-    if(mem_size < nall){
-        return 1;
-    }
-    const int nblock = (nall+TPB-1)/TPB;
-    int * ilist = nlist.ilist;
-    int * numneigh = nlist.numneigh;
-    int ** firstneigh = nlist.firstneigh;
-    DPErrcheck(cudaMemset(nlist_data, -1, sizeof(int) * 2 * nloc * mem_size));
-    int * temp_nlist = nlist_data; //nloc*mem_size
-    int * nei_order = temp_nlist + nloc * mem_size;
-    nlist.inum = nloc;
-    FPTYPE rcut2 = rcut * rcut;
-    
-    
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, TPB);
-    build_nlist<<<block_grid, thread_grid>>>(
-                ilist, 
-                temp_nlist,
-                c_cpy, 
-                rcut2,
-                nloc,
-                nall,
-                mem_size);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
-    parallel_prefix_scan<TPB> <<<nloc, TPB>>>(
-      numneigh, nei_order, 
-      temp_nlist, mem_size, nloc, nall);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
-    fill_nlist<<<block_grid, thread_grid>>>(
-                firstneigh,
-                temp_nlist,
-                nei_order,
-                mem_size,
-                nall);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
-    int * numneigh_host = new int[nloc];
-    DPErrcheck(cudaMemcpy(numneigh_host, numneigh, sizeof(int) * nloc, cudaMemcpyDeviceToHost));
-    int max_nei = 0;
-    for(int ii=0;ii<nloc;ii++){
-        if(numneigh_host[ii]>max_nei)max_nei=numneigh_host[ii];
-    }
-    *max_list_size = max_nei;
-    delete [] numneigh_host;
-    return 0;
+int build_nlist_gpu(InputNlist &nlist,
+                    int *max_list_size,
+                    int *nlist_data,
+                    const FPTYPE *c_cpy,
+                    const int &nloc,
+                    const int &nall,
+                    const int &mem_size,
+                    const float &rcut) {
+  if (mem_size < nall) {
+    return 1;
+  }
+  const int nblock = (nall + TPB - 1) / TPB;
+  int *ilist = nlist.ilist;
+  int *numneigh = nlist.numneigh;
+  int **firstneigh = nlist.firstneigh;
+  DPErrcheck(cudaMemset(nlist_data, -1, sizeof(int) * 2 * nloc * mem_size));
+  int *temp_nlist = nlist_data;  // nloc*mem_size
+  int *nei_order = temp_nlist + nloc * mem_size;
+  nlist.inum = nloc;
+  FPTYPE rcut2 = rcut * rcut;
+
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(1, TPB);
+  build_nlist<<<block_grid, thread_grid>>>(ilist, temp_nlist, c_cpy, rcut2,
+                                           nloc, nall, mem_size);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
+  parallel_prefix_scan<TPB>
+      <<<nloc, TPB>>>(numneigh, nei_order, temp_nlist, mem_size, nloc, nall);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
+  fill_nlist<<<block_grid, thread_grid>>>(firstneigh, temp_nlist, nei_order,
+                                          mem_size, nall);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
+  int *numneigh_host = new int[nloc];
+  DPErrcheck(cudaMemcpy(numneigh_host, numneigh, sizeof(int) * nloc,
+                        cudaMemcpyDeviceToHost));
+  int max_nei = 0;
+  for (int ii = 0; ii < nloc; ii++) {
+    if (numneigh_host[ii] > max_nei) max_nei = numneigh_host[ii];
+  }
+  *max_list_size = max_nei;
+  delete[] numneigh_host;
+  return 0;
 }
 
-void use_nlist_map(
-    int * nlist, 
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei)
-{
-    int nblock=(nnei+TPB-1)/TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, TPB);
-    map_nlist<<<block_grid,thread_grid>>>(nlist, nlist_map, nloc, nnei);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+void use_nlist_map(int *nlist,
+                   const int *nlist_map,
+                   const int nloc,
+                   const int nnei) {
+  int nblock = (nnei + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(1, TPB);
+  map_nlist<<<block_grid, thread_grid>>>(nlist, nlist_map, nloc, nnei);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-void use_nei_info_gpu(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei,
-    const int ntypes,
-    const bool b_nlist_map)
-{
-    int nblock=(nnei+TPB-1)/TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, TPB);
-    DPErrcheck(cudaMemset(ntype, 0, sizeof(int) * nloc * nnei));
-    DPErrcheck(cudaMemset(nmask, 0, sizeof(bool) * nloc * nnei));
-    if (b_nlist_map){
-        map_nei_info<<<block_grid,thread_grid>>>(nlist, ntype, nmask, type, nlist_map, nloc, nnei, ntypes);
-    }
-    else{
-        map_nei_info_noconvert<<<block_grid,thread_grid>>>(nlist, ntype, nmask, type, nloc, nnei, ntypes);
-    }
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+void use_nei_info_gpu(int *nlist,
+                      int *ntype,
+                      bool *nmask,
+                      const int *type,
+                      const int *nlist_map,
+                      const int nloc,
+                      const int nnei,
+                      const int ntypes,
+                      const bool b_nlist_map) {
+  int nblock = (nnei + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(1, TPB);
+  DPErrcheck(cudaMemset(ntype, 0, sizeof(int) * nloc * nnei));
+  DPErrcheck(cudaMemset(nmask, 0, sizeof(bool) * nloc * nnei));
+  if (b_nlist_map) {
+    map_nei_info<<<block_grid, thread_grid>>>(nlist, ntype, nmask, type,
+                                              nlist_map, nloc, nnei, ntypes);
+  } else {
+    map_nei_info_noconvert<<<block_grid, thread_grid>>>(
+        nlist, ntype, nmask, type, nloc, nnei, ntypes);
+  }
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template int build_nlist_gpu<float>(InputNlist & nlist, int * max_list_size, int * nlist_data, const float * c_cpy, const int & nloc, const int & nall, const int & mem_size, const float & rcut);
-template int build_nlist_gpu<double>(InputNlist & nlist, int * max_list_size, int * nlist_data, const double * c_cpy, const int & nloc, const int & nall, const int & mem_size, const float & rcut);
-}
\ No newline at end of file
+template int build_nlist_gpu<float>(InputNlist &nlist,
+                                    int *max_list_size,
+                                    int *nlist_data,
+                                    const float *c_cpy,
+                                    const int &nloc,
+                                    const int &nall,
+                                    const int &mem_size,
+                                    const float &rcut);
+template int build_nlist_gpu<double>(InputNlist &nlist,
+                                     int *max_list_size,
+                                     int *nlist_data,
+                                     const double *c_cpy,
+                                     const int &nloc,
+                                     const int &nall,
+                                     const int &mem_size,
+                                     const float &rcut);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/prod_env_mat.cu b/source/lib/src/cuda/prod_env_mat.cu
index a68067e949..16a29bb418 100644
--- a/source/lib/src/cuda/prod_env_mat.cu
+++ b/source/lib/src/cuda/prod_env_mat.cu
@@ -1,35 +1,35 @@
+#include <cub/block/block_load.cuh>
+#include <cub/block/block_radix_sort.cuh>
+#include <cub/block/block_store.cuh>
+
 #include "device.h"
 #include "fmt_nlist.h"
 #include "prod_env_mat.h"
-#include <cub/block/block_load.cuh>
-#include <cub/block/block_store.cuh>
-#include <cub/block/block_radix_sort.cuh>
 
-__device__ inline double _sqrt(double x) {return sqrt(x);}
-__device__ inline float _sqrt(float x) {return sqrtf(x);}
-__device__ inline double _rsqrt(double x) {return rsqrt(x);}
-__device__ inline float _rsqrt(float x) {return rsqrtf(x);}
+__device__ inline double _sqrt(double x) { return sqrt(x); }
+__device__ inline float _sqrt(float x) { return sqrtf(x); }
+__device__ inline double _rsqrt(double x) { return rsqrt(x); }
+__device__ inline float _rsqrt(float x) { return rsqrtf(x); }
 
 // common part of prod_env_mat
-template <
-    typename    Key,
-    int         BLOCK_THREADS,
-    int         ITEMS_PER_THREAD>
-__launch_bounds__ (BLOCK_THREADS)
-__global__ void BlockSortKernel(
-    Key * d_in,
-    Key * d_out)                // Tile of output
-{   
+template <typename Key, int BLOCK_THREADS, int ITEMS_PER_THREAD>
+__launch_bounds__(BLOCK_THREADS) __global__
+    void BlockSortKernel(Key* d_in,
+                         Key* d_out)  // Tile of output
+{
   enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD };
-  // Specialize BlockLoad type for our thread block (uses warp-striped loads for coalescing, then transposes in shared memory to a blocked arrangement)
-  typedef cub::BlockLoad<Key, BLOCK_THREADS, ITEMS_PER_THREAD, cub::BLOCK_LOAD_WARP_TRANSPOSE> BlockLoadT;
+  // Specialize BlockLoad type for our thread block (uses warp-striped loads for
+  // coalescing, then transposes in shared memory to a blocked arrangement)
+  typedef cub::BlockLoad<Key, BLOCK_THREADS, ITEMS_PER_THREAD,
+                         cub::BLOCK_LOAD_WARP_TRANSPOSE>
+      BlockLoadT;
   // Specialize BlockRadixSort type for our thread block
-  typedef cub::BlockRadixSort<Key, BLOCK_THREADS, ITEMS_PER_THREAD> BlockRadixSortT;
+  typedef cub::BlockRadixSort<Key, BLOCK_THREADS, ITEMS_PER_THREAD>
+      BlockRadixSortT;
   // Shared memory
-  __shared__ union TempStorage
-  {
-    typename BlockLoadT::TempStorage        load;
-    typename BlockRadixSortT::TempStorage   sort;
+  __shared__ union TempStorage {
+    typename BlockLoadT::TempStorage load;
+    typename BlockRadixSortT::TempStorage sort;
   } temp_storage;
   // Per-thread tile items
   Key items[ITEMS_PER_THREAD];
@@ -42,349 +42,318 @@ __global__ void BlockSortKernel(
   // Sort keys
   BlockRadixSortT(temp_storage.sort).SortBlockedToStriped(items);
   // Store output in striped fashion
-  cub::StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_out + block_offset, items);
+  cub::StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_out + block_offset,
+                                         items);
 }
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot(
-    FPTYPE * arr1, 
-    FPTYPE * arr2) 
-{
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot(FPTYPE* arr1, FPTYPE* arr2) {
   return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
 }
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 __device__ inline void spline5_switch(
-    FPTYPE & vv,
-    FPTYPE & dd,
-    FPTYPE & xx, 
-    const float & rmin, 
-    const float & rmax) 
-{
+    FPTYPE& vv, FPTYPE& dd, FPTYPE& xx, const float& rmin, const float& rmax) {
   if (xx < rmin) {
     dd = (FPTYPE)0.;
     vv = (FPTYPE)1.;
-  }
-  else if (xx < rmax) {
-    FPTYPE uu = (xx - rmin) / (rmax - rmin) ;
-    FPTYPE du = (FPTYPE)1. / (rmax - rmin) ;
-    vv = uu*uu*uu * ((FPTYPE)-6. * uu*uu + (FPTYPE)15. * uu - (FPTYPE)10.) + (FPTYPE)1.;
-    dd = ( (FPTYPE)3. * uu*uu * ((FPTYPE)-6. * uu*uu + (FPTYPE)15. * uu - (FPTYPE)10.) + uu*uu*uu * ((FPTYPE)-12. * uu + (FPTYPE)15.) ) * du;
-  }
-  else {
+  } else if (xx < rmax) {
+    FPTYPE uu = (xx - rmin) / (rmax - rmin);
+    FPTYPE du = (FPTYPE)1. / (rmax - rmin);
+    vv = uu * uu * uu *
+             ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) +
+         (FPTYPE)1.;
+    dd = ((FPTYPE)3. * uu * uu *
+              ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) +
+          uu * uu * uu * ((FPTYPE)-12. * uu + (FPTYPE)15.)) *
+         du;
+  } else {
     dd = (FPTYPE)0.;
     vv = (FPTYPE)0.;
   }
 }
 
-template<typename FPTYPE>
-__device__ inline uint_64 encoding_nbor_info(
-    const int type,
-    const FPTYPE dist,
-    const int index)
-{
+template <typename FPTYPE>
+__device__ inline uint_64 encoding_nbor_info(const int type,
+                                             const FPTYPE dist,
+                                             const int index) {
   // nbor info checking:
   // the type of nbor atom must be smaller than 128
   // the distance of center atom between nbor atom must be smaller than 128
-  // the index of nbor atom(including ghost region) must be smaller than 16777216(1 << 24)
-  if(type >= 128 || dist >= (FPTYPE)128.0 || index >= (1 << 24)) {
+  // the index of nbor atom(including ghost region) must be smaller than
+  // 16777216(1 << 24)
+  if (type >= 128 || dist >= (FPTYPE)128.0 || index >= (1 << 24)) {
     asm("trap;");
   }
-  return ((uint_64)type << 57) + (uint_64)((double)dist * ((uint_64)1 << 50)) / (1 << 24) * (1 << 24) + index;
+  return ((uint_64)type << 57) +
+         (uint_64)((double)dist * ((uint_64)1 << 50)) / (1 << 24) * (1 << 24) +
+         index;
 }
 
-__device__ inline void decoding_nbor_info(
-    int &type,
-    int &index,
-    const uint_64 key)
-{
+__device__ inline void decoding_nbor_info(int& type,
+                                          int& index,
+                                          const uint_64 key) {
   type = key >> 57;
   index = key & 0xFFFFFF;
 }
 
-template<typename FPTYPE>
-__global__ void get_i_idx(
-    FPTYPE * i_idx,
-    const int nloc,
-    const FPTYPE * ilist)
-{
+template <typename FPTYPE>
+__global__ void get_i_idx(FPTYPE* i_idx, const int nloc, const FPTYPE* ilist) {
   const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if(idx >= nloc) {
+  if (idx >= nloc) {
     return;
   }
   i_idx[ilist[idx]] = idx;
 }
 
-template<typename FPTYPE>
-__global__ void format_nlist_fill_a(
-    uint_64 * key,
-    const FPTYPE * coord,
-    const int * type,
-    const int * numneigh,
-    int ** firstneigh,
-    const float rcut,
-    int * i_idx,
-    const int MAX_NBOR_SIZE)
-{   
+template <typename FPTYPE>
+__global__ void format_nlist_fill_a(uint_64* key,
+                                    const FPTYPE* coord,
+                                    const int* type,
+                                    const int* numneigh,
+                                    int** firstneigh,
+                                    const float rcut,
+                                    int* i_idx,
+                                    const int MAX_NBOR_SIZE) {
   // <<<nloc, MAX_NBOR_SIZE>>>
   const int_64 idx = blockIdx.x;
   const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y;
-  
+
   const int nsize = numneigh[i_idx[idx]];
   if (idy >= nsize) {
     return;
   }
 
-  const int * nei_idx = firstneigh[i_idx[idx]];
+  const int* nei_idx = firstneigh[i_idx[idx]];
   // dev_copy(nei_idx, &jlist[jrange[i_idx]], nsize);
-  uint_64 * key_in = key + idx * MAX_NBOR_SIZE;
+  uint_64* key_in = key + idx * MAX_NBOR_SIZE;
   FPTYPE diff[3];
-  const int & j_idx = nei_idx[idy];
+  const int& j_idx = nei_idx[idy];
   for (int dd = 0; dd < 3; dd++) {
     diff[dd] = coord[j_idx * 3 + dd] - coord[idx * 3 + dd];
   }
-  FPTYPE rr = _sqrt(dev_dot(diff, diff)); 
+  FPTYPE rr = _sqrt(dev_dot(diff, diff));
   if (rr <= rcut) {
     key_in[idy] = encoding_nbor_info(type[j_idx], rr, j_idx);
   }
 }
 
-template<typename FPTYPE>
-__global__ void fill_nei_iter(
-    int * nei_iter_dev,
-    const FPTYPE * key,
-    const int nloc,
-    const int max_nbor_size,
-    const int sec_size)
-{
+template <typename FPTYPE>
+__global__ void fill_nei_iter(int* nei_iter_dev,
+                              const FPTYPE* key,
+                              const int nloc,
+                              const int max_nbor_size,
+                              const int sec_size) {
   int_64 row = blockIdx.x;
   int col = blockIdx.y * blockDim.x + threadIdx.x;
-  const FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size;
+  const FPTYPE* key_out = key + nloc * max_nbor_size + row * max_nbor_size;
   int nei_type_cur = -1, nbor_idx_cur = 0;
   int nei_type_pre = -1, nbor_idx_pre = 0;
-  if (col < max_nbor_size && key_out[col] != key_out[max_nbor_size - 1]){
-    if (col >= 1) 
+  if (col < max_nbor_size && key_out[col] != key_out[max_nbor_size - 1]) {
+    if (col >= 1)
       decoding_nbor_info(nei_type_pre, nbor_idx_pre, key_out[col - 1]);
     decoding_nbor_info(nei_type_cur, nbor_idx_cur, key_out[col]);
   }
-  if (nei_type_cur != nei_type_pre){
+  if (nei_type_cur != nei_type_pre) {
     nei_iter_dev[row * sec_size + nei_type_cur] = col;
   }
 }
 
-template<typename FPTYPE>
-__global__ void format_nlist_fill_b(
-    int * nlist,
-    const int nlist_size,
-    const int nloc,
-    FPTYPE * key,
-    const int * sec,
-    const int sec_size,
-    int * nei_iter_dev,
-    const int max_nbor_size)
-{ 
+template <typename FPTYPE>
+__global__ void format_nlist_fill_b(int* nlist,
+                                    const int nlist_size,
+                                    const int nloc,
+                                    FPTYPE* key,
+                                    const int* sec,
+                                    const int sec_size,
+                                    int* nei_iter_dev,
+                                    const int max_nbor_size) {
   int_64 row = blockIdx.x;
   int col = blockIdx.y * blockDim.x + threadIdx.x;
-  int * nei_iter = nei_iter_dev + row * sec_size;
-  FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size;
-  int * row_nlist = nlist + row * nlist_size;
-  if (col < max_nbor_size){
-    if (key_out[col] != key_out[max_nbor_size - 1]){
+  int* nei_iter = nei_iter_dev + row * sec_size;
+  FPTYPE* key_out = key + nloc * max_nbor_size + row * max_nbor_size;
+  int* row_nlist = nlist + row * nlist_size;
+  if (col < max_nbor_size) {
+    if (key_out[col] != key_out[max_nbor_size - 1]) {
       int nei_type = 0, nbor_idx = 0;
       decoding_nbor_info(nei_type, nbor_idx, key_out[col]);
       int out_indx = col - nei_iter[nei_type] + sec[nei_type];
-      if (out_indx < sec[nei_type + 1]){
+      if (out_indx < sec[nei_type + 1]) {
         row_nlist[out_indx] = nbor_idx;
       }
     }
   }
 }
 
-template<typename FPTYPE>
-__global__ void encoding_decoding_nbor_info(
-    uint_64 * key,
-    int * out_type,
-    int * out_index,
-    const int * in_type,
-    const FPTYPE * in_dist,
-    const int * in_index,
-    const int size_of_array)
-{ 
+template <typename FPTYPE>
+__global__ void encoding_decoding_nbor_info(uint_64* key,
+                                            int* out_type,
+                                            int* out_index,
+                                            const int* in_type,
+                                            const FPTYPE* in_dist,
+                                            const int* in_index,
+                                            const int size_of_array) {
   const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if(idx >= size_of_array) {
+  if (idx >= size_of_array) {
     return;
   }
-  
+
   key[idx] = encoding_nbor_info(in_type[idx], in_dist[idx], in_index[idx]);
   decoding_nbor_info(out_type[idx], out_index[idx], key[idx]);
 }
 
-template<typename FPTYPE>
-void format_nbor_list_256 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_256(uint_64* key,
+                          const FPTYPE* coord,
+                          const int* type,
+                          const deepmd::InputNlist& gpu_inlist,
+                          const int& nloc,
+                          const float& rcut,
+                          int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 256;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  format_nlist_fill_a<<<block_grid, thread_grid>>> (
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  format_nlist_fill_a<<<block_grid, thread_grid>>>(
+      key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx,
+      MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   const int ITEMS_PER_THREAD = 4;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // BlockSortKernel<NeighborInfo, BLOCK_THREADS,
+  // ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
+  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>
+      <<<nloc, BLOCK_THREADS>>>(key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_512 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_512(uint_64* key,
+                          const FPTYPE* coord,
+                          const int* type,
+                          const deepmd::InputNlist& gpu_inlist,
+                          const int& nloc,
+                          const float& rcut,
+                          int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 512;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  format_nlist_fill_a<<<block_grid, thread_grid>>> (
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  format_nlist_fill_a<<<block_grid, thread_grid>>>(
+      key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx,
+      MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   const int ITEMS_PER_THREAD = 4;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // BlockSortKernel<NeighborInfo, BLOCK_THREADS,
+  // ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
+  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>
+      <<<nloc, BLOCK_THREADS>>>(key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_1024 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_1024(uint_64* key,
+                           const FPTYPE* coord,
+                           const int* type,
+                           const deepmd::InputNlist& gpu_inlist,
+                           const int& nloc,
+                           const float& rcut,
+                           int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 1024;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  format_nlist_fill_a<<<block_grid, thread_grid>>> (
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  format_nlist_fill_a<<<block_grid, thread_grid>>>(
+      key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx,
+      MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   const int ITEMS_PER_THREAD = 8;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // BlockSortKernel<NeighborInfo, BLOCK_THREADS,
+  // ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
+  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>
+      <<<nloc, BLOCK_THREADS>>>(key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_2048 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_2048(uint_64* key,
+                           const FPTYPE* coord,
+                           const int* type,
+                           const deepmd::InputNlist& gpu_inlist,
+                           const int& nloc,
+                           const float& rcut,
+                           int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 2048;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  format_nlist_fill_a<<<block_grid, thread_grid>>> (
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  format_nlist_fill_a<<<block_grid, thread_grid>>>(
+      key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx,
+      MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   const int ITEMS_PER_THREAD = 8;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // BlockSortKernel<NeighborInfo, BLOCK_THREADS,
+  // ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
+  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>
+      <<<nloc, BLOCK_THREADS>>>(key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_4096 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx)
-{   
+template <typename FPTYPE>
+void format_nbor_list_4096(uint_64* key,
+                           const FPTYPE* coord,
+                           const int* type,
+                           const deepmd::InputNlist& gpu_inlist,
+                           const int& nloc,
+                           const float& rcut,
+                           int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 4096;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  format_nlist_fill_a<<<block_grid, thread_grid>>> (
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  format_nlist_fill_a<<<block_grid, thread_grid>>>(
+      key, coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx,
+      MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   const int ITEMS_PER_THREAD = 16;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
-  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD> <<<nloc, BLOCK_THREADS>>> (
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // BlockSortKernel<NeighborInfo, BLOCK_THREADS,
+  // ITEMS_PER_THREAD><<<g_grid_size, BLOCK_THREADS>>> (
+  BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>
+      <<<nloc, BLOCK_THREADS>>>(key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void compute_env_mat_a(
-    FPTYPE* em,
-    FPTYPE* em_deriv,
-    FPTYPE* rij,
-    const FPTYPE* coord,
-    const FPTYPE* avg,
-    const FPTYPE* std,
-    const int* type,
-    const int* nlist,
-    const int nnei,
-    const float rmin,
-    const float rmax)
-{   
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void compute_env_mat_a(FPTYPE* em,
+                                  FPTYPE* em_deriv,
+                                  FPTYPE* rij,
+                                  const FPTYPE* coord,
+                                  const FPTYPE* avg,
+                                  const FPTYPE* std,
+                                  const int* type,
+                                  const int* nlist,
+                                  const int nnei,
+                                  const float rmin,
+                                  const float rmax) {
   // <<<nloc, TPB>>>
   const int_64 bid = blockIdx.x;
   const unsigned int tid = threadIdx.x;
@@ -392,16 +361,16 @@ __global__ void compute_env_mat_a(
     return;
   }
   const int ndescrpt = nnei * 4;
-  const int * row_nlist = nlist + bid * nnei;
-  FPTYPE * row_rij = rij + bid * nnei * 3;
-  FPTYPE * row_descript = em + bid * nnei * 4;
-  FPTYPE * row_descript_deriv = em_deriv + bid * nnei * 12;
+  const int* row_nlist = nlist + bid * nnei;
+  FPTYPE* row_rij = rij + bid * nnei * 3;
+  FPTYPE* row_descript = em + bid * nnei * 4;
+  FPTYPE* row_descript_deriv = em_deriv + bid * nnei * 12;
   for (int ii = tid; ii < nnei; ii += THREADS_PER_BLOCK) {
-    const int idx_value = ii * 4;	  // 4 components
-    const int idx_deriv = ii * 12;	// 4 components time 3 directions
+    const int idx_value = ii * 4;   // 4 components
+    const int idx_deriv = ii * 12;  // 4 components time 3 directions
     if (row_nlist[ii] >= 0) {
-      FPTYPE rr[3]  = {0};
-      FPTYPE dd[4]  = {0};
+      FPTYPE rr[3] = {0};
+      FPTYPE dd[4] = {0};
       FPTYPE vv[12] = {0};
       const int j_idx = row_nlist[ii];
       for (int kk = 0; kk < 3; kk++) {
@@ -417,60 +386,105 @@ __global__ void compute_env_mat_a(
       FPTYPE inr3 = inr4 * nr;
       FPTYPE sw, dsw;
       spline5_switch(sw, dsw, nr, rmin, rmax);
-      dd[0] = ((FPTYPE)1./nr)       ;//* sw;
-      dd[1] = (rr[0] / nr2) ;//* sw;
-      dd[2] = (rr[1] / nr2) ;//* sw;
-      dd[3] = (rr[2] / nr2) ;//* sw;
-      vv[0] = (rr[0] * inr3 * sw - dd[0] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
-      vv[1] = (rr[1] * inr3 * sw - dd[0] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
-      vv[2] = (rr[2] * inr3 * sw - dd[0] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
+      dd[0] = ((FPTYPE)1. / nr);  //* sw;
+      dd[1] = (rr[0] / nr2);      //* sw;
+      dd[2] = (rr[1] / nr2);      //* sw;
+      dd[3] = (rr[2] / nr2);      //* sw;
+      vv[0] = (rr[0] * inr3 * sw -
+               dd[0] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
+      vv[1] = (rr[1] * inr3 * sw -
+               dd[0] * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
+      vv[2] = (rr[2] * inr3 * sw -
+               dd[0] * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
       // ****deriv of component x/r2
-      vv[3] = (((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw - dd[1] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 3) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 3) % (ndescrpt * 3)) / 3];
-      vv[4] = (((FPTYPE)2. * rr[0] * rr[1] * inr4	) * sw - dd[1] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 4) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 4) % (ndescrpt * 3)) / 3];
-      vv[5] = (((FPTYPE)2. * rr[0] * rr[2] * inr4	) * sw - dd[1] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 5) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 5) % (ndescrpt * 3)) / 3];
+      vv[3] = (((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw -
+               dd[1] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 3) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 3) % (ndescrpt * 3)) / 3];
+      vv[4] = (((FPTYPE)2. * rr[0] * rr[1] * inr4) * sw -
+               dd[1] * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 4) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 4) % (ndescrpt * 3)) / 3];
+      vv[5] = (((FPTYPE)2. * rr[0] * rr[2] * inr4) * sw -
+               dd[1] * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 5) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 5) % (ndescrpt * 3)) / 3];
       // ***deriv of component y/r2
-      vv[6] = (((FPTYPE)2. * rr[1] * rr[0] * inr4	) * sw - dd[2] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 6) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 6) % (ndescrpt * 3)) / 3];
-      vv[7] = (((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw - dd[2] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 7) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 7) % (ndescrpt * 3)) / 3];
-      vv[8] = (((FPTYPE)2. * rr[1] * rr[2] * inr4	) * sw - dd[2] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 8) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 8) % (ndescrpt * 3)) / 3];
-      // ***deriv of component z/r2 
-      vv[9] = (((FPTYPE)2. * rr[2] * rr[0] * inr4	) * sw - dd[3] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 9) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 9) % (ndescrpt * 3)) / 3];
-      vv[10]= (((FPTYPE)2. * rr[2] * rr[1] * inr4	) * sw - dd[3] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 10) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 10) % (ndescrpt * 3)) / 3];
-      vv[11]= (((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw - dd[3] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 11) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 11) % (ndescrpt * 3)) / 3];
+      vv[6] = (((FPTYPE)2. * rr[1] * rr[0] * inr4) * sw -
+               dd[2] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 6) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 6) % (ndescrpt * 3)) / 3];
+      vv[7] = (((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw -
+               dd[2] * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 7) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 7) % (ndescrpt * 3)) / 3];
+      vv[8] = (((FPTYPE)2. * rr[1] * rr[2] * inr4) * sw -
+               dd[2] * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 8) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 8) % (ndescrpt * 3)) / 3];
+      // ***deriv of component z/r2
+      vv[9] = (((FPTYPE)2. * rr[2] * rr[0] * inr4) * sw -
+               dd[3] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 9) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 9) % (ndescrpt * 3)) / 3];
+      vv[10] =
+          (((FPTYPE)2. * rr[2] * rr[1] * inr4) * sw -
+           dd[3] * dsw * rr[1] *
+               inr);  // avg[type[(idx_deriv + 10) / (ndescrpt * 3)] * ndescrpt
+                      // + ((idx_deriv + 10) % (ndescrpt * 3)) / 3];
+      vv[11] =
+          (((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw -
+           dd[3] * dsw * rr[2] *
+               inr);  // avg[type[(idx_deriv + 11) / (ndescrpt * 3)] * ndescrpt
+                      // + ((idx_deriv + 11) % (ndescrpt * 3)) / 3];
       // 4 value components
-      dd[0] *= sw; // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt + idx_value + 0];
-      dd[1] *= sw; // * em[idx * ndescrpt + idx_value + 1]);// - avg[type[idx] * ndescrpt + idx_value + 1]) / std[type[idx] * ndescrpt + idx_value + 1];
-      dd[2] *= sw; // * em[idx * ndescrpt + idx_value + 2]);// - avg[type[idx] * ndescrpt + idx_value + 2]) / std[type[idx] * ndescrpt + idx_value + 2];
-      dd[3] *= sw; // * em[idx * ndescrpt + idx_value + 3]);// - avg[type[idx] * ndescrpt + idx_value + 3]) / std[type[idx] * ndescrpt + idx_value + 3];
+      dd[0] *= sw;  // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt +
+                    // idx_value + 0];
+      dd[1] *= sw;  // * em[idx * ndescrpt + idx_value + 1]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 1]) / std[type[idx] * ndescrpt +
+                    // idx_value + 1];
+      dd[2] *= sw;  // * em[idx * ndescrpt + idx_value + 2]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 2]) / std[type[idx] * ndescrpt +
+                    // idx_value + 2];
+      dd[3] *= sw;  // * em[idx * ndescrpt + idx_value + 3]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 3]) / std[type[idx] * ndescrpt +
+                    // idx_value + 3];
       for (int ii = 0; ii < 12; ii++) {
-        row_descript_deriv[idx_deriv + ii] = vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
+        row_descript_deriv[idx_deriv + ii] =
+            vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
       }
-      for (int ii = 0; ii < 4; ii++) {  
-        row_descript[idx_value + ii] = (dd[ii] - avg[type[bid] * ndescrpt + idx_value + ii]) / std[type[bid] * ndescrpt + idx_value + ii];
+      for (int ii = 0; ii < 4; ii++) {
+        row_descript[idx_value + ii] =
+            (dd[ii] - avg[type[bid] * ndescrpt + idx_value + ii]) /
+            std[type[bid] * ndescrpt + idx_value + ii];
       }
-    }
-    else {
+    } else {
       // TODO: move it to the memset.
-      row_descript[idx_value] -= avg[type[bid] * ndescrpt + idx_value] / std[type[bid] * ndescrpt + idx_value];
+      row_descript[idx_value] -= avg[type[bid] * ndescrpt + idx_value] /
+                                 std[type[bid] * ndescrpt + idx_value];
     }
   }
 }
 
-template<
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void compute_env_mat_r(
-    FPTYPE* em,
-    FPTYPE* em_deriv,
-    FPTYPE* rij,
-    const FPTYPE* coord,
-    const FPTYPE* avg,
-    const FPTYPE* std,
-    const int* type,
-    const int* nlist,
-    const int nnei,
-    const float rmin,
-    const float rmax)
-{
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void compute_env_mat_r(FPTYPE* em,
+                                  FPTYPE* em_deriv,
+                                  FPTYPE* rij,
+                                  const FPTYPE* coord,
+                                  const FPTYPE* avg,
+                                  const FPTYPE* std,
+                                  const int* type,
+                                  const int* nlist,
+                                  const int nnei,
+                                  const float rmin,
+                                  const float rmax) {
   // <<<nloc, TPB>>>
   const int_64 bid = blockIdx.x;
   const unsigned int tid = threadIdx.x;
@@ -478,18 +492,18 @@ __global__ void compute_env_mat_r(
     return;
   }
   const int ndescrpt = nnei;
-  const int * row_nlist = nlist + bid * nnei;
-  FPTYPE * row_rij = rij + bid * nnei * 3;
-  FPTYPE * row_em = em + bid * nnei;
-  FPTYPE * row_em_deriv = em_deriv + bid * nnei * 3;
+  const int* row_nlist = nlist + bid * nnei;
+  FPTYPE* row_rij = rij + bid * nnei * 3;
+  FPTYPE* row_em = em + bid * nnei;
+  FPTYPE* row_em_deriv = em_deriv + bid * nnei * 3;
   for (int ii = tid; ii < nnei; ii += THREADS_PER_BLOCK) {
-    const int idx_value = ii;	  // 4 components
-    const int idx_deriv = ii * 3;	// 4 components time 3 directions
+    const int idx_value = ii;      // 4 components
+    const int idx_deriv = ii * 3;  // 4 components time 3 directions
     if (row_nlist[ii] >= 0) {
-      FPTYPE rr[3]  = {0};
-      FPTYPE vv[3]  = {0};
-      FPTYPE dd     = 0;
-      const int & j_idx = row_nlist[ii];
+      FPTYPE rr[3] = {0};
+      FPTYPE vv[3] = {0};
+      FPTYPE dd = 0;
+      const int& j_idx = row_nlist[ii];
       for (int kk = 0; kk < 3; kk++) {
         rr[kk] = coord[j_idx * 3 + kk] - coord[bid * 3 + kk];
         row_rij[ii * 3 + kk] = rr[kk];
@@ -503,201 +517,295 @@ __global__ void compute_env_mat_r(
       FPTYPE inr3 = inr4 * nr;
       FPTYPE sw, dsw;
       spline5_switch(sw, dsw, nr, rmin, rmax);
-      dd = ((FPTYPE)1./nr)       ;//* sw;
-      vv[0] = (rr[0] * inr3 * sw - dd * dsw * rr[0] * inr); // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
-      vv[1] = (rr[1] * inr3 * sw - dd * dsw * rr[1] * inr); // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
-      vv[2] = (rr[2] * inr3 * sw - dd * dsw * rr[2] * inr); // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
-      
+      dd = ((FPTYPE)1. / nr);  //* sw;
+      vv[0] = (rr[0] * inr3 * sw -
+               dd * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
+      vv[1] = (rr[1] * inr3 * sw -
+               dd * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
+      vv[2] = (rr[2] * inr3 * sw -
+               dd * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
+
       // 4 value components
-      dd *= sw; // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt + idx_value + 0];
+      dd *= sw;  // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] *
+                 // ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt +
+                 // idx_value + 0];
       for (int ii = 0; ii < 3; ii++) {
-        row_em_deriv[idx_deriv + ii] = vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
+        row_em_deriv[idx_deriv + ii] =
+            vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
       }
-      row_em[idx_value] = (dd - avg[type[bid] * ndescrpt + idx_value]) / std[type[bid] * ndescrpt + idx_value];
-    }
-    else {
+      row_em[idx_value] = (dd - avg[type[bid] * ndescrpt + idx_value]) /
+                          std[type[bid] * ndescrpt + idx_value];
+    } else {
       // TODO: move it to the memset.
-      row_em[idx_value] -= avg[type[bid] * ndescrpt + idx_value] / std[type[bid] * ndescrpt + idx_value];
+      row_em[idx_value] -= avg[type[bid] * ndescrpt + idx_value] /
+                           std[type[bid] * ndescrpt + idx_value];
     }
   }
 }
 
 namespace deepmd {
 template <typename FPTYPE>
-void format_nbor_list_gpu_cuda(    
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const deepmd::InputNlist & gpu_inlist,
-    int * array_int,
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec)
-{
+void format_nbor_list_gpu_cuda(int* nlist,
+                               const FPTYPE* coord,
+                               const int* type,
+                               const deepmd::InputNlist& gpu_inlist,
+                               int* array_int,
+                               uint_64* array_longlong,
+                               const int max_nbor_size,
+                               const int nloc,
+                               const int nall,
+                               const float rcut,
+                               const std::vector<int> sec) {
   const int LEN = 256;
   const int nnei = sec.back();
-  const int nblock = (nloc + LEN -1) / LEN;
-  int * sec_dev = array_int;
-  int * nei_iter = array_int + sec.size(); // = new int[sec_size];
-  int * i_idx = array_int + sec.size() + nloc * sec.size();
-  uint_64 * key = array_longlong;
-  assert(max_nbor_size == 256 || max_nbor_size == 512 || max_nbor_size == 1024 || max_nbor_size == 2048 || max_nbor_size == 4096);
+  const int nblock = (nloc + LEN - 1) / LEN;
+  int* sec_dev = array_int;
+  int* nei_iter = array_int + sec.size();  // = new int[sec_size];
+  int* i_idx = array_int + sec.size() + nloc * sec.size();
+  uint_64* key = array_longlong;
+  assert(max_nbor_size == 256 || max_nbor_size == 512 ||
+         max_nbor_size == 1024 || max_nbor_size == 2048 ||
+         max_nbor_size == 4096);
   DPErrcheck(cudaMemset(nlist, -1, sizeof(int) * int_64(nloc) * nnei));
-  DPErrcheck(cudaMemset(key, 0xffffffff, sizeof(uint_64) * int_64(nloc) * max_nbor_size));
-  DPErrcheck(cudaMemcpy(sec_dev, &sec[0], sizeof(int) * sec.size(), cudaMemcpyHostToDevice));   
+  DPErrcheck(cudaMemset(key, 0xffffffff,
+                        sizeof(uint_64) * int_64(nloc) * max_nbor_size));
+  DPErrcheck(cudaMemcpy(sec_dev, &sec[0], sizeof(int) * sec.size(),
+                        cudaMemcpyHostToDevice));
 
-  get_i_idx<<<nblock, LEN>>>(
-      i_idx,
-      nloc, gpu_inlist.ilist);
+  get_i_idx<<<nblock, LEN>>>(i_idx, nloc, gpu_inlist.ilist);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 
   if (max_nbor_size == 256) {
-    format_nbor_list_256 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  }
-  else if (max_nbor_size == 512) {
-    format_nbor_list_512 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  } 
-  else if (max_nbor_size == 1024) {
-    format_nbor_list_1024 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  } 
-  else if (max_nbor_size == 2048) {
-    format_nbor_list_2048 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  } 
-  else if (max_nbor_size == 4096) {
-    format_nbor_list_4096 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
+    format_nbor_list_256(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 512) {
+    format_nbor_list_512(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 1024) {
+    format_nbor_list_1024(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 2048) {
+    format_nbor_list_2048(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 4096) {
+    format_nbor_list_4096(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
   }
 
-  fill_nei_iter <<<dim3(nloc, (max_nbor_size + LEN - 1) / LEN) , LEN>>> (
-      nei_iter,
-      key, nloc, max_nbor_size, sec.size());
-  
-  format_nlist_fill_b <<<dim3(nloc, (max_nbor_size + LEN - 1) / LEN), LEN>>> (
-      nlist,
-      nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size);
+  fill_nei_iter<<<dim3(nloc, (max_nbor_size + LEN - 1) / LEN), LEN>>>(
+      nei_iter, key, nloc, max_nbor_size, sec.size());
+
+  format_nlist_fill_b<<<dim3(nloc, (max_nbor_size + LEN - 1) / LEN), LEN>>>(
+      nlist, nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void prod_env_mat_a_gpu_cuda(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type)
-{
-  if (f_type == NULL){
+void prod_env_mat_a_gpu_cuda(FPTYPE* em,
+                             FPTYPE* em_deriv,
+                             FPTYPE* rij,
+                             int* nlist,
+                             const FPTYPE* coord,
+                             const int* type,
+                             const InputNlist& gpu_inlist,
+                             int* array_int,
+                             uint_64* array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE* avg,
+                             const FPTYPE* std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec,
+                             const int* f_type) {
+  if (f_type == NULL) {
     f_type = type;
   }
   const int nnei = sec.back();
   const int ndescrpt = nnei * 4;
   DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt));
-  DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
+  DPErrcheck(
+      cudaMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
   DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * int_64(nloc) * nnei * 3));
 
-  format_nbor_list_gpu_cuda(
-      nlist, 
-      coord, f_type, gpu_inlist, array_int, array_longlong, max_nbor_size, nloc, nall, rcut, sec);
+  format_nbor_list_gpu_cuda(nlist, coord, f_type, gpu_inlist, array_int,
+                            array_longlong, max_nbor_size, nloc, nall, rcut,
+                            sec);
   nborErrcheck(cudaGetLastError());
   nborErrcheck(cudaDeviceSynchronize());
 
-  compute_env_mat_a<FPTYPE, TPB> <<<nloc, TPB>>> (
-      em, em_deriv, rij, 
-      coord, avg, std, type, nlist, nnei, rcut_smth, rcut);
+  compute_env_mat_a<FPTYPE, TPB><<<nloc, TPB>>>(
+      em, em_deriv, rij, coord, avg, std, type, nlist, nnei, rcut_smth, rcut);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void prod_env_mat_r_gpu_cuda(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec)
-{
+void prod_env_mat_r_gpu_cuda(FPTYPE* em,
+                             FPTYPE* em_deriv,
+                             FPTYPE* rij,
+                             int* nlist,
+                             const FPTYPE* coord,
+                             const int* type,
+                             const InputNlist& gpu_inlist,
+                             int* array_int,
+                             uint_64* array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE* avg,
+                             const FPTYPE* std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec) {
   const int nnei = sec.back();
   const int ndescrpt = nnei * 1;
   DPErrcheck(cudaMemset(em, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt));
-  DPErrcheck(cudaMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
+  DPErrcheck(
+      cudaMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
   DPErrcheck(cudaMemset(rij, 0, sizeof(FPTYPE) * int_64(nloc) * nnei * 3));
 
-  format_nbor_list_gpu_cuda(
-      nlist, 
-      coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, nloc, nall, rcut, sec);
+  format_nbor_list_gpu_cuda(nlist, coord, type, gpu_inlist, array_int,
+                            array_longlong, max_nbor_size, nloc, nall, rcut,
+                            sec);
   nborErrcheck(cudaGetLastError());
   nborErrcheck(cudaDeviceSynchronize());
-  
-  compute_env_mat_r<FPTYPE, TPB> <<<nloc, TPB>>> (
-      em, em_deriv, rij, 
-      coord, avg, std, type, nlist, nnei, rcut_smth, rcut);
+
+  compute_env_mat_r<FPTYPE, TPB><<<nloc, TPB>>>(
+      em, em_deriv, rij, coord, avg, std, type, nlist, nnei, rcut_smth, rcut);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void test_encoding_decoding_nbor_info_gpu_cuda(
-    uint_64 * key,
-    int * out_type,
-    int * out_index,
-    const int * in_type,
-    const FPTYPE * in_dist,
-    const int * in_index,
-    const int size_of_array)
-{
+void test_encoding_decoding_nbor_info_gpu_cuda(uint_64* key,
+                                               int* out_type,
+                                               int* out_index,
+                                               const int* in_type,
+                                               const FPTYPE* in_dist,
+                                               const int* in_index,
+                                               const int size_of_array) {
   const int nblock = (size_of_array + TPB - 1) / TPB;
-  encoding_decoding_nbor_info<<<nblock, TPB>>> (
-      key, out_type, out_index,
-      in_type, in_dist, in_index, size_of_array);
+  encoding_decoding_nbor_info<<<nblock, TPB>>>(
+      key, out_type, out_index, in_type, in_dist, in_index, size_of_array);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void prod_env_mat_a_gpu_cuda<float>(float * em, float * em_deriv, float * rij, int * nlist, const float * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const float * avg, const float * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec, const int * f_type);
-template void prod_env_mat_a_gpu_cuda<double>(double * em, double * em_deriv, double * rij, int * nlist, const double * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const double * avg, const double * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec, const int * f_type);
-template void prod_env_mat_r_gpu_cuda<float>(float * em, float * em_deriv, float * rij, int * nlist, const float * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const float * avg, const float * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec);
-template void prod_env_mat_r_gpu_cuda<double>(double * em, double * em_deriv, double * rij, int * nlist, const double * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const double * avg, const double * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec);
-template void format_nbor_list_gpu_cuda<float>(int * nlist, const float * coord, const int * type, const deepmd::InputNlist & gpu_inlist,int * array_int,uint_64 * array_longlong,const int max_nbor_size,const int nloc, const int nall, const float rcut, const std::vector<int> sec);
-template void format_nbor_list_gpu_cuda<double>(int * nlist, const double * coord, const int * type, const deepmd::InputNlist & gpu_inlist,int * array_int,uint_64 * array_longlong,const int max_nbor_size,const int nloc, const int nall, const float rcut, const std::vector<int> sec);
-template void test_encoding_decoding_nbor_info_gpu_cuda(uint_64 * key, int * out_type, int * out_index, const int * in_type, const float * in_dist, const int * in_index, const int size_of_array);
-template void test_encoding_decoding_nbor_info_gpu_cuda(uint_64 * key, int * out_type, int * out_index, const int * in_type, const double * in_dist, const int * in_index, const int size_of_array);
-}
+template void prod_env_mat_a_gpu_cuda<float>(float* em,
+                                             float* em_deriv,
+                                             float* rij,
+                                             int* nlist,
+                                             const float* coord,
+                                             const int* type,
+                                             const InputNlist& gpu_inlist,
+                                             int* array_int,
+                                             unsigned long long* array_longlong,
+                                             const int max_nbor_size,
+                                             const float* avg,
+                                             const float* std,
+                                             const int nloc,
+                                             const int nall,
+                                             const float rcut,
+                                             const float rcut_smth,
+                                             const std::vector<int> sec,
+                                             const int* f_type);
+template void prod_env_mat_a_gpu_cuda<double>(
+    double* em,
+    double* em_deriv,
+    double* rij,
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const InputNlist& gpu_inlist,
+    int* array_int,
+    unsigned long long* array_longlong,
+    const int max_nbor_size,
+    const double* avg,
+    const double* std,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const float rcut_smth,
+    const std::vector<int> sec,
+    const int* f_type);
+template void prod_env_mat_r_gpu_cuda<float>(float* em,
+                                             float* em_deriv,
+                                             float* rij,
+                                             int* nlist,
+                                             const float* coord,
+                                             const int* type,
+                                             const InputNlist& gpu_inlist,
+                                             int* array_int,
+                                             unsigned long long* array_longlong,
+                                             const int max_nbor_size,
+                                             const float* avg,
+                                             const float* std,
+                                             const int nloc,
+                                             const int nall,
+                                             const float rcut,
+                                             const float rcut_smth,
+                                             const std::vector<int> sec);
+template void prod_env_mat_r_gpu_cuda<double>(
+    double* em,
+    double* em_deriv,
+    double* rij,
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const InputNlist& gpu_inlist,
+    int* array_int,
+    unsigned long long* array_longlong,
+    const int max_nbor_size,
+    const double* avg,
+    const double* std,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const float rcut_smth,
+    const std::vector<int> sec);
+template void format_nbor_list_gpu_cuda<float>(
+    int* nlist,
+    const float* coord,
+    const int* type,
+    const deepmd::InputNlist& gpu_inlist,
+    int* array_int,
+    uint_64* array_longlong,
+    const int max_nbor_size,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const std::vector<int> sec);
+template void format_nbor_list_gpu_cuda<double>(
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const deepmd::InputNlist& gpu_inlist,
+    int* array_int,
+    uint_64* array_longlong,
+    const int max_nbor_size,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const std::vector<int> sec);
+template void test_encoding_decoding_nbor_info_gpu_cuda(
+    uint_64* key,
+    int* out_type,
+    int* out_index,
+    const int* in_type,
+    const float* in_dist,
+    const int* in_index,
+    const int size_of_array);
+template void test_encoding_decoding_nbor_info_gpu_cuda(
+    uint_64* key,
+    int* out_type,
+    int* out_index,
+    const int* in_type,
+    const double* in_dist,
+    const int* in_index,
+    const int size_of_array);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/prod_force.cu b/source/lib/src/cuda/prod_force.cu
index ace49b3d98..db3683eab7 100644
--- a/source/lib/src/cuda/prod_force.cu
+++ b/source/lib/src/cuda/prod_force.cu
@@ -1,15 +1,11 @@
 #include "device.h"
 #include "prod_force.h"
 
-template <
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void force_deriv_wrt_center_atom(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int ndescrpt)
-{
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void force_deriv_wrt_center_atom(FPTYPE* force,
+                                            const FPTYPE* net_deriv,
+                                            const FPTYPE* in_deriv,
+                                            const int ndescrpt) {
   __shared__ FPTYPE data[THREADS_PER_BLOCK * 3];
   int_64 bid = blockIdx.x;
   unsigned int tid = threadIdx.x;
@@ -18,15 +14,18 @@ __global__ void force_deriv_wrt_center_atom(
   }
   for (int ii = tid; ii < ndescrpt; ii += THREADS_PER_BLOCK) {
     for (int jj = 0; jj < 3; jj++) {
-      data[jj * THREADS_PER_BLOCK + tid] += net_deriv[bid * ndescrpt + ii] * in_deriv[bid * ndescrpt * 3 + ii * 3 + jj];
+      data[jj * THREADS_PER_BLOCK + tid] +=
+          net_deriv[bid * ndescrpt + ii] *
+          in_deriv[bid * ndescrpt * 3 + ii * 3 + jj];
     }
   }
-  __syncthreads(); 
+  __syncthreads();
   // do reduction in shared memory
   for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) {
     if (tid < ii) {
       for (int jj = 0; jj < 3; jj++) {
-        data[jj * THREADS_PER_BLOCK + tid] += data[jj * THREADS_PER_BLOCK + tid + ii];
+        data[jj * THREADS_PER_BLOCK + tid] +=
+            data[jj * THREADS_PER_BLOCK + tid + ii];
       }
     }
     __syncthreads();
@@ -39,81 +38,73 @@ __global__ void force_deriv_wrt_center_atom(
   }
 }
 
-template<typename FPTYPE>
-__global__ void force_deriv_wrt_neighbors_a(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv,
-    const FPTYPE * in_deriv,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{  
-    // idy -> nnei
-    const int_64 idx = blockIdx.x;
-    const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
-    const unsigned int idz = threadIdx.y;
-    const int ndescrpt = nnei * 4;
-    if (idy >= nnei) {
-        return;
-    }
-    // deriv wrt neighbors
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    FPTYPE force_tmp = 0.f;
-    for (int idw = 0; idw < 4; ++idw) {
-        force_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz];
-    }
-    atomicAdd(force + j_idx * 3 + idz, force_tmp);
+template <typename FPTYPE>
+__global__ void force_deriv_wrt_neighbors_a(FPTYPE* force,
+                                            const FPTYPE* net_deriv,
+                                            const FPTYPE* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x;
+  const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
+  const unsigned int idz = threadIdx.y;
+  const int ndescrpt = nnei * 4;
+  if (idy >= nnei) {
+    return;
+  }
+  // deriv wrt neighbors
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  FPTYPE force_tmp = 0.f;
+  for (int idw = 0; idw < 4; ++idw) {
+    force_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] *
+                 in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz];
+  }
+  atomicAdd(force + j_idx * 3 + idz, force_tmp);
 }
 
-template<typename FPTYPE>
-__global__ void force_deriv_wrt_neighbors_r(
-		FPTYPE * force, 
-		const FPTYPE * net_deriv,
-		const FPTYPE * in_deriv,
-		const int * nlist,
-		const int nloc,
-		const int nnei)
-{  
-    // idy -> nnei
-    const int_64 idx = blockIdx.x;
-    const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
-    const unsigned int idz = threadIdx.y;
-    const int ndescrpt = nnei * 1;
-    if (idy >= nnei) {
-        return;
-    }
-    // deriv wrt neighbors
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    atomicAdd(
-        force + j_idx * 3 + idz, 
-        net_deriv[idx * ndescrpt + idy] * in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]);
+template <typename FPTYPE>
+__global__ void force_deriv_wrt_neighbors_r(FPTYPE* force,
+                                            const FPTYPE* net_deriv,
+                                            const FPTYPE* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x;
+  const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
+  const unsigned int idz = threadIdx.y;
+  const int ndescrpt = nnei * 1;
+  if (idy >= nnei) {
+    return;
+  }
+  // deriv wrt neighbors
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  atomicAdd(force + j_idx * 3 + idz,
+            net_deriv[idx * ndescrpt + idy] *
+                in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]);
 }
 
 namespace deepmd {
-template<typename FPTYPE> 
-void prod_force_a_gpu_cuda(    
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
+template <typename FPTYPE>
+void prod_force_a_gpu_cuda(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei) {
   const int ndescrpt = nnei * 4;
-  DPErrcheck(cudaMemset(
-      force, 
-      0, sizeof(FPTYPE) * nall * 3));
+  DPErrcheck(cudaMemset(force, 0, sizeof(FPTYPE) * nall * 3));
 
-  force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
-      force, 
-      net_deriv, in_deriv, ndescrpt);
+  force_deriv_wrt_center_atom<FPTYPE, TPB>
+      <<<nloc, TPB>>>(force, net_deriv, in_deriv, ndescrpt);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 
@@ -122,30 +113,24 @@ void prod_force_a_gpu_cuda(
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(LEN, 3);
   force_deriv_wrt_neighbors_a<<<block_grid, thread_grid>>>(
-      force, 
-      net_deriv, in_deriv, nlist, nloc, nnei);
+      force, net_deriv, in_deriv, nlist, nloc, nnei);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE> 
-void prod_force_r_gpu_cuda(    
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
+template <typename FPTYPE>
+void prod_force_r_gpu_cuda(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei) {
   const int ndescrpt = nnei * 1;
-  DPErrcheck(cudaMemset(
-      force, 
-      0, sizeof(FPTYPE) * nall * 3));
+  DPErrcheck(cudaMemset(force, 0, sizeof(FPTYPE) * nall * 3));
 
-  force_deriv_wrt_center_atom<FPTYPE, TPB> <<<nloc, TPB>>>(
-      force, 
-      net_deriv, in_deriv, ndescrpt);
+  force_deriv_wrt_center_atom<FPTYPE, TPB>
+      <<<nloc, TPB>>>(force, net_deriv, in_deriv, ndescrpt);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 
@@ -154,14 +139,37 @@ void prod_force_r_gpu_cuda(
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(LEN, 3);
   force_deriv_wrt_neighbors_r<<<block_grid, thread_grid>>>(
-      force, 
-      net_deriv, in_deriv, nlist, nloc, nnei);
+      force, net_deriv, in_deriv, nlist, nloc, nnei);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void prod_force_a_gpu_cuda<float>(float * force, const float * net_deriv, const float * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_force_a_gpu_cuda<double>(double * force, const double * net_deriv, const double * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_force_r_gpu_cuda<float>(float * force, const float * net_deriv, const float * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_force_r_gpu_cuda<double>(double * force, const double * net_deriv, const double * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-}
+template void prod_force_a_gpu_cuda<float>(float* force,
+                                           const float* net_deriv,
+                                           const float* in_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nall,
+                                           const int nnei);
+template void prod_force_a_gpu_cuda<double>(double* force,
+                                            const double* net_deriv,
+                                            const double* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+template void prod_force_r_gpu_cuda<float>(float* force,
+                                           const float* net_deriv,
+                                           const float* in_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nall,
+                                           const int nnei);
+template void prod_force_r_gpu_cuda<double>(double* force,
+                                            const double* net_deriv,
+                                            const double* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/prod_force_grad.cu b/source/lib/src/cuda/prod_force_grad.cu
index f09082c316..b44f191ffb 100644
--- a/source/lib/src/cuda/prod_force_grad.cu
+++ b/source/lib/src/cuda/prod_force_grad.cu
@@ -1,150 +1,152 @@
 #include "device.h"
 #include "prod_force_grad.h"
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot(
-    const FPTYPE * arr1, 
-    const FPTYPE * arr2) 
-{
-    return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot(const FPTYPE* arr1, const FPTYPE* arr2) {
+  return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
 }
 
-template<typename FPTYPE>
-__global__ void force_grad_wrt_center_atom(
-    FPTYPE * grad_net,
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int ndescrpt)
-{
-    __shared__ FPTYPE grad_one[3];
-    int_64 center_idx = blockIdx.x;
-    unsigned int tid = threadIdx.x;
-    if(tid < 3){
-        grad_one[tid] = grad[center_idx * 3 + tid];
-    }
-    __syncthreads();
-    unsigned int descrpt_idx = blockIdx.y * blockDim.x + tid;
-    if(descrpt_idx < ndescrpt){
-        grad_net[center_idx * ndescrpt + descrpt_idx] -= dev_dot(grad_one, env_deriv + center_idx * ndescrpt * 3 + descrpt_idx * 3);
-    }
+template <typename FPTYPE>
+__global__ void force_grad_wrt_center_atom(FPTYPE* grad_net,
+                                           const FPTYPE* grad,
+                                           const FPTYPE* env_deriv,
+                                           const int ndescrpt) {
+  __shared__ FPTYPE grad_one[3];
+  int_64 center_idx = blockIdx.x;
+  unsigned int tid = threadIdx.x;
+  if (tid < 3) {
+    grad_one[tid] = grad[center_idx * 3 + tid];
+  }
+  __syncthreads();
+  unsigned int descrpt_idx = blockIdx.y * blockDim.x + tid;
+  if (descrpt_idx < ndescrpt) {
+    grad_net[center_idx * ndescrpt + descrpt_idx] -= dev_dot(
+        grad_one, env_deriv + center_idx * ndescrpt * 3 + descrpt_idx * 3);
+  }
 }
 
-template<typename FPTYPE>
-__global__ void force_grad_wrt_neighbors_a(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int idy = blockIdx.y;
-    const unsigned int idw = threadIdx.y;
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    if (j_idx >= nloc) j_idx = j_idx % nloc;
-    grad_net[idx * nnei * 4 + idy * 4 + idw] += dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 4 * 3 + idy * 4 * 3 + idw * 3);
+template <typename FPTYPE>
+__global__ void force_grad_wrt_neighbors_a(FPTYPE* grad_net,
+                                           const FPTYPE* grad,
+                                           const FPTYPE* env_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const unsigned int idy = blockIdx.y;
+  const unsigned int idw = threadIdx.y;
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  if (j_idx >= nloc) j_idx = j_idx % nloc;
+  grad_net[idx * nnei * 4 + idy * 4 + idw] += dev_dot(
+      grad + j_idx * 3, env_deriv + idx * nnei * 4 * 3 + idy * 4 * 3 + idw * 3);
 }
 
-template<typename FPTYPE>
-__global__ void force_grad_wrt_neighbors_r(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int idy = blockIdx.y;
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    if (j_idx >= nloc) j_idx = j_idx % nloc;
-    grad_net[idx * nnei + idy] += dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 3 + idy * 3);
+template <typename FPTYPE>
+__global__ void force_grad_wrt_neighbors_r(FPTYPE* grad_net,
+                                           const FPTYPE* grad,
+                                           const FPTYPE* env_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const unsigned int idy = blockIdx.y;
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  if (j_idx >= nloc) j_idx = j_idx % nloc;
+  grad_net[idx * nnei + idy] +=
+      dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 3 + idy * 3);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void prod_force_grad_a_gpu_cuda(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei)
-{
-    const int ndescrpt = nnei * 4;
-    DPErrcheck(cudaMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int nblock = (ndescrpt + TPB - 1) / TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(TPB, 1);
-    force_grad_wrt_center_atom<<<block_grid, thread_grid>>>(
-        grad_net,
-        grad, env_deriv, ndescrpt);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+template <typename FPTYPE>
+void prod_force_grad_a_gpu_cuda(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei) {
+  const int ndescrpt = nnei * 4;
+  DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int nblock = (ndescrpt + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(TPB, 1);
+  force_grad_wrt_center_atom<<<block_grid, thread_grid>>>(grad_net, grad,
+                                                          env_deriv, ndescrpt);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 
-    const int LEN = 128;
-    const int nblock_ = (nloc + LEN -1) / LEN;
-    dim3 block_grid_(nblock_, nnei);
-    dim3 thread_grid_(LEN, 4);
-    force_grad_wrt_neighbors_a<<<block_grid_, thread_grid_>>>(
-        grad_net,
-        grad, env_deriv, nlist, nloc, nnei);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+  const int LEN = 128;
+  const int nblock_ = (nloc + LEN - 1) / LEN;
+  dim3 block_grid_(nblock_, nnei);
+  dim3 thread_grid_(LEN, 4);
+  force_grad_wrt_neighbors_a<<<block_grid_, thread_grid_>>>(
+      grad_net, grad, env_deriv, nlist, nloc, nnei);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void prod_force_grad_r_gpu_cuda(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei)
-{
-    const int ndescrpt = nnei * 1;
-    DPErrcheck(cudaMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int nblock = (ndescrpt + TPB - 1) / TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(TPB, 1);
-    force_grad_wrt_center_atom<<<block_grid, thread_grid>>>(
-        grad_net,
-        grad, env_deriv, ndescrpt);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+template <typename FPTYPE>
+void prod_force_grad_r_gpu_cuda(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei) {
+  const int ndescrpt = nnei * 1;
+  DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int nblock = (ndescrpt + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(TPB, 1);
+  force_grad_wrt_center_atom<<<block_grid, thread_grid>>>(grad_net, grad,
+                                                          env_deriv, ndescrpt);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 
-    const int LEN = 128;
-    const int nblock_ = (nloc + LEN -1) / LEN;
-    dim3 block_grid_(nblock_, nnei);
-    dim3 thread_grid_(LEN, 1);
-    force_grad_wrt_neighbors_r<<<block_grid_, thread_grid_>>>(
-        grad_net,
-        grad, env_deriv, nlist, nloc, nnei);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+  const int LEN = 128;
+  const int nblock_ = (nloc + LEN - 1) / LEN;
+  dim3 block_grid_(nblock_, nnei);
+  dim3 thread_grid_(LEN, 1);
+  force_grad_wrt_neighbors_r<<<block_grid_, thread_grid_>>>(
+      grad_net, grad, env_deriv, nlist, nloc, nnei);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void prod_force_grad_a_gpu_cuda<float>(float * grad_net, const float * grad, const float * env_deriv, const int * nlist, const int nloc, const int nnei);
-template void prod_force_grad_a_gpu_cuda<double>(double * grad_net, const double * grad, const double * env_deriv, const int * nlist, const int nloc, const int nnei);
-template void prod_force_grad_r_gpu_cuda<float>(float * grad_net, const float * grad, const float * env_deriv, const int * nlist, const int nloc, const int nnei);
-template void prod_force_grad_r_gpu_cuda<double>(double * grad_net, const double * grad, const double * env_deriv, const int * nlist, const int nloc, const int nnei);
-}
\ No newline at end of file
+template void prod_force_grad_a_gpu_cuda<float>(float* grad_net,
+                                                const float* grad,
+                                                const float* env_deriv,
+                                                const int* nlist,
+                                                const int nloc,
+                                                const int nnei);
+template void prod_force_grad_a_gpu_cuda<double>(double* grad_net,
+                                                 const double* grad,
+                                                 const double* env_deriv,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+template void prod_force_grad_r_gpu_cuda<float>(float* grad_net,
+                                                const float* grad,
+                                                const float* env_deriv,
+                                                const int* nlist,
+                                                const int nloc,
+                                                const int nnei);
+template void prod_force_grad_r_gpu_cuda<double>(double* grad_net,
+                                                 const double* grad,
+                                                 const double* env_deriv,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/prod_virial.cu b/source/lib/src/cuda/prod_virial.cu
index 16566027ae..7b42008e68 100644
--- a/source/lib/src/cuda/prod_virial.cu
+++ b/source/lib/src/cuda/prod_virial.cu
@@ -1,44 +1,38 @@
 #include "device.h"
 #include "prod_virial.h"
 
-template <
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void atom_virial_reduction(
-    FPTYPE * virial, 
-    const FPTYPE * atom_virial,
-    const int nall)
-{
-    unsigned int bid = blockIdx.x;
-    unsigned int tid = threadIdx.x;
-    __shared__ FPTYPE data[THREADS_PER_BLOCK];
-    data[tid] = (FPTYPE)0.;
-    for (int ii = tid; ii < nall; ii += THREADS_PER_BLOCK) {
-        data[tid] += atom_virial[ii * 9 + bid];
-    }
-    __syncthreads(); 
-    // do reduction in shared memory
-    for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) {
-        if (tid < ii) {
-            data[tid] += data[tid + ii];
-        }
-        __syncthreads();
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void atom_virial_reduction(FPTYPE* virial,
+                                      const FPTYPE* atom_virial,
+                                      const int nall) {
+  unsigned int bid = blockIdx.x;
+  unsigned int tid = threadIdx.x;
+  __shared__ FPTYPE data[THREADS_PER_BLOCK];
+  data[tid] = (FPTYPE)0.;
+  for (int ii = tid; ii < nall; ii += THREADS_PER_BLOCK) {
+    data[tid] += atom_virial[ii * 9 + bid];
+  }
+  __syncthreads();
+  // do reduction in shared memory
+  for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) {
+    if (tid < ii) {
+      data[tid] += data[tid + ii];
     }
-    // write result for this block to global memory
-    if (tid == 0) virial[bid] = data[0];
+    __syncthreads();
+  }
+  // write result for this block to global memory
+  if (tid == 0) virial[bid] = data[0];
 }
 
-template<typename FPTYPE>
-__global__ void virial_deriv_wrt_neighbors_a(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial,
-    const FPTYPE * net_deriv,
-    const FPTYPE * in_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei) 
-{
+template <typename FPTYPE>
+__global__ void virial_deriv_wrt_neighbors_a(FPTYPE* virial,
+                                             FPTYPE* atom_virial,
+                                             const FPTYPE* net_deriv,
+                                             const FPTYPE* in_deriv,
+                                             const FPTYPE* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nnei) {
   // idx -> nloc
   // idy -> nnei
   // idz = dd0 * 3 + dd1
@@ -49,135 +43,154 @@ __global__ void virial_deriv_wrt_neighbors_a(
   const unsigned int idz = threadIdx.y;
   const int ndescrpt = nnei * 4;
   if (idy >= nnei) {
-      return;
+    return;
   }
   int j_idx = nlist[idx * nnei + idy];
   if (j_idx < 0) {
-      return;
+    return;
   }
   // atomicAdd(
-  //    virial + idz, 
-  //    net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % 3]);
+  //    virial + idz,
+  //    net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3
+  //    + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz %
+  //    3]);
   FPTYPE virial_tmp = (FPTYPE)0.;
   for (int idw = 0; idw < 4; ++idw) {
-      virial_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + idz % 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz / 3];
+    virial_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] *
+                  rij[idx * nnei * 3 + idy * 3 + idz % 3] *
+                  in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz / 3];
   }
   atomicAdd(atom_virial + j_idx * 9 + idz, virial_tmp);
 }
 
-template<typename FPTYPE>
-__global__ void virial_deriv_wrt_neighbors_r(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial,
-    const FPTYPE * net_deriv,
-    const FPTYPE * in_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei) 
-{
-    // idx -> nloc
-    // idy -> nnei
-    // idz = dd0 * 3 + dd1
-    // dd0 = idz / 3
-    // dd1 = idz % 3
-    const int_64 idx = blockIdx.x;
-    const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
-    const unsigned int idz = threadIdx.y;
-    const int ndescrpt = nnei * 1;
+template <typename FPTYPE>
+__global__ void virial_deriv_wrt_neighbors_r(FPTYPE* virial,
+                                             FPTYPE* atom_virial,
+                                             const FPTYPE* net_deriv,
+                                             const FPTYPE* in_deriv,
+                                             const FPTYPE* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nnei) {
+  // idx -> nloc
+  // idy -> nnei
+  // idz = dd0 * 3 + dd1
+  // dd0 = idz / 3
+  // dd1 = idz % 3
+  const int_64 idx = blockIdx.x;
+  const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
+  const unsigned int idz = threadIdx.y;
+  const int ndescrpt = nnei * 1;
 
-    if (idy >= nnei) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    // atomicAdd(
-    //    virial + idz, 
-    //    net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % 3]);
-    atomicAdd(
-        atom_virial + j_idx * 9 + idz, 
-        net_deriv[idx * ndescrpt + idy] * rij[idx * nnei * 3 + idy * 3 + idz % 3] * in_deriv[idx * ndescrpt * 3 + idy * 3 + idz / 3]);
+  if (idy >= nnei) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  // atomicAdd(
+  //    virial + idz,
+  //    net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3
+  //    + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz %
+  //    3]);
+  atomicAdd(atom_virial + j_idx * 9 + idz,
+            net_deriv[idx * ndescrpt + idy] *
+                rij[idx * nnei * 3 + idy * 3 + idz % 3] *
+                in_deriv[idx * ndescrpt * 3 + idy * 3 + idz / 3]);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void prod_virial_a_gpu_cuda(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
-  DPErrcheck(cudaMemset(
-      virial, 
-      0, sizeof(FPTYPE) * 9));
-  DPErrcheck(cudaMemset(
-      atom_virial, 
-      0, sizeof(FPTYPE) * 9 * nall));
-    
+template <typename FPTYPE>
+void prod_virial_a_gpu_cuda(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* in_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei) {
+  DPErrcheck(cudaMemset(virial, 0, sizeof(FPTYPE) * 9));
+  DPErrcheck(cudaMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall));
+
   const int LEN = 16;
   int nblock = (nnei + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(LEN, 9);
   // compute virial of a frame
   virial_deriv_wrt_neighbors_a<<<block_grid, thread_grid>>>(
-      virial, atom_virial, 
-      net_deriv, in_deriv, rij, nlist, nloc, nnei);
+      virial, atom_virial, net_deriv, in_deriv, rij, nlist, nloc, nnei);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   // reduction atom_virial to virial
-  atom_virial_reduction<FPTYPE, TPB> <<<9, TPB>>>(
-      virial, 
-      atom_virial, nall);
+  atom_virial_reduction<FPTYPE, TPB><<<9, TPB>>>(virial, atom_virial, nall);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void prod_virial_r_gpu_cuda(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
-  DPErrcheck(cudaMemset(
-      virial, 
-      0, sizeof(FPTYPE) * 9));
-  DPErrcheck(cudaMemset(
-      atom_virial, 
-      0, sizeof(FPTYPE) * 9 * nall));
-    
+template <typename FPTYPE>
+void prod_virial_r_gpu_cuda(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* in_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei) {
+  DPErrcheck(cudaMemset(virial, 0, sizeof(FPTYPE) * 9));
+  DPErrcheck(cudaMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall));
+
   const int LEN = 16;
   int nblock = (nnei + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(LEN, 9);
   // compute virial of a frame
   virial_deriv_wrt_neighbors_r<<<block_grid, thread_grid>>>(
-      virial, atom_virial, 
-      net_deriv, in_deriv, rij, nlist, nloc, nnei);
+      virial, atom_virial, net_deriv, in_deriv, rij, nlist, nloc, nnei);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
   // reduction atom_virial to virial
-  atom_virial_reduction<FPTYPE, TPB> <<<9, TPB>>>(
-    virial, 
-    atom_virial, nall);
+  atom_virial_reduction<FPTYPE, TPB><<<9, TPB>>>(virial, atom_virial, nall);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void prod_virial_a_gpu_cuda<float>(float * virial, float * atom_virial, const float * net_deriv, const float * in_deriv, const float * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_virial_a_gpu_cuda<double>(double * virial, double * atom_virial, const double * net_deriv, const double * in_deriv, const double * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_virial_r_gpu_cuda<float>(float * virial, float * atom_virial, const float * net_deriv, const float * in_deriv, const float * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_virial_r_gpu_cuda<double>(double * virial, double * atom_virial, const double * net_deriv, const double * in_deriv, const double * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-}
+template void prod_virial_a_gpu_cuda<float>(float* virial,
+                                            float* atom_virial,
+                                            const float* net_deriv,
+                                            const float* in_deriv,
+                                            const float* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+template void prod_virial_a_gpu_cuda<double>(double* virial,
+                                             double* atom_virial,
+                                             const double* net_deriv,
+                                             const double* in_deriv,
+                                             const double* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nall,
+                                             const int nnei);
+template void prod_virial_r_gpu_cuda<float>(float* virial,
+                                            float* atom_virial,
+                                            const float* net_deriv,
+                                            const float* in_deriv,
+                                            const float* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+template void prod_virial_r_gpu_cuda<double>(double* virial,
+                                             double* atom_virial,
+                                             const double* net_deriv,
+                                             const double* in_deriv,
+                                             const double* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nall,
+                                             const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/prod_virial_grad.cu b/source/lib/src/cuda/prod_virial_grad.cu
index c699c4a09a..7e3e7c3b34 100644
--- a/source/lib/src/cuda/prod_virial_grad.cu
+++ b/source/lib/src/cuda/prod_virial_grad.cu
@@ -1,144 +1,155 @@
 #include "device.h"
 #include "prod_virial_grad.h"
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot9(
-    const FPTYPE * arr1, 
-    const FPTYPE * arr2) 
-{
-    FPTYPE result = (FPTYPE)0.0;
-    for(int ii=0; ii<9; ii++){
-        result += arr1[ii] * arr2[ii];
-    }
-    return result;
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot9(const FPTYPE* arr1, const FPTYPE* arr2) {
+  FPTYPE result = (FPTYPE)0.0;
+  for (int ii = 0; ii < 9; ii++) {
+    result += arr1[ii] * arr2[ii];
+  }
+  return result;
 }
 
-template<typename FPTYPE>
-__global__ void virial_grad_wrt_neighbors_a(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const unsigned int tid = threadIdx.x;
-    const int_64 idx = blockIdx.x * blockDim.x + tid;
-    const unsigned int idy = blockIdx.y;
-    const unsigned int idw = threadIdx.y;
-    const int ndescrpt = nnei * 4;
-    __shared__ FPTYPE grad_one[9];
-    if(tid < 9){
-        grad_one[tid] = grad[tid];
-    }
-    __syncthreads(); 
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    FPTYPE tmp[9];
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-        for (int dd1 = 0; dd1 < 3; ++dd1){
-            tmp[dd0 * 3 + dd1] = rij[idx * nnei * 3 + idy * 3 + dd1] * env_deriv[idx * ndescrpt * 3 + idy * 4 * 3 + idw * 3 + dd0];
-        }
+template <typename FPTYPE>
+__global__ void virial_grad_wrt_neighbors_a(FPTYPE* grad_net,
+                                            const FPTYPE* grad,
+                                            const FPTYPE* env_deriv,
+                                            const FPTYPE* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const unsigned int tid = threadIdx.x;
+  const int_64 idx = blockIdx.x * blockDim.x + tid;
+  const unsigned int idy = blockIdx.y;
+  const unsigned int idw = threadIdx.y;
+  const int ndescrpt = nnei * 4;
+  __shared__ FPTYPE grad_one[9];
+  if (tid < 9) {
+    grad_one[tid] = grad[tid];
+  }
+  __syncthreads();
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  FPTYPE tmp[9];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      tmp[dd0 * 3 + dd1] =
+          rij[idx * nnei * 3 + idy * 3 + dd1] *
+          env_deriv[idx * ndescrpt * 3 + idy * 4 * 3 + idw * 3 + dd0];
     }
-    grad_net[idx * ndescrpt + idy * 4 + idw] -= (FPTYPE)-1.0 * dev_dot9(grad_one, tmp);
+  }
+  grad_net[idx * ndescrpt + idy * 4 + idw] -=
+      (FPTYPE)-1.0 * dev_dot9(grad_one, tmp);
 }
 
-template<typename FPTYPE>
-__global__ void virial_grad_wrt_neighbors_r(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const unsigned int tid = threadIdx.x;
-    const int_64 idx = blockIdx.x * blockDim.x + tid;
-    const unsigned int idy = blockIdx.y;
-    const int ndescrpt = nnei;
-    __shared__ FPTYPE grad_one[9];
-    if(tid < 9){
-        grad_one[tid] = grad[tid];
-    }
-    __syncthreads(); 
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    FPTYPE tmp[9];
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-        for (int dd1 = 0; dd1 < 3; ++dd1){
-            tmp[dd0 * 3 + dd1] = rij[idx * nnei * 3 + idy * 3 + dd1] * env_deriv[idx * ndescrpt * 3 + idy * 3 + dd0];
-        }
+template <typename FPTYPE>
+__global__ void virial_grad_wrt_neighbors_r(FPTYPE* grad_net,
+                                            const FPTYPE* grad,
+                                            const FPTYPE* env_deriv,
+                                            const FPTYPE* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const unsigned int tid = threadIdx.x;
+  const int_64 idx = blockIdx.x * blockDim.x + tid;
+  const unsigned int idy = blockIdx.y;
+  const int ndescrpt = nnei;
+  __shared__ FPTYPE grad_one[9];
+  if (tid < 9) {
+    grad_one[tid] = grad[tid];
+  }
+  __syncthreads();
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  FPTYPE tmp[9];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      tmp[dd0 * 3 + dd1] = rij[idx * nnei * 3 + idy * 3 + dd1] *
+                           env_deriv[idx * ndescrpt * 3 + idy * 3 + dd0];
     }
-    grad_net[idx * ndescrpt + idy] -= (FPTYPE)-1.0 * dev_dot9(grad_one, tmp);
+  }
+  grad_net[idx * ndescrpt + idy] -= (FPTYPE)-1.0 * dev_dot9(grad_one, tmp);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void prod_virial_grad_a_gpu_cuda(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    const int ndescrpt = nnei * 4;
-    DPErrcheck(cudaMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int LEN = 128;
-    const int nblock = (nloc + LEN -1) / LEN;
-    dim3 block_grid(nblock, nnei);
-    dim3 thread_grid(LEN, 4);
-    virial_grad_wrt_neighbors_a<<<block_grid, thread_grid>>>(
-        grad_net,
-        grad, env_deriv, rij, nlist, nloc, nnei);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+template <typename FPTYPE>
+void prod_virial_grad_a_gpu_cuda(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei) {
+  const int ndescrpt = nnei * 4;
+  DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int LEN = 128;
+  const int nblock = (nloc + LEN - 1) / LEN;
+  dim3 block_grid(nblock, nnei);
+  dim3 thread_grid(LEN, 4);
+  virial_grad_wrt_neighbors_a<<<block_grid, thread_grid>>>(
+      grad_net, grad, env_deriv, rij, nlist, nloc, nnei);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void prod_virial_grad_r_gpu_cuda(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    const int ndescrpt = nnei;
-    DPErrcheck(cudaMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int LEN = 128;
-    const int nblock = (nloc + LEN -1) / LEN;
-    dim3 block_grid(nblock, nnei);
-    dim3 thread_grid(LEN, 1);
-    virial_grad_wrt_neighbors_r<<<block_grid, thread_grid>>>(
-        grad_net,
-        grad, env_deriv, rij, nlist, nloc, nnei);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+template <typename FPTYPE>
+void prod_virial_grad_r_gpu_cuda(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei) {
+  const int ndescrpt = nnei;
+  DPErrcheck(cudaMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int LEN = 128;
+  const int nblock = (nloc + LEN - 1) / LEN;
+  dim3 block_grid(nblock, nnei);
+  dim3 thread_grid(LEN, 1);
+  virial_grad_wrt_neighbors_r<<<block_grid, thread_grid>>>(
+      grad_net, grad, env_deriv, rij, nlist, nloc, nnei);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void prod_virial_grad_a_gpu_cuda<float>(float * grad_net, const float * grad, const float * env_deriv, const float * rij, const int * nlist, const int nloc, const int nnei);
-template void prod_virial_grad_a_gpu_cuda<double>(double * grad_net, const double * grad, const double * env_deriv, const double * rij, const int * nlist, const int nloc, const int nnei);
-template void prod_virial_grad_r_gpu_cuda<float>(float * grad_net, const float * grad, const float * env_deriv, const float * rij, const int * nlist, const int nloc, const int nnei);
-template void prod_virial_grad_r_gpu_cuda<double>(double * grad_net, const double * grad, const double * env_deriv, const double * rij, const int * nlist, const int nloc, const int nnei);
-}
\ No newline at end of file
+template void prod_virial_grad_a_gpu_cuda<float>(float* grad_net,
+                                                 const float* grad,
+                                                 const float* env_deriv,
+                                                 const float* rij,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+template void prod_virial_grad_a_gpu_cuda<double>(double* grad_net,
+                                                  const double* grad,
+                                                  const double* env_deriv,
+                                                  const double* rij,
+                                                  const int* nlist,
+                                                  const int nloc,
+                                                  const int nnei);
+template void prod_virial_grad_r_gpu_cuda<float>(float* grad_net,
+                                                 const float* grad,
+                                                 const float* env_deriv,
+                                                 const float* rij,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+template void prod_virial_grad_r_gpu_cuda<double>(double* grad_net,
+                                                  const double* grad,
+                                                  const double* env_deriv,
+                                                  const double* rij,
+                                                  const int* nlist,
+                                                  const int nloc,
+                                                  const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/region.cu b/source/lib/src/cuda/region.cu
index 74a60a07e9..858739671c 100644
--- a/source/lib/src/cuda/region.cu
+++ b/source/lib/src/cuda/region.cu
@@ -1,74 +1,65 @@
 #include "device.h"
-#include "region.h"
 #include "region.cuh"
+#include "region.h"
 
-template<typename FPTYPE>
-__global__ void _phys2Inter(
-    FPTYPE *inter, 
-    const FPTYPE *phys, 
-    const FPTYPE *rec_boxt)
-{
-    phys2Inter(inter, phys, rec_boxt);
+template <typename FPTYPE>
+__global__ void _phys2Inter(FPTYPE *inter,
+                            const FPTYPE *phys,
+                            const FPTYPE *rec_boxt) {
+  phys2Inter(inter, phys, rec_boxt);
 }
 
-template<typename FPTYPE>
-__global__ void _inter2Phys(
-    FPTYPE *phys, 
-    const FPTYPE *inter, 
-    const FPTYPE *boxt)
-{
-    inter2Phys(phys, inter, boxt);
+template <typename FPTYPE>
+__global__ void _inter2Phys(FPTYPE *phys,
+                            const FPTYPE *inter,
+                            const FPTYPE *boxt) {
+  inter2Phys(phys, inter, boxt);
 }
 
-template<typename FPTYPE>
-__global__ void _compute_volume(
-    FPTYPE * volume, 
-    const FPTYPE * boxt)
-{
-    volume[0] = compute_volume(boxt);
+template <typename FPTYPE>
+__global__ void _compute_volume(FPTYPE *volume, const FPTYPE *boxt) {
+  volume[0] = compute_volume(boxt);
 }
 
 namespace deepmd {
-//only for unittest
-template<typename FPTYPE>
-void
-convert_to_inter_gpu(
-    FPTYPE * ri, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * rp)
-{
-    _phys2Inter<<<1, 1>>>(ri, rp, region.rec_boxt);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+// only for unittest
+template <typename FPTYPE>
+void convert_to_inter_gpu(FPTYPE *ri,
+                          const Region<FPTYPE> &region,
+                          const FPTYPE *rp) {
+  _phys2Inter<<<1, 1>>>(ri, rp, region.rec_boxt);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void
-convert_to_phys_gpu(
-    FPTYPE * rp, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * ri)
-{
-    _inter2Phys<<<1, 1>>>(rp, ri, region.boxt);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+template <typename FPTYPE>
+void convert_to_phys_gpu(FPTYPE *rp,
+                         const Region<FPTYPE> &region,
+                         const FPTYPE *ri) {
+  _inter2Phys<<<1, 1>>>(rp, ri, region.boxt);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void
-volume_gpu(
-    FPTYPE * volume,
-    const Region<FPTYPE> & region)
-{
-    _compute_volume<<<1, 1>>>(volume, region.boxt);
-    DPErrcheck(cudaGetLastError());
-    DPErrcheck(cudaDeviceSynchronize());
+template <typename FPTYPE>
+void volume_gpu(FPTYPE *volume, const Region<FPTYPE> &region) {
+  _compute_volume<<<1, 1>>>(volume, region.boxt);
+  DPErrcheck(cudaGetLastError());
+  DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void convert_to_inter_gpu<float>(float * ri, const Region<float> & region, const float * rp);
-template void convert_to_inter_gpu<double>(double * ri, const Region<double> & region, const double * rp);
-template void convert_to_phys_gpu<float>(float * rp, const Region<float> & region, const float * ri);
-template void convert_to_phys_gpu<double>(double * rp, const Region<double> & region, const double * ri);
-template void volume_gpu<float>(float * volume, const Region<float> & region);
-template void volume_gpu<double>(double * volume, const Region<double> & region);
-}
\ No newline at end of file
+template void convert_to_inter_gpu<float>(float *ri,
+                                          const Region<float> &region,
+                                          const float *rp);
+template void convert_to_inter_gpu<double>(double *ri,
+                                           const Region<double> &region,
+                                           const double *rp);
+template void convert_to_phys_gpu<float>(float *rp,
+                                         const Region<float> &region,
+                                         const float *ri);
+template void convert_to_phys_gpu<double>(double *rp,
+                                          const Region<double> &region,
+                                          const double *ri);
+template void volume_gpu<float>(float *volume, const Region<float> &region);
+template void volume_gpu<double>(double *volume, const Region<double> &region);
+}  // namespace deepmd
diff --git a/source/lib/src/cuda/tabulate.cu b/source/lib/src/cuda/tabulate.cu
index f8c3b46589..2e8c24cf99 100644
--- a/source/lib/src/cuda/tabulate.cu
+++ b/source/lib/src/cuda/tabulate.cu
@@ -8,110 +8,93 @@
 #define FULL_MASK 0xffffffff
 
 template <typename FPTYPE>
-__forceinline__ __device__
-void locate_xx_se_a(
-    FPTYPE& xx, 
-    int& table_idx,
-    const FPTYPE& lower, 
-    const FPTYPE& upper,  
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1)
-{
+__forceinline__ __device__ void locate_xx_se_a(FPTYPE& xx,
+                                               int& table_idx,
+                                               const FPTYPE& lower,
+                                               const FPTYPE& upper,
+                                               const FPTYPE& max,
+                                               const FPTYPE& stride0,
+                                               const FPTYPE& stride1) {
   if (xx < lower) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     table_idx = (int)((xx - lower) / stride0);
     xx -= (table_idx * stride0 + lower);
-  }
-  else if (xx < max) {
+  } else if (xx < max) {
     int first_stride = int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx =
+        int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
 template <typename FPTYPE>
-__forceinline__ __device__
-void locate_xx_se_t(
-    FPTYPE& xx, 
-    int& table_idx,
-    const FPTYPE& lower, 
-    const FPTYPE& upper,  
-    const FPTYPE& min, 
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1)
-{
+__forceinline__ __device__ void locate_xx_se_t(FPTYPE& xx,
+                                               int& table_idx,
+                                               const FPTYPE& lower,
+                                               const FPTYPE& upper,
+                                               const FPTYPE& min,
+                                               const FPTYPE& max,
+                                               const FPTYPE& stride0,
+                                               const FPTYPE& stride1) {
   if (xx < min) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < lower) {
+  } else if (xx < lower) {
     table_idx = (int)((xx - min) / stride1);
     xx -= (table_idx * stride1 + min);
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     int first_stride = int((lower - min) / stride1);
     table_idx = first_stride + (int)((xx - lower) / stride0);
     xx -= ((table_idx - first_stride) * stride0 + lower);
-  }
-  else if (xx < max) {
-    int first_stride = int((lower - min) / stride1) + int((upper - lower) / stride0);
+  } else if (xx < max) {
+    int first_stride =
+        int((lower - min) / stride1) + int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) +
+                (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
 template <typename FPTYPE>
-__forceinline__ __device__
-void locate_xx_se_r(
-    FPTYPE& xx, 
-    int& table_idx,
-    const FPTYPE& lower, 
-    const FPTYPE& upper,  
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1)
-{
+__forceinline__ __device__ void locate_xx_se_r(FPTYPE& xx,
+                                               int& table_idx,
+                                               const FPTYPE& lower,
+                                               const FPTYPE& upper,
+                                               const FPTYPE& max,
+                                               const FPTYPE& stride0,
+                                               const FPTYPE& stride1) {
   if (xx < lower) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     table_idx = (int)((xx - lower) / stride0);
     xx -= (table_idx * stride0 + lower);
-  }
-  else if (xx < max) {
+  } else if (xx < max) {
     int first_stride = int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx =
+        int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
 template <typename FPTYPE>
-__forceinline__ __device__ 
-void load_polynomial_params(
+__forceinline__ __device__ void load_polynomial_params(
     FPTYPE var[6],
     const FPTYPE* table,
-    const int& table_idx, 
-    const int& idx, 
-    const int& last_layer_size)
-{
+    const int& table_idx,
+    const int& idx,
+    const int& last_layer_size) {
   var[0] = table[table_idx * last_layer_size * 6 + idx * 6 + 0];
   var[1] = table[table_idx * last_layer_size * 6 + idx * 6 + 1];
   var[2] = table[table_idx * last_layer_size * 6 + idx * 6 + 2];
@@ -120,45 +103,32 @@ void load_polynomial_params(
   var[5] = table[table_idx * last_layer_size * 6 + idx * 6 + 5];
 }
 
-
 template <typename FPTYPE>
-__forceinline__ __device__ 
-FPTYPE dot(
-    FPTYPE ll[4], 
-    FPTYPE rr[4]) 
-{
+__forceinline__ __device__ FPTYPE dot(FPTYPE ll[4], FPTYPE rr[4]) {
   return ll[0] * rr[0] + ll[1] * rr[1] + ll[2] * rr[2] + ll[3] * rr[3];
 }
 
 template <typename FPTYPE>
-__forceinline__ 
-__device__
-void warp_reduce(
-    FPTYPE & val) 
-{
+__forceinline__ __device__ void warp_reduce(FPTYPE& val) {
   for (int offset = 16; offset > 0; offset >>= 1)
     val += __shfl_down_sync(FULL_MASK, val, offset);
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
-    FPTYPE * out, 
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
   FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei + nnei - 1], 0);
   bool unloop = false;
   int breakpoint = nnei - 1;
@@ -175,86 +145,97 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
     int table_idx = 0;
     locate_xx_se_a(xx, table_idx, lower, upper, max, stride0, stride1);
     if (table_idx != mark_table_idx) {
-      load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
+      load_polynomial_params(var, table, table_idx, thread_idx,
+                             last_layer_size);
     }
-    FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-    
+    FPTYPE res =
+        var[0] +
+        (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+            xx;
+
     for (int kk = 0; kk < MTILE; kk++) {
-      sum[kk] += (nnei - breakpoint) * em[block_idx * nnei * MTILE + ii * MTILE + kk] * res;
+      sum[kk] += (nnei - breakpoint) *
+                 em[block_idx * nnei * MTILE + ii * MTILE + kk] * res;
     }
     if (unloop) break;
     mark_table_idx = table_idx;
   }
   for (int ii = 0; ii < MTILE; ii++) {
-    out[block_idx * MTILE * last_layer_size + ii * last_layer_size + thread_idx] = sum[ii];
+    out[block_idx * MTILE * last_layer_size + ii * last_layer_size +
+        thread_idx] = sum[ii];
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,   
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
+    FPTYPE* dy_dem_x,
+    FPTYPE* dy_dem,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
   extern __shared__ int _data[];
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x; // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
   int warp_idx = __shfl_sync(0xffffffff, threadIdx.x / WARP_SIZE, 0);
   int lane_idx = threadIdx.x % WARP_SIZE;
   int breakpoint = nnei - 1;
   bool unloop = false;
-  FPTYPE * iteratorA = (FPTYPE *)&_data[0]; // dy
+  FPTYPE* iteratorA = (FPTYPE*)&_data[0];  // dy
   for (int ii = 0; ii < MTILE; ii++) {
     for (int jj = thread_idx; jj < last_layer_size; jj += blockDim.x) {
-      iteratorA[ii * last_layer_size + jj] = dy[block_idx * MTILE * last_layer_size + ii * last_layer_size + jj];
+      iteratorA[ii * last_layer_size + jj] =
+          dy[block_idx * MTILE * last_layer_size + ii * last_layer_size + jj];
     }
   }
   __syncthreads();
   FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei + nnei - 1], 0);
   for (int ii = warp_idx; ii < nnei; ii += KTILE) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
-    if (ago == xx) { 
+    if (ago == xx) {
       unloop = true;
       breakpoint = ii;
     }
-    
+
     int table_idx = 0;
-    FPTYPE reg_em[MTILE] = {
-      em[block_idx * nnei * MTILE + ii * 4 + 0],
-      em[block_idx * nnei * MTILE + ii * 4 + 1],
-      em[block_idx * nnei * MTILE + ii * 4 + 2],
-      em[block_idx * nnei * MTILE + ii * 4 + 3]
-    };
+    FPTYPE reg_em[MTILE] = {em[block_idx * nnei * MTILE + ii * 4 + 0],
+                            em[block_idx * nnei * MTILE + ii * 4 + 1],
+                            em[block_idx * nnei * MTILE + ii * 4 + 2],
+                            em[block_idx * nnei * MTILE + ii * 4 + 3]};
     FPTYPE Csub = (FPTYPE)0.;
     FPTYPE sum[MTILE] = {(FPTYPE)0.};
     locate_xx_se_a(xx, table_idx, lower, upper, max, stride0, stride1);
 
-    FPTYPE var[6]; 
+    FPTYPE var[6];
     for (int jj = lane_idx; jj < last_layer_size; jj += WARP_SIZE) {
       load_polynomial_params(var, table, table_idx, jj, last_layer_size);
-      FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-      
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
+
       for (int kk = 0; kk < MTILE; kk++) {
-        sum[kk] += (nnei - breakpoint) * iteratorA[kk * last_layer_size + jj] * res;
+        sum[kk] +=
+            (nnei - breakpoint) * iteratorA[kk * last_layer_size + jj] * res;
       }
-      res  = reg_em[0] * iteratorA[0 * last_layer_size + jj];
+      res = reg_em[0] * iteratorA[0 * last_layer_size + jj];
       res += reg_em[1] * iteratorA[1 * last_layer_size + jj];
       res += reg_em[2] * iteratorA[2 * last_layer_size + jj];
       res += reg_em[3] * iteratorA[3 * last_layer_size + jj];
-      Csub += (nnei - breakpoint) * (var[1] + (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) * xx) * res;
+      Csub +=
+          (nnei - breakpoint) *
+          (var[1] + (2 * var[2] +
+                     (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) *
+                        xx) *
+          res;
     }
     __syncwarp();
     for (int kk = 0; kk < MTILE; kk++) {
@@ -271,32 +252,28 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE>
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem_x,
+    const FPTYPE* dz_dy_dem,
     const FPTYPE lower,
     const FPTYPE upper,
     const FPTYPE max,
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size)
-{
+    const int last_layer_size) {
   extern __shared__ int _data[];
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
   FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei + nnei - 1], 0);
   bool unloop = false;
   int breakpoint = nnei - 1;
-  FPTYPE * iteratorC = (FPTYPE*) &_data[0];
+  FPTYPE* iteratorC = (FPTYPE*)&_data[0];
   for (int kk = 0; kk < MTILE; kk++)
     iteratorC[kk * last_layer_size + thread_idx] = (FPTYPE)0.;
   __syncthreads();
@@ -313,54 +290,62 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
     int table_idx = 0;
     locate_xx_se_a(xx, table_idx, lower, upper, max, stride0, stride1);
     if (table_idx != mark_table_idx) {
-      load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
+      load_polynomial_params(var, table, table_idx, thread_idx,
+                             last_layer_size);
     }
-    
-    FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-    FPTYPE res_grad = var[1] + (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) * xx;
+
+    FPTYPE res =
+        var[0] +
+        (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+            xx;
+    FPTYPE res_grad =
+        var[1] +
+        (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) *
+            xx;
 
     for (int kk = 0; kk < MTILE; kk++) {
       int em_index = block_idx * nnei * MTILE + ii * MTILE + kk;
-      iteratorC[kk * last_layer_size + thread_idx] += (nnei - breakpoint) * (em[em_index] * res_grad * dz_xx + dz_dy_dem[em_index] * res);
+      iteratorC[kk * last_layer_size + thread_idx] +=
+          (nnei - breakpoint) *
+          (em[em_index] * res_grad * dz_xx + dz_dy_dem[em_index] * res);
     }
     mark_table_idx = table_idx;
     if (unloop) break;
   }
   for (int ii = 0; ii < MTILE; ii++) {
-    dz_dy[block_idx * MTILE * last_layer_size + ii * last_layer_size + thread_idx] = iteratorC[ii * last_layer_size + thread_idx];
+    dz_dy[block_idx * MTILE * last_layer_size + ii * last_layer_size +
+          thread_idx] = iteratorC[ii * last_layer_size + thread_idx];
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_t_fifth_order_polynomial(
-    FPTYPE * out, 
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size) {
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
 
   FPTYPE sum = (FPTYPE)0.;
   for (int ii = 0; ii < nnei_i; ii++) {
-    FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
+    FPTYPE ago = __shfl_sync(
+        0xffffffff,
+        em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
     int breakpoint = nnei_j - 1;
     bool unloop = false;
     FPTYPE var[6];
     int mark_table_idx = -1;
     for (int jj = 0; jj < nnei_j; jj++) {
-      FPTYPE xx  = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
       if (xx == ago) {
         unloop = true;
@@ -369,9 +354,14 @@ __global__ void tabulate_fusion_se_t_fifth_order_polynomial(
       int table_idx = 0;
       locate_xx_se_t(xx, table_idx, lower, upper, -max, max, stride0, stride1);
       if (table_idx != mark_table_idx) {
-        load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
+        load_polynomial_params(var, table, table_idx, thread_idx,
+                               last_layer_size);
       }
-      FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
 
       sum += (nnei_j - breakpoint) * tmp * res;
       mark_table_idx = table_idx;
@@ -381,63 +371,71 @@ __global__ void tabulate_fusion_se_t_fifth_order_polynomial(
   out[block_idx * last_layer_size + thread_idx] = sum;
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_t_grad_fifth_order_polynomial(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,   
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
+    FPTYPE* dy_dem_x,
+    FPTYPE* dy_dem,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size) {
   extern __shared__ int _data[];
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x; // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
   int warp_idx = __shfl_sync(0xffffffff, threadIdx.x / WARP_SIZE, 0);
   int lane_idx = threadIdx.x % WARP_SIZE;
-  FPTYPE * iteratorA = (FPTYPE *)&_data[0]; // dy
+  FPTYPE* iteratorA = (FPTYPE*)&_data[0];  // dy
   for (int ii = thread_idx; ii < last_layer_size; ii += blockDim.x) {
     iteratorA[ii] = dy[block_idx * last_layer_size + ii];
   }
   __syncthreads();
 
   for (int ii = 0; ii < nnei_i; ii++) {
-    FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
+    FPTYPE ago = __shfl_sync(
+        0xffffffff,
+        em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
     bool unloop = false;
     for (int jj = warp_idx; jj < nnei_j; jj += KTILE) {
-      FPTYPE xx  = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
-      if (ago == xx) { 
+      if (ago == xx) {
         unloop = true;
       }
       int table_idx = 0;
       locate_xx_se_t(xx, table_idx, lower, upper, -max, max, stride0, stride1);
-      FPTYPE sum  = (FPTYPE)0.;
+      FPTYPE sum = (FPTYPE)0.;
       FPTYPE Csub = (FPTYPE)0.;
       for (int kk = lane_idx; kk < last_layer_size; kk += WARP_SIZE) {
-        FPTYPE var[6]; 
+        FPTYPE var[6];
         load_polynomial_params(var, table, table_idx, kk, last_layer_size);
-        FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-
-        sum  += iteratorA[kk] * res;
-        Csub += iteratorA[kk] * tmp * (var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx);
+        FPTYPE res =
+            var[0] +
+            (var[1] +
+             (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+                xx;
+
+        sum += iteratorA[kk] * res;
+        Csub +=
+            iteratorA[kk] * tmp *
+            (var[1] + ((FPTYPE)2. * var[2] +
+                       ((FPTYPE)3. * var[3] +
+                        ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                           xx) *
+                          xx);
       }
       __syncwarp();
       warp_reduce(sum);
       warp_reduce(Csub);
       if (lane_idx == 0) {
-        dy_dem  [block_idx * nnei_i * nnei_j + ii * nnei_j + jj] = sum;
+        dy_dem[block_idx * nnei_i * nnei_j + ii * nnei_j + jj] = sum;
         dy_dem_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj] = Csub;
       }
       if (unloop) break;
@@ -445,17 +443,14 @@ __global__ void tabulate_fusion_se_t_grad_fifth_order_polynomial(
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE>
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem_x,
+    const FPTYPE* dz_dy_dem,
     const FPTYPE lower,
     const FPTYPE upper,
     const FPTYPE max,
@@ -463,34 +458,44 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
     const FPTYPE stride1,
     const int nnei_i,
     const int nnei_j,
-    const int last_layer_size)
-{
-  const int_64 block_idx  = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    const int last_layer_size) {
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
 
   FPTYPE sum = (FPTYPE)0.;
-  for (int ii = 0; ii < nnei_i; ii++) { 
-    FPTYPE ago = __shfl_sync(0xffffffff, em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
+  for (int ii = 0; ii < nnei_i; ii++) {
+    FPTYPE ago = __shfl_sync(
+        0xffffffff,
+        em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
     bool unloop = false;
     int mark_table_idx = -1;
     for (int jj = 0; ii < nnei_j; jj++) {
-      FPTYPE xx  = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
-      FPTYPE dz_xx = dz_dy_dem_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE dz_xx =
+          dz_dy_dem_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE dz_em = dz_dy_dem[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE var[6];
-      if (ago == xx) { 
+      if (ago == xx) {
         unloop = true;
       }
 
       int table_idx = 0;
       locate_xx_se_t(xx, table_idx, lower, upper, -max, max, stride0, stride1);
       if (table_idx != mark_table_idx) {
-        load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
+        load_polynomial_params(var, table, table_idx, thread_idx,
+                               last_layer_size);
       }
-      FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-      FPTYPE res_grad = var[1] + (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) * xx;
-  
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
+      FPTYPE res_grad =
+          var[1] + (2 * var[2] +
+                    (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) *
+                       xx;
+
       sum += (tmp * res_grad * dz_xx + dz_em * res);
       mark_table_idx = table_idx;
       if (unloop) break;
@@ -499,24 +504,20 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
   dz_dy[block_idx * last_layer_size + thread_idx] = sum;
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
-    FPTYPE * out, 
-    const FPTYPE * table, 
-    const FPTYPE * em, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
 
   int mark_table_idx = -1;
   FPTYPE var[6];
@@ -525,79 +526,79 @@ __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
     int table_idx = 0;
     locate_xx_se_r(xx, table_idx, lower, upper, max, stride0, stride1);
     if (table_idx != mark_table_idx) {
-      load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
+      load_polynomial_params(var, table, table_idx, thread_idx,
+                             last_layer_size);
     }
-    out[block_idx * nnei * last_layer_size + ii * last_layer_size + thread_idx] = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
+    out[block_idx * nnei * last_layer_size + ii * last_layer_size +
+        thread_idx] =
+        var[0] +
+        (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+            xx;
     mark_table_idx = table_idx;
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_grad_fifth_order_polynomial(
-    FPTYPE * dy_dem,   
-    const FPTYPE * table,  
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
+    FPTYPE* dy_dem,
+    const FPTYPE* table,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
   extern __shared__ int _data[];
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x; // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
   int warp_idx = __shfl_sync(0xffffffff, thread_idx / WARP_SIZE, 0);
   int lane_idx = thread_idx % WARP_SIZE;
   __syncthreads();
   for (int ii = warp_idx; ii < nnei; ii += KTILE) {
     FPTYPE xx = em[block_idx * nnei + ii];
-    
+
     int table_idx = 0;
     FPTYPE Csub = (FPTYPE)0.;
     locate_xx_se_r(xx, table_idx, lower, upper, max, stride0, stride1);
 
-    FPTYPE var[6]; 
+    FPTYPE var[6];
     for (int jj = lane_idx; jj < last_layer_size; jj += WARP_SIZE) {
       load_polynomial_params(var, table, table_idx, jj, last_layer_size);
-      Csub += (var[1] + (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) * xx) * dy[block_idx * nnei * last_layer_size + ii * last_layer_size + jj];
+      Csub +=
+          (var[1] + (2 * var[2] +
+                     (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) *
+                        xx) *
+          dy[block_idx * nnei * last_layer_size + ii * last_layer_size + jj];
     }
     __syncwarp();
 
     warp_reduce(Csub);
-    if (lane_idx == 0){
+    if (lane_idx == 0) {
       dy_dem[block_idx * nnei + ii] = Csub;
     }
-  
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE>
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_grad_grad_fifth_order_polynomial(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem,
     const FPTYPE lower,
     const FPTYPE upper,
     const FPTYPE max,
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size)
-{
+    const int last_layer_size) {
   extern __shared__ int _data[];
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
-  
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
+
   int mark_table_idx = -1;
   FPTYPE var[6];
   for (int ii = 0; ii < nnei; ii++) {
@@ -605,243 +606,403 @@ __global__ void tabulate_fusion_se_r_grad_grad_fifth_order_polynomial(
     int table_idx = 0;
     locate_xx_se_r(xx, table_idx, lower, upper, max, stride0, stride1);
     if (table_idx != mark_table_idx) {
-      load_polynomial_params(var, table, table_idx, thread_idx, last_layer_size);
+      load_polynomial_params(var, table, table_idx, thread_idx,
+                             last_layer_size);
     }
-    FPTYPE res_grad = var[1] + (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) * xx;
+    FPTYPE res_grad =
+        var[1] +
+        (2 * var[2] + (3 * var[3] + (4 * var[4] + 5 * var[5] * xx) * xx) * xx) *
+            xx;
     mark_table_idx = table_idx;
-    dz_dy[block_idx * nnei * last_layer_size + ii * last_layer_size + thread_idx] = dz_dy_dem[block_idx * nnei + ii]*res_grad;
+    dz_dy[block_idx * nnei * last_layer_size + ii * last_layer_size +
+          thread_idx] = dz_dy_dem[block_idx * nnei + ii] * res_grad;
   }
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void tabulate_fusion_se_a_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size) 
-{
-  if (nloc <= 0) {return;}
-  tabulate_fusion_se_a_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
-      out, 
-      table, em_x, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_a_gpu_cuda(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  tabulate_fusion_se_a_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, last_layer_size>>>(out, table, em_x, em, table_info[0],
+                                  table_info[1], table_info[2], table_info[3],
+                                  table_info[4], nnei, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_gpu_cuda(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size)
-{
-  if (nloc <= 0) {return;}
-  DPErrcheck(cudaMemset(
-      dy_dem_x,
-      0, sizeof(FPTYPE) * nloc * nnei));
-  DPErrcheck(cudaMemset(
-      dy_dem,
-      0, sizeof(FPTYPE) * nloc * nnei * 4));
-
-  tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
-      dy_dem_x, dy_dem,
-      table, em_x, em, dy,  table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_gpu_cuda(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(cudaMemset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei));
+  DPErrcheck(cudaMemset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei * 4));
+
+  tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
+          dy_dem_x, dy_dem, table, em_x, em, dy, table_info[0], table_info[1],
+          table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_grad_gpu_cuda(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size)
-{
-  if (nloc <= 0) {return;}
-  DPErrcheck(cudaMemset(
-    dz_dy,
-    0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
-  tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
-      dz_dy,
-      table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_grad_gpu_cuda(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(cudaMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
+  tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
+          dz_dy, table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0],
+          table_info[1], table_info[2], table_info[3], table_info[4], nnei,
+          last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_t_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
-  if (nloc <= 0) {return;}
-  tabulate_fusion_se_t_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
-      out, 
-      table, em_x, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_t_gpu_cuda(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  tabulate_fusion_se_t_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, last_layer_size>>>(
+          out, table, em_x, em, table_info[0], table_info[1], table_info[2],
+          table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_gpu_cuda(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size)
-{
-  if (nloc <= 0) {return;}
-  DPErrcheck(cudaMemset(
-      dy_dem_x,
-      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
-  DPErrcheck(cudaMemset(
-      dy_dem,
-      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
-
-  tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size>>>(
-      dy_dem_x, dy_dem,
-      table, em_x, em, dy,  table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_gpu_cuda(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(cudaMemset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+  DPErrcheck(cudaMemset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+
+  tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size>>>(
+          dy_dem_x, dy_dem, table, em_x, em, dy, table_info[0], table_info[1],
+          table_info[2], table_info[3], table_info[4], nnei_i, nnei_j,
+          last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_grad_gpu_cuda(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei_i,
-    const int nnei_j,
-    const int last_layer_size)
-{
-  if (nloc <= 0) {return;}
-  DPErrcheck(cudaMemset(
-    dz_dy,
-    0, sizeof(FPTYPE) * nloc * last_layer_size));
-
-  tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
-      dz_dy,
-      table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_grad_gpu_cuda(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(cudaMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * last_layer_size));
+
+  tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, last_layer_size>>>(dz_dy, table, em_x, em, dz_dy_dem_x,
+                                  dz_dy_dem, table_info[0], table_info[1],
+                                  table_info[2], table_info[3], table_info[4],
+                                  nnei_i, nnei_j, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_r_gpu_cuda(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size) 
-{
-  if (nloc <= 0) {return;}
-  tabulate_fusion_se_r_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size>>>(
-      out, 
-      table, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_r_gpu_cuda(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  tabulate_fusion_se_r_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, last_layer_size>>>(out, table, em, table_info[0], table_info[1],
+                                  table_info[2], table_info[3], table_info[4],
+                                  nnei, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_gpu_cuda(
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size)
-{
-  if (nloc <= 0) {return;}
-  DPErrcheck(cudaMemset(
-      dy_dem,
-      0, sizeof(FPTYPE) * nloc * nnei));
-
-  tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
-      dy_dem,
-      table, em, dy,  table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_gpu_cuda(FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(cudaMemset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei));
+
+  tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size>>>(
+          dy_dem, table, em, dy, table_info[0], table_info[1], table_info[2],
+          table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_grad_gpu_cuda(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size)
-{
-  if (nloc <= 0) {return;}
-  DPErrcheck(cudaMemset(
-    dz_dy,
-    0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
-  tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK> <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
-      dz_dy,
-      table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_grad_gpu_cuda(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(
+      cudaMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
+  tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>
+      <<<nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size>>>(
+          dz_dy, table, em, dz_dy_dem, table_info[0], table_info[1],
+          table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(cudaGetLastError());
   DPErrcheck(cudaDeviceSynchronize());
 }
 
-template void tabulate_fusion_se_a_gpu_cuda<float>(float * out, const float * table, const float * table_info, const float * em_x, const float * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_gpu_cuda<double>(double * out, const double * table, const double * table_info, const double * em_x, const double * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_grad_gpu_cuda<float> (float * dy_dem_x, float * dy_dem, const float * table, const float * table_info, const float * em_x, const float * em, const float * dy, const int nloc, const int nnei, const int last_layer_size); 
-template void tabulate_fusion_se_a_grad_gpu_cuda<double> (double * dy_dem_x, double * dy_dem, const double * table, const double * table_info, const double * em_x, const double * em, const double * dy, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_grad_grad_gpu_cuda<float> (float * dz_dy, const float * table, const float * table_info, const float * em_x, const float * em, const float * dz_dy_dem_x, const float * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_grad_grad_gpu_cuda<double> (double * dz_dy, const double * table, const double * table_info, const double * em_x, const double * em, const double * dz_dy_dem_x, const double * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-
-template void tabulate_fusion_se_t_gpu_cuda<float>(float * out, const float * table, const float * table_info, const float * em_x, const float * em, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_gpu_cuda<double>(double * out, const double * table, const double * table_info, const double * em_x, const double * em, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_grad_gpu_cuda<float> (float * dy_dem_x, float * dy_dem, const float * table, const float * table_info, const float * em_x, const float * em, const float * dy, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size); 
-template void tabulate_fusion_se_t_grad_gpu_cuda<double> (double * dy_dem_x, double * dy_dem, const double * table, const double * table_info, const double * em_x, const double * em, const double * dy, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_grad_grad_gpu_cuda<float> (float * dz_dy, const float * table, const float * table_info, const float * em_x, const float * em, const float * dz_dy_dem_x, const float * dz_dy_dem, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_grad_grad_gpu_cuda<double> (double * dz_dy, const double * table, const double * table_info, const double * em_x, const double * em, const double * dz_dy_dem_x, const double * dz_dy_dem, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-
-template void tabulate_fusion_se_r_gpu_cuda<float>(float * out, const float * table, const float * table_info, const float * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_gpu_cuda<double>(double * out, const double * table, const double * table_info, const double * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_grad_gpu_cuda<float> (float * dy_dem, const float * table, const float * table_info, const float * em, const float * dy, const int nloc, const int nnei, const int last_layer_size); 
-template void tabulate_fusion_se_r_grad_gpu_cuda<double> (double * dy_dem, const double * table, const double * table_info, const double * em, const double * dy, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_grad_grad_gpu_cuda<float> (float * dz_dy, const float * table, const float * table_info, const float * em, const float * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_grad_grad_gpu_cuda<double> (double * dz_dy, const double * table, const double * table_info, const double * em, const double * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-
-}
\ No newline at end of file
+template void tabulate_fusion_se_a_gpu_cuda<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em_x,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei,
+                                                   const int last_layer_size);
+template void tabulate_fusion_se_a_gpu_cuda<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em_x,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei,
+                                                    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_gpu_cuda<float>(
+    float* dy_dem_x,
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_gpu_cuda<double>(
+    double* dy_dem_x,
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_grad_gpu_cuda<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_grad_gpu_cuda<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_t_gpu_cuda<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em_x,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei_i,
+                                                   const int nnei_j,
+                                                   const int last_layer_size);
+template void tabulate_fusion_se_t_gpu_cuda<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em_x,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei_i,
+                                                    const int nnei_j,
+                                                    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_gpu_cuda<float>(
+    float* dy_dem_x,
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_gpu_cuda<double>(
+    double* dy_dem_x,
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_grad_gpu_cuda<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_grad_gpu_cuda<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_r_gpu_cuda<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei,
+                                                   const int last_layer_size);
+template void tabulate_fusion_se_r_gpu_cuda<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei,
+                                                    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_gpu_cuda<float>(
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_gpu_cuda<double>(
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_grad_gpu_cuda<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_grad_gpu_cuda<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+
+}  // namespace deepmd
diff --git a/source/lib/src/env_mat.cc b/source/lib/src/env_mat.cc
index f269056cbb..ebd4e2c573 100644
--- a/source/lib/src/env_mat.cc
+++ b/source/lib/src/env_mat.cc
@@ -1,86 +1,108 @@
 #include "env_mat.h"
+
 #include "switcher.h"
 
-// output deriv size: n_sel_a_nei x 4 x 12				    
-//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) 
-void env_mat_a (
-    std::vector<double > &		descrpt_a,
-    std::vector<double > &		descrpt_a_deriv,
-    std::vector<double > &		rij_a,
-    const std::vector<double > &	posi,
-    const int &				ntypes,
-    const std::vector<int > &		type,
-    const SimulationRegion<double> &	region,
-    const bool &			b_pbc,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist_a,
-    const std::vector<int > &		sec_a, 
-    const double &			rmin, 
-    const double &			rmax)
-{  
+// output deriv size: n_sel_a_nei x 4 x 12
+//		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z)
+void env_mat_a(std::vector<double>& descrpt_a,
+               std::vector<double>& descrpt_a_deriv,
+               std::vector<double>& rij_a,
+               const std::vector<double>& posi,
+               const int& ntypes,
+               const std::vector<int>& type,
+               const SimulationRegion<double>& region,
+               const bool& b_pbc,
+               const int& i_idx,
+               const std::vector<int>& fmt_nlist_a,
+               const std::vector<int>& sec_a,
+               const double& rmin,
+               const double& rmax) {
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_a_diff (sec_a.back());
-  rij_a.resize (sec_a.back() * 3);
-  fill (rij_a.begin(), rij_a.end(), 0.0);
-  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii){
-    for (int jj = sec_a[ii]; jj < sec_a[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_a_diff(sec_a.back());
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
       if (fmt_nlist_a[jj] < 0) break;
       sel_a_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist_a[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_a_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
+      const int& j_idx = fmt_nlist_a[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_a_diff[jj][0], sel_a_diff[jj][1], sel_a_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_a_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      for (int dd = 0; dd < 3; ++dd) rij_a[jj*3+dd] = sel_a_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij_a[jj * 3 + dd] = sel_a_diff[jj][dd];
     }
   }
-  
+
   // 1./rr, cos(theta), cos(phi), sin(phi)
-  descrpt_a.resize (sec_a.back() * 4);
-  fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
   // deriv wrt center: 3
-  descrpt_a_deriv.resize (sec_a.back() * 4 * 3);
-  fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
 
-  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {      
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
       if (fmt_nlist_a[nei_iter] < 0) break;
-      const double * rr = &sel_a_diff[nei_iter][0];
+      const double* rr = &sel_a_diff[nei_iter][0];
       double nr2 = deepmd::dot3(rr, rr);
-      double inr = 1./sqrt(nr2);
+      double inr = 1. / sqrt(nr2);
       double nr = nr2 * inr;
       double inr2 = inr * inr;
       double inr4 = inr2 * inr2;
       double inr3 = inr4 * nr;
       double sw, dsw;
       deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-      int idx_deriv = nei_iter * 4 * 3;	// 4 components time 3 directions
-      int idx_value = nei_iter * 4;	// 4 components
+      int idx_deriv = nei_iter * 4 * 3;  // 4 components time 3 directions
+      int idx_value = nei_iter * 4;      // 4 components
       // 4 value components
-      descrpt_a[idx_value + 0] = 1./nr;
+      descrpt_a[idx_value + 0] = 1. / nr;
       descrpt_a[idx_value + 1] = rr[0] / nr2;
       descrpt_a[idx_value + 2] = rr[1] / nr2;
       descrpt_a[idx_value + 3] = rr[2] / nr2;
       // deriv of component 1/r
-      descrpt_a_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 0] =
+          rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 1] =
+          rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 2] =
+          rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
       // deriv of component x/r2
-      descrpt_a_deriv[idx_deriv + 3] = (2. * rr[0] * rr[0] * inr4 - inr2) * sw - descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 4] = (2. * rr[0] * rr[1] * inr4	) * sw - descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 5] = (2. * rr[0] * rr[2] * inr4	) * sw - descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 3] =
+          (2. * rr[0] * rr[0] * inr4 - inr2) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 4] =
+          (2. * rr[0] * rr[1] * inr4) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 5] =
+          (2. * rr[0] * rr[2] * inr4) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
       // deriv of component y/r2
-      descrpt_a_deriv[idx_deriv + 6] = (2. * rr[1] * rr[0] * inr4	) * sw - descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv + 7] = (2. * rr[1] * rr[1] * inr4 - inr2) * sw - descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv + 8] = (2. * rr[1] * rr[2] * inr4	) * sw - descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 6] =
+          (2. * rr[1] * rr[0] * inr4) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 7] =
+          (2. * rr[1] * rr[1] * inr4 - inr2) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 8] =
+          (2. * rr[1] * rr[2] * inr4) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
       // deriv of component z/r2
-      descrpt_a_deriv[idx_deriv + 9] = (2. * rr[2] * rr[0] * inr4	) * sw - descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
-      descrpt_a_deriv[idx_deriv +10] = (2. * rr[2] * rr[1] * inr4	) * sw - descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
-      descrpt_a_deriv[idx_deriv +11] = (2. * rr[2] * rr[2] * inr4 - inr2) * sw - descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
+      descrpt_a_deriv[idx_deriv + 9] =
+          (2. * rr[2] * rr[0] * inr4) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 10] =
+          (2. * rr[2] * rr[1] * inr4) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 11] =
+          (2. * rr[2] * rr[2] * inr4 - inr2) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
       // 4 value components
       descrpt_a[idx_value + 0] *= sw;
       descrpt_a[idx_value + 1] *= sw;
@@ -90,278 +112,277 @@ void env_mat_a (
   }
 }
 
-
-template<typename FPTYPE> 
-void 
-deepmd::
-env_mat_a_cpu (
-    std::vector<FPTYPE > &	        descrpt_a,
-    std::vector<FPTYPE > &	        descrpt_a_deriv,
-    std::vector<FPTYPE > &	        rij_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist_a,
-    const std::vector<int > &		sec_a, 
-    const float &			rmin,
-    const float &			rmax) 
-{  
-    // compute the diff of the neighbors
-    rij_a.resize (sec_a.back() * 3);
-    fill (rij_a.begin(), rij_a.end(), (FPTYPE)0.0);
-    for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
-        for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
-            if (fmt_nlist_a[jj] < 0) break;
-            const int & j_idx = fmt_nlist_a[jj];
-            for (int dd = 0; dd < 3; ++dd) {
-                rij_a[jj * 3 + dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
-            }
-        }
+template <typename FPTYPE>
+void deepmd::env_mat_a_cpu(std::vector<FPTYPE>& descrpt_a,
+                           std::vector<FPTYPE>& descrpt_a_deriv,
+                           std::vector<FPTYPE>& rij_a,
+                           const std::vector<FPTYPE>& posi,
+                           const std::vector<int>& type,
+                           const int& i_idx,
+                           const std::vector<int>& fmt_nlist_a,
+                           const std::vector<int>& sec_a,
+                           const float& rmin,
+                           const float& rmax) {
+  // compute the diff of the neighbors
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), (FPTYPE)0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
+      if (fmt_nlist_a[jj] < 0) break;
+      const int& j_idx = fmt_nlist_a[jj];
+      for (int dd = 0; dd < 3; ++dd) {
+        rij_a[jj * 3 + dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
+      }
     }
-    // 1./rr, cos(theta), cos(phi), sin(phi)
-    descrpt_a.resize (sec_a.back() * 4);
-    fill (descrpt_a.begin(), descrpt_a.end(), (FPTYPE)0.0);
-    // deriv wrt center: 3
-    descrpt_a_deriv.resize (sec_a.back() * 4 * 3);
-    fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), (FPTYPE)0.0);
+  }
+  // 1./rr, cos(theta), cos(phi), sin(phi)
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), (FPTYPE)0.0);
+  // deriv wrt center: 3
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), (FPTYPE)0.0);
 
-    for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
-        for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {      
-            if (fmt_nlist_a[nei_iter] < 0) break;
-            const FPTYPE * rr = &rij_a[nei_iter * 3];
-            FPTYPE nr2 = deepmd::dot3(rr, rr);
-            FPTYPE inr = (FPTYPE)1./sqrt(nr2);
-            FPTYPE nr = nr2 * inr;
-            FPTYPE inr2 = inr * inr;
-            FPTYPE inr4 = inr2 * inr2;
-            FPTYPE inr3 = inr4 * nr;
-            FPTYPE sw, dsw;
-            deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-            int idx_deriv = nei_iter * 4 * 3;	// 4 components time 3 directions
-            int idx_value = nei_iter * 4;	// 4 components
-            // 4 value components
-            descrpt_a[idx_value + 0] = (FPTYPE)1./nr;
-            descrpt_a[idx_value + 1] = rr[0] / nr2;
-            descrpt_a[idx_value + 2] = rr[1] / nr2;
-            descrpt_a[idx_value + 3] = rr[2] / nr2;
-            // deriv of component 1/r
-            descrpt_a_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
-            descrpt_a_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
-            descrpt_a_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
-            // deriv of component x/r2
-            descrpt_a_deriv[idx_deriv + 3] = ((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw - descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
-            descrpt_a_deriv[idx_deriv + 4] = ((FPTYPE)2. * rr[0] * rr[1] * inr4	) * sw - descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
-            descrpt_a_deriv[idx_deriv + 5] = ((FPTYPE)2. * rr[0] * rr[2] * inr4	) * sw - descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
-            // deriv of component y/r2
-            descrpt_a_deriv[idx_deriv + 6] = ((FPTYPE)2. * rr[1] * rr[0] * inr4	) * sw - descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
-            descrpt_a_deriv[idx_deriv + 7] = ((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw - descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
-            descrpt_a_deriv[idx_deriv + 8] = ((FPTYPE)2. * rr[1] * rr[2] * inr4	) * sw - descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
-            // deriv of component z/r2
-            descrpt_a_deriv[idx_deriv + 9] = ((FPTYPE)2. * rr[2] * rr[0] * inr4	) * sw - descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
-            descrpt_a_deriv[idx_deriv +10] = ((FPTYPE)2. * rr[2] * rr[1] * inr4	) * sw - descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
-            descrpt_a_deriv[idx_deriv +11] = ((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw - descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
-            // 4 value components
-            descrpt_a[idx_value + 0] *= sw;
-            descrpt_a[idx_value + 1] *= sw;
-            descrpt_a[idx_value + 2] *= sw;
-            descrpt_a[idx_value + 3] *= sw;
-        }
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
+      if (fmt_nlist_a[nei_iter] < 0) break;
+      const FPTYPE* rr = &rij_a[nei_iter * 3];
+      FPTYPE nr2 = deepmd::dot3(rr, rr);
+      FPTYPE inr = (FPTYPE)1. / sqrt(nr2);
+      FPTYPE nr = nr2 * inr;
+      FPTYPE inr2 = inr * inr;
+      FPTYPE inr4 = inr2 * inr2;
+      FPTYPE inr3 = inr4 * nr;
+      FPTYPE sw, dsw;
+      deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
+      int idx_deriv = nei_iter * 4 * 3;  // 4 components time 3 directions
+      int idx_value = nei_iter * 4;      // 4 components
+      // 4 value components
+      descrpt_a[idx_value + 0] = (FPTYPE)1. / nr;
+      descrpt_a[idx_value + 1] = rr[0] / nr2;
+      descrpt_a[idx_value + 2] = rr[1] / nr2;
+      descrpt_a[idx_value + 3] = rr[2] / nr2;
+      // deriv of component 1/r
+      descrpt_a_deriv[idx_deriv + 0] =
+          rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 1] =
+          rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 2] =
+          rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
+      // deriv of component x/r2
+      descrpt_a_deriv[idx_deriv + 3] =
+          ((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 4] =
+          ((FPTYPE)2. * rr[0] * rr[1] * inr4) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 5] =
+          ((FPTYPE)2. * rr[0] * rr[2] * inr4) * sw -
+          descrpt_a[idx_value + 1] * dsw * rr[2] * inr;
+      // deriv of component y/r2
+      descrpt_a_deriv[idx_deriv + 6] =
+          ((FPTYPE)2. * rr[1] * rr[0] * inr4) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 7] =
+          ((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 8] =
+          ((FPTYPE)2. * rr[1] * rr[2] * inr4) * sw -
+          descrpt_a[idx_value + 2] * dsw * rr[2] * inr;
+      // deriv of component z/r2
+      descrpt_a_deriv[idx_deriv + 9] =
+          ((FPTYPE)2. * rr[2] * rr[0] * inr4) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 10] =
+          ((FPTYPE)2. * rr[2] * rr[1] * inr4) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 11] =
+          ((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw -
+          descrpt_a[idx_value + 3] * dsw * rr[2] * inr;
+      // 4 value components
+      descrpt_a[idx_value + 0] *= sw;
+      descrpt_a[idx_value + 1] *= sw;
+      descrpt_a[idx_value + 2] *= sw;
+      descrpt_a[idx_value + 3] *= sw;
     }
+  }
 }
 
-
-void env_mat_r (
-    std::vector<double > &		descrpt,
-    std::vector<double > &		descrpt_deriv,
-    std::vector<double > &		rij,
-    const std::vector<double > &	posi,
-    const int &				ntypes,
-    const std::vector<int > &		type,
-    const SimulationRegion<double> &	region,
-    const bool &			b_pbc,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec,
-    const double &			rmin, 
-    const double &			rmax)
-{  
+void env_mat_r(std::vector<double>& descrpt,
+               std::vector<double>& descrpt_deriv,
+               std::vector<double>& rij,
+               const std::vector<double>& posi,
+               const int& ntypes,
+               const std::vector<int>& type,
+               const SimulationRegion<double>& region,
+               const bool& b_pbc,
+               const int& i_idx,
+               const std::vector<int>& fmt_nlist,
+               const std::vector<int>& sec,
+               const double& rmin,
+               const double& rmax) {
   // compute the diff of the neighbors
-  std::vector<std::vector<double > > sel_diff (sec.back());
-  rij.resize (sec.back() * 3);
-  fill (rij.begin(), rij.end(), 0.0);
-  for (int ii = 0; ii < int(sec.size()) - 1; ++ii){
-    for (int jj = sec[ii]; jj < sec[ii+1]; ++jj){
+  std::vector<std::vector<double> > sel_diff(sec.back());
+  rij.resize(sec.back() * 3);
+  fill(rij.begin(), rij.end(), 0.0);
+  for (int ii = 0; ii < int(sec.size()) - 1; ++ii) {
+    for (int jj = sec[ii]; jj < sec[ii + 1]; ++jj) {
       if (fmt_nlist[jj] < 0) break;
       sel_diff[jj].resize(3);
-      const int & j_idx = fmt_nlist[jj];
-      if (b_pbc){
-	region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				    posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				    sel_diff[jj][0], sel_diff[jj][1], sel_diff[jj][2]);
+      const int& j_idx = fmt_nlist[jj];
+      if (b_pbc) {
+        region.diffNearestNeighbor(
+            posi[j_idx * 3 + 0], posi[j_idx * 3 + 1], posi[j_idx * 3 + 2],
+            posi[i_idx * 3 + 0], posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+            sel_diff[jj][0], sel_diff[jj][1], sel_diff[jj][2]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd)
+          sel_diff[jj][dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
       }
-      else {
-	for (int dd = 0; dd < 3; ++dd) sel_diff[jj][dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
-      }
-      for (int dd = 0; dd < 3; ++dd) rij[jj*3+dd] = sel_diff[jj][dd];
+      for (int dd = 0; dd < 3; ++dd) rij[jj * 3 + dd] = sel_diff[jj][dd];
     }
   }
-  
+
   // 1./rr
-  descrpt.resize (sec.back());
-  fill (descrpt.begin(), descrpt.end(), 0.0);
+  descrpt.resize(sec.back());
+  fill(descrpt.begin(), descrpt.end(), 0.0);
   // deriv wrt center: 3
-  descrpt_deriv.resize (sec.back() * 3);
-  fill (descrpt_deriv.begin(), descrpt_deriv.end(), 0.0);
+  descrpt_deriv.resize(sec.back() * 3);
+  fill(descrpt_deriv.begin(), descrpt_deriv.end(), 0.0);
 
-  for (int sec_iter = 0; sec_iter < int(sec.size()) - 1; ++sec_iter){
-    for (int nei_iter = sec[sec_iter]; nei_iter < sec[sec_iter+1]; ++nei_iter) {      
+  for (int sec_iter = 0; sec_iter < int(sec.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec[sec_iter]; nei_iter < sec[sec_iter + 1];
+         ++nei_iter) {
       if (fmt_nlist[nei_iter] < 0) break;
-      const double * rr = &sel_diff[nei_iter][0];
+      const double* rr = &sel_diff[nei_iter][0];
       double nr2 = deepmd::dot3(rr, rr);
-      double inr = 1./sqrt(nr2);
+      double inr = 1. / sqrt(nr2);
       double nr = nr2 * inr;
       double inr2 = inr * inr;
       double inr4 = inr2 * inr2;
       double inr3 = inr4 * nr;
       double sw, dsw;
       deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-      int idx_deriv = nei_iter * 3;	// 1 components time 3 directions
-      int idx_value = nei_iter;		// 1 components
+      int idx_deriv = nei_iter * 3;  // 1 components time 3 directions
+      int idx_value = nei_iter;      // 1 components
       // value components
-      descrpt[idx_value + 0] = 1./nr;
+      descrpt[idx_value + 0] = 1. / nr;
       // deriv of component 1/r
-      descrpt_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[0] * inr;
-      descrpt_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[1] * inr;
-      descrpt_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[2] * inr;
+      descrpt_deriv[idx_deriv + 0] =
+          rr[0] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_deriv[idx_deriv + 1] =
+          rr[1] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_deriv[idx_deriv + 2] =
+          rr[2] * inr3 * sw - descrpt[idx_value + 0] * dsw * rr[2] * inr;
       // value components
       descrpt[idx_value + 0] *= sw;
     }
   }
 }
 
-template<typename FPTYPE> 
-void 
-deepmd::
-env_mat_r_cpu (
-    std::vector<FPTYPE > &		descrpt_a,
-    std::vector<FPTYPE > &	        descrpt_a_deriv,
-    std::vector<FPTYPE > &	        rij_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax) 
-{
-    // compute the diff of the neighbors
-    rij_a.resize (sec.back() * 3);
-    fill (rij_a.begin(), rij_a.end(), (FPTYPE)0.0);
-    for (int ii = 0; ii < int(sec.size()) - 1; ++ii) {
-        for (int jj = sec[ii]; jj < sec[ii + 1]; ++jj) {
-            if (fmt_nlist[jj] < 0) break;
-            const int & j_idx = fmt_nlist[jj];
+template <typename FPTYPE>
+void deepmd::env_mat_r_cpu(std::vector<FPTYPE>& descrpt_a,
+                           std::vector<FPTYPE>& descrpt_a_deriv,
+                           std::vector<FPTYPE>& rij_a,
+                           const std::vector<FPTYPE>& posi,
+                           const std::vector<int>& type,
+                           const int& i_idx,
+                           const std::vector<int>& fmt_nlist,
+                           const std::vector<int>& sec,
+                           const float& rmin,
+                           const float& rmax) {
+  // compute the diff of the neighbors
+  rij_a.resize(sec.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), (FPTYPE)0.0);
+  for (int ii = 0; ii < int(sec.size()) - 1; ++ii) {
+    for (int jj = sec[ii]; jj < sec[ii + 1]; ++jj) {
+      if (fmt_nlist[jj] < 0) break;
+      const int& j_idx = fmt_nlist[jj];
 
-            for (int dd = 0; dd < 3; ++dd) {
-                rij_a[jj * 3 + dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
-            }
-        }
+      for (int dd = 0; dd < 3; ++dd) {
+        rij_a[jj * 3 + dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
+      }
     }
-    // 1./rr, cos(theta), cos(phi), sin(phi)
-    descrpt_a.resize (sec.back());
-    fill (descrpt_a.begin(), descrpt_a.end(), (FPTYPE)0.0);
-    // deriv wrt center: 3
-    descrpt_a_deriv.resize (sec.back() * 3);
-    fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), (FPTYPE)0.0);
+  }
+  // 1./rr, cos(theta), cos(phi), sin(phi)
+  descrpt_a.resize(sec.back());
+  fill(descrpt_a.begin(), descrpt_a.end(), (FPTYPE)0.0);
+  // deriv wrt center: 3
+  descrpt_a_deriv.resize(sec.back() * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), (FPTYPE)0.0);
 
-    for (int sec_iter = 0; sec_iter < int(sec.size()) - 1; ++sec_iter) {
-        for (int nei_iter = sec[sec_iter]; nei_iter < sec[sec_iter+1]; ++nei_iter) {      
-            if (fmt_nlist[nei_iter] < 0) break;
-            const FPTYPE * rr = &rij_a[nei_iter * 3];
-            FPTYPE nr2 = deepmd::dot3(rr, rr);
-            FPTYPE inr = (FPTYPE)1./sqrt(nr2);
-            FPTYPE nr = nr2 * inr;
-            FPTYPE inr2 = inr * inr;
-            FPTYPE inr4 = inr2 * inr2;
-            FPTYPE inr3 = inr4 * nr;
-            FPTYPE sw, dsw;
-            deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
-            int idx_deriv = nei_iter * 3;	// 1 components time 3 directions
-            int idx_value = nei_iter;	    // 1 components
-            // 4 value components
-            descrpt_a[idx_value + 0] = (FPTYPE)1./nr;
-            // deriv of component 1/r
-            descrpt_a_deriv[idx_deriv + 0] = rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
-            descrpt_a_deriv[idx_deriv + 1] = rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
-            descrpt_a_deriv[idx_deriv + 2] = rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
-            // 4 value components
-            descrpt_a[idx_value + 0] *= sw;
-        }
+  for (int sec_iter = 0; sec_iter < int(sec.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec[sec_iter]; nei_iter < sec[sec_iter + 1];
+         ++nei_iter) {
+      if (fmt_nlist[nei_iter] < 0) break;
+      const FPTYPE* rr = &rij_a[nei_iter * 3];
+      FPTYPE nr2 = deepmd::dot3(rr, rr);
+      FPTYPE inr = (FPTYPE)1. / sqrt(nr2);
+      FPTYPE nr = nr2 * inr;
+      FPTYPE inr2 = inr * inr;
+      FPTYPE inr4 = inr2 * inr2;
+      FPTYPE inr3 = inr4 * nr;
+      FPTYPE sw, dsw;
+      deepmd::spline5_switch(sw, dsw, nr, rmin, rmax);
+      int idx_deriv = nei_iter * 3;  // 1 components time 3 directions
+      int idx_value = nei_iter;      // 1 components
+      // 4 value components
+      descrpt_a[idx_value + 0] = (FPTYPE)1. / nr;
+      // deriv of component 1/r
+      descrpt_a_deriv[idx_deriv + 0] =
+          rr[0] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[0] * inr;
+      descrpt_a_deriv[idx_deriv + 1] =
+          rr[1] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[1] * inr;
+      descrpt_a_deriv[idx_deriv + 2] =
+          rr[2] * inr3 * sw - descrpt_a[idx_value + 0] * dsw * rr[2] * inr;
+      // 4 value components
+      descrpt_a[idx_value + 0] *= sw;
     }
+  }
 }
 
+template void deepmd::env_mat_a_cpu<double>(
+    std::vector<double>& descrpt_a,
+    std::vector<double>& descrpt_a_deriv,
+    std::vector<double>& rij_a,
+    const std::vector<double>& posi,
+    const std::vector<int>& type,
+    const int& i_idx,
+    const std::vector<int>& fmt_nlist,
+    const std::vector<int>& sec,
+    const float& rmin,
+    const float& rmax);
 
-template
-void 
-deepmd::
-env_mat_a_cpu<double> (
-    std::vector<double > &	        descrpt_a,
-    std::vector<double > &	        descrpt_a_deriv,
-    std::vector<double > &	        rij_a,
-    const std::vector<double > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax) ;
-
-
-template
-void 
-deepmd::
-env_mat_a_cpu<float> (
-    std::vector<float > &	        descrpt_a,
-    std::vector<float > &	        descrpt_a_deriv,
-    std::vector<float > &	        rij_a,
-    const std::vector<float > &		posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax) ;
-
-
-template
-void 
-deepmd::
-env_mat_r_cpu<double> (
-    std::vector<double > &	        descrpt_r,
-    std::vector<double > &	        descrpt_r_deriv,
-    std::vector<double > &	        rij_r,
-    const std::vector<double > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax) ;
-
-
-template
-void 
-deepmd::
-env_mat_r_cpu<float> (
-    std::vector<float > &	        descrpt_r,
-    std::vector<float > &	        descrpt_r_deriv,
-    std::vector<float > &	        rij_r,
-    const std::vector<float > &		posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax) ;
+template void deepmd::env_mat_a_cpu<float>(std::vector<float>& descrpt_a,
+                                           std::vector<float>& descrpt_a_deriv,
+                                           std::vector<float>& rij_a,
+                                           const std::vector<float>& posi,
+                                           const std::vector<int>& type,
+                                           const int& i_idx,
+                                           const std::vector<int>& fmt_nlist,
+                                           const std::vector<int>& sec,
+                                           const float& rmin,
+                                           const float& rmax);
 
+template void deepmd::env_mat_r_cpu<double>(
+    std::vector<double>& descrpt_r,
+    std::vector<double>& descrpt_r_deriv,
+    std::vector<double>& rij_r,
+    const std::vector<double>& posi,
+    const std::vector<int>& type,
+    const int& i_idx,
+    const std::vector<int>& fmt_nlist,
+    const std::vector<int>& sec,
+    const float& rmin,
+    const float& rmax);
 
+template void deepmd::env_mat_r_cpu<float>(std::vector<float>& descrpt_r,
+                                           std::vector<float>& descrpt_r_deriv,
+                                           std::vector<float>& rij_r,
+                                           const std::vector<float>& posi,
+                                           const std::vector<int>& type,
+                                           const int& i_idx,
+                                           const std::vector<int>& fmt_nlist,
+                                           const std::vector<int>& sec,
+                                           const float& rmin,
+                                           const float& rmax);
diff --git a/source/lib/src/env_mat_nvnmd.cc b/source/lib/src/env_mat_nvnmd.cc
index 6e84ddcab4..5b4b03eeca 100644
--- a/source/lib/src/env_mat_nvnmd.cc
+++ b/source/lib/src/env_mat_nvnmd.cc
@@ -1,10 +1,10 @@
 /*
 //==================================================
- _   _  __     __  _   _   __  __   ____  
-| \ | | \ \   / / | \ | | |  \/  | |  _ \ 
+ _   _  __     __  _   _   __  __   ____
+| \ | | \ \   / / | \ | | |  \/  | |  _ \
 |  \| |  \ \ / /  |  \| | | |\/| | | | | |
 | |\  |   \ V /   | |\  | | |  | | | |_| |
-|_| \_|    \_/    |_| \_| |_|  |_| |____/ 
+|_| \_|    \_/    |_| \_| |_|  |_| |____/
 
 //==================================================
 
@@ -15,8 +15,8 @@ date: 2021-12-6
 
 */
 
-
 #include "env_mat_nvnmd.h"
+
 #include "switcher.h"
 
 // env_mat_a_nvnmd_cpu
@@ -28,119 +28,110 @@ date: 2021-12-6
 //==================================================
 */
 
-
-template<typename FPTYPE> 
-void 
-deepmd::
-env_mat_a_nvnmd_quantize_cpu (
-    std::vector<FPTYPE > &	        descrpt_a,
-    std::vector<FPTYPE > &	        descrpt_a_deriv,
-    std::vector<FPTYPE > &	        rij_a,
-    const std::vector<FPTYPE > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist_a,
-    const std::vector<int > &		sec_a, 
-    const float &			rmin,
-    const float &			rmax)
-{  
-    // compute the diff of the neighbors
-    rij_a.resize (sec_a.back() * 3);
-    fill (rij_a.begin(), rij_a.end(), 0.0);
-    for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
-        for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
-            if (fmt_nlist_a[jj] < 0) break;
-            const int & j_idx = fmt_nlist_a[jj];
-            for (int dd = 0; dd < 3; ++dd) {
-                rij_a[jj * 3 + dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
-            }
-        }
+template <typename FPTYPE>
+void deepmd::env_mat_a_nvnmd_quantize_cpu(std::vector<FPTYPE>& descrpt_a,
+                                          std::vector<FPTYPE>& descrpt_a_deriv,
+                                          std::vector<FPTYPE>& rij_a,
+                                          const std::vector<FPTYPE>& posi,
+                                          const std::vector<int>& type,
+                                          const int& i_idx,
+                                          const std::vector<int>& fmt_nlist_a,
+                                          const std::vector<int>& sec_a,
+                                          const float& rmin,
+                                          const float& rmax) {
+  // compute the diff of the neighbors
+  rij_a.resize(sec_a.back() * 3);
+  fill(rij_a.begin(), rij_a.end(), 0.0);
+  for (int ii = 0; ii < int(sec_a.size()) - 1; ++ii) {
+    for (int jj = sec_a[ii]; jj < sec_a[ii + 1]; ++jj) {
+      if (fmt_nlist_a[jj] < 0) break;
+      const int& j_idx = fmt_nlist_a[jj];
+      for (int dd = 0; dd < 3; ++dd) {
+        rij_a[jj * 3 + dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
+      }
     }
-    // 1./rr, cos(theta), cos(phi), sin(phi)
-    descrpt_a.resize (sec_a.back() * 4);
-    fill (descrpt_a.begin(), descrpt_a.end(), 0.0);
-    // deriv wrt center: 3
-    descrpt_a_deriv.resize (sec_a.back() * 4 * 3);
-    fill (descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
-    U_Flt64_Int64 ufi;
-    int64_t expo_max;
-
-    for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
-        for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter+1]; ++nei_iter) {      
-            if (fmt_nlist_a[nei_iter] < 0) break;
-            const FPTYPE * rr = &rij_a[nei_iter * 3];
-
-            // NVNMD
-            FPTYPE rij[3];
-            ufi.nflt = rr[0]; ufi.nint &= FLT_MASK; rij[0] = ufi.nflt;
-            ufi.nflt = rr[1]; ufi.nint &= FLT_MASK; rij[1] = ufi.nflt;
-            ufi.nflt = rr[2]; ufi.nint &= FLT_MASK; rij[2] = ufi.nflt;
-            
-            FPTYPE nr2;
-            dotmul_flt_nvnmd(nr2, rij, rij, 3);
-
-            int idx_deriv = nei_iter * 4 * 3;	// 4 components time 3 directions
-            int idx_value = nei_iter * 4;	// 4 components
-            // 4 value components
-            descrpt_a[idx_value + 0] = nr2;
-            descrpt_a[idx_value + 1] = rij[0];
-            descrpt_a[idx_value + 2] = rij[1];
-            descrpt_a[idx_value + 3] = rij[2];
-            // deriv of component 1/r
-            descrpt_a_deriv[idx_deriv + 0] = -2 * rij[0];
-            descrpt_a_deriv[idx_deriv + 1] = -2 * rij[1];
-            descrpt_a_deriv[idx_deriv + 2] = -2 * rij[2];
-            /*
-            d(sw*x/r)_d(x) = x * d(sw/r)_d(x) + sw/r
-            d(sw*y/r)_d(x) = y * d(sw/r)_d(x)
-            */
-            // deriv of component x/r
-            descrpt_a_deriv[idx_deriv + 3] = -1;
-            descrpt_a_deriv[idx_deriv + 4] =  0;
-            descrpt_a_deriv[idx_deriv + 5] =  0;
-            // deriv of component y/r2
-            descrpt_a_deriv[idx_deriv + 6] =  0;
-            descrpt_a_deriv[idx_deriv + 7] = -1;
-            descrpt_a_deriv[idx_deriv + 8] =  0;
-            // deriv of component z/r2
-            descrpt_a_deriv[idx_deriv + 9] =  0;
-            descrpt_a_deriv[idx_deriv +10] =  0;
-            descrpt_a_deriv[idx_deriv +11] = -1;
-        }
+  }
+  // 1./rr, cos(theta), cos(phi), sin(phi)
+  descrpt_a.resize(sec_a.back() * 4);
+  fill(descrpt_a.begin(), descrpt_a.end(), 0.0);
+  // deriv wrt center: 3
+  descrpt_a_deriv.resize(sec_a.back() * 4 * 3);
+  fill(descrpt_a_deriv.begin(), descrpt_a_deriv.end(), 0.0);
+  U_Flt64_Int64 ufi;
+  int64_t expo_max;
+
+  for (int sec_iter = 0; sec_iter < int(sec_a.size()) - 1; ++sec_iter) {
+    for (int nei_iter = sec_a[sec_iter]; nei_iter < sec_a[sec_iter + 1];
+         ++nei_iter) {
+      if (fmt_nlist_a[nei_iter] < 0) break;
+      const FPTYPE* rr = &rij_a[nei_iter * 3];
+
+      // NVNMD
+      FPTYPE rij[3];
+      ufi.nflt = rr[0];
+      ufi.nint &= FLT_MASK;
+      rij[0] = ufi.nflt;
+      ufi.nflt = rr[1];
+      ufi.nint &= FLT_MASK;
+      rij[1] = ufi.nflt;
+      ufi.nflt = rr[2];
+      ufi.nint &= FLT_MASK;
+      rij[2] = ufi.nflt;
+
+      FPTYPE nr2;
+      dotmul_flt_nvnmd(nr2, rij, rij, 3);
+
+      int idx_deriv = nei_iter * 4 * 3;  // 4 components time 3 directions
+      int idx_value = nei_iter * 4;      // 4 components
+      // 4 value components
+      descrpt_a[idx_value + 0] = nr2;
+      descrpt_a[idx_value + 1] = rij[0];
+      descrpt_a[idx_value + 2] = rij[1];
+      descrpt_a[idx_value + 3] = rij[2];
+      // deriv of component 1/r
+      descrpt_a_deriv[idx_deriv + 0] = -2 * rij[0];
+      descrpt_a_deriv[idx_deriv + 1] = -2 * rij[1];
+      descrpt_a_deriv[idx_deriv + 2] = -2 * rij[2];
+      /*
+      d(sw*x/r)_d(x) = x * d(sw/r)_d(x) + sw/r
+      d(sw*y/r)_d(x) = y * d(sw/r)_d(x)
+      */
+      // deriv of component x/r
+      descrpt_a_deriv[idx_deriv + 3] = -1;
+      descrpt_a_deriv[idx_deriv + 4] = 0;
+      descrpt_a_deriv[idx_deriv + 5] = 0;
+      // deriv of component y/r2
+      descrpt_a_deriv[idx_deriv + 6] = 0;
+      descrpt_a_deriv[idx_deriv + 7] = -1;
+      descrpt_a_deriv[idx_deriv + 8] = 0;
+      // deriv of component z/r2
+      descrpt_a_deriv[idx_deriv + 9] = 0;
+      descrpt_a_deriv[idx_deriv + 10] = 0;
+      descrpt_a_deriv[idx_deriv + 11] = -1;
     }
+  }
 }
 
-
-
-template
-void 
-deepmd::
-env_mat_a_nvnmd_quantize_cpu<double> (
-    std::vector<double > &	        descrpt_a,
-    std::vector<double > &	        descrpt_a_deriv,
-    std::vector<double > &	        rij_a,
-    const std::vector<double > &	posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax);
-
-
-template
-void 
-deepmd::
-env_mat_a_nvnmd_quantize_cpu<float> (
-    std::vector<float > &	        descrpt_a,
-    std::vector<float > &	        descrpt_a_deriv,
-    std::vector<float > &	        rij_a,
-    const std::vector<float > &		posi,
-    const std::vector<int > &		type,
-    const int &				i_idx,
-    const std::vector<int > &		fmt_nlist,
-    const std::vector<int > &		sec, 
-    const float &			rmin,
-    const float &			rmax);
-
-
+template void deepmd::env_mat_a_nvnmd_quantize_cpu<double>(
+    std::vector<double>& descrpt_a,
+    std::vector<double>& descrpt_a_deriv,
+    std::vector<double>& rij_a,
+    const std::vector<double>& posi,
+    const std::vector<int>& type,
+    const int& i_idx,
+    const std::vector<int>& fmt_nlist,
+    const std::vector<int>& sec,
+    const float& rmin,
+    const float& rmax);
+
+template void deepmd::env_mat_a_nvnmd_quantize_cpu<float>(
+    std::vector<float>& descrpt_a,
+    std::vector<float>& descrpt_a_deriv,
+    std::vector<float>& rij_a,
+    const std::vector<float>& posi,
+    const std::vector<int>& type,
+    const int& i_idx,
+    const std::vector<int>& fmt_nlist,
+    const std::vector<int>& sec,
+    const float& rmin,
+    const float& rmax);
diff --git a/source/lib/src/ewald.cc b/source/lib/src/ewald.cc
index 08d0354023..9e9a5efc3d 100644
--- a/source/lib/src/ewald.cc
+++ b/source/lib/src/ewald.cc
@@ -1,80 +1,75 @@
 #include "ewald.h"
+
 #include "SimulationRegion.h"
 
 using namespace deepmd;
 
-template<typename VALUETYPE> 
-VALUETYPE
-dir_err_esti(const VALUETYPE & test_q,
-	     const VALUETYPE & c2,
-	     const VALUETYPE & nn,
-	     const EwaldParameters<VALUETYPE> & param) 
-{
-  const VALUETYPE & rcut = param.rcut;
-  const VALUETYPE & beta = param.beta;
-  const VALUETYPE rho_q2 = c2/nn;  
-  VALUETYPE sum = (VALUETYPE)2. * test_q 
-      * sqrt (rho_q2 / rcut)
-      * exp (- beta*beta*rcut*rcut) * ElectrostaticConvertion;
+template <typename VALUETYPE>
+VALUETYPE dir_err_esti(const VALUETYPE& test_q,
+                       const VALUETYPE& c2,
+                       const VALUETYPE& nn,
+                       const EwaldParameters<VALUETYPE>& param) {
+  const VALUETYPE& rcut = param.rcut;
+  const VALUETYPE& beta = param.beta;
+  const VALUETYPE rho_q2 = c2 / nn;
+  VALUETYPE sum = (VALUETYPE)2. * test_q * sqrt(rho_q2 / rcut) *
+                  exp(-beta * beta * rcut * rcut) * ElectrostaticConvertion;
   return sum;
 }
 
-template<typename VALUETYPE> 
-VALUETYPE
-rec_err_esti(const VALUETYPE & test_q,
-	     const VALUETYPE & c2,
-	     const VALUETYPE & nn,
-	     const EwaldParameters<VALUETYPE>&	param,
-	     const SimulationRegion<double>&	region) 
-{
-  const VALUETYPE & beta = param.beta;
+template <typename VALUETYPE>
+VALUETYPE rec_err_esti(const VALUETYPE& test_q,
+                       const VALUETYPE& c2,
+                       const VALUETYPE& nn,
+                       const EwaldParameters<VALUETYPE>& param,
+                       const SimulationRegion<double>& region) {
+  const VALUETYPE& beta = param.beta;
   std::vector<int> KK;
   cmpt_k(KK, region, param);
-  const double * rec_box = region.getRecBoxTensor();
+  const double* rec_box = region.getRecBoxTensor();
   double sum = 0;
   int BD[3];
-  for (int dd = 0; dd < 3; ++dd){
-    BD[dd] = KK[dd]/2 + 10;
+  for (int dd = 0; dd < 3; ++dd) {
+    BD[dd] = KK[dd] / 2 + 10;
   }
   int mm[3];
-  for (mm[0] = -BD[0]; mm[0] <= BD[0]; ++mm[0]){
-    for (mm[1] = -BD[1]; mm[1] <= BD[1]; ++mm[1]){
-      for (mm[2] = -BD[2]; mm[2] <= BD[2]; ++mm[2]){
-        if (mm[0] >= - int(KK[0])/2 && mm[0] <= int(KK[0])/2 &&
-            mm[1] >= - int(KK[1])/2 && mm[1] <= int(KK[1])/2 &&
-            mm[2] >= - int(KK[2])/2 && mm[2] <= int(KK[2])/2) continue;
-	VALUETYPE rm[3] = {0,0,0};	  
-	for (int dd = 0; dd < 3; ++dd){
-	  rm[0] += mm[dd] * rec_box[dd*3+0];
-	  rm[1] += mm[dd] * rec_box[dd*3+1];
-	  rm[2] += mm[dd] * rec_box[dd*3+2];
-	}
-	VALUETYPE mm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
-        sum += exp (-2 * M_PI * M_PI / beta / beta * mm2) / mm2;
+  for (mm[0] = -BD[0]; mm[0] <= BD[0]; ++mm[0]) {
+    for (mm[1] = -BD[1]; mm[1] <= BD[1]; ++mm[1]) {
+      for (mm[2] = -BD[2]; mm[2] <= BD[2]; ++mm[2]) {
+        if (mm[0] >= -int(KK[0]) / 2 && mm[0] <= int(KK[0]) / 2 &&
+            mm[1] >= -int(KK[1]) / 2 && mm[1] <= int(KK[1]) / 2 &&
+            mm[2] >= -int(KK[2]) / 2 && mm[2] <= int(KK[2]) / 2)
+          continue;
+        VALUETYPE rm[3] = {0, 0, 0};
+        for (int dd = 0; dd < 3; ++dd) {
+          rm[0] += mm[dd] * rec_box[dd * 3 + 0];
+          rm[1] += mm[dd] * rec_box[dd * 3 + 1];
+          rm[2] += mm[dd] * rec_box[dd * 3 + 2];
+        }
+        VALUETYPE mm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
+        sum += exp(-2 * M_PI * M_PI / beta / beta * mm2) / mm2;
       }
     }
   }
   VALUETYPE vol = region.getVolume();
-  // cout << "sum: " << sqrt(sum) 
-  //      << " KK: " << KK[0] 
-  //      << " rbox: " << rec_box[0] 
-  //      << " c2: " << c2 
+  // cout << "sum: " << sqrt(sum)
+  //      << " KK: " << KK[0]
+  //      << " rbox: " << rec_box[0]
+  //      << " c2: " << c2
   //      << " vol: " << vol << endl;
   sum = test_q * 2 * sqrt(sum) * sqrt(c2) / vol * ElectrostaticConvertion;
   return sum;
 }
 
 template <typename VALUETYPE>
-void
-cmpt_k(std::vector<int> & KK,
-       const VALUETYPE *		boxt, 
-       const EwaldParameters<VALUETYPE>&	param)
-{
+void cmpt_k(std::vector<int>& KK,
+            const VALUETYPE* boxt,
+            const EwaldParameters<VALUETYPE>& param) {
   KK.resize(3);
-  for (int dd = 0; dd < 3; ++dd){
-    VALUETYPE ll = sqrt(deepmd::dot3(boxt+dd*3, boxt+dd*3));
+  for (int dd = 0; dd < 3; ++dd) {
+    VALUETYPE ll = sqrt(deepmd::dot3(boxt + dd * 3, boxt + dd * 3));
     KK[dd] = ll / param.spacing;
-    // KK[dd] should be large enough 
+    // KK[dd] should be large enough
     if (KK[dd] * param.spacing < ll) KK[dd] += 1;
     assert(KK[dd] * param.spacing >= ll);
     // KK[dd] should be even
@@ -87,21 +82,17 @@ cmpt_k(std::vector<int> & KK,
 // outputs: energy force virial
 // inputs: coordinates charges region
 template <typename VALUETYPE>
-void 
-deepmd::
-ewald_recp(
-    VALUETYPE &				ener, 
-    std::vector<VALUETYPE> &		force,
-    std::vector<VALUETYPE> &		virial,
-    const std::vector<VALUETYPE>&	coord,
-    const std::vector<VALUETYPE>&	charge,
-    const Region<VALUETYPE>&		region, 
-    const EwaldParameters<VALUETYPE>&	param)
-{
+void deepmd::ewald_recp(VALUETYPE& ener,
+                        std::vector<VALUETYPE>& force,
+                        std::vector<VALUETYPE>& virial,
+                        const std::vector<VALUETYPE>& coord,
+                        const std::vector<VALUETYPE>& charge,
+                        const Region<VALUETYPE>& region,
+                        const EwaldParameters<VALUETYPE>& param) {
   // natoms
   int natoms = charge.size();
   // init returns
-  force.resize(natoms * 3);  
+  force.resize(natoms * 3);
   virial.resize(9);
   ener = 0;
   fill(force.begin(), force.end(), static_cast<VALUETYPE>(0));
@@ -109,7 +100,7 @@ ewald_recp(
 
   // number of threads
   int nthreads = 1;
-#pragma omp parallel 
+#pragma omp parallel
   {
     if (0 == omp_get_thread_num()) {
       nthreads = omp_get_num_threads();
@@ -120,133 +111,141 @@ ewald_recp(
   std::vector<int> KK(3);
   int totK = 1;
   cmpt_k<VALUETYPE>(KK, region.boxt, param);
-  for (int dd = 0; dd < 3; ++dd){
-    totK *= (KK[dd]+1);
-  }  
+  for (int dd = 0; dd < 3; ++dd) {
+    totK *= (KK[dd] + 1);
+  }
   int stride[3];
-  for (int dd = 0; dd < 3; ++dd) stride[dd] = KK[dd]+1;
-  
+  for (int dd = 0; dd < 3; ++dd) stride[dd] = KK[dd] + 1;
+
   // compute the sq
-  std::vector<std::vector<VALUETYPE> > thread_sqr(nthreads), thread_sqi(nthreads);
-  for (int ii = 0; ii < nthreads; ++ii){
+  std::vector<std::vector<VALUETYPE> > thread_sqr(nthreads),
+      thread_sqi(nthreads);
+  for (int ii = 0; ii < nthreads; ++ii) {
     thread_sqr[ii].resize(totK, static_cast<VALUETYPE>(0));
     thread_sqi[ii].resize(totK, static_cast<VALUETYPE>(0));
-  }  
+  }
   // firstly loop over particles then loop over m
 #pragma omp parallel for num_threads(nthreads)
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     int thread_id = omp_get_thread_num();
     VALUETYPE ir[3];
-    VALUETYPE tmpcoord[3] = {coord[ii*3], coord[ii*3+1], coord[ii*3+2]};
+    VALUETYPE tmpcoord[3] = {coord[ii * 3], coord[ii * 3 + 1],
+                             coord[ii * 3 + 2]};
     convert_to_inter_cpu(ir, region, tmpcoord);
     // region.phys2Inter(ir, tmpcoord);
-    for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
+    for (int mm0 = -KK[0] / 2; mm0 <= KK[0] / 2; ++mm0) {
       VALUETYPE mr[3];
-      mr[0] = ir[0] * mm0;      
-      int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
-      for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
-	mr[1] = ir[1] * mm1;
-	int shift1 = (mm1 + KK[1]/2) * stride[2];
-	for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
-	  if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
-	  int mc = shift0 + shift1 + mm2 + KK[2]/2;
-	  mr[2] = ir[2] * mm2;
-	  VALUETYPE mdotr = 2. * M_PI * (mr[0]+mr[1]+mr[2]);
-	  thread_sqr[thread_id][mc] += charge[ii] * cos(mdotr);
-	  thread_sqi[thread_id][mc] += charge[ii] * sin(mdotr);
-	}
+      mr[0] = ir[0] * mm0;
+      int shift0 = (mm0 + KK[0] / 2) * stride[1] * stride[2];
+      for (int mm1 = -KK[1] / 2; mm1 <= KK[1] / 2; ++mm1) {
+        mr[1] = ir[1] * mm1;
+        int shift1 = (mm1 + KK[1] / 2) * stride[2];
+        for (int mm2 = -KK[2] / 2; mm2 <= KK[2] / 2; ++mm2) {
+          if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
+          int mc = shift0 + shift1 + mm2 + KK[2] / 2;
+          mr[2] = ir[2] * mm2;
+          VALUETYPE mdotr = 2. * M_PI * (mr[0] + mr[1] + mr[2]);
+          thread_sqr[thread_id][mc] += charge[ii] * cos(mdotr);
+          thread_sqi[thread_id][mc] += charge[ii] * sin(mdotr);
+        }
       }
     }
   }
-  VALUETYPE * sqr = new VALUETYPE[totK];
-  VALUETYPE * sqi = new VALUETYPE[totK];
-  for (int ii = 0; ii < totK; ++ii){
+  VALUETYPE* sqr = new VALUETYPE[totK];
+  VALUETYPE* sqi = new VALUETYPE[totK];
+  for (int ii = 0; ii < totK; ++ii) {
     sqr[ii] = static_cast<VALUETYPE>(0);
     sqi[ii] = static_cast<VALUETYPE>(0);
-    for (int jj = 0; jj < nthreads; ++jj){
+    for (int jj = 0; jj < nthreads; ++jj) {
       sqr[ii] += thread_sqr[jj][ii];
       sqi[ii] += thread_sqi[jj][ii];
     }
-  }  
+  }
 
   // get rbox
-  const VALUETYPE * rec_box = region.rec_boxt;
-  
+  const VALUETYPE* rec_box = region.rec_boxt;
+
   std::vector<VALUETYPE> thread_ener(nthreads, 0.);
   std::vector<std::vector<VALUETYPE> > thread_force(nthreads);
   std::vector<std::vector<VALUETYPE> > thread_virial(nthreads);
-  for (int ii = 0; ii < nthreads; ++ii){
+  for (int ii = 0; ii < nthreads; ++ii) {
     thread_force[ii].resize(natoms * 3, 0.);
     thread_virial[ii].resize(9, 0.);
   }
   // calculate ener, force and virial
-  // firstly loop over particles then loop over m  
+  // firstly loop over particles then loop over m
 #pragma omp parallel for num_threads(nthreads)
-  for (int mc = 0; mc < totK; ++mc){
+  for (int mc = 0; mc < totK; ++mc) {
     int thread_id = omp_get_thread_num();
     int mm0 = mc / (stride[1] * stride[2]);
     int left = mc - mm0 * stride[1] * stride[2];
     int mm1 = left / stride[2];
     int mm2 = left - mm1 * stride[2];
-    mm0 -= KK[0]/2;
-    mm1 -= KK[1]/2;
-    mm2 -= KK[2]/2;
-  // for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
-  //   int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
-  //   for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
-  //     int shift1 = (mm1 + KK[1]/2) * stride[2];
-  //     for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
-  // 	int mc = shift0 + shift1 + mm2 + KK[2]/2;
-	if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
-	// \bm m and \vert m \vert^2
-	VALUETYPE rm[3] = {0,0,0};	  
-	rm[0] += mm0 * rec_box[0*3+0];
-	rm[1] += mm0 * rec_box[0*3+1];
-	rm[2] += mm0 * rec_box[0*3+2];
-	rm[0] += mm1 * rec_box[1*3+0];
-	rm[1] += mm1 * rec_box[1*3+1];
-	rm[2] += mm1 * rec_box[1*3+2];
-	rm[0] += mm2 * rec_box[2*3+0];
-	rm[1] += mm2 * rec_box[2*3+1];
-	rm[2] += mm2 * rec_box[2*3+2];
-	VALUETYPE nmm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
-	// energy
-	VALUETYPE expnmm2 = exp(- M_PI * M_PI * nmm2 / (param.beta * param.beta)) / nmm2;
-	VALUETYPE eincr = expnmm2 * (sqr[mc] * sqr[mc] + sqi[mc] * sqi[mc]);
-	thread_ener[thread_id] += eincr;
-	// virial
-	VALUETYPE vpref = (VALUETYPE)-2. * ((VALUETYPE)1. + M_PI * M_PI * nmm2 / (param.beta * param.beta)) / nmm2;
-	for (int dd0 = 0; dd0 < 3; ++dd0){
-	  for (int dd1 = 0; dd1 < 3; ++dd1){	    
-	    VALUETYPE tmp = vpref * rm[dd0] * rm[dd1];
-	    if (dd0 == dd1) tmp += 1;
-	    thread_virial[thread_id][dd0*3+dd1] += eincr * tmp;
-	  }
-	}
-	// force
-	for (int ii = 0; ii < natoms; ++ii){
-	  VALUETYPE mdotr = (VALUETYPE)-2. * M_PI * (coord[ii*3+0]*rm[0] + coord[ii*3+1]*rm[1] + coord[ii*3+2]*rm[2]);
-	  VALUETYPE tmpr = charge[ii] * cos(mdotr);
-	  VALUETYPE tmpi = charge[ii] * sin(mdotr);
-	  VALUETYPE cc = (VALUETYPE)4. * M_PI * (tmpr * sqi[mc] + tmpi * sqr[mc]) * expnmm2;
-	  thread_force[thread_id][ii*3+0] -= rm[0] * cc;
-	  thread_force[thread_id][ii*3+1] -= rm[1] * cc;
-	  thread_force[thread_id][ii*3+2] -= rm[2] * cc;
-	}
+    mm0 -= KK[0] / 2;
+    mm1 -= KK[1] / 2;
+    mm2 -= KK[2] / 2;
+    // for (int mm0 = -KK[0]/2; mm0 <= KK[0]/2; ++mm0){
+    //   int shift0 = (mm0 + KK[0]/2) * stride[1] * stride[2];
+    //   for (int mm1 = -KK[1]/2; mm1 <= KK[1]/2; ++mm1){
+    //     int shift1 = (mm1 + KK[1]/2) * stride[2];
+    //     for (int mm2 = -KK[2]/2; mm2 <= KK[2]/2; ++mm2){
+    // 	int mc = shift0 + shift1 + mm2 + KK[2]/2;
+    if (mm0 == 0 && mm1 == 0 && mm2 == 0) continue;
+    // \bm m and \vert m \vert^2
+    VALUETYPE rm[3] = {0, 0, 0};
+    rm[0] += mm0 * rec_box[0 * 3 + 0];
+    rm[1] += mm0 * rec_box[0 * 3 + 1];
+    rm[2] += mm0 * rec_box[0 * 3 + 2];
+    rm[0] += mm1 * rec_box[1 * 3 + 0];
+    rm[1] += mm1 * rec_box[1 * 3 + 1];
+    rm[2] += mm1 * rec_box[1 * 3 + 2];
+    rm[0] += mm2 * rec_box[2 * 3 + 0];
+    rm[1] += mm2 * rec_box[2 * 3 + 1];
+    rm[2] += mm2 * rec_box[2 * 3 + 2];
+    VALUETYPE nmm2 = rm[0] * rm[0] + rm[1] * rm[1] + rm[2] * rm[2];
+    // energy
+    VALUETYPE expnmm2 =
+        exp(-M_PI * M_PI * nmm2 / (param.beta * param.beta)) / nmm2;
+    VALUETYPE eincr = expnmm2 * (sqr[mc] * sqr[mc] + sqi[mc] * sqi[mc]);
+    thread_ener[thread_id] += eincr;
+    // virial
+    VALUETYPE vpref =
+        (VALUETYPE)-2. *
+        ((VALUETYPE)1. + M_PI * M_PI * nmm2 / (param.beta * param.beta)) / nmm2;
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        VALUETYPE tmp = vpref * rm[dd0] * rm[dd1];
+        if (dd0 == dd1) tmp += 1;
+        thread_virial[thread_id][dd0 * 3 + dd1] += eincr * tmp;
+      }
+    }
+    // force
+    for (int ii = 0; ii < natoms; ++ii) {
+      VALUETYPE mdotr = (VALUETYPE)-2. * M_PI *
+                        (coord[ii * 3 + 0] * rm[0] + coord[ii * 3 + 1] * rm[1] +
+                         coord[ii * 3 + 2] * rm[2]);
+      VALUETYPE tmpr = charge[ii] * cos(mdotr);
+      VALUETYPE tmpi = charge[ii] * sin(mdotr);
+      VALUETYPE cc =
+          (VALUETYPE)4. * M_PI * (tmpr * sqi[mc] + tmpi * sqr[mc]) * expnmm2;
+      thread_force[thread_id][ii * 3 + 0] -= rm[0] * cc;
+      thread_force[thread_id][ii * 3 + 1] -= rm[1] * cc;
+      thread_force[thread_id][ii * 3 + 2] -= rm[2] * cc;
+    }
     //   }
     // }
   }
   // reduce thread results
-  for (int ii = 0; ii < nthreads; ++ii){
+  for (int ii = 0; ii < nthreads; ++ii) {
     ener += thread_ener[ii];
   }
-  for (int jj = 0; jj < 9; ++jj){
-    for (int ii = 0; ii < nthreads; ++ii){
+  for (int jj = 0; jj < 9; ++jj) {
+    for (int ii = 0; ii < nthreads; ++ii) {
       virial[jj] += thread_virial[ii][jj];
     }
   }
-  for (int jj = 0; jj < natoms * 3; ++jj){
-    for (int ii = 0; ii < nthreads; ++ii){
+  for (int jj = 0; jj < natoms * 3; ++jj) {
+    for (int ii = 0; ii < nthreads; ++ii) {
       force[jj] += thread_force[ii][jj];
     }
   }
@@ -254,39 +253,30 @@ ewald_recp(
   VALUETYPE vol = volume_cpu(region);
   ener /= (VALUETYPE)2. * M_PI * vol;
   ener *= ElectrostaticConvertion;
-  for (int ii = 0; ii < 3*natoms; ++ii){
+  for (int ii = 0; ii < 3 * natoms; ++ii) {
     force[ii] /= (VALUETYPE)2. * M_PI * vol;
     force[ii] *= ElectrostaticConvertion;
-  }  
-  for (int ii = 0; ii < 3*3; ++ii){
+  }
+  for (int ii = 0; ii < 3 * 3; ++ii) {
     virial[ii] /= (VALUETYPE)2. * M_PI * vol;
     virial[ii] *= ElectrostaticConvertion;
-  }  
-  delete[]sqr;
-  delete[]sqi;
+  }
+  delete[] sqr;
+  delete[] sqi;
 }
 
+template void deepmd::ewald_recp<float>(float& ener,
+                                        std::vector<float>& force,
+                                        std::vector<float>& virial,
+                                        const std::vector<float>& coord,
+                                        const std::vector<float>& charge,
+                                        const Region<float>& region,
+                                        const EwaldParameters<float>& param);
 
-template
-void 
-deepmd::
-ewald_recp<float>(
-    float &				ener, 
-    std::vector<float> &		force,
-    std::vector<float> &		virial,
-    const std::vector<float>&		coord,
-    const std::vector<float>&		charge,
-    const Region<float>&		region, 
-    const EwaldParameters<float>&	param);
-
-template
-void 
-deepmd::
-ewald_recp<double>(
-    double &				ener, 
-    std::vector<double> &		force,
-    std::vector<double> &		virial,
-    const std::vector<double>&		coord,
-    const std::vector<double>&		charge,
-    const Region<double>&		region, 
-    const EwaldParameters<double>&	param);
+template void deepmd::ewald_recp<double>(double& ener,
+                                         std::vector<double>& force,
+                                         std::vector<double>& virial,
+                                         const std::vector<double>& coord,
+                                         const std::vector<double>& charge,
+                                         const Region<double>& region,
+                                         const EwaldParameters<double>& param);
diff --git a/source/lib/src/fmt_nlist.cc b/source/lib/src/fmt_nlist.cc
index 9e1d5fc57c..0d7346f3ac 100644
--- a/source/lib/src/fmt_nlist.cc
+++ b/source/lib/src/fmt_nlist.cc
@@ -1,246 +1,208 @@
-#include <vector>
-#include <cassert>
-#include <algorithm>
 #include "fmt_nlist.h"
-#include "SimulationRegion.h"
+
+#include <algorithm>
+#include <cassert>
 #include <iostream>
+#include <vector>
+
+#include "SimulationRegion.h"
 #include "errors.h"
 
 using namespace deepmd;
 
-template<typename FPTYPE> 
-struct NeighborInfo 
-{
+template <typename FPTYPE>
+struct NeighborInfo {
   int type;
   FPTYPE dist;
   int index;
-  NeighborInfo () 
-      : type (0), dist(0), index(0) 
-      {
-      }
-  NeighborInfo (int tt, FPTYPE dd, int ii) 
-      : type (tt), dist(dd), index(ii) 
-      {
-      }
-  bool operator < (const NeighborInfo & b) const 
-      {
-	return (type < b.type || 
-		(type == b.type && 
-		 (dist < b.dist || 
-		  (dist == b.dist && index < b.index) ) ) );
-      }
+  NeighborInfo() : type(0), dist(0), index(0) {}
+  NeighborInfo(int tt, FPTYPE dd, int ii) : type(tt), dist(dd), index(ii) {}
+  bool operator<(const NeighborInfo &b) const {
+    return (type < b.type ||
+            (type == b.type &&
+             (dist < b.dist || (dist == b.dist && index < b.index))));
+  }
 };
 
-int format_nlist_i_fill_a (
-    std::vector<int > &			fmt_nei_idx_a,
-    std::vector<int > &			fmt_nei_idx_r,
-    const std::vector<double > &	posi,
-    const int &				ntypes,
-    const std::vector<int > &		type,
-    const SimulationRegion<double> &	region,
-    const bool &			b_pbc,
-    const int &				i_idx,
-    const std::vector<int > &		nei_idx_a, 
-    const std::vector<int > &		nei_idx_r, 
-    const double &			rcut,
-    const std::vector<int > &		sec_a, 
-    const std::vector<int > &		sec_r)
-{
+int format_nlist_i_fill_a(std::vector<int> &fmt_nei_idx_a,
+                          std::vector<int> &fmt_nei_idx_r,
+                          const std::vector<double> &posi,
+                          const int &ntypes,
+                          const std::vector<int> &type,
+                          const SimulationRegion<double> &region,
+                          const bool &b_pbc,
+                          const int &i_idx,
+                          const std::vector<int> &nei_idx_a,
+                          const std::vector<int> &nei_idx_r,
+                          const double &rcut,
+                          const std::vector<int> &sec_a,
+                          const std::vector<int> &sec_r) {
 #ifdef DEBUG
-  assert (sec_a.size() == ntypes + 1);
-  assert (sec_r.size() == ntypes + 1);
+  assert(sec_a.size() == ntypes + 1);
+  assert(sec_r.size() == ntypes + 1);
 #endif
-  
-  fmt_nei_idx_a.resize (sec_a.back());
-  fmt_nei_idx_r.resize (sec_r.back());
-  fill (fmt_nei_idx_a.begin(), fmt_nei_idx_a.end(), -1);
-  fill (fmt_nei_idx_r.begin(), fmt_nei_idx_r.end(), -1);  
-  
+
+  fmt_nei_idx_a.resize(sec_a.back());
+  fmt_nei_idx_r.resize(sec_r.back());
+  fill(fmt_nei_idx_a.begin(), fmt_nei_idx_a.end(), -1);
+  fill(fmt_nei_idx_r.begin(), fmt_nei_idx_r.end(), -1);
+
   // gether all neighbors
-  std::vector<int > nei_idx (nei_idx_a);
-  nei_idx.insert (nei_idx.end(), nei_idx_r.begin(), nei_idx_r.end());
-  assert (nei_idx.size() == nei_idx_a.size() + nei_idx_r.size());
+  std::vector<int> nei_idx(nei_idx_a);
+  nei_idx.insert(nei_idx.end(), nei_idx_r.begin(), nei_idx_r.end());
+  assert(nei_idx.size() == nei_idx_a.size() + nei_idx_r.size());
   // allocate the information for all neighbors
-  std::vector<NeighborInfo<double> > sel_nei ;
-  sel_nei.reserve (nei_idx_a.size() + nei_idx_r.size());
-  for (unsigned kk = 0; kk < nei_idx.size(); ++kk){
+  std::vector<NeighborInfo<double> > sel_nei;
+  sel_nei.reserve(nei_idx_a.size() + nei_idx_r.size());
+  for (unsigned kk = 0; kk < nei_idx.size(); ++kk) {
     double diff[3];
-    const int & j_idx = nei_idx[kk];
-    if (b_pbc){
-      region.diffNearestNeighbor (posi[j_idx*3+0], posi[j_idx*3+1], posi[j_idx*3+2], 
-				  posi[i_idx*3+0], posi[i_idx*3+1], posi[i_idx*3+2], 
-				  diff[0], diff[1], diff[2]);
+    const int &j_idx = nei_idx[kk];
+    if (b_pbc) {
+      region.diffNearestNeighbor(posi[j_idx * 3 + 0], posi[j_idx * 3 + 1],
+                                 posi[j_idx * 3 + 2], posi[i_idx * 3 + 0],
+                                 posi[i_idx * 3 + 1], posi[i_idx * 3 + 2],
+                                 diff[0], diff[1], diff[2]);
+    } else {
+      for (int dd = 0; dd < 3; ++dd)
+        diff[dd] = posi[j_idx * 3 + dd] - posi[i_idx * 3 + dd];
     }
-    else {
-      for (int dd = 0; dd < 3; ++dd) diff[dd] = posi[j_idx*3+dd] - posi[i_idx*3+dd];
-    }
-    double rr = sqrt(deepmd::dot3(diff, diff));    
+    double rr = sqrt(deepmd::dot3(diff, diff));
     if (rr <= rcut) {
-      sel_nei.push_back(NeighborInfo<double> (type[j_idx], rr, j_idx));
+      sel_nei.push_back(NeighborInfo<double>(type[j_idx], rr, j_idx));
     }
   }
-  sort (sel_nei.begin(), sel_nei.end());  
-  
-  std::vector<int > nei_iter = sec_a;
+  sort(sel_nei.begin(), sel_nei.end());
+
+  std::vector<int> nei_iter = sec_a;
   int overflowed = -1;
-  for (unsigned kk = 0; kk < sel_nei.size(); ++kk){
-    const int & nei_type = sel_nei[kk].type;
-    if (nei_iter[nei_type] >= sec_a[nei_type+1]) {
-      int r_idx_iter = (nei_iter[nei_type] ++) - sec_a[nei_type+1] + sec_r[nei_type];
-      if (r_idx_iter >= sec_r[nei_type+1]) {
-	// return nei_type;
-	overflowed = nei_type;
+  for (unsigned kk = 0; kk < sel_nei.size(); ++kk) {
+    const int &nei_type = sel_nei[kk].type;
+    if (nei_iter[nei_type] >= sec_a[nei_type + 1]) {
+      int r_idx_iter =
+          (nei_iter[nei_type]++) - sec_a[nei_type + 1] + sec_r[nei_type];
+      if (r_idx_iter >= sec_r[nei_type + 1]) {
+        // return nei_type;
+        overflowed = nei_type;
+      } else {
+        fmt_nei_idx_r[r_idx_iter] = sel_nei[kk].index;
       }
-      else {
-	fmt_nei_idx_r[r_idx_iter] = sel_nei[kk].index;
-      }
-    }
-    else {
-      fmt_nei_idx_a[nei_iter[nei_type] ++] = sel_nei[kk].index;
+    } else {
+      fmt_nei_idx_a[nei_iter[nei_type]++] = sel_nei[kk].index;
     }
   }
   return overflowed;
 }
 
+template <typename FPTYPE>
+int format_nlist_i_cpu(std::vector<int> &fmt_nei_idx_a,
+                       const std::vector<FPTYPE> &posi,
+                       const std::vector<int> &type,
+                       const int &i_idx,
+                       const std::vector<int> &nei_idx_a,
+                       const float &rcut,
+                       const std::vector<int> &sec_a) {
+  fmt_nei_idx_a.resize(sec_a.back());
+  fill(fmt_nei_idx_a.begin(), fmt_nei_idx_a.end(), -1);
 
-template<typename FPTYPE> 
-int format_nlist_i_cpu (
-    std::vector<int > &		fmt_nei_idx_a,
-    const std::vector<FPTYPE > &posi,
-    const std::vector<int > &   type,
-    const int &			i_idx,
-    const std::vector<int > &   nei_idx_a, 
-    const float &		rcut,
-    const std::vector<int > &   sec_a)
-{
-    fmt_nei_idx_a.resize (sec_a.back());
-    fill (fmt_nei_idx_a.begin(), fmt_nei_idx_a.end(), -1);
-  
-    // gether all neighbors
-    std::vector<int > nei_idx (nei_idx_a);
-    // allocate the information for all neighbors
-    std::vector<NeighborInfo<float> > sel_nei;
-    sel_nei.reserve (nei_idx_a.size());
-    float rcut2 = rcut * rcut;
-    for (unsigned kk = 0; kk < nei_idx.size(); ++kk) {
-        // rcut is float in this function, so float rr is enough
-        float diff[3];
-        const int & j_idx = nei_idx[kk];
-        for (int dd = 0; dd < 3; ++dd) {
-            diff[dd] = (float)posi[j_idx * 3 + dd] - (float)posi[i_idx * 3 + dd];
-        }
-        float rr2 = deepmd::dot3(diff, diff);    
-        if (rr2 <= rcut2) {
-            sel_nei.push_back(NeighborInfo<float>(type[j_idx], rr2, j_idx));
-        }
+  // gether all neighbors
+  std::vector<int> nei_idx(nei_idx_a);
+  // allocate the information for all neighbors
+  std::vector<NeighborInfo<float> > sel_nei;
+  sel_nei.reserve(nei_idx_a.size());
+  float rcut2 = rcut * rcut;
+  for (unsigned kk = 0; kk < nei_idx.size(); ++kk) {
+    // rcut is float in this function, so float rr is enough
+    float diff[3];
+    const int &j_idx = nei_idx[kk];
+    for (int dd = 0; dd < 3; ++dd) {
+      diff[dd] = (float)posi[j_idx * 3 + dd] - (float)posi[i_idx * 3 + dd];
     }
-    sort(sel_nei.begin(), sel_nei.end());  
-  
-    std::vector<int > nei_iter = sec_a;
-    int overflowed = -1;
-    for (unsigned kk = 0; kk < sel_nei.size(); ++kk) {
-        const int & nei_type = sel_nei[kk].type;
-        if (nei_iter[nei_type] < sec_a[nei_type+1]) {
-            fmt_nei_idx_a[nei_iter[nei_type] ++] = sel_nei[kk].index;
-        }
-	else{
-	  overflowed = nei_type;
-	}
+    float rr2 = deepmd::dot3(diff, diff);
+    if (rr2 <= rcut2) {
+      sel_nei.push_back(NeighborInfo<float>(type[j_idx], rr2, j_idx));
+    }
+  }
+  sort(sel_nei.begin(), sel_nei.end());
+
+  std::vector<int> nei_iter = sec_a;
+  int overflowed = -1;
+  for (unsigned kk = 0; kk < sel_nei.size(); ++kk) {
+    const int &nei_type = sel_nei[kk].type;
+    if (nei_iter[nei_type] < sec_a[nei_type + 1]) {
+      fmt_nei_idx_a[nei_iter[nei_type]++] = sel_nei[kk].index;
+    } else {
+      overflowed = nei_type;
     }
-    return overflowed;
+  }
+  return overflowed;
 }
 
-template<typename FPTYPE> 
-void 
-deepmd::
-format_nlist_cpu (
-    int * nlist,
-    const InputNlist & in_nlist,
-    const FPTYPE * coord, 
-    const int * type, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec)
-{
+template <typename FPTYPE>
+void deepmd::format_nlist_cpu(int *nlist,
+                              const InputNlist &in_nlist,
+                              const FPTYPE *coord,
+                              const int *type,
+                              const int nloc,
+                              const int nall,
+                              const float rcut,
+                              const std::vector<int> sec) {
   std::vector<FPTYPE> posi_(nall * 3);
   std::vector<int> type_(nall);
   std::copy(coord, coord + nall * 3, posi_.begin());
   std::copy(type, type + nall, type_.begin());
   std::vector<int> ilist, fmt_ilist;
   int nnei = sec.back();
-  
-  for(int ii = 0; ii < in_nlist.inum; ++ii){
+
+  for (int ii = 0; ii < in_nlist.inum; ++ii) {
     int i_idx = in_nlist.ilist[ii];
     int i_num = in_nlist.numneigh[ii];
     ilist.resize(i_num);
-    std::copy(in_nlist.firstneigh[ii], in_nlist.firstneigh[ii] + i_num, ilist.begin());
-    format_nlist_i_cpu(
-	fmt_ilist,
-	posi_,
-	type_,
-	i_idx,
-	ilist,
-	rcut, 
-	sec);	
-    int * cur_nlist = nlist + i_idx * nnei;
-    if(fmt_ilist.size() != nnei){
-      std::cerr << "FATAL: formatted nlist of i have length " 
-		<< fmt_ilist.size()
-		<< " which does not match " 
-		<< nnei	<< std::endl;
+    std::copy(in_nlist.firstneigh[ii], in_nlist.firstneigh[ii] + i_num,
+              ilist.begin());
+    format_nlist_i_cpu(fmt_ilist, posi_, type_, i_idx, ilist, rcut, sec);
+    int *cur_nlist = nlist + i_idx * nnei;
+    if (fmt_ilist.size() != nnei) {
+      std::cerr << "FATAL: formatted nlist of i have length "
+                << fmt_ilist.size() << " which does not match " << nnei
+                << std::endl;
       throw deepmd::deepmd_exception();
     }
     std::copy(fmt_ilist.begin(), fmt_ilist.end(), cur_nlist);
   }
 }
 
-template
-int format_nlist_i_cpu<double> (
-    std::vector<int > &		fmt_nei_idx_a,
-    const std::vector<double > &posi,
-    const std::vector<int > &   type,
-    const int &			i_idx,
-    const std::vector<int > &   nei_idx_a, 
-    const float &		rcut,
-    const std::vector<int > &   sec_a);
-
-
-template
-int format_nlist_i_cpu<float> (
-    std::vector<int > &		fmt_nei_idx_a,
-    const std::vector<float > &	posi,
-    const std::vector<int > &   type,
-    const int &			i_idx,
-    const std::vector<int > &   nei_idx_a, 
-    const float &		rcut,
-    const std::vector<int > &   sec_a);
-
-template
-void 
-deepmd::
-format_nlist_cpu<double> (
-    int * nlist,
-    const deepmd::InputNlist & in_nlist,
-    const double * coord, 
-    const int * type, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec);
+template int format_nlist_i_cpu<double>(std::vector<int> &fmt_nei_idx_a,
+                                        const std::vector<double> &posi,
+                                        const std::vector<int> &type,
+                                        const int &i_idx,
+                                        const std::vector<int> &nei_idx_a,
+                                        const float &rcut,
+                                        const std::vector<int> &sec_a);
 
+template int format_nlist_i_cpu<float>(std::vector<int> &fmt_nei_idx_a,
+                                       const std::vector<float> &posi,
+                                       const std::vector<int> &type,
+                                       const int &i_idx,
+                                       const std::vector<int> &nei_idx_a,
+                                       const float &rcut,
+                                       const std::vector<int> &sec_a);
 
-template
-void 
-deepmd::
-format_nlist_cpu<float> (
-    int * nlist,
-    const deepmd::InputNlist & in_nlist,
-    const float * coord, 
-    const int * type, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
+template void deepmd::format_nlist_cpu<double>(
+    int *nlist,
+    const deepmd::InputNlist &in_nlist,
+    const double *coord,
+    const int *type,
+    const int nloc,
+    const int nall,
+    const float rcut,
     const std::vector<int> sec);
 
-
+template void deepmd::format_nlist_cpu<float>(
+    int *nlist,
+    const deepmd::InputNlist &in_nlist,
+    const float *coord,
+    const int *type,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const std::vector<int> sec);
diff --git a/source/lib/src/gelu.cc b/source/lib/src/gelu.cc
index 13a3711027..c9ff7c9f2b 100644
--- a/source/lib/src/gelu.cc
+++ b/source/lib/src/gelu.cc
@@ -1,49 +1,77 @@
 #include "gelu.h"
+
 #include <cmath>
+
 #include "device.h"
 
-template<typename FPTYPE>
-void deepmd::gelu_cpu(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size)
-{
+template <typename FPTYPE>
+void deepmd::gelu_cpu(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
   for (int ii = 0; ii < size; ii++) {
-    out[ii] = xx[ii] * (FPTYPE)0.5 * ((FPTYPE)1.0 + tanh((FPTYPE)SQRT_2_PI * (xx[ii] + (FPTYPE)0.044715 * xx[ii] * xx[ii] *xx[ii])));
+    out[ii] = xx[ii] * (FPTYPE)0.5 *
+              ((FPTYPE)1.0 +
+               tanh((FPTYPE)SQRT_2_PI *
+                    (xx[ii] + (FPTYPE)0.044715 * xx[ii] * xx[ii] * xx[ii])));
   }
 }
 
-template<typename FPTYPE>
-void deepmd::gelu_grad_cpu(
-    FPTYPE * out, 
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const int_64 size)
-{
+template <typename FPTYPE>
+void deepmd::gelu_grad_cpu(FPTYPE* out,
+                           const FPTYPE* xx,
+                           const FPTYPE* dy,
+                           const int_64 size) {
   for (int ii = 0; ii < size; ii++) {
-    const FPTYPE var = tanh((FPTYPE)SQRT_2_PI * (xx[ii] + (FPTYPE)0.044715 * xx[ii] * xx[ii] * xx[ii]));
-    out[ii] = dy[ii] * ((FPTYPE)0.5 * (FPTYPE)SQRT_2_PI * xx[ii] * ((FPTYPE)1. - var * var) * ((FPTYPE)0.134145 * xx[ii] * xx[ii] + (FPTYPE)1.) + (FPTYPE)0.5 * var + (FPTYPE)0.5);
+    const FPTYPE var =
+        tanh((FPTYPE)SQRT_2_PI *
+             (xx[ii] + (FPTYPE)0.044715 * xx[ii] * xx[ii] * xx[ii]));
+    out[ii] = dy[ii] * ((FPTYPE)0.5 * (FPTYPE)SQRT_2_PI * xx[ii] *
+                            ((FPTYPE)1. - var * var) *
+                            ((FPTYPE)0.134145 * xx[ii] * xx[ii] + (FPTYPE)1.) +
+                        (FPTYPE)0.5 * var + (FPTYPE)0.5);
   }
 }
 
-template<typename FPTYPE>
-void deepmd::gelu_grad_grad_cpu(
-    FPTYPE * out,
-    const FPTYPE * xx,
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size) 
-{
+template <typename FPTYPE>
+void deepmd::gelu_grad_grad_cpu(FPTYPE* out,
+                                const FPTYPE* xx,
+                                const FPTYPE* dy,
+                                const FPTYPE* dy_2,
+                                const int_64 size) {
   for (int ii = 0; ii < size; ii++) {
-    const FPTYPE var1 = tanh((FPTYPE)SQRT_2_PI * (xx[ii] + (FPTYPE)0.044715 * xx[ii] * xx[ii] *xx[ii]));
-    const FPTYPE var2 = (FPTYPE)SQRT_2_PI * ((FPTYPE)1. - var1 * var1) * ((FPTYPE)0.134145 * xx[ii] * xx[ii] + (FPTYPE)1.);
-    out[ii] = dy[ii] * dy_2[ii] * ((FPTYPE)0.134145 * (FPTYPE)SQRT_2_PI * xx[ii] * xx[ii] * ((FPTYPE)1. - var1 * var1) - (FPTYPE)SQRT_2_PI * xx[ii] * var2 * ((FPTYPE)0.134145 * xx[ii] * xx[ii] + (FPTYPE)1.) * var1 + var2);
+    const FPTYPE var1 =
+        tanh((FPTYPE)SQRT_2_PI *
+             (xx[ii] + (FPTYPE)0.044715 * xx[ii] * xx[ii] * xx[ii]));
+    const FPTYPE var2 = (FPTYPE)SQRT_2_PI * ((FPTYPE)1. - var1 * var1) *
+                        ((FPTYPE)0.134145 * xx[ii] * xx[ii] + (FPTYPE)1.);
+    out[ii] = dy[ii] * dy_2[ii] *
+              ((FPTYPE)0.134145 * (FPTYPE)SQRT_2_PI * xx[ii] * xx[ii] *
+                   ((FPTYPE)1. - var1 * var1) -
+               (FPTYPE)SQRT_2_PI * xx[ii] * var2 *
+                   ((FPTYPE)0.134145 * xx[ii] * xx[ii] + (FPTYPE)1.) * var1 +
+               var2);
   }
 }
 
-template void deepmd::gelu_cpu<float>(float * out, const float * x, const int_64 size);
-template void deepmd::gelu_cpu<double>(double * out, const double * x, const int_64 size);
-template void deepmd::gelu_grad_cpu<float>(float * out, const float * x, const float * dy, const int_64 size);
-template void deepmd::gelu_grad_cpu<double>(double * out, const double * x, const double * dy, const int_64 size);
-template void deepmd::gelu_grad_grad_cpu<float>(float * out, const float * x, const float * dy, const float * dy_2, const int_64 size);
-template void deepmd::gelu_grad_grad_cpu<double>(double * out, const double * x, const double * dy, const double * dy_2, const int_64 size);
+template void deepmd::gelu_cpu<float>(float* out,
+                                      const float* x,
+                                      const int_64 size);
+template void deepmd::gelu_cpu<double>(double* out,
+                                       const double* x,
+                                       const int_64 size);
+template void deepmd::gelu_grad_cpu<float>(float* out,
+                                           const float* x,
+                                           const float* dy,
+                                           const int_64 size);
+template void deepmd::gelu_grad_cpu<double>(double* out,
+                                            const double* x,
+                                            const double* dy,
+                                            const int_64 size);
+template void deepmd::gelu_grad_grad_cpu<float>(float* out,
+                                                const float* x,
+                                                const float* dy,
+                                                const float* dy_2,
+                                                const int_64 size);
+template void deepmd::gelu_grad_grad_cpu<double>(double* out,
+                                                 const double* x,
+                                                 const double* dy,
+                                                 const double* dy_2,
+                                                 const int_64 size);
diff --git a/source/lib/src/map_aparam.cc b/source/lib/src/map_aparam.cc
index 7e60f1c3b8..e95d3cbc0a 100644
--- a/source/lib/src/map_aparam.cc
+++ b/source/lib/src/map_aparam.cc
@@ -1,21 +1,19 @@
 #include "map_aparam.h"
 
 template <typename FPTYPE>
-void deepmd::map_aparam_cpu (
-    FPTYPE * output,
-    const FPTYPE * aparam,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei,
-    const int & numb_aparam
-    )
+void deepmd::map_aparam_cpu(FPTYPE* output,
+                            const FPTYPE* aparam,
+                            const int* nlist,
+                            const int& nloc,
+                            const int& nnei,
+                            const int& numb_aparam)
 //
 //	output:	nloc x nnei x numb_aparam
 //	aparam:	nall x numb_aparam
 //	nlist:	nloc x nnei
 //
 {
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
     for (int dd = 0; dd < nnei * numb_aparam; ++dd) {
       output[i_idx * nnei * numb_aparam + dd] = 0.;
@@ -23,37 +21,31 @@ void deepmd::map_aparam_cpu (
   }
 
   // loop over loc atoms
-  for (int ii = 0; ii < nloc; ++ii){
-    int i_idx = ii;	
+  for (int ii = 0; ii < nloc; ++ii) {
+    int i_idx = ii;
     // loop over neighbor atoms
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       // loop over elements of aparam
-      for (int dd = 0; dd < numb_aparam; ++dd){
-	output[ii * nnei * numb_aparam + jj * numb_aparam + dd] = aparam[j_idx * numb_aparam + dd];
+      for (int dd = 0; dd < numb_aparam; ++dd) {
+        output[ii * nnei * numb_aparam + jj * numb_aparam + dd] =
+            aparam[j_idx * numb_aparam + dd];
       }
     }
-  }  
+  }
 }
 
-template
-void deepmd::map_aparam_cpu<double> (
-    double * output,
-    const double * aparam,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei,
-    const int & numb_aparam
-    );
-
-template
-void deepmd::map_aparam_cpu<float> (
-    float * output,
-    const float * aparam,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei,
-    const int & numb_aparam
-    );
+template void deepmd::map_aparam_cpu<double>(double* output,
+                                             const double* aparam,
+                                             const int* nlist,
+                                             const int& nloc,
+                                             const int& nnei,
+                                             const int& numb_aparam);
 
+template void deepmd::map_aparam_cpu<float>(float* output,
+                                            const float* aparam,
+                                            const int* nlist,
+                                            const int& nloc,
+                                            const int& nnei,
+                                            const int& numb_aparam);
diff --git a/source/lib/src/neighbor_list.cc b/source/lib/src/neighbor_list.cc
index 99362bcc08..19b4908334 100644
--- a/source/lib/src/neighbor_list.cc
+++ b/source/lib/src/neighbor_list.cc
@@ -1,54 +1,47 @@
 #include "neighbor_list.h"
-#include "device.h"
+
 #include <iostream>
 #include <limits>
-// #include <iomanip> 
+
+#include "device.h"
+// #include <iomanip>
 
 // using namespace std;
 enum {
   MAX_WARN_IDX_OUT_OF_BOUND = 10,
 };
 
-bool 
-is_loc (const std::vector<int> & idx, 
-	const std::vector<int> & nat_stt,
-	const std::vector<int> & nat_end)
-{
+bool is_loc(const std::vector<int>& idx,
+            const std::vector<int>& nat_stt,
+            const std::vector<int>& nat_end) {
   bool ret = true;
   for (int dd = 0; dd < 3; ++dd) ret = ret && idx[dd] >= nat_stt[dd];
-  for (int dd = 0; dd < 3; ++dd) ret = ret && idx[dd] <  nat_end[dd];
+  for (int dd = 0; dd < 3; ++dd) ret = ret && idx[dd] < nat_end[dd];
   return ret;
 }
 
-int 
-collapse_index (const std::vector<int> &	idx,
-		const std::vector<int> &	size)
-{
+int collapse_index(const std::vector<int>& idx, const std::vector<int>& size) {
   return (idx[0] * size[1] + idx[1]) * size[2] + idx[2];
 }
 
-void
-expand_index (std::vector<int > &		o_idx,
-	      const int &		i_idx,
-	      const std::vector<int> &	size)
-{
+void expand_index(std::vector<int>& o_idx,
+                  const int& i_idx,
+                  const std::vector<int>& size) {
   int tmp1 = i_idx / size[2];
   o_idx[2] = i_idx - tmp1 * size[2];
   o_idx[0] = tmp1 / size[1];
   o_idx[1] = tmp1 - o_idx[0] * size[1];
 }
 
-void 
-build_clist (std::vector<std::vector<int > > &	clist,
-	     const std::vector<double > &	coord,
-	     const int &		nloc,
-	     const std::vector<int > &	nat_stt,
-	     const std::vector<int > &	nat_end,
-	     const std::vector<int > &	ext_stt,
-	     const std::vector<int > &	ext_end,
-	     const SimulationRegion<double> & region,
-	     const std::vector<int > &	global_grid)
-{
+void build_clist(std::vector<std::vector<int> >& clist,
+                 const std::vector<double>& coord,
+                 const int& nloc,
+                 const std::vector<int>& nat_stt,
+                 const std::vector<int>& nat_end,
+                 const std::vector<int>& ext_stt,
+                 const std::vector<int>& ext_end,
+                 const SimulationRegion<double>& region,
+                 const std::vector<int>& global_grid) {
   static int count_warning_loc_idx_lower = 0;
   static int count_warning_loc_idx_upper = 0;
   static int count_warning_ghost_idx_lower = 0;
@@ -58,85 +51,89 @@ build_clist (std::vector<std::vector<int > > &	clist,
   std::vector<int> ext_ncell(3);
   for (int dd = 0; dd < 3; ++dd) ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
   int ncell = ext_ncell[0] * ext_ncell[1] * ext_ncell[2];
-  std::vector<double > cell_size (3);
-  for (int dd = 0; dd < 3; ++dd) cell_size[dd] = 1./global_grid[dd];
-  std::vector<double > nat_orig(3);
+  std::vector<double> cell_size(3);
+  for (int dd = 0; dd < 3; ++dd) cell_size[dd] = 1. / global_grid[dd];
+  std::vector<double> nat_orig(3);
   for (int dd = 0; dd < 3; ++dd) nat_orig[dd] = nat_stt[dd] * cell_size[dd];
   std::vector<int> idx_orig_shift(3);
   for (int dd = 0; dd < 3; ++dd) idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
-  
+
   // allocate the reserve the cell list
-  clist.resize (ncell);
+  clist.resize(ncell);
   int esti_natom_per_cell = nall / ncell + 10;
-  for (unsigned ii = 0; ii < clist.size(); ++ii){
+  for (unsigned ii = 0; ii < clist.size(); ++ii) {
     clist[ii].clear();
-    clist[ii].reserve (esti_natom_per_cell);
+    clist[ii].reserve(esti_natom_per_cell);
   }
 
   // build the cell list
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     double inter[3];
-    region.phys2Inter (inter, &(coord[ii*3]));
-    std::vector<int > idx(3);
-    for (int dd = 0; dd < 3; ++dd){
+    region.phys2Inter(inter, &(coord[ii * 3]));
+    std::vector<int> idx(3);
+    for (int dd = 0; dd < 3; ++dd) {
       idx[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
-      if (inter[dd] - nat_orig[dd] < 0.) idx[dd] --;
+      if (inter[dd] - nat_orig[dd] < 0.) idx[dd]--;
       if (idx[dd] < nat_stt[dd]) {
-	if (count_warning_loc_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
-	  std::cerr << "# warning: loc idx out of lower bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
-	  count_warning_loc_idx_lower ++;
-	}	
-	idx[dd] = nat_stt[dd];
-      }
-      else if (idx[dd] >= nat_end[dd]) {
-	if (count_warning_loc_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
-	  std::cerr << "# warning: loc idx out of upper bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
-	  count_warning_loc_idx_upper ++;
-	}
-	idx[dd] = nat_end[dd] - 1;
+        if (count_warning_loc_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
+          std::cerr << "# warning: loc idx out of lower bound (ignored if "
+                       "warned for more than "
+                    << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
+          count_warning_loc_idx_lower++;
+        }
+        idx[dd] = nat_stt[dd];
+      } else if (idx[dd] >= nat_end[dd]) {
+        if (count_warning_loc_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
+          std::cerr << "# warning: loc idx out of upper bound (ignored if "
+                       "warned for more than "
+                    << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
+          count_warning_loc_idx_upper++;
+        }
+        idx[dd] = nat_end[dd] - 1;
       }
       idx[dd] += idx_orig_shift[dd];
     }
-    clist[collapse_index(idx, ext_ncell)].push_back (ii);
+    clist[collapse_index(idx, ext_ncell)].push_back(ii);
   }
-  for (int ii = nloc; ii < nall; ++ii){
+  for (int ii = nloc; ii < nall; ++ii) {
     double inter[3];
-    region.phys2Inter (inter, &(coord[ii*3]));
-    std::vector<int > idx(3);
-    for (int dd = 0; dd < 3; ++dd){
+    region.phys2Inter(inter, &(coord[ii * 3]));
+    std::vector<int> idx(3);
+    for (int dd = 0; dd < 3; ++dd) {
       idx[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
-      if (inter[dd] - nat_orig[dd] < 0.) idx[dd] --;
+      if (inter[dd] - nat_orig[dd] < 0.) idx[dd]--;
       if (idx[dd] < ext_stt[dd]) {
-	if (count_warning_ghost_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND &&
-	    fabs((inter[dd] - nat_orig[dd]) - (ext_stt[dd] * cell_size[dd]))
-	    > fabs(ext_stt[dd] * cell_size[dd]) * std::numeric_limits<double>::epsilon() * 5.
-	    ) {
-	  std::cerr << "# warning: ghost idx out of lower bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
-	  count_warning_ghost_idx_lower ++;
-	}
-	idx[dd] = ext_stt[dd];
-      }
-      else if (idx[dd] >= ext_end[dd]) {
-	if (count_warning_ghost_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
-	  std::cerr << "# warning: ghost idx out of upper bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
-	  count_warning_ghost_idx_upper ++;
-	}
-	idx[dd] = ext_end[dd] - 1;
+        if (count_warning_ghost_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND &&
+            fabs((inter[dd] - nat_orig[dd]) - (ext_stt[dd] * cell_size[dd])) >
+                fabs(ext_stt[dd] * cell_size[dd]) *
+                    std::numeric_limits<double>::epsilon() * 5.) {
+          std::cerr << "# warning: ghost idx out of lower bound (ignored if "
+                       "warned for more than "
+                    << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
+          count_warning_ghost_idx_lower++;
+        }
+        idx[dd] = ext_stt[dd];
+      } else if (idx[dd] >= ext_end[dd]) {
+        if (count_warning_ghost_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
+          std::cerr << "# warning: ghost idx out of upper bound (ignored if "
+                       "warned for more than "
+                    << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
+          count_warning_ghost_idx_upper++;
+        }
+        idx[dd] = ext_end[dd] - 1;
       }
       idx[dd] += idx_orig_shift[dd];
     }
-    clist[collapse_index(idx, ext_ncell)].push_back (ii);
+    clist[collapse_index(idx, ext_ncell)].push_back(ii);
   }
 }
 
-void 
-build_clist (std::vector<std::vector<int > > &	clist,
-	     const std::vector<double > &	coord,
-	     const std::vector<int>  &	sel,
-	     const std::vector<int > &	nat_stt,
-	     const std::vector<int > &	nat_end,
-	     const SimulationRegion<double> & region)
-{
+void build_clist(std::vector<std::vector<int> >& clist,
+                 const std::vector<double>& coord,
+                 const std::vector<int>& sel,
+                 const std::vector<int>& nat_stt,
+                 const std::vector<int>& nat_end,
+                 const SimulationRegion<double>& region) {
   static int count_warning_loc_idx_lower = 0;
   static int count_warning_loc_idx_upper = 0;
   // compute region info, in terms of internal coord
@@ -144,152 +141,149 @@ build_clist (std::vector<std::vector<int > > &	clist,
   std::vector<int> nat_ncell(3);
   for (int dd = 0; dd < 3; ++dd) nat_ncell[dd] = nat_end[dd] - nat_stt[dd];
   int ncell = nat_ncell[0] * nat_ncell[1] * nat_ncell[2];
-  std::vector<double > cell_size (3);
-  for (int dd = 0; dd < 3; ++dd) cell_size[dd] = 1./nat_end[dd];
-  std::vector<double > nat_orig(3);
+  std::vector<double> cell_size(3);
+  for (int dd = 0; dd < 3; ++dd) cell_size[dd] = 1. / nat_end[dd];
+  std::vector<double> nat_orig(3);
   for (int dd = 0; dd < 3; ++dd) nat_orig[dd] = nat_stt[dd] * cell_size[dd];
-  
+
   // allocate the reserve the cell list
-  clist.resize (ncell);
+  clist.resize(ncell);
   int esti_natom_per_cell = nall / ncell + 10;
-  for (unsigned ii = 0; ii < clist.size(); ++ii){
+  for (unsigned ii = 0; ii < clist.size(); ++ii) {
     clist[ii].clear();
-    clist[ii].reserve (esti_natom_per_cell);
+    clist[ii].reserve(esti_natom_per_cell);
   }
 
   // build the cell list
-  for (unsigned _ = 0; _ < sel.size(); ++_){
+  for (unsigned _ = 0; _ < sel.size(); ++_) {
     int ii = sel[_];
     double inter[3];
-    region.phys2Inter (inter, &(coord[ii*3]));
-    std::vector<int > idx(3);
-    for (int dd = 0; dd < 3; ++dd){
+    region.phys2Inter(inter, &(coord[ii * 3]));
+    std::vector<int> idx(3);
+    for (int dd = 0; dd < 3; ++dd) {
       idx[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
-      if (inter[dd] - nat_orig[dd] < 0.) idx[dd] --;
+      if (inter[dd] - nat_orig[dd] < 0.) idx[dd]--;
       if (idx[dd] < nat_stt[dd]) {
-	if (count_warning_loc_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
-	  std::cerr << "# warning: loc idx out of lower bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
-	  count_warning_loc_idx_lower ++;
-	}	
-	idx[dd] = nat_stt[dd];
-      }
-      else if (idx[dd] >= nat_end[dd]) {
-	if (count_warning_loc_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
-	  std::cerr << "# warning: loc idx out of upper bound (ignored if warned for more than " << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
-	  count_warning_loc_idx_upper ++;
-	}	
-	idx[dd] = nat_end[dd] - 1;
+        if (count_warning_loc_idx_lower < MAX_WARN_IDX_OUT_OF_BOUND) {
+          std::cerr << "# warning: loc idx out of lower bound (ignored if "
+                       "warned for more than "
+                    << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
+          count_warning_loc_idx_lower++;
+        }
+        idx[dd] = nat_stt[dd];
+      } else if (idx[dd] >= nat_end[dd]) {
+        if (count_warning_loc_idx_upper < MAX_WARN_IDX_OUT_OF_BOUND) {
+          std::cerr << "# warning: loc idx out of upper bound (ignored if "
+                       "warned for more than "
+                    << MAX_WARN_IDX_OUT_OF_BOUND << " times) " << std::endl;
+          count_warning_loc_idx_upper++;
+        }
+        idx[dd] = nat_end[dd] - 1;
       }
     }
-    clist[collapse_index(idx, nat_ncell)].push_back (ii);
+    clist[collapse_index(idx, nat_ncell)].push_back(ii);
   }
 }
 
-
-void
-build_nlist_cell (std::vector<std::vector<int> > &	nlist0,
-		  std::vector<std::vector<int> > &	nlist1,
-		  const int &			cidx,
-		  const int &			tidx, 
-		  const std::vector<std::vector<int > > &	clist,
-		  const std::vector<double > &	coord,
-		  const double &		rc02,
-		  const double &		rc12,
-		  const std::vector<int> &		shift = {0, 0, 0},
-		  const std::vector<double > &	boxt = {0., 0., 0., 0., 0., 0., 0., 0., 0.})
-{
+void build_nlist_cell(std::vector<std::vector<int> >& nlist0,
+                      std::vector<std::vector<int> >& nlist1,
+                      const int& cidx,
+                      const int& tidx,
+                      const std::vector<std::vector<int> >& clist,
+                      const std::vector<double>& coord,
+                      const double& rc02,
+                      const double& rc12,
+                      const std::vector<int>& shift = {0, 0, 0},
+                      const std::vector<double>& boxt = {0., 0., 0., 0., 0., 0.,
+                                                         0., 0., 0.}) {
   int nloc = nlist0.size();
   // loop over c (current) cell
-  for (unsigned ii = 0; ii < clist[cidx].size(); ++ii){
+  for (unsigned ii = 0; ii < clist[cidx].size(); ++ii) {
     int i_idx = clist[cidx][ii];
     // assert (i_idx < nloc);
     // loop over t (target) cell
-    for (unsigned jj = 0; jj < clist[tidx].size(); ++jj){
+    for (unsigned jj = 0; jj < clist[tidx].size(); ++jj) {
       int j_idx = clist[tidx][jj];
       if (cidx == tidx && j_idx <= i_idx) continue;
       double diff[3];
       for (int dd0 = 0; dd0 < 3; ++dd0) {
-	diff[dd0] = coord[i_idx*3 + dd0] - coord[j_idx*3 + dd0];
-	for (int dd1 = 0; dd1 < 3; ++dd1) {
-	  diff[dd0] += shift[dd1] * boxt[3*dd1+dd0];
-	}
+        diff[dd0] = coord[i_idx * 3 + dd0] - coord[j_idx * 3 + dd0];
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          diff[dd0] += shift[dd1] * boxt[3 * dd1 + dd0];
+        }
       }
       double r2 = deepmd::dot3(diff, diff);
       if (r2 < rc02) {
-	if (i_idx < nloc) nlist0[i_idx].push_back (j_idx);
-	if (j_idx < nloc) nlist0[j_idx].push_back (i_idx);
+        if (i_idx < nloc) nlist0[i_idx].push_back(j_idx);
+        if (j_idx < nloc) nlist0[j_idx].push_back(i_idx);
+      } else if (r2 < rc12) {
+        if (i_idx < nloc) nlist1[i_idx].push_back(j_idx);
+        if (j_idx < nloc) nlist1[j_idx].push_back(i_idx);
       }
-      else if (r2 < rc12) {
-	if (i_idx < nloc) nlist1[i_idx].push_back (j_idx);
-	if (j_idx < nloc) nlist1[j_idx].push_back (i_idx);
-      }      
     }
   }
 }
 
-void
-build_nlist_cell (std::vector<std::vector<int> > &	nlist0,
-		  std::vector<std::vector<int> > &	nlist1,
-		  const int &			cidx,
-		  const int &			tidx, 
-		  const std::vector<std::vector<int > > &	clist0,
-		  const std::vector<std::vector<int > > &	clist1,
-		  const std::vector<double > &	coord,
-		  const double &		rc02,
-		  const double &		rc12,
-		  const std::vector<int> &		shift = {0, 0, 0},
-		  const std::vector<double > &	boxt = {0., 0., 0., 0., 0., 0., 0., 0., 0.})
-{
+void build_nlist_cell(std::vector<std::vector<int> >& nlist0,
+                      std::vector<std::vector<int> >& nlist1,
+                      const int& cidx,
+                      const int& tidx,
+                      const std::vector<std::vector<int> >& clist0,
+                      const std::vector<std::vector<int> >& clist1,
+                      const std::vector<double>& coord,
+                      const double& rc02,
+                      const double& rc12,
+                      const std::vector<int>& shift = {0, 0, 0},
+                      const std::vector<double>& boxt = {0., 0., 0., 0., 0., 0.,
+                                                         0., 0., 0.}) {
   // loop over c (current) cell
-  for (unsigned ii = 0; ii < clist0[cidx].size(); ++ii){
+  for (unsigned ii = 0; ii < clist0[cidx].size(); ++ii) {
     int i_idx = clist0[cidx][ii];
     if (i_idx >= nlist0.size()) continue;
     // loop over t (target) cell
-    for (unsigned jj = 0; jj < clist1[tidx].size(); ++jj){
+    for (unsigned jj = 0; jj < clist1[tidx].size(); ++jj) {
       int j_idx = clist1[tidx][jj];
       if (cidx == tidx && j_idx == i_idx) continue;
       double diff[3];
       for (int dd0 = 0; dd0 < 3; ++dd0) {
-	diff[dd0] = coord[i_idx*3 + dd0] - coord[j_idx*3 + dd0];
-	for (int dd1 = 0; dd1 < 3; ++dd1) {
-	  diff[dd0] += shift[dd1] * boxt[3*dd1+dd0];
-	}
+        diff[dd0] = coord[i_idx * 3 + dd0] - coord[j_idx * 3 + dd0];
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          diff[dd0] += shift[dd1] * boxt[3 * dd1 + dd0];
+        }
       }
       double r2 = deepmd::dot3(diff, diff);
       if (r2 < rc02) {
-	nlist0[i_idx].push_back (j_idx);
+        nlist0[i_idx].push_back(j_idx);
+      } else if (r2 < rc12) {
+        nlist1[i_idx].push_back(j_idx);
       }
-      else if (r2 < rc12) {
-	nlist1[i_idx].push_back (j_idx);
-      }      
     }
   }
 }
 
-void
-build_nlist (std::vector<std::vector<int > > &	nlist0,
-	     std::vector<std::vector<int > > &	nlist1,
-	     const std::vector<double > &	coord,
-	     const int &		nloc,
-	     const double &		rc0,
-	     const double &		rc1,
-	     const std::vector<int > &	nat_stt_,
-	     const std::vector<int > &	nat_end_,
-	     const std::vector<int > &	ext_stt_,
-	     const std::vector<int > &	ext_end_,
-	     const SimulationRegion<double> & region,
-	     const std::vector<int > &	global_grid)
-{
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const int& nloc,
+                 const double& rc0,
+                 const double& rc1,
+                 const std::vector<int>& nat_stt_,
+                 const std::vector<int>& nat_end_,
+                 const std::vector<int>& ext_stt_,
+                 const std::vector<int>& ext_end_,
+                 const SimulationRegion<double>& region,
+                 const std::vector<int>& global_grid) {
   // normalize the index
   // i require that the ext_stt = {0, 0, 0}
-  std::vector<int > nat_stt (nat_stt_);
-  std::vector<int > nat_end (nat_end_);
-  std::vector<int > ext_stt (ext_stt_);
-  std::vector<int > ext_end (ext_end_);
-  
+  std::vector<int> nat_stt(nat_stt_);
+  std::vector<int> nat_end(nat_end_);
+  std::vector<int> ext_stt(ext_stt_);
+  std::vector<int> ext_end(ext_end_);
+
   // compute the clist
-  std::vector<std::vector<int > > clist;
-  build_clist (clist, coord, nloc, nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
+  std::vector<std::vector<int> > clist;
+  build_clist(clist, coord, nloc, nat_stt, nat_end, ext_stt, ext_end, region,
+              global_grid);
 
   // compute the region info
   int nall = coord.size() / 3;
@@ -297,37 +291,39 @@ build_nlist (std::vector<std::vector<int > > &	nlist0,
   for (int dd = 0; dd < 3; ++dd) ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
 
   // compute number of iter according to the cut-off
-  assert (rc0 <= rc1);
-  std::vector<int> niter (3);
-  double to_face [3];
-  region.toFaceDistance (to_face);
-  for (int dd = 0; dd < 3; ++dd){
+  assert(rc0 <= rc1);
+  std::vector<int> niter(3);
+  double to_face[3];
+  region.toFaceDistance(to_face);
+  for (int dd = 0; dd < 3; ++dd) {
     double cell_size = to_face[dd] / nat_end[dd];
     niter[dd] = rc1 / cell_size;
     if (niter[dd] * cell_size < rc1) niter[dd] += 1;
-    assert (niter[dd] * cell_size >= rc1);
+    assert(niter[dd] * cell_size >= rc1);
   }
   // check the validity of the iters
-  for (int dd = 0; dd < 3; ++dd){
-    assert (nat_stt[dd] - niter[dd] >= ext_stt[dd]);
-    assert (nat_end[dd] + niter[dd] <= ext_end[dd]);
+  for (int dd = 0; dd < 3; ++dd) {
+    assert(nat_stt[dd] - niter[dd] >= ext_stt[dd]);
+    assert(nat_end[dd] + niter[dd] <= ext_end[dd]);
   }
 
   // allocate the nlists
   double density = nloc / region.getVolume();
-  nlist0.resize (nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+  nlist0.resize(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     nlist0[ii].clear();
-    int esti = 4./3. * 3.14 * (rc0*rc0*rc0) * density * 1.5 + 20;
+    int esti = 4. / 3. * 3.14 * (rc0 * rc0 * rc0) * density * 1.5 + 20;
     if (esti < 0) esti = 10;
-    nlist0[ii].reserve ( esti );
-  }  
-  nlist1.resize (nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+    nlist0[ii].reserve(esti);
+  }
+  nlist1.resize(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     nlist1[ii].clear();
-    int esti = 4./3. * 3.14 * (rc1*rc1*rc1 - rc0*rc0*rc0) * density * 1.5 + 20;
+    int esti =
+        4. / 3. * 3.14 * (rc1 * rc1 * rc1 - rc0 * rc0 * rc0) * density * 1.5 +
+        20;
     if (esti < 0) esti = 10;
-    nlist1[ii].reserve ( esti );
+    nlist1[ii].reserve(esti);
   }
 
   // shift of the idx origin
@@ -339,82 +335,89 @@ build_nlist (std::vector<std::vector<int > > &	nlist0,
   if (rc0 > 0) rc02 = rc0 * rc0;
   double rc12 = rc1 * rc1;
   std::vector<int> cidx(3);
-  for (cidx[0] = nat_stt[0]; cidx[0] < nat_end[0]; ++cidx[0]){
-    for (cidx[1] = nat_stt[1]; cidx[1] < nat_end[1]; ++cidx[1]){
-      for (cidx[2] = nat_stt[2]; cidx[2] < nat_end[2]; ++cidx[2]){
-	std::vector<int> mcidx(3);
-	for (int dd = 0; dd < 3; ++dd) mcidx[dd] = cidx[dd] + idx_orig_shift[dd];
-	int clp_cidx = collapse_index (mcidx, ext_ncell);
-	std::vector<int> tidx(3);
-	for (tidx[0] = cidx[0] - niter[0]; tidx[0] < cidx[0] + niter[0] + 1; ++tidx[0]) {
-	  for (tidx[1] = cidx[1] - niter[1]; tidx[1] < cidx[1] + niter[1] + 1; ++tidx[1]) {
-	    for (tidx[2] = cidx[2] - niter[2]; tidx[2] < cidx[2] + niter[2] + 1; ++tidx[2]) {
-	      std::vector<int> mtidx(3);
-	      for (int dd = 0; dd < 3; ++dd) mtidx[dd] = tidx[dd] + idx_orig_shift[dd];
-	      int clp_tidx = collapse_index (mtidx, ext_ncell);
-	      if (is_loc(tidx, nat_stt, nat_end) && clp_tidx < clp_cidx) continue;
-	      build_nlist_cell (nlist0, nlist1, clp_cidx, clp_tidx, clist, coord, rc02, rc12);
-	    }
-	  }
-	}
+  for (cidx[0] = nat_stt[0]; cidx[0] < nat_end[0]; ++cidx[0]) {
+    for (cidx[1] = nat_stt[1]; cidx[1] < nat_end[1]; ++cidx[1]) {
+      for (cidx[2] = nat_stt[2]; cidx[2] < nat_end[2]; ++cidx[2]) {
+        std::vector<int> mcidx(3);
+        for (int dd = 0; dd < 3; ++dd)
+          mcidx[dd] = cidx[dd] + idx_orig_shift[dd];
+        int clp_cidx = collapse_index(mcidx, ext_ncell);
+        std::vector<int> tidx(3);
+        for (tidx[0] = cidx[0] - niter[0]; tidx[0] < cidx[0] + niter[0] + 1;
+             ++tidx[0]) {
+          for (tidx[1] = cidx[1] - niter[1]; tidx[1] < cidx[1] + niter[1] + 1;
+               ++tidx[1]) {
+            for (tidx[2] = cidx[2] - niter[2]; tidx[2] < cidx[2] + niter[2] + 1;
+                 ++tidx[2]) {
+              std::vector<int> mtidx(3);
+              for (int dd = 0; dd < 3; ++dd)
+                mtidx[dd] = tidx[dd] + idx_orig_shift[dd];
+              int clp_tidx = collapse_index(mtidx, ext_ncell);
+              if (is_loc(tidx, nat_stt, nat_end) && clp_tidx < clp_cidx)
+                continue;
+              build_nlist_cell(nlist0, nlist1, clp_cidx, clp_tidx, clist, coord,
+                               rc02, rc12);
+            }
+          }
+        }
       }
     }
   }
 }
 
-
 // assume nat grid is the global grid. only used for serial simulations
-void
-build_nlist (std::vector<std::vector<int > > &	nlist0,
-	     std::vector<std::vector<int > > &	nlist1,
-	     const std::vector<double > &	coord,
-	     const double &		rc0,
-	     const double &		rc1,
-	     const std::vector<int > &	grid,
-	     const SimulationRegion<double> & region)
-{
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const double& rc0,
+                 const double& rc1,
+                 const std::vector<int>& grid,
+                 const SimulationRegion<double>& region) {
   // assuming nloc == nall
   int nloc = coord.size() / 3;
   // compute the clist
   std::vector<int> nat_stt(3, 0);
   std::vector<int> nat_end(grid);
-  std::vector<std::vector<int > > clist;
-  build_clist (clist, coord, nloc, nat_stt, nat_end, nat_stt, nat_end, region, nat_end);
-  
+  std::vector<std::vector<int> > clist;
+  build_clist(clist, coord, nloc, nat_stt, nat_end, nat_stt, nat_end, region,
+              nat_end);
+
   // compute the region info
   int nall = coord.size() / 3;
   std::vector<int> nat_ncell(3);
   for (int dd = 0; dd < 3; ++dd) nat_ncell[dd] = nat_end[dd] - nat_stt[dd];
 
   // compute number of iter according to the cut-off
-  assert (rc0 <= rc1);
-  std::vector<int> niter (3);
-  double to_face [3];
-  region.toFaceDistance (to_face);
-  for (int dd = 0; dd < 3; ++dd){
+  assert(rc0 <= rc1);
+  std::vector<int> niter(3);
+  double to_face[3];
+  region.toFaceDistance(to_face);
+  for (int dd = 0; dd < 3; ++dd) {
     double cell_size = to_face[dd] / nat_end[dd];
     niter[dd] = rc1 / cell_size;
     if (niter[dd] * cell_size < rc1) niter[dd] += 1;
-    assert (niter[dd] * cell_size >= rc1);
+    assert(niter[dd] * cell_size >= rc1);
   }
   // check the validity of the iters
-  for (int dd = 0; dd < 3; ++dd){
-    assert (niter[dd] <= (nat_end[dd] - nat_stt[dd]) / 2);
+  for (int dd = 0; dd < 3; ++dd) {
+    assert(niter[dd] <= (nat_end[dd] - nat_stt[dd]) / 2);
   }
 
   // allocate the nlists
   double density = nall / region.getVolume();
-  nlist0.resize (nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+  nlist0.resize(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     nlist0[ii].clear();
-    nlist0[ii].reserve ( 4./3. * 3.14 * (rc0*rc0*rc0) * density * 1.5 + 20);
-  }  
-  nlist1.resize (nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+    nlist0[ii].reserve(4. / 3. * 3.14 * (rc0 * rc0 * rc0) * density * 1.5 + 20);
+  }
+  nlist1.resize(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     nlist1[ii].clear();
-    nlist1[ii].reserve ( 4./3. * 3.14 * (rc1*rc1*rc1 - rc0*rc0*rc0) * density * 1.5 + 20);
+    nlist1[ii].reserve(4. / 3. * 3.14 * (rc1 * rc1 * rc1 - rc0 * rc0 * rc0) *
+                           density * 1.5 +
+                       20);
   }
-  
+
   // physical cell size
   std::vector<double> phys_cs(9);
   for (int dd = 0; dd < 9; ++dd) phys_cs[dd] = region.getBoxTensor()[dd];
@@ -426,9 +429,9 @@ build_nlist (std::vector<std::vector<int > > &	nlist0,
 
 #ifdef HALF_NEIGHBOR_LIST
   std::vector<int> cidx(3);
-  for (cidx[0] = nat_stt[0]; cidx[0] < nat_end[0]; ++cidx[0]){
-    for (cidx[1] = nat_stt[1]; cidx[1] < nat_end[1]; ++cidx[1]){
-      for (cidx[2] = nat_stt[2]; cidx[2] < nat_end[2]; ++cidx[2]){
+  for (cidx[0] = nat_stt[0]; cidx[0] < nat_end[0]; ++cidx[0]) {
+    for (cidx[1] = nat_stt[1]; cidx[1] < nat_end[1]; ++cidx[1]) {
+      for (cidx[2] = nat_stt[2]; cidx[2] < nat_end[2]; ++cidx[2]) {
 #else
   int idx_range[3];
   idx_range[0] = nat_end[0] - nat_stt[0];
@@ -445,94 +448,104 @@ build_nlist (std::vector<std::vector<int > > &	nlist0,
     {
       {
 #endif
-	int clp_cidx = collapse_index (cidx, nat_ncell);
-	std::vector<int> tidx(3);
-	std::vector<int> stidx(3);
-	std::vector<int> shift(3);
-	for (tidx[0] = cidx[0] - niter[0]; tidx[0] < cidx[0] + niter[0] + 1; ++tidx[0]) {
-	  shift[0] = 0;
-	  if      (tidx[0] < 0)			shift[0] += 1;
-	  else if (tidx[0] >= nat_ncell[0])	shift[0] -= 1;
-	  stidx[0] = tidx[0] + shift[0] * nat_ncell[0];
-	  for (tidx[1] = cidx[1] - niter[1]; tidx[1] < cidx[1] + niter[1] + 1; ++tidx[1]) {
-	    shift[1] = 0;
-	    if      (tidx[1] < 0)		shift[1] += 1;
-	    else if (tidx[1] >= nat_ncell[1])	shift[1] -= 1;
-	    stidx[1] = tidx[1] + shift[1] * nat_ncell[1];
-	    for (tidx[2] = cidx[2] - niter[2]; tidx[2] < cidx[2] + niter[2] + 1; ++tidx[2]) {
-	      shift[2] = 0;
-	      if      (tidx[2] < 0)		shift[2] += 1;
-	      else if (tidx[2] >= nat_ncell[2])	shift[2] -= 1;
-	      stidx[2] = tidx[2] + shift[2] * nat_ncell[2];
-	      int clp_tidx = collapse_index (stidx, nat_ncell);
+        int clp_cidx = collapse_index(cidx, nat_ncell);
+        std::vector<int> tidx(3);
+        std::vector<int> stidx(3);
+        std::vector<int> shift(3);
+        for (tidx[0] = cidx[0] - niter[0]; tidx[0] < cidx[0] + niter[0] + 1;
+             ++tidx[0]) {
+          shift[0] = 0;
+          if (tidx[0] < 0)
+            shift[0] += 1;
+          else if (tidx[0] >= nat_ncell[0])
+            shift[0] -= 1;
+          stidx[0] = tidx[0] + shift[0] * nat_ncell[0];
+          for (tidx[1] = cidx[1] - niter[1]; tidx[1] < cidx[1] + niter[1] + 1;
+               ++tidx[1]) {
+            shift[1] = 0;
+            if (tidx[1] < 0)
+              shift[1] += 1;
+            else if (tidx[1] >= nat_ncell[1])
+              shift[1] -= 1;
+            stidx[1] = tidx[1] + shift[1] * nat_ncell[1];
+            for (tidx[2] = cidx[2] - niter[2]; tidx[2] < cidx[2] + niter[2] + 1;
+                 ++tidx[2]) {
+              shift[2] = 0;
+              if (tidx[2] < 0)
+                shift[2] += 1;
+              else if (tidx[2] >= nat_ncell[2])
+                shift[2] -= 1;
+              stidx[2] = tidx[2] + shift[2] * nat_ncell[2];
+              int clp_tidx = collapse_index(stidx, nat_ncell);
 #ifdef HALF_NEIGHBOR_LIST
-	      if (clp_tidx < clp_cidx) continue;
-	      build_nlist_cell (nlist0, nlist1, clp_cidx, clp_tidx, clist, coord, rc02, rc12, shift, phys_cs);
+              if (clp_tidx < clp_cidx) continue;
+              build_nlist_cell(nlist0, nlist1, clp_cidx, clp_tidx, clist, coord,
+                               rc02, rc12, shift, phys_cs);
 #else
-	      build_nlist_cell (nlist0, nlist1, clp_cidx, clp_tidx, clist, clist, coord, rc02, rc12, shift, phys_cs);
+              build_nlist_cell(nlist0, nlist1, clp_cidx, clp_tidx, clist, clist,
+                               coord, rc02, rc12, shift, phys_cs);
 #endif
-	    }
-	  }
-	}
+            }
+          }
+        }
       }
     }
   }
 }
 
-
-void
-build_nlist (std::vector<std::vector<int > > &	nlist0,
-	     std::vector<std::vector<int > > &	nlist1,
-	     const std::vector<double > &	coord,
-	     const std::vector<int> &	sel0,
-	     const std::vector<int> &	sel1,
-	     const double &		rc0,
-	     const double &		rc1,
-	     const std::vector<int > &	grid,
-	     const SimulationRegion<double> & region)
-{
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& coord,
+                 const std::vector<int>& sel0,
+                 const std::vector<int>& sel1,
+                 const double& rc0,
+                 const double& rc1,
+                 const std::vector<int>& grid,
+                 const SimulationRegion<double>& region) {
   int nloc = coord.size() / 3;
   // compute the clist
   std::vector<int> nat_stt(3, 0);
   std::vector<int> nat_end(grid);
-  std::vector<std::vector<int > > clist0, clist1;
-  build_clist (clist0, coord, sel0, nat_stt, nat_end, region);
-  build_clist (clist1, coord, sel1, nat_stt, nat_end, region);
-  
+  std::vector<std::vector<int> > clist0, clist1;
+  build_clist(clist0, coord, sel0, nat_stt, nat_end, region);
+  build_clist(clist1, coord, sel1, nat_stt, nat_end, region);
+
   // compute the region info
   int nall = coord.size() / 3;
   std::vector<int> nat_ncell(3);
   for (int dd = 0; dd < 3; ++dd) nat_ncell[dd] = nat_end[dd] - nat_stt[dd];
 
   // compute number of iter according to the cut-off
-  assert (rc0 <= rc1);
-  std::vector<int> niter (3);
-  double to_face [3];
-  region.toFaceDistance (to_face);
-  for (int dd = 0; dd < 3; ++dd){
+  assert(rc0 <= rc1);
+  std::vector<int> niter(3);
+  double to_face[3];
+  region.toFaceDistance(to_face);
+  for (int dd = 0; dd < 3; ++dd) {
     double cell_size = to_face[dd] / nat_end[dd];
     niter[dd] = rc1 / cell_size;
     if (niter[dd] * cell_size < rc1) niter[dd] += 1;
-    assert (niter[dd] * cell_size >= rc1);
+    assert(niter[dd] * cell_size >= rc1);
   }
   // check the validity of the iters
-  for (int dd = 0; dd < 3; ++dd){
-    assert (niter[dd] <= (nat_end[dd] - nat_stt[dd]) / 2);
+  for (int dd = 0; dd < 3; ++dd) {
+    assert(niter[dd] <= (nat_end[dd] - nat_stt[dd]) / 2);
   }
 
   // allocate the nlists
   double density = nall / region.getVolume();
-  nlist0.resize (nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+  nlist0.resize(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     nlist0[ii].clear();
-    nlist0[ii].reserve ( 4./3. * 3.14 * (rc0*rc0*rc0) * density * 1.5 + 20);
-  }  
-  nlist1.resize (nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+    nlist0[ii].reserve(4. / 3. * 3.14 * (rc0 * rc0 * rc0) * density * 1.5 + 20);
+  }
+  nlist1.resize(nloc);
+  for (int ii = 0; ii < nloc; ++ii) {
     nlist1[ii].clear();
-    nlist1[ii].reserve ( 4./3. * 3.14 * (rc1*rc1*rc1 - rc0*rc0*rc0) * density * 1.5 + 20);
+    nlist1[ii].reserve(4. / 3. * 3.14 * (rc1 * rc1 * rc1 - rc0 * rc0 * rc0) *
+                           density * 1.5 +
+                       20);
   }
-  
+
   // physical cell size
   std::vector<double> phys_cs(9);
   for (int dd = 0; dd < 9; ++dd) phys_cs[dd] = region.getBoxTensor()[dd];
@@ -542,133 +555,135 @@ build_nlist (std::vector<std::vector<int > > &	nlist0,
   if (rc0 > 0) rc02 = rc0 * rc0;
   double rc12 = rc1 * rc1;
   std::vector<int> cidx(3);
-  for (cidx[0] = nat_stt[0]; cidx[0] < nat_end[0]; ++cidx[0]){
-    for (cidx[1] = nat_stt[1]; cidx[1] < nat_end[1]; ++cidx[1]){
-      for (cidx[2] = nat_stt[2]; cidx[2] < nat_end[2]; ++cidx[2]){
-	int clp_cidx = collapse_index (cidx, nat_ncell);
-	std::vector<int> tidx(3);
-	std::vector<int> stidx(3);
-	std::vector<int> shift(3);
-	for (tidx[0] = cidx[0] - niter[0]; tidx[0] < cidx[0] + niter[0] + 1; ++tidx[0]) {
-	  shift[0] = 0;
-	  if      (tidx[0] < 0)			shift[0] += 1;
-	  else if (tidx[0] >= nat_ncell[0])	shift[0] -= 1;
-	  stidx[0] = tidx[0] + shift[0] * nat_ncell[0];
-	  for (tidx[1] = cidx[1] - niter[1]; tidx[1] < cidx[1] + niter[1] + 1; ++tidx[1]) {
-	    shift[1] = 0;
-	    if      (tidx[1] < 0)		shift[1] += 1;
-	    else if (tidx[1] >= nat_ncell[1])	shift[1] -= 1;
-	    stidx[1] = tidx[1] + shift[1] * nat_ncell[1];
-	    for (tidx[2] = cidx[2] - niter[2]; tidx[2] < cidx[2] + niter[2] + 1; ++tidx[2]) {
-	      shift[2] = 0;
-	      if      (tidx[2] < 0)		shift[2] += 1;
-	      else if (tidx[2] >= nat_ncell[2])	shift[2] -= 1;
-	      stidx[2] = tidx[2] + shift[2] * nat_ncell[2];
-	      int clp_tidx = collapse_index (stidx, nat_ncell);
-	      build_nlist_cell (nlist0, nlist1, clp_cidx, clp_tidx, clist0, clist1, coord, rc02, rc12, shift, phys_cs);
-	    }
-	  }
-	}
+  for (cidx[0] = nat_stt[0]; cidx[0] < nat_end[0]; ++cidx[0]) {
+    for (cidx[1] = nat_stt[1]; cidx[1] < nat_end[1]; ++cidx[1]) {
+      for (cidx[2] = nat_stt[2]; cidx[2] < nat_end[2]; ++cidx[2]) {
+        int clp_cidx = collapse_index(cidx, nat_ncell);
+        std::vector<int> tidx(3);
+        std::vector<int> stidx(3);
+        std::vector<int> shift(3);
+        for (tidx[0] = cidx[0] - niter[0]; tidx[0] < cidx[0] + niter[0] + 1;
+             ++tidx[0]) {
+          shift[0] = 0;
+          if (tidx[0] < 0)
+            shift[0] += 1;
+          else if (tidx[0] >= nat_ncell[0])
+            shift[0] -= 1;
+          stidx[0] = tidx[0] + shift[0] * nat_ncell[0];
+          for (tidx[1] = cidx[1] - niter[1]; tidx[1] < cidx[1] + niter[1] + 1;
+               ++tidx[1]) {
+            shift[1] = 0;
+            if (tidx[1] < 0)
+              shift[1] += 1;
+            else if (tidx[1] >= nat_ncell[1])
+              shift[1] -= 1;
+            stidx[1] = tidx[1] + shift[1] * nat_ncell[1];
+            for (tidx[2] = cidx[2] - niter[2]; tidx[2] < cidx[2] + niter[2] + 1;
+                 ++tidx[2]) {
+              shift[2] = 0;
+              if (tidx[2] < 0)
+                shift[2] += 1;
+              else if (tidx[2] >= nat_ncell[2])
+                shift[2] -= 1;
+              stidx[2] = tidx[2] + shift[2] * nat_ncell[2];
+              int clp_tidx = collapse_index(stidx, nat_ncell);
+              build_nlist_cell(nlist0, nlist1, clp_cidx, clp_tidx, clist0,
+                               clist1, coord, rc02, rc12, shift, phys_cs);
+            }
+          }
+        }
       }
     }
   }
-}   
-
+}
 
-void
-build_nlist (std::vector<std::vector<int > > & nlist0,
-	     std::vector<std::vector<int > > & nlist1,
-	     const std::vector<double > & posi3,
-	     const double & rc0_,
-	     const double & rc1_,
-	     const SimulationRegion<double > * region)
-{
-  double rc0 (rc0_);
-  double rc1 (rc1_);
-  assert (rc0 <= rc1);
+void build_nlist(std::vector<std::vector<int> >& nlist0,
+                 std::vector<std::vector<int> >& nlist1,
+                 const std::vector<double>& posi3,
+                 const double& rc0_,
+                 const double& rc1_,
+                 const SimulationRegion<double>* region) {
+  double rc0(rc0_);
+  double rc1(rc1_);
+  assert(rc0 <= rc1);
   double rc02 = rc0 * rc0;
   // negative rc0 means not applying rc0
   if (rc0 < 0) rc02 = 0;
   double rc12 = rc1 * rc1;
 
-  unsigned natoms = posi3.size()/3;
+  unsigned natoms = posi3.size() / 3;
   nlist0.clear();
   nlist1.clear();
   nlist0.resize(natoms);
   nlist1.resize(natoms);
-  for (unsigned ii = 0; ii < natoms; ++ii){
-    nlist0[ii].reserve (60);
-    nlist1[ii].reserve (60);
+  for (unsigned ii = 0; ii < natoms; ++ii) {
+    nlist0[ii].reserve(60);
+    nlist1[ii].reserve(60);
   }
-  for (unsigned ii = 0; ii < natoms; ++ii){
-    for (unsigned jj = ii+1; jj < natoms; ++jj){
+  for (unsigned ii = 0; ii < natoms; ++ii) {
+    for (unsigned jj = ii + 1; jj < natoms; ++jj) {
       double diff[3];
       if (region != NULL) {
-	region->diffNearestNeighbor (posi3[jj*3+0], posi3[jj*3+1], posi3[jj*3+2],
-				     posi3[ii*3+0], posi3[ii*3+1], posi3[ii*3+2],
-				     diff[0], diff[1], diff[2]);
-      }
-      else {
-	diff[0] = posi3[jj*3+0] - posi3[ii*3+0];
-	diff[1] = posi3[jj*3+1] - posi3[ii*3+1];
-	diff[2] = posi3[jj*3+2] - posi3[ii*3+2];
+        region->diffNearestNeighbor(posi3[jj * 3 + 0], posi3[jj * 3 + 1],
+                                    posi3[jj * 3 + 2], posi3[ii * 3 + 0],
+                                    posi3[ii * 3 + 1], posi3[ii * 3 + 2],
+                                    diff[0], diff[1], diff[2]);
+      } else {
+        diff[0] = posi3[jj * 3 + 0] - posi3[ii * 3 + 0];
+        diff[1] = posi3[jj * 3 + 1] - posi3[ii * 3 + 1];
+        diff[2] = posi3[jj * 3 + 2] - posi3[ii * 3 + 2];
       }
       double r2 = deepmd::dot3(diff, diff);
       if (r2 < rc02) {
-	nlist0[ii].push_back (jj);
-	nlist0[jj].push_back (ii);
-      }
-      else if (r2 < rc12) {
-	nlist1[ii].push_back (jj);
-	nlist1[jj].push_back (ii);
+        nlist0[ii].push_back(jj);
+        nlist0[jj].push_back(ii);
+      } else if (r2 < rc12) {
+        nlist1[ii].push_back(jj);
+        nlist1[jj].push_back(ii);
       }
     }
   }
 }
 
-static int compute_pbc_shift (int idx, 
-			      int ncell)
-{
+static int compute_pbc_shift(int idx, int ncell) {
   int shift = 0;
   if (idx < 0) {
     shift = 1;
-    while (idx + shift * ncell < 0) shift ++;
-  }
-  else if (idx >= ncell) {
+    while (idx + shift * ncell < 0) shift++;
+  } else if (idx >= ncell) {
     shift = -1;
-    while (idx + shift * ncell >= ncell) shift --;
+    while (idx + shift * ncell >= ncell) shift--;
   }
-  assert (idx + shift * ncell >= 0 && idx + shift * ncell < ncell);
+  assert(idx + shift * ncell >= 0 && idx + shift * ncell < ncell);
   return shift;
 }
 
-void 
-copy_coord (std::vector<double > & out_c, 
-	    std::vector<int > & out_t, 
-	    std::vector<int > & mapping,
-	    std::vector<int> & ncell,
-	    std::vector<int> & ngcell,
-	    const std::vector<double > & in_c,
-	    const std::vector<int > & in_t,
-	    const double & rc,
-	    const SimulationRegion<double > & region)
-{
+void copy_coord(std::vector<double>& out_c,
+                std::vector<int>& out_t,
+                std::vector<int>& mapping,
+                std::vector<int>& ncell,
+                std::vector<int>& ngcell,
+                const std::vector<double>& in_c,
+                const std::vector<int>& in_t,
+                const double& rc,
+                const SimulationRegion<double>& region) {
   int nloc = in_c.size() / 3;
   assert(nloc == in_t.size());
 
   ncell.resize(3);
   ngcell.resize(3);
-  double to_face [3];
-  double cell_size [3];
-  region.toFaceDistance (to_face);
-  for (int dd = 0; dd < 3; ++dd){
-    ncell[dd]  = to_face[dd] / rc;
+  double to_face[3];
+  double cell_size[3];
+  region.toFaceDistance(to_face);
+  for (int dd = 0; dd < 3; ++dd) {
+    ncell[dd] = to_face[dd] / rc;
     if (ncell[dd] == 0) ncell[dd] = 1;
     cell_size[dd] = to_face[dd] / ncell[dd];
     ngcell[dd] = int(rc / cell_size[dd]) + 1;
     assert(cell_size[dd] * ngcell[dd] >= rc);
   }
-  int total_ncell = (2 * ngcell[0] + ncell[0]) * (2 * ngcell[1] + ncell[1]) * (2 * ngcell[2] + ncell[2]);
+  int total_ncell = (2 * ngcell[0] + ncell[0]) * (2 * ngcell[1] + ncell[1]) *
+                    (2 * ngcell[2] + ncell[2]);
   int loc_ncell = (ncell[0]) * (ncell[1]) * (ncell[2]);
   int esti_ntotal = total_ncell / loc_ncell * nloc + 10;
 
@@ -676,11 +691,11 @@ copy_coord (std::vector<double > & out_c,
   out_c.reserve(esti_ntotal * 6);
   out_t.reserve(esti_ntotal * 2);
   mapping.reserve(esti_ntotal * 2);
-  
+
   // build cell list
-  std::vector<std::vector<int > > clist;
+  std::vector<std::vector<int> > clist;
   std::vector<int> nat_stt(3, 0);
-  build_clist (clist, in_c, nloc, nat_stt, ncell, nat_stt, ncell, region, ncell);
+  build_clist(clist, in_c, nloc, nat_stt, ncell, nat_stt, ncell, region, ncell);
 
   // copy local atoms
   out_c.resize(nloc * 3);
@@ -693,52 +708,49 @@ copy_coord (std::vector<double > & out_c,
   // push ghost
   std::vector<int> ii(3), jj(3), pbc_shift(3, 0);
   double pbc_shift_d[3];
-  for (ii[0] = -ngcell[0]; ii[0] < ncell[0] + ngcell[0]; ++ii[0]){
+  for (ii[0] = -ngcell[0]; ii[0] < ncell[0] + ngcell[0]; ++ii[0]) {
     pbc_shift[0] = compute_pbc_shift(ii[0], ncell[0]);
     pbc_shift_d[0] = pbc_shift[0];
     jj[0] = ii[0] + pbc_shift[0] * ncell[0];
-    for (ii[1] = -ngcell[1]; ii[1] < ncell[1] + ngcell[1]; ++ii[1]){
+    for (ii[1] = -ngcell[1]; ii[1] < ncell[1] + ngcell[1]; ++ii[1]) {
       pbc_shift[1] = compute_pbc_shift(ii[1], ncell[1]);
       pbc_shift_d[1] = pbc_shift[1];
       jj[1] = ii[1] + pbc_shift[1] * ncell[1];
-      for (ii[2] = -ngcell[2]; ii[2] < ncell[2] + ngcell[2]; ++ii[2]){
-	pbc_shift[2] = compute_pbc_shift(ii[2], ncell[2]);
-	pbc_shift_d[2] = pbc_shift[2];
-	jj[2] = ii[2] + pbc_shift[2] * ncell[2];
-	// local cell, continue
-	if (ii[0] >= 0 && ii[0] < ncell[0] &&
-	    ii[1] >= 0 && ii[1] < ncell[1] &&
-	    ii[2] >= 0 && ii[2] < ncell[2] ){
-	  continue;
-	}
-	double shift_v [3];
-	region.inter2Phys(shift_v, pbc_shift_d);
-	int cell_idx = collapse_index(jj, ncell);
-	std::vector<int> & cur_clist = clist[cell_idx];
-	for (int kk = 0; kk < cur_clist.size(); ++kk){
-	  int p_idx = cur_clist[kk];
-	  double shifted_coord [3];
-	  out_c.push_back(in_c[p_idx*3+0] - shift_v[0]);
-	  out_c.push_back(in_c[p_idx*3+1] - shift_v[1]);
-	  out_c.push_back(in_c[p_idx*3+2] - shift_v[2]);
-	  out_t.push_back(in_t[p_idx]);
-	  mapping.push_back(p_idx);
-	  // double phys[3];
-	  // for (int dd = 0; dd < 3; ++dd) phys[dd] = in_c[p_idx*3+dd] - shift_v[dd];
-	  // double inter[3];
-	  // region.phys2Inter(inter, phys);
-	  // if (  inter[0] >= 0 && inter[0] < 1 &&
-	  // 	inter[1] >= 0 && inter[1] < 1 &&
-	  // 	inter[2] >= 0 && inter[2] < 1 ){
-	  //   std::cout << out_c.size()  / 3 << " "
-	  // 	 << inter[0] << " " 
-	  // 	 << inter[1] << " " 
-	  // 	 << inter[2] << " " 
-	  // 	 << std::endl;
-	  //   std::cout << "err here inner" << std::endl;
-	  //   exit(1);
-	  // }	  
-	}
+      for (ii[2] = -ngcell[2]; ii[2] < ncell[2] + ngcell[2]; ++ii[2]) {
+        pbc_shift[2] = compute_pbc_shift(ii[2], ncell[2]);
+        pbc_shift_d[2] = pbc_shift[2];
+        jj[2] = ii[2] + pbc_shift[2] * ncell[2];
+        // local cell, continue
+        if (ii[0] >= 0 && ii[0] < ncell[0] && ii[1] >= 0 && ii[1] < ncell[1] &&
+            ii[2] >= 0 && ii[2] < ncell[2]) {
+          continue;
+        }
+        double shift_v[3];
+        region.inter2Phys(shift_v, pbc_shift_d);
+        int cell_idx = collapse_index(jj, ncell);
+        std::vector<int>& cur_clist = clist[cell_idx];
+        for (int kk = 0; kk < cur_clist.size(); ++kk) {
+          int p_idx = cur_clist[kk];
+          double shifted_coord[3];
+          out_c.push_back(in_c[p_idx * 3 + 0] - shift_v[0]);
+          out_c.push_back(in_c[p_idx * 3 + 1] - shift_v[1]);
+          out_c.push_back(in_c[p_idx * 3 + 2] - shift_v[2]);
+          out_t.push_back(in_t[p_idx]);
+          mapping.push_back(p_idx);
+          // double phys[3];
+          // for (int dd = 0; dd < 3; ++dd) phys[dd] = in_c[p_idx*3+dd] -
+          // shift_v[dd]; double inter[3]; region.phys2Inter(inter, phys); if (
+          // inter[0] >= 0 && inter[0] < 1 && 	inter[1] >= 0 && inter[1] < 1 &&
+          // 	inter[2] >= 0 && inter[2] < 1 ){
+          //   std::cout << out_c.size()  / 3 << " "
+          // 	 << inter[0] << " "
+          // 	 << inter[1] << " "
+          // 	 << inter[2] << " "
+          // 	 << std::endl;
+          //   std::cout << "err here inner" << std::endl;
+          //   exit(1);
+          // }
+        }
       }
     }
   }
@@ -746,160 +758,128 @@ copy_coord (std::vector<double > & out_c,
 
 using namespace deepmd;
 
-void
-deepmd::
-convert_nlist(
-    InputNlist & to_nlist,
-    std::vector<std::vector<int> > & from_nlist
-    )
-{
+void deepmd::convert_nlist(InputNlist& to_nlist,
+                           std::vector<std::vector<int> >& from_nlist) {
   to_nlist.inum = from_nlist.size();
-  for(int ii = 0; ii < to_nlist.inum; ++ii){
+  for (int ii = 0; ii < to_nlist.inum; ++ii) {
     to_nlist.ilist[ii] = ii;
     to_nlist.numneigh[ii] = from_nlist[ii].size();
     to_nlist.firstneigh[ii] = &from_nlist[ii][0];
   }
 }
 
-int
-deepmd::
-max_numneigh(
-    const InputNlist & nlist
-    )
-{
+int deepmd::max_numneigh(const InputNlist& nlist) {
   int max_num = 0;
-  for(int ii = 0; ii < nlist.inum; ++ii){
-    if(nlist.numneigh[ii] > max_num) max_num = nlist.numneigh[ii];
+  for (int ii = 0; ii < nlist.inum; ++ii) {
+    if (nlist.numneigh[ii] > max_num) max_num = nlist.numneigh[ii];
   }
   return max_num;
 }
 
 template <typename FPTYPE>
-int
-deepmd::
-build_nlist_cpu(
-    InputNlist & nlist,
-    int * max_list_size,
-    const FPTYPE * c_cpy,
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size_,
-    const float & rcut)
-{
+int deepmd::build_nlist_cpu(InputNlist& nlist,
+                            int* max_list_size,
+                            const FPTYPE* c_cpy,
+                            const int& nloc,
+                            const int& nall,
+                            const int& mem_size_,
+                            const float& rcut) {
   const int mem_size = mem_size_;
   *max_list_size = 0;
   nlist.inum = nloc;
-  FPTYPE rcut2 = rcut * rcut;  
+  FPTYPE rcut2 = rcut * rcut;
   std::vector<int> jlist;
-  jlist.reserve(mem_size);  
-  for(int ii = 0; ii < nlist.inum; ++ii){
+  jlist.reserve(mem_size);
+  for (int ii = 0; ii < nlist.inum; ++ii) {
     nlist.ilist[ii] = ii;
     jlist.clear();
-    for(int jj = 0; jj < nall; ++jj){
-      if(jj == ii) continue;
+    for (int jj = 0; jj < nall; ++jj) {
+      if (jj == ii) continue;
       FPTYPE diff[3];
-      for(int dd = 0; dd < 3; ++dd){
-	diff[dd] = c_cpy[ii*3+dd] - c_cpy[jj*3+dd];
+      for (int dd = 0; dd < 3; ++dd) {
+        diff[dd] = c_cpy[ii * 3 + dd] - c_cpy[jj * 3 + dd];
       }
       FPTYPE diff2 = deepmd::dot3(diff, diff);
-      if(diff2 < rcut2){
-	jlist.push_back(jj);
+      if (diff2 < rcut2) {
+        jlist.push_back(jj);
       }
     }
-    if(jlist.size() > mem_size){
+    if (jlist.size() > mem_size) {
       *max_list_size = jlist.size();
-      return 1;      
-    }
-    else {
+      return 1;
+    } else {
       int list_size = jlist.size();
       nlist.numneigh[ii] = list_size;
-      if(list_size > *max_list_size) *max_list_size = list_size;
+      if (list_size > *max_list_size) *max_list_size = list_size;
       std::copy(jlist.begin(), jlist.end(), nlist.firstneigh[ii]);
     }
   }
   return 0;
 }
 
-void 
-deepmd::
-use_nei_info_cpu(
-    int * nlist, 
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei,
-    const int ntypes,
-    const bool b_nlist_map)
-{
-    if(b_nlist_map){
-    for (int ii = 0; ii < nloc; ++ii){
-      for (int jj = 0; jj < nnei; ++jj){
-        int nlist_idx = ii*nnei+jj;
+void deepmd::use_nei_info_cpu(int* nlist,
+                              int* ntype,
+                              bool* nmask,
+                              const int* type,
+                              const int* nlist_map,
+                              const int nloc,
+                              const int nnei,
+                              const int ntypes,
+                              const bool b_nlist_map) {
+  if (b_nlist_map) {
+    for (int ii = 0; ii < nloc; ++ii) {
+      for (int jj = 0; jj < nnei; ++jj) {
+        int nlist_idx = ii * nnei + jj;
         int record = nlist[nlist_idx];
-        if (record >= 0){	
+        if (record >= 0) {
           int temp = nlist_map[record];
-          nlist[nlist_idx] = temp;	  
-          ntype[nlist_idx]=type[temp];
-          nmask[nlist_idx]=true;    
-        }
-        else{
-          ntype[nlist_idx]=ntypes;
-          nmask[nlist_idx]=false;
+          nlist[nlist_idx] = temp;
+          ntype[nlist_idx] = type[temp];
+          nmask[nlist_idx] = true;
+        } else {
+          ntype[nlist_idx] = ntypes;
+          nmask[nlist_idx] = false;
         }
       }
-    } 
-  }
-  else{
-    for (int ii = 0; ii < nloc; ++ii){
-      for (int jj = 0; jj < nnei; ++jj){
-        int nlist_idx = ii*nnei+jj;
+    }
+  } else {
+    for (int ii = 0; ii < nloc; ++ii) {
+      for (int jj = 0; jj < nnei; ++jj) {
+        int nlist_idx = ii * nnei + jj;
         int record = nlist[nlist_idx];
-        if (record >= 0){		  
-          ntype[nlist_idx]=type[record];
-          nmask[nlist_idx]=true;    
-        }
-        else{
-          ntype[nlist_idx]=ntypes;
-          nmask[nlist_idx]=false;
+        if (record >= 0) {
+          ntype[nlist_idx] = type[record];
+          nmask[nlist_idx] = true;
+        } else {
+          ntype[nlist_idx] = ntypes;
+          nmask[nlist_idx] = false;
         }
       }
-    } 
+    }
   }
 }
 
-template
-int
-deepmd::
-build_nlist_cpu<double>(
-    InputNlist & nlist,
-    int * max_list_size,
-    const double * c_cpy,
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut);
-
-template
-int
-deepmd::
-build_nlist_cpu<float>(
-    InputNlist & nlist,
-    int * max_list_size,
-    const float * c_cpy,
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut);
+template int deepmd::build_nlist_cpu<double>(InputNlist& nlist,
+                                             int* max_list_size,
+                                             const double* c_cpy,
+                                             const int& nloc,
+                                             const int& nall,
+                                             const int& mem_size,
+                                             const float& rcut);
+
+template int deepmd::build_nlist_cpu<float>(InputNlist& nlist,
+                                            int* max_list_size,
+                                            const float* c_cpy,
+                                            const int& nloc,
+                                            const int& nall,
+                                            const int& mem_size,
+                                            const float& rcut);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-void deepmd::convert_nlist_gpu_device(
-    InputNlist & gpu_nlist,
-    InputNlist & cpu_nlist,
-    int* & gpu_memory,
-    const int & max_nbor_size)
-{
+void deepmd::convert_nlist_gpu_device(InputNlist& gpu_nlist,
+                                      InputNlist& cpu_nlist,
+                                      int*& gpu_memory,
+                                      const int& max_nbor_size) {
   const int inum = cpu_nlist.inum;
   gpu_nlist.inum = inum;
   malloc_device_memory(gpu_nlist.ilist, inum);
@@ -907,21 +887,20 @@ void deepmd::convert_nlist_gpu_device(
   malloc_device_memory(gpu_nlist.firstneigh, inum);
   memcpy_host_to_device(gpu_nlist.ilist, cpu_nlist.ilist, inum);
   memcpy_host_to_device(gpu_nlist.numneigh, cpu_nlist.numneigh, inum);
-  int ** _firstneigh = NULL;
+  int** _firstneigh = NULL;
   _firstneigh = (int**)malloc(sizeof(int*) * inum);
   for (int ii = 0; ii < inum; ii++) {
-    memcpy_host_to_device(gpu_memory + ii * max_nbor_size, cpu_nlist.firstneigh[ii], cpu_nlist.numneigh[ii]);
+    memcpy_host_to_device(gpu_memory + ii * max_nbor_size,
+                          cpu_nlist.firstneigh[ii], cpu_nlist.numneigh[ii]);
     _firstneigh[ii] = gpu_memory + ii * max_nbor_size;
   }
   memcpy_host_to_device(gpu_nlist.firstneigh, _firstneigh, inum);
   free(_firstneigh);
 }
 
-void deepmd::free_nlist_gpu_device(
-    InputNlist & gpu_nlist)
-{
+void deepmd::free_nlist_gpu_device(InputNlist& gpu_nlist) {
   delete_device_memory(gpu_nlist.ilist);
   delete_device_memory(gpu_nlist.numneigh);
   delete_device_memory(gpu_nlist.firstneigh);
 }
-#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/lib/src/pair_tab.cc b/source/lib/src/pair_tab.cc
index 22c9bd5390..864453688d 100644
--- a/source/lib/src/pair_tab.cc
+++ b/source/lib/src/pair_tab.cc
@@ -1,22 +1,21 @@
-#include <iostream>
-#include <cmath>
+#include "pair_tab.h"
+
 #include <cassert>
+#include <cmath>
+#include <iostream>
 #include <vector>
-#include "pair_tab.h"
+
 #include "errors.h"
 
-inline 
-void _pair_tabulated_inter (
-    double & ener, 
-    double & fscale, 
-    const double * table_info,
-    const double * table_data,
-    const double * dr)
-{
+inline void _pair_tabulated_inter(double& ener,
+                                  double& fscale,
+                                  const double* table_info,
+                                  const double* table_data,
+                                  const double* dr) {
   // info size: 3
-  const double & rmin = table_info[0];
-  const double & hh = table_info[1];
-  const double hi = 1./hh;
+  const double& rmin = table_info[0];
+  const double& hh = table_info[1];
+  const double hi = 1. / hh;
   const unsigned nspline = unsigned(table_info[2] + 0.1);
   const unsigned ndata = nspline * 4;
 
@@ -37,108 +36,93 @@ void _pair_tabulated_inter (
   assert(idx >= 0);
   assert(uu >= 0 && uu < 1);
 
-  const double & a3 = table_data[4 * idx + 0];
-  const double & a2 = table_data[4 * idx + 1];
-  const double & a1 = table_data[4 * idx + 2];
-  const double & a0 = table_data[4 * idx + 3];
-  
+  const double& a3 = table_data[4 * idx + 0];
+  const double& a2 = table_data[4 * idx + 1];
+  const double& a1 = table_data[4 * idx + 2];
+  const double& a0 = table_data[4 * idx + 3];
+
   double etmp = (a3 * uu + a2) * uu + a1;
   ener = etmp * uu + a0;
   fscale = (2. * a3 * uu + a2) * uu + etmp;
   fscale *= -hi;
 }
 
-template<typename FPTYPE>
-void _pair_tab_jloop(
-    FPTYPE * energy,
-    FPTYPE * force,
-    FPTYPE * virial,
-    int & jiter,
-    const int & i_idx,
-    const int & nnei,
-    const int & i_type_shift,
-    const double * p_table_info,
-    const double * p_table_data,
-    const int & tab_stride,
-    const FPTYPE * rij,
-    const FPTYPE * scale,
-    const int * type,
-    const int * nlist,
-    const int * natoms,
-    const std::vector<int> & sel
-    )
-{
+template <typename FPTYPE>
+void _pair_tab_jloop(FPTYPE* energy,
+                     FPTYPE* force,
+                     FPTYPE* virial,
+                     int& jiter,
+                     const int& i_idx,
+                     const int& nnei,
+                     const int& i_type_shift,
+                     const double* p_table_info,
+                     const double* p_table_data,
+                     const int& tab_stride,
+                     const FPTYPE* rij,
+                     const FPTYPE* scale,
+                     const int* type,
+                     const int* nlist,
+                     const int* natoms,
+                     const std::vector<int>& sel) {
   const FPTYPE i_scale = scale[i_idx];
-  for (int ss = 0; ss < sel.size(); ++ss){
+  for (int ss = 0; ss < sel.size(); ++ss) {
     int j_type = ss;
-    const double * cur_table_data = 
-	p_table_data + (i_type_shift + j_type) * tab_stride;
-    for (int jj = 0; jj < sel[ss]; ++jj){
+    const double* cur_table_data =
+        p_table_data + (i_type_shift + j_type) * tab_stride;
+    for (int jj = 0; jj < sel[ss]; ++jj) {
       int j_idx = nlist[i_idx * nnei + jiter];
-      if (j_idx < 0){
-	jiter++;
-	continue;
+      if (j_idx < 0) {
+        jiter++;
+        continue;
       }
       assert(j_type == type[j_idx]);
       double dr[3];
-      for (int dd = 0; dd < 3; ++dd){
-	dr[dd] = rij[(i_idx * nnei + jiter) * 3 + dd];
+      for (int dd = 0; dd < 3; ++dd) {
+        dr[dd] = rij[(i_idx * nnei + jiter) * 3 + dd];
       }
       double r2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
-      double ri = 1./sqrt(r2);
+      double ri = 1. / sqrt(r2);
       double ener, fscale;
-      _pair_tabulated_inter(
-	  ener,
-	  fscale, 
-	  p_table_info, 
-	  cur_table_data, 
-	  dr);
+      _pair_tabulated_inter(ener, fscale, p_table_info, cur_table_data, dr);
       energy[i_idx] += 0.5 * ener;
       for (int dd = 0; dd < 3; ++dd) {
-	force[i_idx * 3 + dd] -= fscale * dr[dd] * ri * 0.5 * i_scale;
-	force[j_idx * 3 + dd] += fscale * dr[dd] * ri * 0.5 * i_scale;
+        force[i_idx * 3 + dd] -= fscale * dr[dd] * ri * 0.5 * i_scale;
+        force[j_idx * 3 + dd] += fscale * dr[dd] * ri * 0.5 * i_scale;
       }
       for (int dd0 = 0; dd0 < 3; ++dd0) {
-	for (int dd1 = 0; dd1 < 3; ++dd1) {
-	  virial[i_idx * 9 + dd0 * 3 + dd1]
-	      += 0.5 * fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
-	  virial[j_idx * 9 + dd0 * 3 + dd1]
-	      += 0.5 * fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
-	}
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          virial[i_idx * 9 + dd0 * 3 + dd1] +=
+              0.5 * fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
+          virial[j_idx * 9 + dd0 * 3 + dd1] +=
+              0.5 * fscale * dr[dd0] * dr[dd1] * ri * 0.5 * i_scale;
+        }
       }
       jiter++;
     }
   }
 }
 
-inline void
-_cum_sum (
-    std::vector<int> & sec,
-    const std::vector<int> & n_sel) {
-  sec.resize (n_sel.size() + 1);
+inline void _cum_sum(std::vector<int>& sec, const std::vector<int>& n_sel) {
+  sec.resize(n_sel.size() + 1);
   sec[0] = 0;
-  for (int ii = 1; ii < sec.size(); ++ii){
-    sec[ii] = sec[ii-1] + n_sel[ii-1];
+  for (int ii = 1; ii < sec.size(); ++ii) {
+    sec[ii] = sec[ii - 1] + n_sel[ii - 1];
   }
 }
 
-template<typename FPTYPE>
-void 
-deepmd::pair_tab_cpu(
-    FPTYPE * energy,
-    FPTYPE * force,
-    FPTYPE * virial,
-    const double * p_table_info,
-    const double * p_table_data,
-    const FPTYPE * rij,
-    const FPTYPE * scale,
-    const int * type,
-    const int * nlist,
-    const int * natoms,
-    const std::vector<int> & sel_a,
-    const std::vector<int> & sel_r
-    )
-{
+template <typename FPTYPE>
+void deepmd::pair_tab_cpu(FPTYPE* energy,
+                          FPTYPE* force,
+                          FPTYPE* virial,
+                          const double* p_table_info,
+                          const double* p_table_data,
+                          const FPTYPE* rij,
+                          const FPTYPE* scale,
+                          const int* type,
+                          const int* nlist,
+                          const int* natoms,
+                          const std::vector<int>& sel_a,
+                          const std::vector<int>& sel_r) {
   std::vector<int> sec_a;
   std::vector<int> sec_r;
   _cum_sum(sec_a, sel_a);
@@ -146,16 +130,16 @@ deepmd::pair_tab_cpu(
   const int nloc = natoms[0];
   const int nall = natoms[1];
   const int nnei = sec_a.back() + sec_r.back();
-  const int ntypes = int(p_table_info[3]+0.1);
-  const int nspline = p_table_info[2]+0.1;
+  const int ntypes = int(p_table_info[3] + 0.1);
+  const int nspline = p_table_info[2] + 0.1;
   const int tab_stride = 4 * nspline;
-  
+
   // fill results with 0
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
     energy[i_idx] = 0;
   }
-  for (int ii = 0; ii < nall; ++ii){
+  for (int ii = 0; ii < nall; ++ii) {
     int i_idx = ii;
     force[i_idx * 3 + 0] = (FPTYPE)0.;
     force[i_idx * 3 + 1] = (FPTYPE)0.;
@@ -167,80 +151,46 @@ deepmd::pair_tab_cpu(
   // compute force of a frame
   int i_idx = 0;
   for (int tt = 0; tt < ntypes; ++tt) {
-    for (int ii = 0; ii < natoms[2+tt]; ++ii){
+    for (int ii = 0; ii < natoms[2 + tt]; ++ii) {
       int i_type = type[i_idx];
-      assert(i_type == tt) ;
+      assert(i_type == tt);
       const int i_type_shift = i_type * ntypes;
       int jiter = 0;
       // a neighbor
-      _pair_tab_jloop(energy,
-		      force,
-		      virial,
-		      jiter,
-		      i_idx, 
-		      nnei,
-		      i_type_shift,
-		      p_table_info,
-		      p_table_data,
-		      tab_stride,
-		      rij,
-		      scale,
-		      type, 
-		      nlist,
-		      natoms,
-		      sel_a);
+      _pair_tab_jloop(energy, force, virial, jiter, i_idx, nnei, i_type_shift,
+                      p_table_info, p_table_data, tab_stride, rij, scale, type,
+                      nlist, natoms, sel_a);
       // r neighbor
-      _pair_tab_jloop(energy,
-		      force,
-		      virial,
-		      jiter,
-		      i_idx, 
-		      nnei,
-		      i_type_shift,
-		      p_table_info,
-		      p_table_data,
-		      tab_stride,
-		      rij,
-		      scale,
-		      type, 
-		      nlist,
-		      natoms,
-		      sel_r);
-      i_idx ++;
+      _pair_tab_jloop(energy, force, virial, jiter, i_idx, nnei, i_type_shift,
+                      p_table_info, p_table_data, tab_stride, rij, scale, type,
+                      nlist, natoms, sel_r);
+      i_idx++;
     }
   }
 }
 
+template void deepmd::pair_tab_cpu<float>(float* energy,
+                                          float* force,
+                                          float* virial,
+                                          const double* table_info,
+                                          const double* table_data,
+                                          const float* rij,
+                                          const float* scale,
+                                          const int* type,
+                                          const int* nlist,
+                                          const int* natoms,
+                                          const std::vector<int>& sel_a,
+                                          const std::vector<int>& sel_r);
 
-template
-void deepmd::pair_tab_cpu<float>(
-    float * energy,
-    float * force,
-    float * virial,
-    const double * table_info,
-    const double * table_data,
-    const float * rij,
-    const float * scale,
-    const int * type,
-    const int * nlist,
-    const int * natoms,
-    const std::vector<int> & sel_a,
-    const std::vector<int> & sel_r
-    );
-
-template
-void deepmd::pair_tab_cpu<double>(
-    double * energy,
-    double * force,
-    double * virial,
-    const double * table_info,
-    const double * table_data,
-    const double * rij,
-    const double * scale,
-    const int * type,
-    const int * nlist,
-    const int * natoms,
-    const std::vector<int> & sel_a,
-    const std::vector<int> & sel_r
-    );
-
+template void deepmd::pair_tab_cpu<double>(double* energy,
+                                           double* force,
+                                           double* virial,
+                                           const double* table_info,
+                                           const double* table_data,
+                                           const double* rij,
+                                           const double* scale,
+                                           const int* type,
+                                           const int* nlist,
+                                           const int* natoms,
+                                           const std::vector<int>& sel_a,
+                                           const std::vector<int>& sel_r);
diff --git a/source/lib/src/prod_env_mat.cc b/source/lib/src/prod_env_mat.cc
index d64e6a4b09..214bdf2ce8 100644
--- a/source/lib/src/prod_env_mat.cc
+++ b/source/lib/src/prod_env_mat.cc
@@ -1,34 +1,33 @@
+#include "prod_env_mat.h"
+
+#include <string.h>
+
 #include <cassert>
 #include <iostream>
-#include <string.h>
-#include "prod_env_mat.h"
-#include "fmt_nlist.h"
+
 #include "env_mat.h"
+#include "fmt_nlist.h"
 
 using namespace deepmd;
 
-template<typename FPTYPE>
-void
-deepmd::
-prod_env_mat_a_cpu(
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type)
-{
-  if (f_type == NULL){
+template <typename FPTYPE>
+void deepmd::prod_env_mat_a_cpu(FPTYPE *em,
+                                FPTYPE *em_deriv,
+                                FPTYPE *rij,
+                                int *nlist,
+                                const FPTYPE *coord,
+                                const int *type,
+                                const InputNlist &inlist,
+                                const int max_nbor_size,
+                                const FPTYPE *avg,
+                                const FPTYPE *std,
+                                const int nloc,
+                                const int nall,
+                                const float rcut,
+                                const float rcut_smth,
+                                const std::vector<int> sec,
+                                const int *f_type) {
+  if (f_type == NULL) {
     f_type = type;
   }
   const int nnei = sec.back();
@@ -47,9 +46,9 @@ prod_env_mat_a_cpu(
   for (int ii = 0; ii < nall; ++ii) {
     d_f_type[ii] = f_type[ii];
   }
-  
+
   // build nlist
-  std::vector<std::vector<int > > d_nlist_a(nloc);
+  std::vector<std::vector<int> > d_nlist_a(nloc);
 
   assert(nloc == inlist.inum);
   for (unsigned ii = 0; ii < nloc; ++ii) {
@@ -57,34 +56,38 @@ prod_env_mat_a_cpu(
   }
   for (unsigned ii = 0; ii < nloc; ++ii) {
     int i_idx = inlist.ilist[ii];
-    for(unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj){
+    for (unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj) {
       int j_idx = inlist.firstneigh[ii][jj];
-      d_nlist_a[i_idx].push_back (j_idx);
+      d_nlist_a[i_idx].push_back(j_idx);
     }
   }
-    
-#pragma omp parallel for 
+
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ++ii) {
     std::vector<int> fmt_nlist_a;
-    int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_f_type, ii, d_nlist_a[ii], rcut, sec);
+    int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_f_type, ii,
+                                 d_nlist_a[ii], rcut, sec);
     std::vector<FPTYPE> d_em_a;
     std::vector<FPTYPE> d_em_a_deriv;
     std::vector<FPTYPE> d_em_r;
     std::vector<FPTYPE> d_em_r_deriv;
     std::vector<FPTYPE> d_rij_a;
-    env_mat_a_cpu (d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_f_type, ii, fmt_nlist_a, sec, rcut_smth, rcut);
+    env_mat_a_cpu(d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_f_type, ii,
+                  fmt_nlist_a, sec, rcut_smth, rcut);
 
     // check sizes
-    assert (d_em_a.size() == nem);
-    assert (d_em_a_deriv.size() == nem * 3);
-    assert (d_rij_a.size() == nnei * 3);
-    assert (fmt_nlist_a.size() == nnei);
+    assert(d_em_a.size() == nem);
+    assert(d_em_a_deriv.size() == nem * 3);
+    assert(d_rij_a.size() == nnei * 3);
+    assert(fmt_nlist_a.size() == nnei);
     // record outputs
     for (int jj = 0; jj < nem; ++jj) {
-      em[ii * nem + jj] = (d_em_a[jj] - avg[type[ii] * nem + jj]) / std[type[ii] * nem + jj];
+      em[ii * nem + jj] =
+          (d_em_a[jj] - avg[type[ii] * nem + jj]) / std[type[ii] * nem + jj];
     }
     for (int jj = 0; jj < nem * 3; ++jj) {
-      em_deriv[ii * nem * 3 + jj] = d_em_a_deriv[jj] / std[type[ii] * nem + jj / 3];
+      em_deriv[ii * nem * 3 + jj] =
+          d_em_a_deriv[jj] / std[type[ii] * nem + jj / 3];
     }
     for (int jj = 0; jj < nnei * 3; ++jj) {
       rij[ii * nnei * 3 + jj] = d_rij_a[jj];
@@ -95,26 +98,22 @@ prod_env_mat_a_cpu(
   }
 }
 
-template<typename FPTYPE>
-void 
-deepmd::
-prod_env_mat_r_cpu(
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec) 
-{
+template <typename FPTYPE>
+void deepmd::prod_env_mat_r_cpu(FPTYPE *em,
+                                FPTYPE *em_deriv,
+                                FPTYPE *rij,
+                                int *nlist,
+                                const FPTYPE *coord,
+                                const int *type,
+                                const InputNlist &inlist,
+                                const int max_nbor_size,
+                                const FPTYPE *avg,
+                                const FPTYPE *std,
+                                const int nloc,
+                                const int nall,
+                                const float rcut,
+                                const float rcut_smth,
+                                const std::vector<int> sec) {
   const int nnei = sec.back();
   const int nem = nnei * 1;
 
@@ -127,13 +126,13 @@ prod_env_mat_r_cpu(
   }
 
   // set type
-  std::vector<int> d_type (nall);
+  std::vector<int> d_type(nall);
   for (int ii = 0; ii < nall; ++ii) {
     d_type[ii] = type[ii];
   }
 
   // build nlist
-  std::vector<std::vector<int > > d_nlist_a(nloc);
+  std::vector<std::vector<int> > d_nlist_a(nloc);
 
   assert(nloc == inlist.inum);
   for (unsigned ii = 0; ii < nloc; ++ii) {
@@ -141,34 +140,38 @@ prod_env_mat_r_cpu(
   }
   for (unsigned ii = 0; ii < nloc; ++ii) {
     int i_idx = inlist.ilist[ii];
-    for(unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj){
+    for (unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj) {
       int j_idx = inlist.firstneigh[ii][jj];
-      d_nlist_a[i_idx].push_back (j_idx);
+      d_nlist_a[i_idx].push_back(j_idx);
     }
   }
-    
-#pragma omp parallel for 
+
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ++ii) {
     std::vector<int> fmt_nlist_a;
-    int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_type, ii, d_nlist_a[ii], rcut, sec);
+    int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_type, ii,
+                                 d_nlist_a[ii], rcut, sec);
     std::vector<FPTYPE> d_em_a;
     std::vector<FPTYPE> d_em_a_deriv;
     std::vector<FPTYPE> d_em_r;
     std::vector<FPTYPE> d_em_r_deriv;
     std::vector<FPTYPE> d_rij_a;
-    env_mat_r_cpu (d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_type, ii, fmt_nlist_a, sec, rcut_smth, rcut);
+    env_mat_r_cpu(d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_type, ii,
+                  fmt_nlist_a, sec, rcut_smth, rcut);
 
     // check sizes
-    assert (d_em_a.size() == nem);
-    assert (d_em_a_deriv.size() == nem * 3);
-    assert (d_rij_a.size() == nnei * 3);
-    assert (fmt_nlist_a.size() == nnei);
+    assert(d_em_a.size() == nem);
+    assert(d_em_a_deriv.size() == nem * 3);
+    assert(d_rij_a.size() == nnei * 3);
+    assert(fmt_nlist_a.size() == nnei);
     // record outputs
     for (int jj = 0; jj < nem; ++jj) {
-      em[ii * nem + jj] = (d_em_a[jj] - avg[d_type[ii] * nem + jj]) / std[d_type[ii] * nem + jj];
+      em[ii * nem + jj] = (d_em_a[jj] - avg[d_type[ii] * nem + jj]) /
+                          std[d_type[ii] * nem + jj];
     }
     for (int jj = 0; jj < nem * 3; ++jj) {
-      em_deriv[ii * nem * 3 + jj] = d_em_a_deriv[jj] / std[d_type[ii] * nem + jj / 3];
+      em_deriv[ii * nem * 3 + jj] =
+          d_em_a_deriv[jj] / std[d_type[ii] * nem + jj / 3];
     }
     for (int jj = 0; jj < nnei * 3; ++jj) {
       rij[ii * nnei * 3 + jj] = d_rij_a[jj];
@@ -179,102 +182,84 @@ prod_env_mat_r_cpu(
   }
 }
 
-template
-void 
-deepmd::
-prod_env_mat_a_cpu<double>(
-    double * em, 
-    double * em_deriv, 
-    double * rij, 
-    int * nlist, 
-    const double * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const double * avg, 
-    const double * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type);
+template void deepmd::prod_env_mat_a_cpu<double>(double *em,
+                                                 double *em_deriv,
+                                                 double *rij,
+                                                 int *nlist,
+                                                 const double *coord,
+                                                 const int *type,
+                                                 const InputNlist &inlist,
+                                                 const int max_nbor_size,
+                                                 const double *avg,
+                                                 const double *std,
+                                                 const int nloc,
+                                                 const int nall,
+                                                 const float rcut,
+                                                 const float rcut_smth,
+                                                 const std::vector<int> sec,
+                                                 const int *f_type);
 
-template
-void
-deepmd::
-prod_env_mat_a_cpu<float>(
-    float * em, 
-    float * em_deriv, 
-    float * rij, 
-    int * nlist, 
-    const float * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const float * avg, 
-    const float * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type);
+template void deepmd::prod_env_mat_a_cpu<float>(float *em,
+                                                float *em_deriv,
+                                                float *rij,
+                                                int *nlist,
+                                                const float *coord,
+                                                const int *type,
+                                                const InputNlist &inlist,
+                                                const int max_nbor_size,
+                                                const float *avg,
+                                                const float *std,
+                                                const int nloc,
+                                                const int nall,
+                                                const float rcut,
+                                                const float rcut_smth,
+                                                const std::vector<int> sec,
+                                                const int *f_type);
 
-template
-void
-deepmd::
-prod_env_mat_r_cpu<double>(
-    double * em, 
-    double * em_deriv, 
-    double * rij, 
-    int * nlist, 
-    const double * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const double * avg, 
-    const double * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec);
+template void deepmd::prod_env_mat_r_cpu<double>(double *em,
+                                                 double *em_deriv,
+                                                 double *rij,
+                                                 int *nlist,
+                                                 const double *coord,
+                                                 const int *type,
+                                                 const InputNlist &inlist,
+                                                 const int max_nbor_size,
+                                                 const double *avg,
+                                                 const double *std,
+                                                 const int nloc,
+                                                 const int nall,
+                                                 const float rcut,
+                                                 const float rcut_smth,
+                                                 const std::vector<int> sec);
 
-template
-void 
-deepmd::
-prod_env_mat_r_cpu<float>(
-    float * em, 
-    float * em_deriv, 
-    float * rij, 
-    int * nlist, 
-    const float * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const float * avg, 
-    const float * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec);
+template void deepmd::prod_env_mat_r_cpu<float>(float *em,
+                                                float *em_deriv,
+                                                float *rij,
+                                                int *nlist,
+                                                const float *coord,
+                                                const int *type,
+                                                const InputNlist &inlist,
+                                                const int max_nbor_size,
+                                                const float *avg,
+                                                const float *std,
+                                                const int nloc,
+                                                const int nall,
+                                                const float rcut,
+                                                const float rcut_smth,
+                                                const std::vector<int> sec);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-void deepmd::env_mat_nbor_update(
-    InputNlist &inlist,
-    InputNlist &gpu_inlist,
-    int &max_nbor_size,
-    int* &nbor_list_dev,
-    const int * mesh, 
-    const int size)
-{
+void deepmd::env_mat_nbor_update(InputNlist &inlist,
+                                 InputNlist &gpu_inlist,
+                                 int &max_nbor_size,
+                                 int *&nbor_list_dev,
+                                 const int *mesh,
+                                 const int size) {
   int *mesh_host = new int[size];
   memcpy_device_to_host(mesh, mesh_host, size);
   memcpy(&inlist.ilist, 4 + mesh_host, sizeof(int *));
-	memcpy(&inlist.numneigh, 8 + mesh_host, sizeof(int *));
-	memcpy(&inlist.firstneigh, 12 + mesh_host, sizeof(int **));
+  memcpy(&inlist.numneigh, 8 + mesh_host, sizeof(int *));
+  memcpy(&inlist.firstneigh, 12 + mesh_host, sizeof(int **));
   const int ago = mesh_host[0];
   if (ago == 0 || gpu_inlist.inum < inlist.inum) {
     const int inum = inlist.inum;
@@ -291,23 +276,17 @@ void deepmd::env_mat_nbor_update(
     int _max_nbor_size = max_numneigh(inlist);
     if (_max_nbor_size <= 256) {
       _max_nbor_size = 256;
-    }
-    else if (_max_nbor_size <= 512) {
+    } else if (_max_nbor_size <= 512) {
       _max_nbor_size = 512;
-    }
-    else if (_max_nbor_size <= 1024) {
+    } else if (_max_nbor_size <= 1024) {
       _max_nbor_size = 1024;
-    }
-    else if (_max_nbor_size <= 2048) {
+    } else if (_max_nbor_size <= 2048) {
       _max_nbor_size = 2048;
-    }
-    else {
+    } else {
       _max_nbor_size = 4096;
     }
-    if ( nbor_list_dev == NULL 
-      || _max_nbor_size > max_nbor_size 
-      || inum > gpu_inlist.inum) 
-    {
+    if (nbor_list_dev == NULL || _max_nbor_size > max_nbor_size ||
+        inum > gpu_inlist.inum) {
       delete_device_memory(nbor_list_dev);
       malloc_device_memory(nbor_list_dev, inum * _max_nbor_size);
     }
@@ -317,17 +296,18 @@ void deepmd::env_mat_nbor_update(
 
     // copy nbor list from host to the device
     std::vector<int> nbor_list_host(inum * max_nbor_size, 0);
-    int ** _firstneigh = (int**)malloc(sizeof(int*) * inum);
+    int **_firstneigh = (int **)malloc(sizeof(int *) * inum);
     for (int ii = 0; ii < inum; ii++) {
       _firstneigh[ii] = nbor_list_dev + ii * max_nbor_size;
       for (int jj = 0; jj < inlist.numneigh[ii]; jj++) {
         nbor_list_host[ii * max_nbor_size + jj] = inlist.firstneigh[ii][jj];
       }
     }
-    memcpy_host_to_device(nbor_list_dev, &nbor_list_host[0], inum * max_nbor_size);
+    memcpy_host_to_device(nbor_list_dev, &nbor_list_host[0],
+                          inum * max_nbor_size);
     memcpy_host_to_device(gpu_inlist.firstneigh, _firstneigh, inum);
     free(_firstneigh);
   }
-  delete [] mesh_host;
+  delete[] mesh_host;
 }
-#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/lib/src/prod_env_mat_nvnmd.cc b/source/lib/src/prod_env_mat_nvnmd.cc
index 6df210f3d3..742f44be89 100644
--- a/source/lib/src/prod_env_mat_nvnmd.cc
+++ b/source/lib/src/prod_env_mat_nvnmd.cc
@@ -1,10 +1,10 @@
 /*
 //==================================================
- _   _  __     __  _   _   __  __   ____  
-| \ | | \ \   / / | \ | | |  \/  | |  _ \ 
+ _   _  __     __  _   _   __  __   ____
+| \ | | \ \   / / | \ | | |  \/  | |  _ \
 |  \| |  \ \ / /  |  \| | | |\/| | | | | |
 | |\  |   \ V /   | |\  | | |  | | | |_| |
-|_| \_|    \_/    |_| \_| |_|  |_| |____/ 
+|_| \_|    \_/    |_| \_| |_|  |_| |____/
 
 //==================================================
 
@@ -15,13 +15,15 @@ date: 2021-12-6
 
 */
 
+#include "prod_env_mat_nvnmd.h"
+
+#include <string.h>
 
 #include <cassert>
 #include <iostream>
-#include <string.h>
-#include "prod_env_mat_nvnmd.h"
-#include "fmt_nlist.h"
+
 #include "env_mat_nvnmd.h"
+#include "fmt_nlist.h"
 
 using namespace deepmd;
 
@@ -39,27 +41,22 @@ using namespace deepmd;
 //==================================================
 */
 
-
-template<typename FPTYPE>
-void
-deepmd::
-prod_env_mat_a_nvnmd_quantize_cpu(
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & inlist,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec) 
-{
+template <typename FPTYPE>
+void deepmd::prod_env_mat_a_nvnmd_quantize_cpu(FPTYPE* em,
+                                               FPTYPE* em_deriv,
+                                               FPTYPE* rij,
+                                               int* nlist,
+                                               const FPTYPE* coord,
+                                               const int* type,
+                                               const InputNlist& inlist,
+                                               const int max_nbor_size,
+                                               const FPTYPE* avg,
+                                               const FPTYPE* std,
+                                               const int nloc,
+                                               const int nall,
+                                               const float rcut,
+                                               const float rcut_smth,
+                                               const std::vector<int> sec) {
   const int nnei = sec.back();
   const int nem = nnei * 4;
 
@@ -72,13 +69,13 @@ prod_env_mat_a_nvnmd_quantize_cpu(
   }
 
   // set type
-  std::vector<int> d_type (nall);
+  std::vector<int> d_type(nall);
   for (int ii = 0; ii < nall; ++ii) {
     d_type[ii] = type[ii];
   }
-    
+
   // build nlist
-  std::vector<std::vector<int > > d_nlist_a(nloc);
+  std::vector<std::vector<int> > d_nlist_a(nloc);
 
   assert(nloc == inlist.inum);
   for (unsigned ii = 0; ii < nloc; ++ii) {
@@ -86,35 +83,39 @@ prod_env_mat_a_nvnmd_quantize_cpu(
   }
   for (unsigned ii = 0; ii < nloc; ++ii) {
     int i_idx = inlist.ilist[ii];
-    for(unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj){
+    for (unsigned jj = 0; jj < inlist.numneigh[ii]; ++jj) {
       int j_idx = inlist.firstneigh[ii][jj];
-      d_nlist_a[i_idx].push_back (j_idx);
+      d_nlist_a[i_idx].push_back(j_idx);
     }
   }
-    
-#pragma omp parallel for 
+
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ++ii) {
     std::vector<int> fmt_nlist_a;
-    int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_type, ii, d_nlist_a[ii], rcut, sec);
+    int ret = format_nlist_i_cpu(fmt_nlist_a, d_coord3, d_type, ii,
+                                 d_nlist_a[ii], rcut, sec);
     std::vector<FPTYPE> d_em_a;
     std::vector<FPTYPE> d_em_a_deriv;
     std::vector<FPTYPE> d_em_r;
     std::vector<FPTYPE> d_em_r_deriv;
     std::vector<FPTYPE> d_rij_a;
-    env_mat_a_nvnmd_quantize_cpu (d_em_a, d_em_a_deriv, d_rij_a, d_coord3, d_type, ii, fmt_nlist_a, sec, rcut_smth, rcut);
+    env_mat_a_nvnmd_quantize_cpu(d_em_a, d_em_a_deriv, d_rij_a, d_coord3,
+                                 d_type, ii, fmt_nlist_a, sec, rcut_smth, rcut);
 
     // check sizes
-    assert (d_em_a.size() == nem);
-    assert (d_em_a_deriv.size() == nem * 3);
-    assert (d_rij_a.size() == nnei * 3);
-    assert (fmt_nlist_a.size() == nnei);
+    assert(d_em_a.size() == nem);
+    assert(d_em_a_deriv.size() == nem * 3);
+    assert(d_rij_a.size() == nnei * 3);
+    assert(fmt_nlist_a.size() == nnei);
     // record outputs
     for (int jj = 0; jj < nem; ++jj) {
-      // em[ii * nem + jj] = (d_em_a[jj] - avg[d_type[ii] * nem + jj]) / std[d_type[ii] * nem + jj];
+      // em[ii * nem + jj] = (d_em_a[jj] - avg[d_type[ii] * nem + jj]) /
+      // std[d_type[ii] * nem + jj];
       em[ii * nem + jj] = d_em_a[jj];
     }
     for (int jj = 0; jj < nem * 3; ++jj) {
-      // em_deriv[ii * nem * 3 + jj] = d_em_a_deriv[jj] / std[d_type[ii] * nem + jj / 3];
+      // em_deriv[ii * nem * 3 + jj] = d_em_a_deriv[jj] / std[d_type[ii] * nem +
+      // jj / 3];
       em_deriv[ii * nem * 3 + jj] = d_em_a_deriv[jj];
     }
     for (int jj = 0; jj < nnei * 3; ++jj) {
@@ -126,48 +127,40 @@ prod_env_mat_a_nvnmd_quantize_cpu(
   }
 }
 
-
-
-template
-void 
-deepmd::
-prod_env_mat_a_nvnmd_quantize_cpu<double>(
-    double * em, 
-    double * em_deriv, 
-    double * rij, 
-    int * nlist, 
-    const double * coord, 
-    const int * type, 
-    const InputNlist & inlist,
+template void deepmd::prod_env_mat_a_nvnmd_quantize_cpu<double>(
+    double* em,
+    double* em_deriv,
+    double* rij,
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const InputNlist& inlist,
     const int max_nbor_size,
-    const double * avg, 
-    const double * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
+    const double* avg,
+    const double* std,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const float rcut_smth,
     const std::vector<int> sec);
 
-template
-void
-deepmd::
-prod_env_mat_a_nvnmd_quantize_cpu<float>(
-    float * em, 
-    float * em_deriv, 
-    float * rij, 
-    int * nlist, 
-    const float * coord, 
-    const int * type, 
-    const InputNlist & inlist,
+template void deepmd::prod_env_mat_a_nvnmd_quantize_cpu<float>(
+    float* em,
+    float* em_deriv,
+    float* rij,
+    int* nlist,
+    const float* coord,
+    const int* type,
+    const InputNlist& inlist,
     const int max_nbor_size,
-    const float * avg, 
-    const float * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
+    const float* avg,
+    const float* std,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const float rcut_smth,
     const std::vector<int> sec);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // UNDEFINE
-#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/lib/src/prod_force.cc b/source/lib/src/prod_force.cc
index eb7d2d3844..b258ff1d3f 100644
--- a/source/lib/src/prod_force.cc
+++ b/source/lib/src/prod_force.cc
@@ -1,39 +1,33 @@
-#include <stdexcept>
-#include <cstring>
 #include "prod_force.h"
-#include "errors.h"
+
 #include <math.h>
 
-inline void
-make_index_range (
-    int & idx_start,
-    int & idx_end,
-    const int & nei_idx, 
-    const int & nnei) 
-{
+#include <cstring>
+#include <stdexcept>
+
+#include "errors.h"
+
+inline void make_index_range(int& idx_start,
+                             int& idx_end,
+                             const int& nei_idx,
+                             const int& nnei) {
   if (nei_idx < nnei) {
     idx_start = nei_idx * 4;
-    idx_end   = nei_idx * 4 + 4;
-  }
-  else {
+    idx_end = nei_idx * 4 + 4;
+  } else {
     throw deepmd::deepmd_exception("should no reach here");
   }
 }
 
-
-template<typename FPTYPE>
-void 
-deepmd::
-prod_force_a_cpu(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei,
-    const int start_index) 
-{
+template <typename FPTYPE>
+void deepmd::prod_force_a_cpu(FPTYPE* force,
+                              const FPTYPE* net_deriv,
+                              const FPTYPE* env_deriv,
+                              const int* nlist,
+                              const int nloc,
+                              const int nall,
+                              const int nnei,
+                              const int start_index) {
   const int ndescrpt = 4 * nnei;
 
   memset(force, 0, sizeof(FPTYPE) * nall * 3);
@@ -41,67 +35,60 @@ prod_force_a_cpu(
   for (int i_idx = start_index; i_idx < start_index + nloc; ++i_idx) {
     // deriv wrt center atom
     for (int aa = 0; aa < ndescrpt; ++aa) {
-      force[i_idx * 3 + 0] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0];
-      force[i_idx * 3 + 1] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1];
-      force[i_idx * 3 + 2] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2];
+      force[i_idx * 3 + 0] -= net_deriv[i_idx * ndescrpt + aa] *
+                              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0];
+      force[i_idx * 3 + 1] -= net_deriv[i_idx * ndescrpt + aa] *
+                              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1];
+      force[i_idx * 3 + 2] -= net_deriv[i_idx * ndescrpt + aa] *
+                              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2];
     }
     // deriv wrt neighbors
     for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int aa_start, aa_end;
-      make_index_range (aa_start, aa_end, jj, nnei);
+      make_index_range(aa_start, aa_end, jj, nnei);
       for (int aa = aa_start; aa < aa_end; ++aa) {
-        force[j_idx * 3 + 0] += net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0];
-        force[j_idx * 3 + 1] += net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1];
-        force[j_idx * 3 + 2] += net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2];
+        force[j_idx * 3 + 0] += net_deriv[i_idx * ndescrpt + aa] *
+                                env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0];
+        force[j_idx * 3 + 1] += net_deriv[i_idx * ndescrpt + aa] *
+                                env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1];
+        force[j_idx * 3 + 2] += net_deriv[i_idx * ndescrpt + aa] *
+                                env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2];
       }
     }
   }
 }
 
-template
-void 
-deepmd::
-prod_force_a_cpu<double>(
-    double * force, 
-    const double * net_deriv, 
-    const double * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei,
-    const int start_index);
-
-template
-void 
-deepmd::
-prod_force_a_cpu<float>(
-    float * force, 
-    const float * net_deriv, 
-    const float * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei,
-    const int start_index);
+template void deepmd::prod_force_a_cpu<double>(double* force,
+                                               const double* net_deriv,
+                                               const double* env_deriv,
+                                               const int* nlist,
+                                               const int nloc,
+                                               const int nall,
+                                               const int nnei,
+                                               const int start_index);
 
+template void deepmd::prod_force_a_cpu<float>(float* force,
+                                              const float* net_deriv,
+                                              const float* env_deriv,
+                                              const int* nlist,
+                                              const int nloc,
+                                              const int nall,
+                                              const int nnei,
+                                              const int start_index);
 
-template<typename FPTYPE>
-void 
-deepmd::
-prod_force_r_cpu(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei) 
-{
+template <typename FPTYPE>
+void deepmd::prod_force_r_cpu(FPTYPE* force,
+                              const FPTYPE* net_deriv,
+                              const FPTYPE* env_deriv,
+                              const int* nlist,
+                              const int nloc,
+                              const int nall,
+                              const int nnei) {
   const int ndescrpt = 1 * nnei;
 
-  for (int ii = 0; ii < nall; ++ii){
+  for (int ii = 0; ii < nall; ++ii) {
     int i_idx = ii;
     force[i_idx * 3 + 0] = (FPTYPE)0.;
     force[i_idx * 3 + 1] = (FPTYPE)0.;
@@ -109,46 +96,44 @@ prod_force_r_cpu(
   }
 
   // compute force of a frame
-  for (int ii = 0; ii < nloc; ++ii){
-    int i_idx = ii;	
+  for (int ii = 0; ii < nloc; ++ii) {
+    int i_idx = ii;
     // deriv wrt center atom
-    for (int aa = 0; aa < ndescrpt; ++aa){
-      force[i_idx * 3 + 0] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0];
-      force[i_idx * 3 + 1] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1];
-      force[i_idx * 3 + 2] -= net_deriv[i_idx * ndescrpt + aa] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2];
+    for (int aa = 0; aa < ndescrpt; ++aa) {
+      force[i_idx * 3 + 0] -= net_deriv[i_idx * ndescrpt + aa] *
+                              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 0];
+      force[i_idx * 3 + 1] -= net_deriv[i_idx * ndescrpt + aa] *
+                              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 1];
+      force[i_idx * 3 + 2] -= net_deriv[i_idx * ndescrpt + aa] *
+                              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + 2];
     }
     // deriv wrt neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       // if (j_idx > nloc) j_idx = j_idx % nloc;
       if (j_idx < 0) continue;
-      force[j_idx * 3 + 0] += net_deriv[i_idx * ndescrpt + jj] * env_deriv[i_idx * ndescrpt * 3 + jj * 3 + 0];
-      force[j_idx * 3 + 1] += net_deriv[i_idx * ndescrpt + jj] * env_deriv[i_idx * ndescrpt * 3 + jj * 3 + 1];
-      force[j_idx * 3 + 2] += net_deriv[i_idx * ndescrpt + jj] * env_deriv[i_idx * ndescrpt * 3 + jj * 3 + 2];
+      force[j_idx * 3 + 0] += net_deriv[i_idx * ndescrpt + jj] *
+                              env_deriv[i_idx * ndescrpt * 3 + jj * 3 + 0];
+      force[j_idx * 3 + 1] += net_deriv[i_idx * ndescrpt + jj] *
+                              env_deriv[i_idx * ndescrpt * 3 + jj * 3 + 1];
+      force[j_idx * 3 + 2] += net_deriv[i_idx * ndescrpt + jj] *
+                              env_deriv[i_idx * ndescrpt * 3 + jj * 3 + 2];
     }
   }
 }
 
-template
-void 
-deepmd::
-prod_force_r_cpu<double>(
-    double * force, 
-    const double * net_deriv, 
-    const double * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template void deepmd::prod_force_r_cpu<double>(double* force,
+                                               const double* net_deriv,
+                                               const double* env_deriv,
+                                               const int* nlist,
+                                               const int nloc,
+                                               const int nall,
+                                               const int nnei);
 
-template
-void 
-deepmd::
-prod_force_r_cpu<float>(
-    float * force, 
-    const float * net_deriv, 
-    const float * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template void deepmd::prod_force_r_cpu<float>(float* force,
+                                              const float* net_deriv,
+                                              const float* env_deriv,
+                                              const int* nlist,
+                                              const int nloc,
+                                              const int nall,
+                                              const int nnei);
diff --git a/source/lib/src/prod_force_grad.cc b/source/lib/src/prod_force_grad.cc
index 88d61a4436..adc52baaf0 100644
--- a/source/lib/src/prod_force_grad.cc
+++ b/source/lib/src/prod_force_grad.cc
@@ -1,109 +1,92 @@
+#include "prod_force_grad.h"
+
+#include <cstring>
 #include <iostream>
 #include <stdexcept>
-#include <cstring>
-#include "prod_force_grad.h"
+
 #include "errors.h"
 
-inline void
-make_index_range (
-    int & idx_start,
-    int & idx_end,
-    const int & nei_idx, 
-    const int & nnei) 
-{
+inline void make_index_range(int& idx_start,
+                             int& idx_end,
+                             const int& nei_idx,
+                             const int& nnei) {
   if (nei_idx < nnei) {
     idx_start = nei_idx * 4;
-    idx_end   = nei_idx * 4 + 4;
-  }
-  else {
+    idx_end = nei_idx * 4 + 4;
+  } else {
     throw deepmd::deepmd_exception("should no reach here");
   }
 }
 
-
-template<typename FPTYPE>
-void 
-deepmd::
-prod_force_grad_a_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei) 
-{
+template <typename FPTYPE>
+void deepmd::prod_force_grad_a_cpu(FPTYPE* grad_net,
+                                   const FPTYPE* grad,
+                                   const FPTYPE* env_deriv,
+                                   const int* nlist,
+                                   const int nloc,
+                                   const int nnei) {
   const int ndescrpt = nnei * 4;
-  
+
   // reset the frame to 0
-  for (int ii = 0; ii < nloc; ++ii){
-    for (int aa = 0; aa < ndescrpt; ++aa){
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int aa = 0; aa < ndescrpt; ++aa) {
       grad_net[ii * ndescrpt + aa] = (FPTYPE)0.;
     }
-  }      
+  }
 
-  // compute grad of one frame
-  #pragma omp parallel for
-  for (int ii = 0; ii < nloc; ++ii){
+// compute grad of one frame
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
-	
+
     // deriv wrt center atom
-    for (int aa = 0; aa < ndescrpt; ++aa){
-      for (int dd = 0; dd < 3; ++dd){
-	grad_net[i_idx * ndescrpt + aa] -= grad[i_idx * 3 + dd] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd];
+    for (int aa = 0; aa < ndescrpt; ++aa) {
+      for (int dd = 0; dd < 3; ++dd) {
+        grad_net[i_idx * ndescrpt + aa] -=
+            grad[i_idx * 3 + dd] *
+            env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd];
       }
     }
 
     // loop over neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx >= nloc) j_idx = j_idx % nloc;
       if (j_idx < 0) continue;
       int aa_start, aa_end;
       make_index_range(aa_start, aa_end, jj, nnei);
-      for (int aa = aa_start; aa < aa_end; ++aa){
-	for (int dd = 0; dd < 3; ++dd){
-	  grad_net[i_idx * ndescrpt + aa] += grad[j_idx * 3 + dd] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd];
-	}
+      for (int aa = aa_start; aa < aa_end; ++aa) {
+        for (int dd = 0; dd < 3; ++dd) {
+          grad_net[i_idx * ndescrpt + aa] +=
+              grad[j_idx * 3 + dd] *
+              env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd];
+        }
       }
     }
   }
 }
 
-
-template
-void 
-deepmd::
-prod_force_grad_a_cpu<double>(
-    double * grad_net, 
-    const double * grad, 
-    const double * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei) ;
-
-template
-void 
-deepmd::
-prod_force_grad_a_cpu<float>(
-    float * grad_net, 
-    const float * grad, 
-    const float * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei) ;
-
-
-
-template<typename FPTYPE>
-void 
-deepmd::
-prod_force_grad_r_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei) 
+template void deepmd::prod_force_grad_a_cpu<double>(double* grad_net,
+                                                    const double* grad,
+                                                    const double* env_deriv,
+                                                    const int* nlist,
+                                                    const int nloc,
+                                                    const int nnei);
+
+template void deepmd::prod_force_grad_a_cpu<float>(float* grad_net,
+                                                   const float* grad,
+                                                   const float* env_deriv,
+                                                   const int* nlist,
+                                                   const int nloc,
+                                                   const int nnei);
+
+template <typename FPTYPE>
+void deepmd::prod_force_grad_r_cpu(FPTYPE* grad_net,
+                                   const FPTYPE* grad,
+                                   const FPTYPE* env_deriv,
+                                   const int* nlist,
+                                   const int nloc,
+                                   const int nnei)
 //
 //	grad_net:	nloc x ndescrpt
 //	grad:		nloc x 3
@@ -112,59 +95,52 @@ prod_force_grad_r_cpu(
 //
 {
   const int ndescrpt = nnei * 1;
-  
+
   // reset the frame to 0
-  for (int ii = 0; ii < nloc; ++ii){
-    for (int aa = 0; aa < ndescrpt; ++aa){
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int aa = 0; aa < ndescrpt; ++aa) {
       grad_net[ii * ndescrpt + aa] = (FPTYPE)0.;
     }
-  }      
+  }
 
-  // compute grad of one frame
-  #pragma omp parallel for
-  for (int ii = 0; ii < nloc; ++ii){
+// compute grad of one frame
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
-	
+
     // deriv wrt center atom
-    for (int aa = 0; aa < ndescrpt; ++aa){
-      for (int dd = 0; dd < 3; ++dd){
-	grad_net[i_idx * ndescrpt + aa] -= grad[i_idx * 3 + dd] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd];
+    for (int aa = 0; aa < ndescrpt; ++aa) {
+      for (int dd = 0; dd < 3; ++dd) {
+        grad_net[i_idx * ndescrpt + aa] -=
+            grad[i_idx * 3 + dd] *
+            env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd];
       }
     }
 
     // loop over neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx >= nloc) j_idx = j_idx % nloc;
       if (j_idx < 0) continue;
-      for (int dd = 0; dd < 3; ++dd){
-	grad_net[i_idx * ndescrpt + jj] += grad[j_idx * 3 + dd] * env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd];
+      for (int dd = 0; dd < 3; ++dd) {
+        grad_net[i_idx * ndescrpt + jj] +=
+            grad[j_idx * 3 + dd] *
+            env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd];
       }
     }
   }
 }
 
-template
-void 
-deepmd::
-prod_force_grad_r_cpu<double>(
-    double * grad_net, 
-    const double * grad, 
-    const double * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei) ;
-
-template
-void
-deepmd::
-prod_force_grad_r_cpu<float>(
-    float * grad_net, 
-    const float * grad, 
-    const float * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei) ;
-
-
-
+template void deepmd::prod_force_grad_r_cpu<double>(double* grad_net,
+                                                    const double* grad,
+                                                    const double* env_deriv,
+                                                    const int* nlist,
+                                                    const int nloc,
+                                                    const int nnei);
+
+template void deepmd::prod_force_grad_r_cpu<float>(float* grad_net,
+                                                   const float* grad,
+                                                   const float* env_deriv,
+                                                   const int* nlist,
+                                                   const int nloc,
+                                                   const int nnei);
diff --git a/source/lib/src/prod_virial.cc b/source/lib/src/prod_virial.cc
index 29b343ba0b..636c45c1a8 100644
--- a/source/lib/src/prod_virial.cc
+++ b/source/lib/src/prod_virial.cc
@@ -1,174 +1,149 @@
+#include "prod_virial.h"
+
+#include <cstring>
 #include <iostream>
 #include <stdexcept>
-#include <cstring>
-#include "prod_virial.h"
+
 #include "errors.h"
 
-inline void 
-make_index_range (
-    int & idx_start,
-    int & idx_end,
-    const int & nei_idx, 
-    const int & nnei) 
-{
+inline void make_index_range(int& idx_start,
+                             int& idx_end,
+                             const int& nei_idx,
+                             const int& nnei) {
   if (nei_idx < nnei) {
     idx_start = nei_idx * 4;
-    idx_end   = nei_idx * 4 + 4;
-  }
-  else {
-    throw deepmd::deepmd_exception("should no reach here");    
+    idx_end = nei_idx * 4 + 4;
+  } else {
+    throw deepmd::deepmd_exception("should no reach here");
   }
 }
 
-template<typename FPTYPE>
-void 
-deepmd::
-prod_virial_a_cpu(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
+template <typename FPTYPE>
+void deepmd::prod_virial_a_cpu(FPTYPE* virial,
+                               FPTYPE* atom_virial,
+                               const FPTYPE* net_deriv,
+                               const FPTYPE* env_deriv,
+                               const FPTYPE* rij,
+                               const int* nlist,
+                               const int nloc,
+                               const int nall,
+                               const int nnei) {
   const int ndescrpt = 4 * nnei;
 
-  for (int ii = 0; ii < 9; ++ ii){
+  for (int ii = 0; ii < 9; ++ii) {
     virial[ii] = (FPTYPE)0.;
   }
-  for (int ii = 0; ii < 9 * nall; ++ ii){
+  for (int ii = 0; ii < 9 * nall; ++ii) {
     atom_virial[ii] = (FPTYPE)0.;
   }
 
-  // compute virial of a frame
-  #pragma omp parallel for
-  for (int ii = 0; ii < nloc; ++ii){
+// compute virial of a frame
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
 
     // deriv wrt neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int aa_start, aa_end;
-      make_index_range (aa_start, aa_end, jj, nnei);
+      make_index_range(aa_start, aa_end, jj, nnei);
       for (int aa = aa_start; aa < aa_end; ++aa) {
-	FPTYPE pref = (FPTYPE)-1.0 * net_deriv[i_idx * ndescrpt + aa];
-	for (int dd0 = 0; dd0 < 3; ++dd0){
-	  for (int dd1 = 0; dd1 < 3; ++dd1){
-	    FPTYPE tmp_v = pref * rij[i_idx * nnei * 3 + jj * 3 + dd1] *  env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd0];
-      #pragma omp atomic
-	    virial[dd0 * 3 + dd1] -= tmp_v;
-      #pragma omp atomic
-	    atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v;
-	  }
-	}
+        FPTYPE pref = (FPTYPE)-1.0 * net_deriv[i_idx * ndescrpt + aa];
+        for (int dd0 = 0; dd0 < 3; ++dd0) {
+          for (int dd1 = 0; dd1 < 3; ++dd1) {
+            FPTYPE tmp_v = pref * rij[i_idx * nnei * 3 + jj * 3 + dd1] *
+                           env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd0];
+#pragma omp atomic
+            virial[dd0 * 3 + dd1] -= tmp_v;
+#pragma omp atomic
+            atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v;
+          }
+        }
       }
     }
-  }  
+  }
 }
 
-template
-void 
-deepmd::
-prod_virial_a_cpu<double>(
-    double * virial, 
-    double * atom_virial, 
-    const double * net_deriv, 
-    const double * env_deriv, 
-    const double * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei) ;
-
-template
-void 
-deepmd::
-prod_virial_a_cpu<float>(
-    float * virial, 
-    float * atom_virial, 
-    const float * net_deriv, 
-    const float * env_deriv, 
-    const float * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei) ;
+template void deepmd::prod_virial_a_cpu<double>(double* virial,
+                                                double* atom_virial,
+                                                const double* net_deriv,
+                                                const double* env_deriv,
+                                                const double* rij,
+                                                const int* nlist,
+                                                const int nloc,
+                                                const int nall,
+                                                const int nnei);
 
+template void deepmd::prod_virial_a_cpu<float>(float* virial,
+                                               float* atom_virial,
+                                               const float* net_deriv,
+                                               const float* env_deriv,
+                                               const float* rij,
+                                               const int* nlist,
+                                               const int nloc,
+                                               const int nall,
+                                               const int nnei);
 
-template<typename FPTYPE>
-void 
-deepmd::
-prod_virial_r_cpu(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * env_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
+template <typename FPTYPE>
+void deepmd::prod_virial_r_cpu(FPTYPE* virial,
+                               FPTYPE* atom_virial,
+                               const FPTYPE* net_deriv,
+                               const FPTYPE* env_deriv,
+                               const FPTYPE* rij,
+                               const int* nlist,
+                               const int nloc,
+                               const int nall,
+                               const int nnei) {
   const int ndescrpt = nnei;
 
-  for (int ii = 0; ii < 9; ++ ii){
+  for (int ii = 0; ii < 9; ++ii) {
     virial[ii] = (FPTYPE)0.;
   }
-  for (int ii = 0; ii < 9 * nall; ++ ii){
+  for (int ii = 0; ii < 9 * nall; ++ii) {
     atom_virial[ii] = (FPTYPE)0.;
   }
 
-  // compute virial of a frame
-  #pragma omp parallel for
-  for (int ii = 0; ii < nloc; ++ii){
+// compute virial of a frame
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
 
     // deriv wrt neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       FPTYPE pref = -1.0 * net_deriv[i_idx * ndescrpt + jj];
-      for (int dd0 = 0; dd0 < 3; ++dd0){
-	for (int dd1 = 0; dd1 < 3; ++dd1){
-	  FPTYPE tmp_v = pref * rij[i_idx * nnei * 3 + jj * 3 + dd1] *  env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd0];
-    #pragma omp atomic
-	  virial[dd0 * 3 + dd1] -= tmp_v;
-    #pragma omp atomic
-	  atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v;
-	}
+      for (int dd0 = 0; dd0 < 3; ++dd0) {
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          FPTYPE tmp_v = pref * rij[i_idx * nnei * 3 + jj * 3 + dd1] *
+                         env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd0];
+#pragma omp atomic
+          virial[dd0 * 3 + dd1] -= tmp_v;
+#pragma omp atomic
+          atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v;
+        }
       }
     }
   }
 }
 
-template
-void 
-deepmd::
-prod_virial_r_cpu<double>(
-    double * virial, 
-    double * atom_virial, 
-    const double * net_deriv, 
-    const double * env_deriv, 
-    const double * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template void deepmd::prod_virial_r_cpu<double>(double* virial,
+                                                double* atom_virial,
+                                                const double* net_deriv,
+                                                const double* env_deriv,
+                                                const double* rij,
+                                                const int* nlist,
+                                                const int nloc,
+                                                const int nall,
+                                                const int nnei);
 
-template
-void 
-deepmd::
-prod_virial_r_cpu<float>(
-    float * virial, 
-    float * atom_virial, 
-    const float * net_deriv, 
-    const float * env_deriv, 
-    const float * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template void deepmd::prod_virial_r_cpu<float>(float* virial,
+                                               float* atom_virial,
+                                               const float* net_deriv,
+                                               const float* env_deriv,
+                                               const float* rij,
+                                               const int* nlist,
+                                               const int nloc,
+                                               const int nall,
+                                               const int nnei);
diff --git a/source/lib/src/prod_virial_grad.cc b/source/lib/src/prod_virial_grad.cc
index 0f8495c90e..14ba158cc1 100644
--- a/source/lib/src/prod_virial_grad.cc
+++ b/source/lib/src/prod_virial_grad.cc
@@ -1,105 +1,88 @@
-#include <stdexcept>
-#include <cstring>
 #include "prod_virial_grad.h"
+
+#include <cstring>
+#include <stdexcept>
+
 #include "errors.h"
 
-inline void
-make_index_range (
-    int & idx_start,
-    int & idx_end,
-    const int & nei_idx, 
-    const int & nnei) 
-{
+inline void make_index_range(int& idx_start,
+                             int& idx_end,
+                             const int& nei_idx,
+                             const int& nnei) {
   if (nei_idx < nnei) {
     idx_start = nei_idx * 4;
-    idx_end   = nei_idx * 4 + 4;
-  }
-  else {
+    idx_end = nei_idx * 4 + 4;
+  } else {
     throw deepmd::deepmd_exception("should no reach here");
   }
 }
 
-template<typename FPTYPE>
-void 
-deepmd::
-prod_virial_grad_a_cpu(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
+template <typename FPTYPE>
+void deepmd::prod_virial_grad_a_cpu(FPTYPE* grad_net,
+                                    const FPTYPE* grad,
+                                    const FPTYPE* env_deriv,
+                                    const FPTYPE* rij,
+                                    const int* nlist,
+                                    const int nloc,
+                                    const int nnei) {
   const int ndescrpt = nnei * 4;
 
   // reset the frame to 0
-  for (int ii = 0; ii < nloc; ++ii){
-    for (int aa = 0; aa < ndescrpt; ++aa){
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int aa = 0; aa < ndescrpt; ++aa) {
       grad_net[ii * ndescrpt + aa] = 0;
     }
-  }      
+  }
 
-  // compute grad of one frame
-  #pragma omp parallel for
-  for (int ii = 0; ii < nloc; ++ii){
+// compute grad of one frame
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
-	
+
     // loop over neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int aa_start, aa_end;
-      make_index_range (aa_start, aa_end, jj, nnei);
-      for (int aa = aa_start; aa < aa_end; ++aa){
-	for (int dd0 = 0; dd0 < 3; ++dd0){
-	  for (int dd1 = 0; dd1 < 3; ++dd1){
-	    grad_net[i_idx * ndescrpt + aa] -= 
-		-1.0 * grad[dd0 * 3 + dd1] * rij[i_idx * nnei * 3 + jj * 3 + dd1] * env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd0];
-	  }
-	}
+      make_index_range(aa_start, aa_end, jj, nnei);
+      for (int aa = aa_start; aa < aa_end; ++aa) {
+        for (int dd0 = 0; dd0 < 3; ++dd0) {
+          for (int dd1 = 0; dd1 < 3; ++dd1) {
+            grad_net[i_idx * ndescrpt + aa] -=
+                -1.0 * grad[dd0 * 3 + dd1] *
+                rij[i_idx * nnei * 3 + jj * 3 + dd1] *
+                env_deriv[i_idx * ndescrpt * 3 + aa * 3 + dd0];
+          }
+        }
       }
     }
   }
 }
 
+template void deepmd::prod_virial_grad_a_cpu<double>(double* grad_net,
+                                                     const double* grad,
+                                                     const double* env_deriv,
+                                                     const double* rij,
+                                                     const int* nlist,
+                                                     const int nloc,
+                                                     const int nnei);
 
-template
-void 
-deepmd::
-prod_virial_grad_a_cpu<double>(
-    double * grad_net,
-    const double * grad,
-    const double * env_deriv,
-    const double * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
-
-template
-void 
-deepmd::
-prod_virial_grad_a_cpu<float>(
-    float * grad_net,
-    const float * grad,
-    const float * env_deriv,
-    const float * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
+template void deepmd::prod_virial_grad_a_cpu<float>(float* grad_net,
+                                                    const float* grad,
+                                                    const float* env_deriv,
+                                                    const float* rij,
+                                                    const int* nlist,
+                                                    const int nloc,
+                                                    const int nnei);
 
-
-template<typename FPTYPE>
-void 
-deepmd::
-prod_virial_grad_r_cpu(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
+template <typename FPTYPE>
+void deepmd::prod_virial_grad_r_cpu(FPTYPE* grad_net,
+                                    const FPTYPE* grad,
+                                    const FPTYPE* env_deriv,
+                                    const FPTYPE* rij,
+                                    const int* nlist,
+                                    const int nloc,
+                                    const int nnei)
 //
 //	grad_net:	nloc x ndescrpt
 //	grad:		9
@@ -111,53 +94,45 @@ prod_virial_grad_r_cpu(
   const int ndescrpt = nnei * 1;
 
   // reset the frame to 0
-  for (int ii = 0; ii < nloc; ++ii){
-    for (int aa = 0; aa < ndescrpt; ++aa){
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int aa = 0; aa < ndescrpt; ++aa) {
       grad_net[ii * ndescrpt + aa] = 0;
     }
-  }      
+  }
 
-  // compute grad of one frame
-  #pragma omp parallel for
-  for (int ii = 0; ii < nloc; ++ii){
+// compute grad of one frame
+#pragma omp parallel for
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
-	
+
     // loop over neighbors
-    for (int jj = 0; jj < nnei; ++jj){
-      int j_idx = nlist[i_idx * nnei + jj];	  
+    for (int jj = 0; jj < nnei; ++jj) {
+      int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
-      for (int dd0 = 0; dd0 < 3; ++dd0){
-	for (int dd1 = 0; dd1 < 3; ++dd1){
-	  grad_net[i_idx * ndescrpt + jj] -= 
-	      -1.0 * grad[dd0 * 3 + dd1] * rij[i_idx * nnei * 3 + jj * 3 + dd1] * env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd0];
-	}
+      for (int dd0 = 0; dd0 < 3; ++dd0) {
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          grad_net[i_idx * ndescrpt + jj] -=
+              -1.0 * grad[dd0 * 3 + dd1] *
+              rij[i_idx * nnei * 3 + jj * 3 + dd1] *
+              env_deriv[i_idx * ndescrpt * 3 + jj * 3 + dd0];
+        }
       }
     }
   }
 }
 
+template void deepmd::prod_virial_grad_r_cpu<double>(double* grad_net,
+                                                     const double* grad,
+                                                     const double* env_deriv,
+                                                     const double* rij,
+                                                     const int* nlist,
+                                                     const int nloc,
+                                                     const int nnei);
 
-template
-void 
-deepmd::
-prod_virial_grad_r_cpu<double>(
-    double * grad_net,
-    const double * grad,
-    const double * env_deriv,
-    const double * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
-
-template
-void 
-deepmd::
-prod_virial_grad_r_cpu<float>(
-    float * grad_net,
-    const float * grad,
-    const float * env_deriv,
-    const float * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei);
-
+template void deepmd::prod_virial_grad_r_cpu<float>(float* grad_net,
+                                                    const float* grad,
+                                                    const float* env_deriv,
+                                                    const float* rij,
+                                                    const int* nlist,
+                                                    const int nloc,
+                                                    const int nnei);
diff --git a/source/lib/src/region.cc b/source/lib/src/region.cc
index 8ce37c0584..56e309e442 100644
--- a/source/lib/src/region.cc
+++ b/source/lib/src/region.cc
@@ -1,181 +1,139 @@
-#include <stdexcept>
+#include "region.h"
+
 #include <algorithm>
 #include <cmath>
-#include "region.h"
+#include <stdexcept>
+
 #include "errors.h"
 #define BOXT_DIM 9
 
 using namespace deepmd;
 
-template<typename FPTYPE>
-Region<FPTYPE>::
-Region()
-{
+template <typename FPTYPE>
+Region<FPTYPE>::Region() {
   boxt = new FPTYPE[BOXT_DIM];
   rec_boxt = new FPTYPE[BOXT_DIM];
 }
 
-template<typename FPTYPE>
-Region<FPTYPE>::
-~Region()
-{
-  delete [] boxt;
-  delete [] rec_boxt;
+template <typename FPTYPE>
+Region<FPTYPE>::~Region() {
+  delete[] boxt;
+  delete[] rec_boxt;
 }
 
 template struct deepmd::Region<double>;
 template struct deepmd::Region<float>;
 
-template<typename FPTYPE>
-inline FPTYPE
-compute_volume(const FPTYPE * boxt)
-{
-  FPTYPE volume =
-      boxt[0*3+0] * (boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) - 
-      boxt[0*3+1] * (boxt[1*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[1*3+2]) +
-      boxt[0*3+2] * (boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]);
+template <typename FPTYPE>
+inline FPTYPE compute_volume(const FPTYPE* boxt) {
+  FPTYPE volume = boxt[0 * 3 + 0] * (boxt[1 * 3 + 1] * boxt[2 * 3 + 2] -
+                                     boxt[2 * 3 + 1] * boxt[1 * 3 + 2]) -
+                  boxt[0 * 3 + 1] * (boxt[1 * 3 + 0] * boxt[2 * 3 + 2] -
+                                     boxt[2 * 3 + 0] * boxt[1 * 3 + 2]) +
+                  boxt[0 * 3 + 2] * (boxt[1 * 3 + 0] * boxt[2 * 3 + 1] -
+                                     boxt[2 * 3 + 0] * boxt[1 * 3 + 1]);
   volume = std::abs(volume);
   return volume;
 }
 
-template<typename FPTYPE>
-inline void
-compute_rec_boxt(
-    FPTYPE * rec_boxt,
-    const FPTYPE * boxt)
-{
+template <typename FPTYPE>
+inline void compute_rec_boxt(FPTYPE* rec_boxt, const FPTYPE* boxt) {
   FPTYPE volumei = static_cast<FPTYPE>(1.) / compute_volume(boxt);
-  rec_boxt[0*3+0] =( boxt[1*3+1]*boxt[2*3+2] - boxt[2*3+1]*boxt[1*3+2]) * volumei;
-  rec_boxt[1*3+1] =( boxt[0*3+0]*boxt[2*3+2] - boxt[2*3+0]*boxt[0*3+2]) * volumei;
-  rec_boxt[2*3+2] =( boxt[0*3+0]*boxt[1*3+1] - boxt[1*3+0]*boxt[0*3+1]) * volumei;
-  rec_boxt[0*3+1] =(-boxt[1*3+0]*boxt[2*3+2] + boxt[2*3+0]*boxt[1*3+2]) * volumei;
-  rec_boxt[0*3+2] =( boxt[1*3+0]*boxt[2*3+1] - boxt[2*3+0]*boxt[1*3+1]) * volumei;
-  rec_boxt[1*3+0] =(-boxt[0*3+1]*boxt[2*3+2] + boxt[2*3+1]*boxt[0*3+2]) * volumei;
-  rec_boxt[1*3+2] =(-boxt[0*3+0]*boxt[2*3+1] + boxt[2*3+0]*boxt[0*3+1]) * volumei;
-  rec_boxt[2*3+0] =( boxt[0*3+1]*boxt[1*3+2] - boxt[1*3+1]*boxt[0*3+2]) * volumei;
-  rec_boxt[2*3+1] =(-boxt[0*3+0]*boxt[1*3+2] + boxt[1*3+0]*boxt[0*3+2]) * volumei;
+  rec_boxt[0 * 3 + 0] =
+      (boxt[1 * 3 + 1] * boxt[2 * 3 + 2] - boxt[2 * 3 + 1] * boxt[1 * 3 + 2]) *
+      volumei;
+  rec_boxt[1 * 3 + 1] =
+      (boxt[0 * 3 + 0] * boxt[2 * 3 + 2] - boxt[2 * 3 + 0] * boxt[0 * 3 + 2]) *
+      volumei;
+  rec_boxt[2 * 3 + 2] =
+      (boxt[0 * 3 + 0] * boxt[1 * 3 + 1] - boxt[1 * 3 + 0] * boxt[0 * 3 + 1]) *
+      volumei;
+  rec_boxt[0 * 3 + 1] =
+      (-boxt[1 * 3 + 0] * boxt[2 * 3 + 2] + boxt[2 * 3 + 0] * boxt[1 * 3 + 2]) *
+      volumei;
+  rec_boxt[0 * 3 + 2] =
+      (boxt[1 * 3 + 0] * boxt[2 * 3 + 1] - boxt[2 * 3 + 0] * boxt[1 * 3 + 1]) *
+      volumei;
+  rec_boxt[1 * 3 + 0] =
+      (-boxt[0 * 3 + 1] * boxt[2 * 3 + 2] + boxt[2 * 3 + 1] * boxt[0 * 3 + 2]) *
+      volumei;
+  rec_boxt[1 * 3 + 2] =
+      (-boxt[0 * 3 + 0] * boxt[2 * 3 + 1] + boxt[2 * 3 + 0] * boxt[0 * 3 + 1]) *
+      volumei;
+  rec_boxt[2 * 3 + 0] =
+      (boxt[0 * 3 + 1] * boxt[1 * 3 + 2] - boxt[1 * 3 + 1] * boxt[0 * 3 + 2]) *
+      volumei;
+  rec_boxt[2 * 3 + 1] =
+      (-boxt[0 * 3 + 0] * boxt[1 * 3 + 2] + boxt[1 * 3 + 0] * boxt[0 * 3 + 2]) *
+      volumei;
 }
 
-template<typename FPTYPE>
-inline void
-tensor_dot_vec (
-    FPTYPE * o_v,
-    const FPTYPE * i_t,
-    const FPTYPE * i_v)
-{
-  o_v[0] = i_v[0] * i_t[0*3+0] + i_v[1] * i_t[0*3+1] + i_v[2] * i_t[0*3+2];
-  o_v[1] = i_v[0] * i_t[1*3+0] + i_v[1] * i_t[1*3+1] + i_v[2] * i_t[1*3+2];
-  o_v[2] = i_v[0] * i_t[2*3+0] + i_v[1] * i_t[2*3+1] + i_v[2] * i_t[2*3+2];
+template <typename FPTYPE>
+inline void tensor_dot_vec(FPTYPE* o_v, const FPTYPE* i_t, const FPTYPE* i_v) {
+  o_v[0] = i_v[0] * i_t[0 * 3 + 0] + i_v[1] * i_t[0 * 3 + 1] +
+           i_v[2] * i_t[0 * 3 + 2];
+  o_v[1] = i_v[0] * i_t[1 * 3 + 0] + i_v[1] * i_t[1 * 3 + 1] +
+           i_v[2] * i_t[1 * 3 + 2];
+  o_v[2] = i_v[0] * i_t[2 * 3 + 0] + i_v[1] * i_t[2 * 3 + 1] +
+           i_v[2] * i_t[2 * 3 + 2];
 }
 
-template<typename FPTYPE>
-inline void
-tensor_t_dot_vec (
-    FPTYPE * o_v,
-    const FPTYPE * i_t,
-    const FPTYPE * i_v)
-{
-  o_v[0] = i_v[0] * i_t[0*3+0] + i_v[1] * i_t[1*3+0] + i_v[2] * i_t[2*3+0];
-  o_v[1] = i_v[0] * i_t[0*3+1] + i_v[1] * i_t[1*3+1] + i_v[2] * i_t[2*3+1];
-  o_v[2] = i_v[0] * i_t[0*3+2] + i_v[1] * i_t[1*3+2] + i_v[2] * i_t[2*3+2];
+template <typename FPTYPE>
+inline void tensor_t_dot_vec(FPTYPE* o_v,
+                             const FPTYPE* i_t,
+                             const FPTYPE* i_v) {
+  o_v[0] = i_v[0] * i_t[0 * 3 + 0] + i_v[1] * i_t[1 * 3 + 0] +
+           i_v[2] * i_t[2 * 3 + 0];
+  o_v[1] = i_v[0] * i_t[0 * 3 + 1] + i_v[1] * i_t[1 * 3 + 1] +
+           i_v[2] * i_t[2 * 3 + 1];
+  o_v[2] = i_v[0] * i_t[0 * 3 + 2] + i_v[1] * i_t[1 * 3 + 2] +
+           i_v[2] * i_t[2 * 3 + 2];
 }
 
-template<typename FPTYPE>
-void
-deepmd::
-init_region_cpu(
-    Region<FPTYPE> & region,
-    const FPTYPE * boxt)
-{
-  std::copy(boxt, boxt+BOXT_DIM, region.boxt);
+template <typename FPTYPE>
+void deepmd::init_region_cpu(Region<FPTYPE>& region, const FPTYPE* boxt) {
+  std::copy(boxt, boxt + BOXT_DIM, region.boxt);
   compute_rec_boxt(region.rec_boxt, region.boxt);
 }
 
-template<typename FPTYPE>
-void
-deepmd::
-convert_to_inter_cpu(
-    FPTYPE * ri, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * rp)
-{
+template <typename FPTYPE>
+void deepmd::convert_to_inter_cpu(FPTYPE* ri,
+                                  const Region<FPTYPE>& region,
+                                  const FPTYPE* rp) {
   tensor_dot_vec(ri, region.rec_boxt, rp);
 }
 
-template<typename FPTYPE>
-void
-deepmd::
-convert_to_phys_cpu(
-    FPTYPE * rp, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * ri)
-{
+template <typename FPTYPE>
+void deepmd::convert_to_phys_cpu(FPTYPE* rp,
+                                 const Region<FPTYPE>& region,
+                                 const FPTYPE* ri) {
   tensor_t_dot_vec(rp, region.boxt, ri);
 }
 
-template<typename FPTYPE>
-FPTYPE
-deepmd::
-volume_cpu(
-    const Region<FPTYPE> & region)
-{
+template <typename FPTYPE>
+FPTYPE deepmd::volume_cpu(const Region<FPTYPE>& region) {
   return compute_volume(region.boxt);
 }
 
-template
-void 
-deepmd::
-init_region_cpu<double>(
-    deepmd::Region<double> & region,
-    const double * boxt);
-
-template
-void 
-deepmd::
-init_region_cpu<float>(
-    deepmd::Region<float> & region,
-    const float * boxt);
-
-template
-void
-deepmd::
-convert_to_inter_cpu<double>(
-    double * ri, 
-    const deepmd::Region<double> & region,
-    const double * rp);
-
-template
-void
-deepmd::
-convert_to_inter_cpu<float>(
-    float * ri, 
-    const deepmd::Region<float> & region,
-    const float * rp);
-
-template
-void
-deepmd::
-convert_to_phys_cpu<double>(
-    double * ri, 
-    const deepmd::Region<double> & region,
-    const double * rp);
-
-template
-void
-deepmd::
-convert_to_phys_cpu<float>(
-    float * ri, 
-    const deepmd::Region<float> & region,
-    const float * rp);
-
-template
-double
-deepmd::
-volume_cpu<double>(
-    const deepmd::Region<double> & region);
-
-template
-float
-deepmd::
-volume_cpu<float>(
-    const deepmd::Region<float> & region);
+template void deepmd::init_region_cpu<double>(deepmd::Region<double>& region,
+                                              const double* boxt);
+
+template void deepmd::init_region_cpu<float>(deepmd::Region<float>& region,
+                                             const float* boxt);
+
+template void deepmd::convert_to_inter_cpu<double>(
+    double* ri, const deepmd::Region<double>& region, const double* rp);
+
+template void deepmd::convert_to_inter_cpu<float>(
+    float* ri, const deepmd::Region<float>& region, const float* rp);
+
+template void deepmd::convert_to_phys_cpu<double>(
+    double* ri, const deepmd::Region<double>& region, const double* rp);
+
+template void deepmd::convert_to_phys_cpu<float>(
+    float* ri, const deepmd::Region<float>& region, const float* rp);
+
+template double deepmd::volume_cpu<double>(
+    const deepmd::Region<double>& region);
+
+template float deepmd::volume_cpu<float>(const deepmd::Region<float>& region);
diff --git a/source/lib/src/rocm/CMakeLists.txt b/source/lib/src/rocm/CMakeLists.txt
index 0a499e9160..f659973897 100644
--- a/source/lib/src/rocm/CMakeLists.txt
+++ b/source/lib/src/rocm/CMakeLists.txt
@@ -12,29 +12,28 @@ add_definitions("-DCUB_IGNORE_DEPRECATED_CPP_DIALECT")
 
 message(STATUS "HIP major version is " ${HIP_VERSION_MAJOR})
 
-set (HIP_HIPCC_FLAGS -fno-gpu-rdc; -fPIC --std=c++14 ${HIP_HIPCC_FLAGS}) # --amdgpu-target=gfx906
-if (HIP_VERSION VERSION_LESS 3.5.1)
-  set (HIP_HIPCC_FLAGS -hc; ${HIP_HIPCC_FLAGS})
+set(HIP_HIPCC_FLAGS -fno-gpu-rdc; -fPIC --std=c++14 ${HIP_HIPCC_FLAGS}
+)# --amdgpu-target=gfx906
+if(HIP_VERSION VERSION_LESS 3.5.1)
+  set(HIP_HIPCC_FLAGS -hc; ${HIP_HIPCC_FLAGS})
 endif()
 
-file (GLOB SOURCE_FILES "*.hip.cu" )
+file(GLOB SOURCE_FILES "*.hip.cu")
 
 hip_add_library(deepmd_op_rocm SHARED ${SOURCE_FILES})
 target_include_directories(
-  deepmd_op_rocm PUBLIC
-  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../include/>
-  $<INSTALL_INTERFACE:include>
-)
+  deepmd_op_rocm
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../../include/>
+         $<INSTALL_INTERFACE:include>)
 target_precompile_headers(deepmd_op_rocm PUBLIC [["device.h"]])
 
 install(TARGETS deepmd_op_rocm DESTINATION lib/)
-if (BUILD_CPP_IF)
+if(BUILD_CPP_IF)
   install(
     TARGETS deepmd_op_rocm
     EXPORT ${CMAKE_PROJECT_NAME}Targets
-    DESTINATION lib/
-  )
-endif (BUILD_CPP_IF)
-if (BUILD_PY_IF)
+    DESTINATION lib/)
+endif(BUILD_CPP_IF)
+if(BUILD_PY_IF)
   install(TARGETS deepmd_op_rocm DESTINATION deepmd/op/)
-endif (BUILD_PY_IF)
+endif(BUILD_PY_IF)
diff --git a/source/lib/src/rocm/coord.hip.cu b/source/lib/src/rocm/coord.hip.cu
index ab75e7f7a0..592f425195 100644
--- a/source/lib/src/rocm/coord.hip.cu
+++ b/source/lib/src/rocm/coord.hip.cu
@@ -1,429 +1,431 @@
-#include "device.h"
 #include "coord.h"
+#include "device.h"
 #include "region.cuh"
 
-__device__ inline int collapse_index(
-    const int * idx,
-    const int * size)
-{
-    return (idx[0] * size[1] + idx[1]) * size[2] + idx[2];
+__device__ inline int collapse_index(const int *idx, const int *size) {
+  return (idx[0] * size[1] + idx[1]) * size[2] + idx[2];
 }
-__device__ inline void index_recover(
-    const int in_idx,
-    const int * size, 
-    int * idx)
-{
-    idx[2]=in_idx%size[2];
-    idx[1]=int(in_idx/size[2])%size[1];
-    idx[0]=int(int(in_idx/size[2])/size[1]);
+__device__ inline void index_recover(const int in_idx,
+                                     const int *size,
+                                     int *idx) {
+  idx[2] = in_idx % size[2];
+  idx[1] = int(in_idx / size[2]) % size[1];
+  idx[0] = int(int(in_idx / size[2]) / size[1]);
 }
-__device__ inline void idx_addshift(
-    int * idx, 
-    const int * shift)
-{
-    for(int dd=0;dd<3;dd++)
-    {
-        idx[dd]+=shift[dd];
-    }
+__device__ inline void idx_addshift(int *idx, const int *shift) {
+  for (int dd = 0; dd < 3; dd++) {
+    idx[dd] += shift[dd];
+  }
 }
-__device__ inline void idx_unshift(
-    int * idx, 
-    const int * shift)
-{
-    for(int dd=0;dd<3;dd++)
-    {
-        idx[dd]-=shift[dd];
-    }
+__device__ inline void idx_unshift(int *idx, const int *shift) {
+  for (int dd = 0; dd < 3; dd++) {
+    idx[dd] -= shift[dd];
+  }
 }
-__device__ inline int compute_pbc_shift(
-    int idx, 
-    int ncell)
-{
-    int shift = 0;
-    if (idx < 0) {
+__device__ inline int compute_pbc_shift(int idx, int ncell) {
+  int shift = 0;
+  if (idx < 0) {
     shift = 1;
-    while (idx + shift * ncell < 0) shift ++;
-    }
-    else if (idx >= ncell) {
+    while (idx + shift * ncell < 0) shift++;
+  } else if (idx >= ncell) {
     shift = -1;
-    while (idx + shift * ncell >= ncell) shift --;
-    }
-    return shift;
+    while (idx + shift * ncell >= ncell) shift--;
+  }
+  return shift;
 }
 
-__device__ inline double _fmod(double x, double y) {return fmod(x, y);}
-__device__ inline float _fmod(float x, float y) {return fmodf(x, y);}
+__device__ inline double _fmod(double x, double y) { return fmod(x, y); }
+__device__ inline float _fmod(float x, float y) { return fmodf(x, y); }
 
-template<typename FPTYPE>
-__global__ void normalize_one(
-    FPTYPE *out_c,
-    const FPTYPE *boxt,
-    const FPTYPE *rec_boxt,
-    const int nall)
-{
-    // <<<nall/TPB, TPB>>>
-    int idy=blockIdx.x*blockDim.x+threadIdx.x;
-    if (idy>=nall){return;}
-    FPTYPE inter[3];
-    phys2Inter(inter,out_c+idy*3,rec_boxt);
-    for (int dd = 0; dd < 3; ++dd) {
-        inter[dd]=_fmod(inter[dd], (FPTYPE)1.);
-        if (inter[dd] <  (FPTYPE)0.) inter[dd] += (FPTYPE)1.;
-    }
-    inter2Phys(out_c+idy*3,inter,boxt);
+template <typename FPTYPE>
+__global__ void normalize_one(FPTYPE *out_c,
+                              const FPTYPE *boxt,
+                              const FPTYPE *rec_boxt,
+                              const int nall) {
+  // <<<nall/TPB, TPB>>>
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy >= nall) {
+    return;
+  }
+  FPTYPE inter[3];
+  phys2Inter(inter, out_c + idy * 3, rec_boxt);
+  for (int dd = 0; dd < 3; ++dd) {
+    inter[dd] = _fmod(inter[dd], (FPTYPE)1.);
+    if (inter[dd] < (FPTYPE)0.) inter[dd] += (FPTYPE)1.;
+  }
+  inter2Phys(out_c + idy * 3, inter, boxt);
 }
 
-template<typename FPTYPE>
-__global__ void _fill_idx_cellmap(
-    int * idx_cellmap,
-    int * idx_cellmap_noshift,
-    const FPTYPE *in_c,
-    const FPTYPE *rec_boxt,
-    const int *nat_stt,
-    const int *nat_end,
-    const int *ext_stt,
-    const int *ext_end,
-    const int nloc)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    int ext_ncell[3];
-    int global_grid[3];
-    int idx_orig_shift[3];
-    FPTYPE cell_size[3];
-    FPTYPE nat_orig[3];
-    for (int dd = 0; dd < 3; ++dd) 
-    {
-        ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
-        global_grid[dd] = nat_end[dd] - nat_stt[dd];
-        idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
-        cell_size[dd] = (FPTYPE)1./global_grid[dd];
-        nat_orig[dd] = nat_stt[dd] * cell_size[dd];
-    }
-    if (idy<nloc)
-    {
-        int idx_noshift[3]; 
-        int idx[3];
-        FPTYPE inter[3];
-        phys2Inter(inter,in_c+idy*3,rec_boxt);
-        for (int dd = 0; dd < 3; ++dd){
-            idx_noshift[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
-            if (inter[dd] - nat_orig[dd] < 0.) idx_noshift[dd] --;
-            if (idx_noshift[dd] < nat_stt[dd]) 
-            {
-                idx_noshift[dd] = nat_stt[dd];
-            }
-            else if (idx_noshift[dd] >= nat_end[dd]) 
-            {
-                idx_noshift[dd] = nat_end[dd] - 1;
-            }
-            idx[dd] = idx_noshift[dd]+idx_orig_shift[dd];
-        }
-        idx_cellmap_noshift[idy]=collapse_index(idx_noshift, global_grid);
-        idx_cellmap[idy]=collapse_index(idx, ext_ncell);
+template <typename FPTYPE>
+__global__ void _fill_idx_cellmap(int *idx_cellmap,
+                                  int *idx_cellmap_noshift,
+                                  const FPTYPE *in_c,
+                                  const FPTYPE *rec_boxt,
+                                  const int *nat_stt,
+                                  const int *nat_end,
+                                  const int *ext_stt,
+                                  const int *ext_end,
+                                  const int nloc) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  int ext_ncell[3];
+  int global_grid[3];
+  int idx_orig_shift[3];
+  FPTYPE cell_size[3];
+  FPTYPE nat_orig[3];
+  for (int dd = 0; dd < 3; ++dd) {
+    ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
+    global_grid[dd] = nat_end[dd] - nat_stt[dd];
+    idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
+    cell_size[dd] = (FPTYPE)1. / global_grid[dd];
+    nat_orig[dd] = nat_stt[dd] * cell_size[dd];
+  }
+  if (idy < nloc) {
+    int idx_noshift[3];
+    int idx[3];
+    FPTYPE inter[3];
+    phys2Inter(inter, in_c + idy * 3, rec_boxt);
+    for (int dd = 0; dd < 3; ++dd) {
+      idx_noshift[dd] = (inter[dd] - nat_orig[dd]) / cell_size[dd];
+      if (inter[dd] - nat_orig[dd] < 0.) idx_noshift[dd]--;
+      if (idx_noshift[dd] < nat_stt[dd]) {
+        idx_noshift[dd] = nat_stt[dd];
+      } else if (idx_noshift[dd] >= nat_end[dd]) {
+        idx_noshift[dd] = nat_end[dd] - 1;
+      }
+      idx[dd] = idx_noshift[dd] + idx_orig_shift[dd];
     }
+    idx_cellmap_noshift[idy] = collapse_index(idx_noshift, global_grid);
+    idx_cellmap[idy] = collapse_index(idx, ext_ncell);
+  }
 }
 
-__global__ void _fill_loc_cellnum_map(
-    int * temp_idx_order,
-    int * loc_cellnum_map,
-    const int * idx_cellmap_noshift,
-    const int nloc,
-    const int loc_cellnum)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    if (idy<loc_cellnum)
-    {
-        int num=0;
-        for(int ii=0;ii<nloc;ii++)
-        {
-            if(idx_cellmap_noshift[ii]==idy)
-            {
-                temp_idx_order[ii]=num;
-                num++;
-            }
-        }
-        loc_cellnum_map[idy]=num;
+__global__ void _fill_loc_cellnum_map(int *temp_idx_order,
+                                      int *loc_cellnum_map,
+                                      const int *idx_cellmap_noshift,
+                                      const int nloc,
+                                      const int loc_cellnum) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy < loc_cellnum) {
+    int num = 0;
+    for (int ii = 0; ii < nloc; ii++) {
+      if (idx_cellmap_noshift[ii] == idy) {
+        temp_idx_order[ii] = num;
+        num++;
+      }
     }
+    loc_cellnum_map[idy] = num;
+  }
 }
 
-__global__ void _fill_total_cellnum_map(
-    int * total_cellnum_map,
-    int * mask_cellnum_map,
-    int * cell_map,
-    int * cell_shift_map,
-    const int * nat_stt,
-    const int * nat_end,
-    const int * ext_stt,
-    const int * ext_end,
-    const int * loc_cellnum_map,
-    const int total_cellnum)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    int ext_ncell[3];
-    int global_grid[3];
-    int idx_orig_shift[3];
-    for (int dd = 0; dd < 3; ++dd) 
-    {
-        ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
-        global_grid[dd] = nat_end[dd] - nat_stt[dd];
-        idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
-    }
-    if(idy<total_cellnum)
-    {
-        int * shift=cell_shift_map+idy*3;
-        int idx[3];
-        index_recover(idy, ext_ncell, idx);
-        idx_unshift(idx, idx_orig_shift);
-        shift[0]=compute_pbc_shift(idx[0],global_grid[0]);
-        shift[1]=compute_pbc_shift(idx[1],global_grid[1]);
-        shift[2]=compute_pbc_shift(idx[2],global_grid[2]);
-        bool loc=false;
-        if(shift[0]==0&&shift[1]==0&&shift[2]==0)loc=true;
-        for(int dd=0;dd<3;dd++)
-        {
-            idx[dd]+=shift[dd]*global_grid[dd];
-        }
-        int orig_idy=collapse_index(idx, global_grid);
-        mask_cellnum_map[idy]=loc_cellnum_map[orig_idy];
-        total_cellnum_map[idy]=mask_cellnum_map[idy];
-        if(loc)mask_cellnum_map[idy]=0;
-        cell_map[idy]=orig_idy;
+__global__ void _fill_total_cellnum_map(int *total_cellnum_map,
+                                        int *mask_cellnum_map,
+                                        int *cell_map,
+                                        int *cell_shift_map,
+                                        const int *nat_stt,
+                                        const int *nat_end,
+                                        const int *ext_stt,
+                                        const int *ext_end,
+                                        const int *loc_cellnum_map,
+                                        const int total_cellnum) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  int ext_ncell[3];
+  int global_grid[3];
+  int idx_orig_shift[3];
+  for (int dd = 0; dd < 3; ++dd) {
+    ext_ncell[dd] = ext_end[dd] - ext_stt[dd];
+    global_grid[dd] = nat_end[dd] - nat_stt[dd];
+    idx_orig_shift[dd] = nat_stt[dd] - ext_stt[dd];
+  }
+  if (idy < total_cellnum) {
+    int *shift = cell_shift_map + idy * 3;
+    int idx[3];
+    index_recover(idy, ext_ncell, idx);
+    idx_unshift(idx, idx_orig_shift);
+    shift[0] = compute_pbc_shift(idx[0], global_grid[0]);
+    shift[1] = compute_pbc_shift(idx[1], global_grid[1]);
+    shift[2] = compute_pbc_shift(idx[2], global_grid[2]);
+    bool loc = false;
+    if (shift[0] == 0 && shift[1] == 0 && shift[2] == 0) loc = true;
+    for (int dd = 0; dd < 3; dd++) {
+      idx[dd] += shift[dd] * global_grid[dd];
     }
+    int orig_idy = collapse_index(idx, global_grid);
+    mask_cellnum_map[idy] = loc_cellnum_map[orig_idy];
+    total_cellnum_map[idy] = mask_cellnum_map[idy];
+    if (loc) mask_cellnum_map[idy] = 0;
+    cell_map[idy] = orig_idy;
+  }
 }
 
-__global__ void _build_loc_clist(
-    int *clist,
-    const int *idx_cellmap, 
-    const int *idx_order,
-    const int *sec_num_map,
-    const int nloc)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    if(idy>=nloc){return;}
-    int cell_idx=idx_cellmap[idy];
-    int * clist_row = clist+sec_num_map[cell_idx];
-    clist_row[idx_order[idy]]=idy;
+__global__ void _build_loc_clist(int *clist,
+                                 const int *idx_cellmap,
+                                 const int *idx_order,
+                                 const int *sec_num_map,
+                                 const int nloc) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy >= nloc) {
+    return;
+  }
+  int cell_idx = idx_cellmap[idy];
+  int *clist_row = clist + sec_num_map[cell_idx];
+  clist_row[idx_order[idy]] = idy;
 }
 
-template<typename FPTYPE>
-__global__ void _copy_coord(
-    FPTYPE * out_c, 
-    int * out_t, 
-    int * mapping, 
-    const FPTYPE * in_c, 
-    const int * in_t, 
-    const int * cell_map, 
-    const int * cell_shift_map, 
-    const int * sec_loc_cellnum_map, 
-    const int * sec_total_cellnum_map, 
-    const int * loc_clist, 
-    const int nloc, 
-    const int nall, 
-    const int total_cellnum, 
-    const FPTYPE * boxt, 
-    const FPTYPE * rec_boxt)
-{
-    int idy = blockIdx.x*blockDim.x+threadIdx.x;
-    if(idy>=nall){return;}
-    if(idy<nloc)
-    {
-        mapping[idy]=idy;
-        out_t[idy]=in_t[idy];
-        for(int dd=0;dd<3;dd++)
-        {
-            out_c[idy*3+dd]=in_c[idy*3+dd];
-        }
+template <typename FPTYPE>
+__global__ void _copy_coord(FPTYPE *out_c,
+                            int *out_t,
+                            int *mapping,
+                            const FPTYPE *in_c,
+                            const int *in_t,
+                            const int *cell_map,
+                            const int *cell_shift_map,
+                            const int *sec_loc_cellnum_map,
+                            const int *sec_total_cellnum_map,
+                            const int *loc_clist,
+                            const int nloc,
+                            const int nall,
+                            const int total_cellnum,
+                            const FPTYPE *boxt,
+                            const FPTYPE *rec_boxt) {
+  int idy = blockIdx.x * blockDim.x + threadIdx.x;
+  if (idy >= nall) {
+    return;
+  }
+  if (idy < nloc) {
+    mapping[idy] = idy;
+    out_t[idy] = in_t[idy];
+    for (int dd = 0; dd < 3; dd++) {
+      out_c[idy * 3 + dd] = in_c[idy * 3 + dd];
+    }
+  } else {
+    int cell_idx = 0;
+    int atom_idx = 0;
+    int orig_cell_idx = 0;
+    int orig_idy = 0;
+    int shift[3];
+    FPTYPE d_shift[3];
+    for (int ii = 0; ii < total_cellnum; ii++) {
+      if (idy >= sec_total_cellnum_map[ii + 1])
+        cell_idx++;
+      else
+        break;
+    }
+    for (int dd = 0; dd < 3; dd++) {
+      shift[dd] = cell_shift_map[cell_idx * 3 + dd];
+      d_shift[dd] = shift[dd];
     }
-    else
-    {
-        int cell_idx=0;
-        int atom_idx=0;
-        int orig_cell_idx=0;
-        int orig_idy=0;
-        int shift[3];
-        FPTYPE d_shift[3];
-        for(int ii=0;ii<total_cellnum;ii++)
-        {
-            if(idy>=sec_total_cellnum_map[ii+1])cell_idx++;
-            else break;
-        }
-        for(int dd=0;dd<3;dd++)
-        {
-            shift[dd]=cell_shift_map[cell_idx*3+dd];
-            d_shift[dd]=shift[dd];
-        }
-        atom_idx=idy-sec_total_cellnum_map[cell_idx];
-        orig_cell_idx=cell_map[cell_idx];
-        orig_idy=loc_clist[sec_loc_cellnum_map[orig_cell_idx]+atom_idx];
-        mapping[idy]=orig_idy;
-        out_t[idy]=in_t[orig_idy];
-        FPTYPE shift_v[3];
-        inter2Phys(shift_v,d_shift,boxt);
-        for(int dd=0;dd<3;dd++)
-        {
-            out_c[idy*3+dd]=in_c[orig_idy*3+dd]-shift_v[dd];
-        }
+    atom_idx = idy - sec_total_cellnum_map[cell_idx];
+    orig_cell_idx = cell_map[cell_idx];
+    orig_idy = loc_clist[sec_loc_cellnum_map[orig_cell_idx] + atom_idx];
+    mapping[idy] = orig_idy;
+    out_t[idy] = in_t[orig_idy];
+    FPTYPE shift_v[3];
+    inter2Phys(shift_v, d_shift, boxt);
+    for (int dd = 0; dd < 3; dd++) {
+      out_c[idy * 3 + dd] = in_c[orig_idy * 3 + dd] - shift_v[dd];
     }
+  }
 }
 
 template <typename FPTYPE>
-void compute_int_data(
-    int * int_data, 
-    const FPTYPE * in_c, 
-    const int * cell_info, 
-    const deepmd::Region<FPTYPE> & region, 
-    const int nloc, 
-    const int loc_cellnum, 
-    const int total_cellnum)
-{
-    int * idx_cellmap=int_data;
-    int * idx_cellmap_noshift=idx_cellmap+nloc;
-    int * temp_idx_order=idx_cellmap_noshift+nloc;
-    int * loc_cellnum_map=temp_idx_order+nloc;
-    int * total_cellnum_map=loc_cellnum_map+loc_cellnum;
-    int * mask_cellnum_map=total_cellnum_map+total_cellnum;
-    int * cell_map=mask_cellnum_map+total_cellnum;
-    int * cell_shift_map=cell_map+total_cellnum;
-    const int * nat_stt=cell_info;
-    const int * nat_end=cell_info+3;
-    const int * ext_stt=cell_info+6;
-    const int * ext_end=cell_info+9;
-    const FPTYPE * rec_boxt = region.rec_boxt;
-    
-    const int nblock_loc=(nloc+TPB-1)/TPB;
-    hipLaunchKernelGGL(_fill_idx_cellmap, nblock_loc, TPB, 0, 0, idx_cellmap, idx_cellmap_noshift, in_c, rec_boxt, 
-        nat_stt, nat_end, ext_stt, ext_end, nloc);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+void compute_int_data(int *int_data,
+                      const FPTYPE *in_c,
+                      const int *cell_info,
+                      const deepmd::Region<FPTYPE> &region,
+                      const int nloc,
+                      const int loc_cellnum,
+                      const int total_cellnum) {
+  int *idx_cellmap = int_data;
+  int *idx_cellmap_noshift = idx_cellmap + nloc;
+  int *temp_idx_order = idx_cellmap_noshift + nloc;
+  int *loc_cellnum_map = temp_idx_order + nloc;
+  int *total_cellnum_map = loc_cellnum_map + loc_cellnum;
+  int *mask_cellnum_map = total_cellnum_map + total_cellnum;
+  int *cell_map = mask_cellnum_map + total_cellnum;
+  int *cell_shift_map = cell_map + total_cellnum;
+  const int *nat_stt = cell_info;
+  const int *nat_end = cell_info + 3;
+  const int *ext_stt = cell_info + 6;
+  const int *ext_end = cell_info + 9;
+  const FPTYPE *rec_boxt = region.rec_boxt;
 
-    const int nblock_loc_cellnum=(loc_cellnum+TPB-1)/TPB;
-    hipLaunchKernelGGL(_fill_loc_cellnum_map, nblock_loc_cellnum, TPB, 0, 0, temp_idx_order, loc_cellnum_map, 
-        idx_cellmap_noshift, nloc, loc_cellnum);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  const int nblock_loc = (nloc + TPB - 1) / TPB;
+  hipLaunchKernelGGL(_fill_idx_cellmap, nblock_loc, TPB, 0, 0, idx_cellmap,
+                     idx_cellmap_noshift, in_c, rec_boxt, nat_stt, nat_end,
+                     ext_stt, ext_end, nloc);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 
-    const int nblock_total_cellnum=(total_cellnum+TPB-1)/TPB;
-    hipLaunchKernelGGL(_fill_total_cellnum_map, nblock_total_cellnum, TPB, 0, 0, total_cellnum_map, mask_cellnum_map, cell_map, 
-        cell_shift_map, nat_stt, nat_end, ext_stt, ext_end, loc_cellnum_map, total_cellnum);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  const int nblock_loc_cellnum = (loc_cellnum + TPB - 1) / TPB;
+  hipLaunchKernelGGL(_fill_loc_cellnum_map, nblock_loc_cellnum, TPB, 0, 0,
+                     temp_idx_order, loc_cellnum_map, idx_cellmap_noshift, nloc,
+                     loc_cellnum);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+
+  const int nblock_total_cellnum = (total_cellnum + TPB - 1) / TPB;
+  hipLaunchKernelGGL(_fill_total_cellnum_map, nblock_total_cellnum, TPB, 0, 0,
+                     total_cellnum_map, mask_cellnum_map, cell_map,
+                     cell_shift_map, nat_stt, nat_end, ext_stt, ext_end,
+                     loc_cellnum_map, total_cellnum);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-void build_loc_clist(
-    int * int_data, 
-    const int nloc, 
-    const int loc_cellnum, 
-    const int total_cellnum)
-{
-    const int nblock=(nloc+TPB-1)/TPB;
-    const int * idx_cellmap_noshift=int_data+nloc;
-    const int * temp_idx_order=idx_cellmap_noshift+nloc;
-    const int * sec_loc_cellnum_map=temp_idx_order+nloc+loc_cellnum+2*total_cellnum+total_cellnum+3*total_cellnum;
-    int * loc_clist=int_data+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1;
-    hipLaunchKernelGGL(_build_loc_clist, nblock, TPB, 0, 0, loc_clist, idx_cellmap_noshift, temp_idx_order, sec_loc_cellnum_map, nloc);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+void build_loc_clist(int *int_data,
+                     const int nloc,
+                     const int loc_cellnum,
+                     const int total_cellnum) {
+  const int nblock = (nloc + TPB - 1) / TPB;
+  const int *idx_cellmap_noshift = int_data + nloc;
+  const int *temp_idx_order = idx_cellmap_noshift + nloc;
+  const int *sec_loc_cellnum_map = temp_idx_order + nloc + loc_cellnum +
+                                   2 * total_cellnum + total_cellnum +
+                                   3 * total_cellnum;
+  int *loc_clist = int_data + nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                   total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + 1;
+  hipLaunchKernelGGL(_build_loc_clist, nblock, TPB, 0, 0, loc_clist,
+                     idx_cellmap_noshift, temp_idx_order, sec_loc_cellnum_map,
+                     nloc);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void copy_coord(
-    FPTYPE * out_c, 
-    int * out_t, 
-    int * mapping, 
-    const int * int_data, 
-    const FPTYPE * in_c, 
-    const int * in_t, 
-    const int nloc, 
-    const int nall, 
-    const int loc_cellnum, 
-    const int total_cellnum, 
-    const deepmd::Region<FPTYPE> & region)
-{
-    const int nblock=(nall+TPB-1)/TPB;
-    const int * cell_map=int_data+3*nloc+loc_cellnum+2*total_cellnum;
-    const int * cell_shift_map=cell_map+total_cellnum;
-    const int * sec_loc_cellnum_map=cell_shift_map+3*total_cellnum;
-    const int * sec_total_cellnum_map=sec_loc_cellnum_map+loc_cellnum+1;
-    const int * loc_clist=sec_total_cellnum_map+total_cellnum+1;
+void copy_coord(FPTYPE *out_c,
+                int *out_t,
+                int *mapping,
+                const int *int_data,
+                const FPTYPE *in_c,
+                const int *in_t,
+                const int nloc,
+                const int nall,
+                const int loc_cellnum,
+                const int total_cellnum,
+                const deepmd::Region<FPTYPE> &region) {
+  const int nblock = (nall + TPB - 1) / TPB;
+  const int *cell_map = int_data + 3 * nloc + loc_cellnum + 2 * total_cellnum;
+  const int *cell_shift_map = cell_map + total_cellnum;
+  const int *sec_loc_cellnum_map = cell_shift_map + 3 * total_cellnum;
+  const int *sec_total_cellnum_map = sec_loc_cellnum_map + loc_cellnum + 1;
+  const int *loc_clist = sec_total_cellnum_map + total_cellnum + 1;
 
-    const FPTYPE *boxt = region.boxt;
-    const FPTYPE *rec_boxt = region.rec_boxt;
-    hipLaunchKernelGGL(_copy_coord, nblock, TPB, 0, 0, out_c, out_t, mapping, in_c, in_t, cell_map, cell_shift_map, 
-        sec_loc_cellnum_map, sec_total_cellnum_map, loc_clist, nloc, nall, total_cellnum, boxt, rec_boxt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  const FPTYPE *boxt = region.boxt;
+  const FPTYPE *rec_boxt = region.rec_boxt;
+  hipLaunchKernelGGL(_copy_coord, nblock, TPB, 0, 0, out_c, out_t, mapping,
+                     in_c, in_t, cell_map, cell_shift_map, sec_loc_cellnum_map,
+                     sec_total_cellnum_map, loc_clist, nloc, nall,
+                     total_cellnum, boxt, rec_boxt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
 namespace deepmd {
 template <typename FPTYPE>
-void
-normalize_coord_gpu_rocm(
-    FPTYPE * coord,
-    const int natom,
-    const Region<FPTYPE> & region)
-{
-    const FPTYPE * boxt=region.boxt;
-    const FPTYPE * rec_boxt=region.rec_boxt;
-    const int nblock=(natom+TPB-1)/TPB;
-    hipLaunchKernelGGL(normalize_one, nblock, TPB, 0, 0, coord, boxt, rec_boxt, natom);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+void normalize_coord_gpu_rocm(FPTYPE *coord,
+                              const int natom,
+                              const Region<FPTYPE> &region) {
+  const FPTYPE *boxt = region.boxt;
+  const FPTYPE *rec_boxt = region.rec_boxt;
+  const int nblock = (natom + TPB - 1) / TPB;
+  hipLaunchKernelGGL(normalize_one, nblock, TPB, 0, 0, coord, boxt, rec_boxt,
+                     natom);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-int
-copy_coord_gpu_rocm(
-    FPTYPE * out_c,
-    int * out_t,
-    int * mapping,
-    int * nall,
-    int * int_data,
-    const FPTYPE * in_c,
-    const int * in_t,
-    const int & nloc,
-    const int & mem_nall,
-    const int & loc_cellnum,
-    const int & total_cellnum,
-    const int * cell_info,
-    const Region<FPTYPE> & region)
-{
-    compute_int_data(int_data, in_c, cell_info, region, nloc, loc_cellnum, total_cellnum);
-    int * int_data_cpu=new int [loc_cellnum+2*total_cellnum+loc_cellnum+1+total_cellnum+1];//loc_cellnum_map,total_cellnum_map,mask_cellnum_map,sec_loc_cellnum_map,sec_total_cellnum_map
-    DPErrcheck(hipMemcpy(int_data_cpu, int_data+3*nloc, sizeof(int) * (loc_cellnum + 2 * total_cellnum), hipMemcpyDeviceToHost));
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-    int * loc_cellnum_map=int_data_cpu;
-    int * total_cellnum_map=loc_cellnum_map+loc_cellnum;
-    int * mask_cellnum_map=total_cellnum_map+total_cellnum;
-    int * sec_loc_cellnum_map=mask_cellnum_map+total_cellnum;
-    int * sec_total_cellnum_map=sec_loc_cellnum_map+loc_cellnum+1;
-    sec_loc_cellnum_map[0]=0;
-    sec_total_cellnum_map[0]=nloc;
-    int max_cell=0;
-    for(int iii=0;iii<total_cellnum;iii++)
-    {
-        if(max_cell<total_cellnum_map[iii]){max_cell=total_cellnum_map[iii];}
-        if(iii<loc_cellnum){sec_loc_cellnum_map[iii+1]=sec_loc_cellnum_map[iii]+loc_cellnum_map[iii];}
-        sec_total_cellnum_map[iii+1]=sec_total_cellnum_map[iii]+mask_cellnum_map[iii];
-    }
-    *nall=sec_total_cellnum_map[total_cellnum];
-    if(*nall > mem_nall){
-        delete[] int_data_cpu;
-        // size of the output arrays is not large enough
-        return 1;
+int copy_coord_gpu_rocm(FPTYPE *out_c,
+                        int *out_t,
+                        int *mapping,
+                        int *nall,
+                        int *int_data,
+                        const FPTYPE *in_c,
+                        const int *in_t,
+                        const int &nloc,
+                        const int &mem_nall,
+                        const int &loc_cellnum,
+                        const int &total_cellnum,
+                        const int *cell_info,
+                        const Region<FPTYPE> &region) {
+  compute_int_data(int_data, in_c, cell_info, region, nloc, loc_cellnum,
+                   total_cellnum);
+  int *int_data_cpu = new int
+      [loc_cellnum + 2 * total_cellnum + loc_cellnum + 1 + total_cellnum +
+       1];  // loc_cellnum_map,total_cellnum_map,mask_cellnum_map,sec_loc_cellnum_map,sec_total_cellnum_map
+  DPErrcheck(hipMemcpy(int_data_cpu, int_data + 3 * nloc,
+                       sizeof(int) * (loc_cellnum + 2 * total_cellnum),
+                       hipMemcpyDeviceToHost));
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+  int *loc_cellnum_map = int_data_cpu;
+  int *total_cellnum_map = loc_cellnum_map + loc_cellnum;
+  int *mask_cellnum_map = total_cellnum_map + total_cellnum;
+  int *sec_loc_cellnum_map = mask_cellnum_map + total_cellnum;
+  int *sec_total_cellnum_map = sec_loc_cellnum_map + loc_cellnum + 1;
+  sec_loc_cellnum_map[0] = 0;
+  sec_total_cellnum_map[0] = nloc;
+  int max_cell = 0;
+  for (int iii = 0; iii < total_cellnum; iii++) {
+    if (max_cell < total_cellnum_map[iii]) {
+      max_cell = total_cellnum_map[iii];
     }
-    else{
-        DPErrcheck(hipMemcpy(int_data+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3, 
-            sec_loc_cellnum_map, sizeof(int) * (loc_cellnum+1+total_cellnum+1), hipMemcpyHostToDevice));
-        delete[] int_data_cpu;
-        build_loc_clist(int_data, nloc, loc_cellnum, total_cellnum);
-        copy_coord(out_c, out_t, mapping, int_data, in_c, in_t, nloc, *nall, loc_cellnum, total_cellnum, region);
+    if (iii < loc_cellnum) {
+      sec_loc_cellnum_map[iii + 1] =
+          sec_loc_cellnum_map[iii] + loc_cellnum_map[iii];
     }
-    return 0;
+    sec_total_cellnum_map[iii + 1] =
+        sec_total_cellnum_map[iii] + mask_cellnum_map[iii];
+  }
+  *nall = sec_total_cellnum_map[total_cellnum];
+  if (*nall > mem_nall) {
+    delete[] int_data_cpu;
+    // size of the output arrays is not large enough
+    return 1;
+  } else {
+    DPErrcheck(hipMemcpy(int_data + nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                             total_cellnum * 3,
+                         sec_loc_cellnum_map,
+                         sizeof(int) * (loc_cellnum + 1 + total_cellnum + 1),
+                         hipMemcpyHostToDevice));
+    delete[] int_data_cpu;
+    build_loc_clist(int_data, nloc, loc_cellnum, total_cellnum);
+    copy_coord(out_c, out_t, mapping, int_data, in_c, in_t, nloc, *nall,
+               loc_cellnum, total_cellnum, region);
+  }
+  return 0;
 }
 
-template void normalize_coord_gpu_rocm<float>(float * coord, const int natom, const Region<float> & region);
-template void normalize_coord_gpu_rocm<double>(double * coord, const int natom, const Region<double> & region);
-template int copy_coord_gpu_rocm<float>(float * out_c, int * out_t, int * mapping, int * nall, int * int_data, const float * in_c, const int * in_t, const int & nloc, const int & mem_nall, const int & loc_cellnum, const int & total_cellnum, const int * cell_info, const Region<float> & region);
-template int copy_coord_gpu_rocm<double>(double * out_c, int * out_t, int * mapping, int * nall, int * int_data, const double * in_c, const int * in_t, const int & nloc, const int & mem_nall, const int & loc_cellnum, const int & total_cellnum, const int * cell_info, const Region<double> & region);
-}
\ No newline at end of file
+template void normalize_coord_gpu_rocm<float>(float *coord,
+                                              const int natom,
+                                              const Region<float> &region);
+template void normalize_coord_gpu_rocm<double>(double *coord,
+                                               const int natom,
+                                               const Region<double> &region);
+template int copy_coord_gpu_rocm<float>(float *out_c,
+                                        int *out_t,
+                                        int *mapping,
+                                        int *nall,
+                                        int *int_data,
+                                        const float *in_c,
+                                        const int *in_t,
+                                        const int &nloc,
+                                        const int &mem_nall,
+                                        const int &loc_cellnum,
+                                        const int &total_cellnum,
+                                        const int *cell_info,
+                                        const Region<float> &region);
+template int copy_coord_gpu_rocm<double>(double *out_c,
+                                         int *out_t,
+                                         int *mapping,
+                                         int *nall,
+                                         int *int_data,
+                                         const double *in_c,
+                                         const int *in_t,
+                                         const int &nloc,
+                                         const int &mem_nall,
+                                         const int &loc_cellnum,
+                                         const int &total_cellnum,
+                                         const int *cell_info,
+                                         const Region<double> &region);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/gelu.hip.cu b/source/lib/src/rocm/gelu.hip.cu
index 6529d277f1..7dfcb45870 100644
--- a/source/lib/src/rocm/gelu.hip.cu
+++ b/source/lib/src/rocm/gelu.hip.cu
@@ -1,118 +1,138 @@
-#include "gelu.h"
 #include "device.h"
+#include "gelu.h"
 
-__device__ inline double _tanh(double x) {return tanh(x);}
-__device__ inline float _tanh(float x) {return tanhf(x);}
+__device__ inline double _tanh(double x) { return tanh(x); }
+__device__ inline float _tanh(float x) { return tanhf(x); }
 
 template <typename FPTYPE>
-__global__ void gelu(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const int_64 size) 
-{
+__global__ void gelu(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
   const int_64 idx = int_64(blockIdx.x) * blockDim.x + threadIdx.x;
   if (idx >= size) {
     return;
   }
-  out[idx] = xx[idx] * (FPTYPE)0.5 * ((FPTYPE)1.0 + _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] *xx[idx])));
+  out[idx] = xx[idx] * (FPTYPE)0.5 *
+             ((FPTYPE)1.0 +
+              _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] *
+                                                       xx[idx] * xx[idx])));
 }
 
 template <typename FPTYPE>
-__global__ void gelu_grad(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const FPTYPE * dy, 
-    const int_64 size) 
-{
+__global__ void gelu_grad(FPTYPE* out,
+                          const FPTYPE* xx,
+                          const FPTYPE* dy,
+                          const int_64 size) {
   const int_64 idx = int_64(blockIdx.x) * blockDim.x + threadIdx.x;
   if (idx >= size) {
     return;
   }
-  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 * xx[idx] * xx[idx] *xx[idx])));
-  const FPTYPE var = _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] *xx[idx]));
-  out[idx] = dy[idx] * ((FPTYPE)0.5 * (FPTYPE)SQRT_2_PI * xx[idx] * ((FPTYPE)1. - var * var) * ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.) + (FPTYPE)0.5 * var + (FPTYPE)0.5);
+  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 *
+  // xx[idx] * xx[idx] *xx[idx])));
+  const FPTYPE var =
+      _tanh((FPTYPE)SQRT_2_PI *
+            (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] * xx[idx]));
+  out[idx] =
+      dy[idx] *
+      ((FPTYPE)0.5 * (FPTYPE)SQRT_2_PI * xx[idx] * ((FPTYPE)1. - var * var) *
+           ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.) +
+       (FPTYPE)0.5 * var + (FPTYPE)0.5);
 }
 
 template <typename FPTYPE>
-__global__ void gelu_grad_grad(
-    FPTYPE * out, 
-    const FPTYPE * xx, 
-    const FPTYPE * dy, 
-    const FPTYPE * dy_2,
-    const int_64 size) 
-{
+__global__ void gelu_grad_grad(FPTYPE* out,
+                               const FPTYPE* xx,
+                               const FPTYPE* dy,
+                               const FPTYPE* dy_2,
+                               const int_64 size) {
   const int_64 idx = int_64(blockIdx.x) * blockDim.x + threadIdx.x;
   if (idx >= size) {
     return;
   }
-  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 * xx[idx] * xx[idx] *xx[idx])));
-  const FPTYPE var1 = _tanh((FPTYPE)SQRT_2_PI * (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] *xx[idx]));
-  const FPTYPE var2 = (FPTYPE)SQRT_2_PI * ((FPTYPE)1. - var1 * var1) * ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.);
-  out[idx] = dy[idx] * dy_2[idx] * ((FPTYPE)0.134145 * (FPTYPE)SQRT_2_PI * xx[idx] * xx[idx] * ((FPTYPE)1. - var1 * var1) - (FPTYPE)SQRT_2_PI * xx[idx] * var2 * ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.) * var1 + var2);
+  // out[idx] = xx[idx] * 0.5 * (1.0 + tanh(SQRT_2_PI * (xx[idx] + 0.044715 *
+  // xx[idx] * xx[idx] *xx[idx])));
+  const FPTYPE var1 =
+      _tanh((FPTYPE)SQRT_2_PI *
+            (xx[idx] + (FPTYPE)0.044715 * xx[idx] * xx[idx] * xx[idx]));
+  const FPTYPE var2 = (FPTYPE)SQRT_2_PI * ((FPTYPE)1. - var1 * var1) *
+                      ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.);
+  out[idx] = dy[idx] * dy_2[idx] *
+             ((FPTYPE)0.134145 * (FPTYPE)SQRT_2_PI * xx[idx] * xx[idx] *
+                  ((FPTYPE)1. - var1 * var1) -
+              (FPTYPE)SQRT_2_PI * xx[idx] * var2 *
+                  ((FPTYPE)0.134145 * xx[idx] * xx[idx] + (FPTYPE)1.) * var1 +
+              var2);
 }
 
 namespace deepmd {
-  template<typename FPTYPE>
-  void gelu_gpu_rocm(
-      FPTYPE * out, 
-      const FPTYPE * xx, 
-      const int_64 size)
-  {
-    if(size <= 0)
-    {
-      return ;
-    }
-    const int THREAD_ITEMS = 1024;
-    const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
-  
-    hipLaunchKernelGGL(gelu, BLOCK_NUMS, THREAD_ITEMS, 0, 0, out, xx, size);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void gelu_gpu_rocm(FPTYPE* out, const FPTYPE* xx, const int_64 size) {
+  if (size <= 0) {
+    return;
   }
-  
-  template<typename FPTYPE>
-  void gelu_grad_gpu_rocm(
-      FPTYPE * out, 
-      const FPTYPE * xx,
-      const FPTYPE * dy, 
-      const int_64 size)
-  {
-    if(size <= 0)
-    {
-      return;
-    }
-    const int THREAD_ITEMS = 1024;
-    const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
-  
-    hipLaunchKernelGGL(gelu_grad, BLOCK_NUMS, THREAD_ITEMS, 0, 0, out, xx, dy, size);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  const int THREAD_ITEMS = 1024;
+  const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
+
+  hipLaunchKernelGGL(gelu, BLOCK_NUMS, THREAD_ITEMS, 0, 0, out, xx, size);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+}
+
+template <typename FPTYPE>
+void gelu_grad_gpu_rocm(FPTYPE* out,
+                        const FPTYPE* xx,
+                        const FPTYPE* dy,
+                        const int_64 size) {
+  if (size <= 0) {
+    return;
   }
-  
-  template<typename FPTYPE>
-  void gelu_grad_grad_gpu_rocm(
-      FPTYPE * out,
-      const FPTYPE * xx,
-      const FPTYPE * dy, 
-      const FPTYPE * dy_2,
-      const int_64 size)
-  {
-    if(size <= 0)
-    {
-      return;
-    }
-    const int THREAD_ITEMS = 1024;
-    const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
-    
-    hipLaunchKernelGGL(gelu_grad_grad, BLOCK_NUMS, THREAD_ITEMS, 0, 0, out, xx, dy, dy_2, size);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  const int THREAD_ITEMS = 1024;
+  const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
+
+  hipLaunchKernelGGL(gelu_grad, BLOCK_NUMS, THREAD_ITEMS, 0, 0, out, xx, dy,
+                     size);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+}
+
+template <typename FPTYPE>
+void gelu_grad_grad_gpu_rocm(FPTYPE* out,
+                             const FPTYPE* xx,
+                             const FPTYPE* dy,
+                             const FPTYPE* dy_2,
+                             const int_64 size) {
+  if (size <= 0) {
+    return;
   }
-  
-  template void gelu_gpu_rocm<float>(float * out, const float * x, const int_64 size);
-  template void gelu_gpu_rocm<double>(double * out, const double * x, const int_64 size);
-  template void gelu_grad_gpu_rocm<float>(float * out, const float * x, const float * dy, const int_64 size);
-  template void gelu_grad_gpu_rocm<double>(double * out, const double * x, const double * dy, const int_64 size);
-  template void gelu_grad_grad_gpu_rocm<float>(float * out, const float * x, const float * dy, const float * dy_2, const int_64 size);
-  template void gelu_grad_grad_gpu_rocm<double>(double * out, const double * x, const double * dy, const double * dy_2, const int_64 size);
-}
\ No newline at end of file
+  const int THREAD_ITEMS = 1024;
+  const int BLOCK_NUMS = (size + THREAD_ITEMS - 1) / THREAD_ITEMS;
+
+  hipLaunchKernelGGL(gelu_grad_grad, BLOCK_NUMS, THREAD_ITEMS, 0, 0, out, xx,
+                     dy, dy_2, size);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+}
+
+template void gelu_gpu_rocm<float>(float* out,
+                                   const float* x,
+                                   const int_64 size);
+template void gelu_gpu_rocm<double>(double* out,
+                                    const double* x,
+                                    const int_64 size);
+template void gelu_grad_gpu_rocm<float>(float* out,
+                                        const float* x,
+                                        const float* dy,
+                                        const int_64 size);
+template void gelu_grad_gpu_rocm<double>(double* out,
+                                         const double* x,
+                                         const double* dy,
+                                         const int_64 size);
+template void gelu_grad_grad_gpu_rocm<float>(float* out,
+                                             const float* x,
+                                             const float* dy,
+                                             const float* dy_2,
+                                             const int_64 size);
+template void gelu_grad_grad_gpu_rocm<double>(double* out,
+                                              const double* x,
+                                              const double* dy,
+                                              const double* dy_2,
+                                              const int_64 size);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/neighbor_list.hip.cu b/source/lib/src/rocm/neighbor_list.hip.cu
index 64f2b31497..35889a8575 100644
--- a/source/lib/src/rocm/neighbor_list.hip.cu
+++ b/source/lib/src/rocm/neighbor_list.hip.cu
@@ -1,38 +1,34 @@
 #include "device.h"
-#include "neighbor_list.h"
-
 #include "hipcub/hipcub.hpp"
+#include "neighbor_list.h"
 // A stateful callback functor that maintains a running prefix to be applied
 // during consecutive scan operations.
-struct parallel_prefix_scan_op
-{
+struct parallel_prefix_scan_op {
   // Running prefix
   int running_total;
   // Constructor
-  __device__ parallel_prefix_scan_op(int running_total) : running_total(running_total) {}
+  __device__ parallel_prefix_scan_op(int running_total)
+      : running_total(running_total) {}
   // Callback operator to be entered by the first warp of threads in the block.
-  // Thread-0 is responsible for returning a value for seeding the block-wide scan.
-  __device__ int operator()(int block_aggregate)
-  {
+  // Thread-0 is responsible for returning a value for seeding the block-wide
+  // scan.
+  __device__ int operator()(int block_aggregate) {
     int old_prefix = running_total;
     running_total += block_aggregate;
     return old_prefix;
   }
 };
 
-template <
-  int   THREADS_PER_BLOCK>
-__global__ void parallel_prefix_scan(
-  int * numneigh, 
-  int * nei_order, 
-  const int * temp_nlist, 
-  const int mem_size, 
-  const int nloc,
-  const int nall
-)
-{
-  // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread
-  typedef hipcub::BlockScan<int,  THREADS_PER_BLOCK> BlockScan;
+template <int THREADS_PER_BLOCK>
+__global__ void parallel_prefix_scan(int *numneigh,
+                                     int *nei_order,
+                                     const int *temp_nlist,
+                                     const int mem_size,
+                                     const int nloc,
+                                     const int nall) {
+  // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128
+  // threads, 4 ints per thread
+  typedef hipcub::BlockScan<int, THREADS_PER_BLOCK> BlockScan;
   // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan
   __shared__ typename BlockScan::TempStorage temp_storage;
 
@@ -40,265 +36,244 @@ __global__ void parallel_prefix_scan(
   parallel_prefix_scan_op prefix_op(0);
 
   // Have the block iterate over segments of items
-  for (int ii = threadIdx.x; ii < nall; ii += THREADS_PER_BLOCK)
-  {
-    int block_offset = blockIdx.x * mem_size; 
+  for (int ii = threadIdx.x; ii < nall; ii += THREADS_PER_BLOCK) {
+    int block_offset = blockIdx.x * mem_size;
     // Load a segment of consecutive items that are blocked across threads
     int i_data = temp_nlist[block_offset + ii];
     int o_data = i_data == -1 ? 0 : 1;
 
     // Collectively compute the block-wide exclusive prefix sum
-    BlockScan(temp_storage).ExclusiveSum(
-        o_data, o_data, prefix_op);
+    BlockScan(temp_storage).ExclusiveSum(o_data, o_data, prefix_op);
 
     __syncthreads();
     // Store scanned items to output segment
     if (i_data != -1) {
-        nei_order[block_offset + ii] = o_data; 
+      nei_order[block_offset + ii] = o_data;
     }
     // Store numneigh into the output array
     if (ii == nall - 1) {
-        o_data += i_data == -1 ? 0 : 1;
-        numneigh[blockIdx.x] = o_data; 
+      o_data += i_data == -1 ? 0 : 1;
+      numneigh[blockIdx.x] = o_data;
     }
   }
 }
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot(
-    FPTYPE * arr1, 
-    FPTYPE * arr2) 
-{
-    return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot(FPTYPE *arr1, FPTYPE *arr2) {
+  return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
 }
 
-template<typename FPTYPE>
-__global__ void build_nlist(
-    int * ilist, 
-    int * temp_nlist,
-    const FPTYPE * c_cpy, 
-    const FPTYPE rcut2,
-    const int nloc,
-    const int nall,
-    const int mem_size)
-{
-    const unsigned int atom_idx = blockIdx.x;
-    const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
-    if(neighbor_idx<nall)
-    {
-        int * neighbor_row = temp_nlist + atom_idx * mem_size;
-        if(neighbor_idx==atom_idx)
-        {
-            ilist[atom_idx]=atom_idx;
-        }
-        else
-        {
-            const FPTYPE * ccoord=c_cpy+atom_idx*3;
-            const FPTYPE * ncoord=c_cpy+neighbor_idx*3;
-            FPTYPE diff[3];
-            for(int kk=0;kk<3;kk++){
-                diff[kk] = ccoord[kk] - ncoord[kk];
-            }
-            FPTYPE r2 = dev_dot(diff, diff);
-            if(r2<rcut2){
-                neighbor_row[neighbor_idx]=neighbor_idx;
-            }
-        }
+template <typename FPTYPE>
+__global__ void build_nlist(int *ilist,
+                            int *temp_nlist,
+                            const FPTYPE *c_cpy,
+                            const FPTYPE rcut2,
+                            const int nloc,
+                            const int nall,
+                            const int mem_size) {
+  const unsigned int atom_idx = blockIdx.x;
+  const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (neighbor_idx < nall) {
+    int *neighbor_row = temp_nlist + atom_idx * mem_size;
+    if (neighbor_idx == atom_idx) {
+      ilist[atom_idx] = atom_idx;
+    } else {
+      const FPTYPE *ccoord = c_cpy + atom_idx * 3;
+      const FPTYPE *ncoord = c_cpy + neighbor_idx * 3;
+      FPTYPE diff[3];
+      for (int kk = 0; kk < 3; kk++) {
+        diff[kk] = ccoord[kk] - ncoord[kk];
+      }
+      FPTYPE r2 = dev_dot(diff, diff);
+      if (r2 < rcut2) {
+        neighbor_row[neighbor_idx] = neighbor_idx;
+      }
     }
+  }
 }
 
-__global__ void fill_nlist(
-    int ** firstneigh,
-    const int * temp_nlist,
-    const int * nei_order,
-    const int mem_size,
-    const int nall)
-{
-    const unsigned int atom_idx = blockIdx.x;
-    const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
-    if(neighbor_idx<nall)
-    {
-        const int * in_row = temp_nlist + atom_idx * mem_size;
-        int * out_row = firstneigh[atom_idx];
-        int nei = in_row[neighbor_idx];
-        if(nei!=-1){
-            out_row[nei_order[atom_idx * mem_size + neighbor_idx]]=nei;
-        }
+__global__ void fill_nlist(int **firstneigh,
+                           const int *temp_nlist,
+                           const int *nei_order,
+                           const int mem_size,
+                           const int nall) {
+  const unsigned int atom_idx = blockIdx.x;
+  const unsigned int neighbor_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (neighbor_idx < nall) {
+    const int *in_row = temp_nlist + atom_idx * mem_size;
+    int *out_row = firstneigh[atom_idx];
+    int nei = in_row[neighbor_idx];
+    if (nei != -1) {
+      out_row[nei_order[atom_idx * mem_size + neighbor_idx]] = nei;
     }
+  }
 }
 
-__global__ void map_nlist(
-    int *nlist,
-    const int *nlist_map,
-    const int nloc,
-    const int nnei
-)
-{
-    int atom_idx=blockIdx.x;
-    int nei_idx=blockIdx.y*blockDim.y+threadIdx.y;
-    if(nei_idx>=nnei){return;}
-    int nlist_idx=atom_idx*nnei+nei_idx;
-    int nlist_item=nlist[nlist_idx];
-    if(nlist_item!=-1){
-        nlist[nlist_idx]=nlist_map[nlist_item];
-    }
+__global__ void map_nlist(int *nlist,
+                          const int *nlist_map,
+                          const int nloc,
+                          const int nnei) {
+  int atom_idx = blockIdx.x;
+  int nei_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (nei_idx >= nnei) {
+    return;
+  }
+  int nlist_idx = atom_idx * nnei + nei_idx;
+  int nlist_item = nlist[nlist_idx];
+  if (nlist_item != -1) {
+    nlist[nlist_idx] = nlist_map[nlist_item];
+  }
 }
 
-__global__ void map_nei_info(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map,
-    const int nloc,
-    const int nnei,
-    const int ntypes
-)
-{
-    int atom_idx=blockIdx.x;
-    int nei_idx=blockIdx.y*blockDim.y+threadIdx.y;
-    if(nei_idx>=nnei){return;}
-    int nlist_idx=atom_idx*nnei+nei_idx;
-    int nlist_item=nlist[nlist_idx];
-    int temp=0;
-    if(nlist_item!=-1){
-        temp=nlist_map[nlist_item];
-        nlist[nlist_idx]=temp;
-        ntype[nlist_idx]=type[temp];
-        nmask[nlist_idx]=true;
-    }
-    else{
-        ntype[nlist_idx]=ntypes;
-    }
+__global__ void map_nei_info(int *nlist,
+                             int *ntype,
+                             bool *nmask,
+                             const int *type,
+                             const int *nlist_map,
+                             const int nloc,
+                             const int nnei,
+                             const int ntypes) {
+  int atom_idx = blockIdx.x;
+  int nei_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (nei_idx >= nnei) {
+    return;
+  }
+  int nlist_idx = atom_idx * nnei + nei_idx;
+  int nlist_item = nlist[nlist_idx];
+  int temp = 0;
+  if (nlist_item != -1) {
+    temp = nlist_map[nlist_item];
+    nlist[nlist_idx] = temp;
+    ntype[nlist_idx] = type[temp];
+    nmask[nlist_idx] = true;
+  } else {
+    ntype[nlist_idx] = ntypes;
+  }
 }
 
-__global__ void map_nei_info_noconvert(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int nloc,
-    const int nnei,
-    const int ntypes
-)
-{
-    int atom_idx=blockIdx.x;
-    int nei_idx=blockIdx.y*blockDim.y+threadIdx.y;
-    if(nei_idx>=nnei){return;}
-    int nlist_idx=atom_idx*nnei+nei_idx;
-    int nlist_item=nlist[nlist_idx];
-    if(nlist_item!=-1){
-        ntype[nlist_idx]=type[nlist_item];
-        nmask[nlist_idx]=true;
-    }
-    else{
-        ntype[nlist_idx]=ntypes;
-    }
+__global__ void map_nei_info_noconvert(int *nlist,
+                                       int *ntype,
+                                       bool *nmask,
+                                       const int *type,
+                                       const int nloc,
+                                       const int nnei,
+                                       const int ntypes) {
+  int atom_idx = blockIdx.x;
+  int nei_idx = blockIdx.y * blockDim.y + threadIdx.y;
+  if (nei_idx >= nnei) {
+    return;
+  }
+  int nlist_idx = atom_idx * nnei + nei_idx;
+  int nlist_item = nlist[nlist_idx];
+  if (nlist_item != -1) {
+    ntype[nlist_idx] = type[nlist_item];
+    nmask[nlist_idx] = true;
+  } else {
+    ntype[nlist_idx] = ntypes;
+  }
 }
 
 namespace deepmd {
 template <typename FPTYPE>
-int build_nlist_gpu_rocm(
-    InputNlist & nlist,
-    int * max_list_size,
-    int * nlist_data,
-    const FPTYPE * c_cpy, 
-    const int & nloc, 
-    const int & nall, 
-    const int & mem_size,
-    const float & rcut)
-{
-    if(mem_size < nall){
-        return 1;
-    }
-    const int nblock = (nall+TPB-1)/TPB;
-    int * ilist = nlist.ilist;
-    int * numneigh = nlist.numneigh;
-    int ** firstneigh = nlist.firstneigh;
-    DPErrcheck(hipMemset(nlist_data, -1, sizeof(int) * 2 * nloc * mem_size));
-    int * temp_nlist = nlist_data; //nloc*mem_size
-    int * nei_order = temp_nlist + nloc * mem_size;
-    nlist.inum = nloc;
-    FPTYPE rcut2 = rcut * rcut;
-    
-    
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, TPB);
-    hipLaunchKernelGGL(build_nlist, block_grid, thread_grid, 0, 0, 
-                ilist, 
-                temp_nlist,
-                c_cpy, 
-                rcut2,
-                nloc,
-                nall,
-                mem_size);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-    hipLaunchKernelGGL(
-        HIP_KERNEL_NAME(parallel_prefix_scan<TPB>), nloc, TPB, 0, 0, 
-        numneigh, nei_order, 
-        temp_nlist, mem_size, nloc, nall);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-    hipLaunchKernelGGL(fill_nlist, block_grid, thread_grid, 0, 0, 
-                firstneigh,
-                temp_nlist,
-                nei_order,
-                mem_size,
-                nall);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-    int * numneigh_host = new int[nloc];
-    DPErrcheck(hipMemcpy(numneigh_host, numneigh, sizeof(int) * nloc, hipMemcpyDeviceToHost));
-    int max_nei = 0;
-    for(int ii=0;ii<nloc;ii++){
-        if(numneigh_host[ii]>max_nei)max_nei=numneigh_host[ii];
-    }
-    *max_list_size = max_nei;
-    delete [] numneigh_host;
-    return 0;
-}
+int build_nlist_gpu_rocm(InputNlist &nlist,
+                         int *max_list_size,
+                         int *nlist_data,
+                         const FPTYPE *c_cpy,
+                         const int &nloc,
+                         const int &nall,
+                         const int &mem_size,
+                         const float &rcut) {
+  if (mem_size < nall) {
+    return 1;
+  }
+  const int nblock = (nall + TPB - 1) / TPB;
+  int *ilist = nlist.ilist;
+  int *numneigh = nlist.numneigh;
+  int **firstneigh = nlist.firstneigh;
+  DPErrcheck(hipMemset(nlist_data, -1, sizeof(int) * 2 * nloc * mem_size));
+  int *temp_nlist = nlist_data;  // nloc*mem_size
+  int *nei_order = temp_nlist + nloc * mem_size;
+  nlist.inum = nloc;
+  FPTYPE rcut2 = rcut * rcut;
 
-void use_nlist_map(
-    int * nlist, 
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei)
-{
-    int nblock=(nnei+TPB-1)/TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, TPB);
-    hipLaunchKernelGGL(map_nlist, block_grid, thread_grid, 0, 0, nlist, nlist_map, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(1, TPB);
+  hipLaunchKernelGGL(build_nlist, block_grid, thread_grid, 0, 0, ilist,
+                     temp_nlist, c_cpy, rcut2, nloc, nall, mem_size);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(parallel_prefix_scan<TPB>), nloc, TPB, 0,
+                     0, numneigh, nei_order, temp_nlist, mem_size, nloc, nall);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+  hipLaunchKernelGGL(fill_nlist, block_grid, thread_grid, 0, 0, firstneigh,
+                     temp_nlist, nei_order, mem_size, nall);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+  int *numneigh_host = new int[nloc];
+  DPErrcheck(hipMemcpy(numneigh_host, numneigh, sizeof(int) * nloc,
+                       hipMemcpyDeviceToHost));
+  int max_nei = 0;
+  for (int ii = 0; ii < nloc; ii++) {
+    if (numneigh_host[ii] > max_nei) max_nei = numneigh_host[ii];
+  }
+  *max_list_size = max_nei;
+  delete[] numneigh_host;
+  return 0;
 }
 
-void use_nei_info_gpu_rocm(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * nlist_map, 
-    const int nloc, 
-    const int nnei,
-    const int ntypes,
-    const bool b_nlist_map)
-{
-    int nblock=(nnei+TPB-1)/TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(1, TPB);
-    DPErrcheck(hipMemset(ntype, 0, sizeof(int) * nloc * nnei));
-    DPErrcheck(hipMemset(nmask, 0, sizeof(bool) * nloc * nnei));
-    if (b_nlist_map){
-        hipLaunchKernelGGL(map_nei_info, block_grid, thread_grid, 0, 0, nlist, ntype, nmask, type, nlist_map, nloc, nnei, ntypes);
-    }
-    else{
-        hipLaunchKernelGGL(map_nei_info_noconvert, block_grid, thread_grid, 0, 0, nlist, ntype, nmask, type, nloc, nnei, ntypes);
-    }
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+void use_nlist_map(int *nlist,
+                   const int *nlist_map,
+                   const int nloc,
+                   const int nnei) {
+  int nblock = (nnei + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(1, TPB);
+  hipLaunchKernelGGL(map_nlist, block_grid, thread_grid, 0, 0, nlist, nlist_map,
+                     nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template int build_nlist_gpu_rocm<float>(InputNlist & nlist, int * max_list_size, int * nlist_data, const float * c_cpy, const int & nloc, const int & nall, const int & mem_size, const float & rcut);
-template int build_nlist_gpu_rocm<double>(InputNlist & nlist, int * max_list_size, int * nlist_data, const double * c_cpy, const int & nloc, const int & nall, const int & mem_size, const float & rcut);
+void use_nei_info_gpu_rocm(int *nlist,
+                           int *ntype,
+                           bool *nmask,
+                           const int *type,
+                           const int *nlist_map,
+                           const int nloc,
+                           const int nnei,
+                           const int ntypes,
+                           const bool b_nlist_map) {
+  int nblock = (nnei + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(1, TPB);
+  DPErrcheck(hipMemset(ntype, 0, sizeof(int) * nloc * nnei));
+  DPErrcheck(hipMemset(nmask, 0, sizeof(bool) * nloc * nnei));
+  if (b_nlist_map) {
+    hipLaunchKernelGGL(map_nei_info, block_grid, thread_grid, 0, 0, nlist,
+                       ntype, nmask, type, nlist_map, nloc, nnei, ntypes);
+  } else {
+    hipLaunchKernelGGL(map_nei_info_noconvert, block_grid, thread_grid, 0, 0,
+                       nlist, ntype, nmask, type, nloc, nnei, ntypes);
+  }
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
+
+template int build_nlist_gpu_rocm<float>(InputNlist &nlist,
+                                         int *max_list_size,
+                                         int *nlist_data,
+                                         const float *c_cpy,
+                                         const int &nloc,
+                                         const int &nall,
+                                         const int &mem_size,
+                                         const float &rcut);
+template int build_nlist_gpu_rocm<double>(InputNlist &nlist,
+                                          int *max_list_size,
+                                          int *nlist_data,
+                                          const double *c_cpy,
+                                          const int &nloc,
+                                          const int &nall,
+                                          const int &mem_size,
+                                          const float &rcut);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/prod_env_mat.hip.cu b/source/lib/src/rocm/prod_env_mat.hip.cu
index ce6227a7f8..b70247c86f 100644
--- a/source/lib/src/rocm/prod_env_mat.hip.cu
+++ b/source/lib/src/rocm/prod_env_mat.hip.cu
@@ -1,33 +1,32 @@
-#include "fmt_nlist.h"
-#include "prod_env_mat.h"
 #include "device.h"
+#include "fmt_nlist.h"
 #include "hipcub/hipcub.hpp"
+#include "prod_env_mat.h"
 
-__device__ inline double _sqrt(double x) {return sqrt(x);}
-__device__ inline float _sqrt(float x) {return sqrtf(x);}
-__device__ inline double _rsqrt(double x) {return rsqrt(x);}
-__device__ inline float _rsqrt(float x) {return rsqrtf(x);}
+__device__ inline double _sqrt(double x) { return sqrt(x); }
+__device__ inline float _sqrt(float x) { return sqrtf(x); }
+__device__ inline double _rsqrt(double x) { return rsqrt(x); }
+__device__ inline float _rsqrt(float x) { return rsqrtf(x); }
 
 // common part of prod_env_mat
-template <
-    typename    Key,
-    int         BLOCK_THREADS,
-    int         ITEMS_PER_THREAD>
-__launch_bounds__ (BLOCK_THREADS)
-__global__ void BlockSortKernel(
-    Key * d_in,
-    Key * d_out)                // Tile of output
-{   
+template <typename Key, int BLOCK_THREADS, int ITEMS_PER_THREAD>
+__launch_bounds__(BLOCK_THREADS) __global__
+    void BlockSortKernel(Key* d_in,
+                         Key* d_out)  // Tile of output
+{
   enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD };
-  // Specialize BlockLoad type for our thread block (uses warp-striped loads for coalescing, then transposes in shared memory to a blocked arrangement)
-  typedef hipcub::BlockLoad<Key, BLOCK_THREADS, ITEMS_PER_THREAD, hipcub::BLOCK_LOAD_WARP_TRANSPOSE> BlockLoadT;
+  // Specialize BlockLoad type for our thread block (uses warp-striped loads for
+  // coalescing, then transposes in shared memory to a blocked arrangement)
+  typedef hipcub::BlockLoad<Key, BLOCK_THREADS, ITEMS_PER_THREAD,
+                            hipcub::BLOCK_LOAD_WARP_TRANSPOSE>
+      BlockLoadT;
   // Specialize BlockRadixSort type for our thread block
-  typedef hipcub::BlockRadixSort<Key, BLOCK_THREADS, ITEMS_PER_THREAD> BlockRadixSortT;
+  typedef hipcub::BlockRadixSort<Key, BLOCK_THREADS, ITEMS_PER_THREAD>
+      BlockRadixSortT;
   // Shared memory
-  __shared__ union TempStorage
-  {
-    typename BlockLoadT::TempStorage        load;
-    typename BlockRadixSortT::TempStorage   sort;
+  __shared__ union TempStorage {
+    typename BlockLoadT::TempStorage load;
+    typename BlockRadixSortT::TempStorage sort;
   } temp_storage;
   // Per-thread tile items
   Key items[ITEMS_PER_THREAD];
@@ -40,349 +39,328 @@ __global__ void BlockSortKernel(
   // Sort keys
   BlockRadixSortT(temp_storage.sort).SortBlockedToStriped(items);
   // Store output in striped fashion
-  hipcub::StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_out + block_offset, items);
+  hipcub::StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_out + block_offset,
+                                            items);
 }
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot(
-    FPTYPE * arr1, 
-    FPTYPE * arr2) 
-{
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot(FPTYPE* arr1, FPTYPE* arr2) {
   return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
 }
 
-template<typename FPTYPE>
+template <typename FPTYPE>
 __device__ inline void spline5_switch(
-    FPTYPE & vv,
-    FPTYPE & dd,
-    FPTYPE & xx, 
-    const float & rmin, 
-    const float & rmax) 
-{
+    FPTYPE& vv, FPTYPE& dd, FPTYPE& xx, const float& rmin, const float& rmax) {
   if (xx < rmin) {
     dd = (FPTYPE)0.;
     vv = (FPTYPE)1.;
-  }
-  else if (xx < rmax) {
-    FPTYPE uu = (xx - rmin) / (rmax - rmin) ;
-    FPTYPE du = (FPTYPE)1. / (rmax - rmin) ;
-    vv = uu*uu*uu * ((FPTYPE)-6. * uu*uu + (FPTYPE)15. * uu - (FPTYPE)10.) + (FPTYPE)1.;
-    dd = ( (FPTYPE)3. * uu*uu * ((FPTYPE)-6. * uu*uu + (FPTYPE)15. * uu - (FPTYPE)10.) + uu*uu*uu * ((FPTYPE)-12. * uu + (FPTYPE)15.) ) * du;
-  }
-  else {
+  } else if (xx < rmax) {
+    FPTYPE uu = (xx - rmin) / (rmax - rmin);
+    FPTYPE du = (FPTYPE)1. / (rmax - rmin);
+    vv = uu * uu * uu *
+             ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) +
+         (FPTYPE)1.;
+    dd = ((FPTYPE)3. * uu * uu *
+              ((FPTYPE)-6. * uu * uu + (FPTYPE)15. * uu - (FPTYPE)10.) +
+          uu * uu * uu * ((FPTYPE)-12. * uu + (FPTYPE)15.)) *
+         du;
+  } else {
     dd = (FPTYPE)0.;
     vv = (FPTYPE)0.;
   }
 }
 
-template<typename FPTYPE>
-__device__ inline uint_64 encoding_nbor_info(
-    const int type,
-    const FPTYPE dist,
-    const int index)
-{
+template <typename FPTYPE>
+__device__ inline uint_64 encoding_nbor_info(const int type,
+                                             const FPTYPE dist,
+                                             const int index) {
   // nbor info checking:
   // the type of nbor atom must be smaller than 128
   // the distance of center atom between nbor atom must be smaller than 128
-  // the index of nbor atom(including ghost region) must be smaller than 16777216(1 << 24)
-  if(type >= 128 || dist >= (FPTYPE)128.0 || index >= (1 << 24)) {
+  // the index of nbor atom(including ghost region) must be smaller than
+  // 16777216(1 << 24)
+  if (type >= 128 || dist >= (FPTYPE)128.0 || index >= (1 << 24)) {
     __builtin_trap();
   }
-  return ((uint_64)type << 57) + (uint_64)((double)dist * ((uint_64)1 << 50)) / (1 << 24) * (1 << 24) + index;
+  return ((uint_64)type << 57) +
+         (uint_64)((double)dist * ((uint_64)1 << 50)) / (1 << 24) * (1 << 24) +
+         index;
 }
 
-__device__ inline void decoding_nbor_info(
-    int &type,
-    int &index,
-    const uint_64 key)
-{
+__device__ inline void decoding_nbor_info(int& type,
+                                          int& index,
+                                          const uint_64 key) {
   type = key >> 57;
   index = key & 0xFFFFFF;
 }
 
-template<typename FPTYPE>
-__global__ void get_i_idx(
-    FPTYPE * i_idx,
-    const int nloc,
-    const FPTYPE * ilist)
-{
+template <typename FPTYPE>
+__global__ void get_i_idx(FPTYPE* i_idx, const int nloc, const FPTYPE* ilist) {
   const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if(idx >= nloc) {
+  if (idx >= nloc) {
     return;
   }
   i_idx[ilist[idx]] = idx;
 }
 
-template<typename FPTYPE>
-__global__ void format_nlist_fill_a(
-    uint_64 * key,
-    const FPTYPE * coord,
-    const int * type,
-    const int * numneigh,
-    int ** firstneigh,
-    const float rcut,
-    int * i_idx,
-    const int MAX_NBOR_SIZE)
-{   
+template <typename FPTYPE>
+__global__ void format_nlist_fill_a(uint_64* key,
+                                    const FPTYPE* coord,
+                                    const int* type,
+                                    const int* numneigh,
+                                    int** firstneigh,
+                                    const float rcut,
+                                    int* i_idx,
+                                    const int MAX_NBOR_SIZE) {
   // <<<nloc, MAX_NBOR_SIZE>>>
   const int_64 idx = blockIdx.x;
   const unsigned int idy = blockIdx.y * blockDim.y + threadIdx.y;
-  
+
   const int nsize = numneigh[i_idx[idx]];
   if (idy >= nsize) {
     return;
   }
 
-  const int * nei_idx = firstneigh[i_idx[idx]];
+  const int* nei_idx = firstneigh[i_idx[idx]];
   // dev_copy(nei_idx, &jlist[jrange[i_idx]], nsize);
-  uint_64 * key_in = key + idx * MAX_NBOR_SIZE;
+  uint_64* key_in = key + idx * MAX_NBOR_SIZE;
   FPTYPE diff[3];
-  const int & j_idx = nei_idx[idy];
+  const int& j_idx = nei_idx[idy];
   for (int dd = 0; dd < 3; dd++) {
     diff[dd] = coord[j_idx * 3 + dd] - coord[idx * 3 + dd];
   }
-  FPTYPE rr = _sqrt(dev_dot(diff, diff)); 
+  FPTYPE rr = _sqrt(dev_dot(diff, diff));
   if (rr <= rcut) {
     key_in[idy] = encoding_nbor_info(type[j_idx], rr, j_idx);
   }
 }
 
-template<typename FPTYPE>
-__global__ void fill_nei_iter(
-    int * nei_iter_dev,
-    const FPTYPE * key,
-    const int nloc,
-    const int max_nbor_size,
-    const int sec_size)
-{
+template <typename FPTYPE>
+__global__ void fill_nei_iter(int* nei_iter_dev,
+                              const FPTYPE* key,
+                              const int nloc,
+                              const int max_nbor_size,
+                              const int sec_size) {
   int_64 row = blockIdx.x;
   int col = blockIdx.y * blockDim.x + threadIdx.x;
-  const FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size;
+  const FPTYPE* key_out = key + nloc * max_nbor_size + row * max_nbor_size;
   int nei_type_cur = -1, nbor_idx_cur = 0;
   int nei_type_pre = -1, nbor_idx_pre = 0;
-  if (col < max_nbor_size && key_out[col] != key_out[max_nbor_size - 1]){
-    if (col >= 1) 
+  if (col < max_nbor_size && key_out[col] != key_out[max_nbor_size - 1]) {
+    if (col >= 1)
       decoding_nbor_info(nei_type_pre, nbor_idx_pre, key_out[col - 1]);
     decoding_nbor_info(nei_type_cur, nbor_idx_cur, key_out[col]);
   }
-  if (nei_type_cur != nei_type_pre){
+  if (nei_type_cur != nei_type_pre) {
     nei_iter_dev[row * sec_size + nei_type_cur] = col;
   }
 }
 
-template<typename FPTYPE>
-__global__ void format_nlist_fill_b(
-    int * nlist,
-    const int nlist_size,
-    const int nloc,
-    FPTYPE * key,
-    const int * sec,
-    const int sec_size,
-    int * nei_iter_dev,
-    const int max_nbor_size)
-{ 
+template <typename FPTYPE>
+__global__ void format_nlist_fill_b(int* nlist,
+                                    const int nlist_size,
+                                    const int nloc,
+                                    FPTYPE* key,
+                                    const int* sec,
+                                    const int sec_size,
+                                    int* nei_iter_dev,
+                                    const int max_nbor_size) {
   int_64 row = blockIdx.x;
   int col = blockIdx.y * blockDim.x + threadIdx.x;
-  int * nei_iter = nei_iter_dev + row * sec_size;
-  FPTYPE * key_out = key + nloc * max_nbor_size + row * max_nbor_size;
-  int * row_nlist = nlist + row * nlist_size;
-  if (col < max_nbor_size){
-    if (key_out[col] != key_out[max_nbor_size - 1]){
+  int* nei_iter = nei_iter_dev + row * sec_size;
+  FPTYPE* key_out = key + nloc * max_nbor_size + row * max_nbor_size;
+  int* row_nlist = nlist + row * nlist_size;
+  if (col < max_nbor_size) {
+    if (key_out[col] != key_out[max_nbor_size - 1]) {
       int nei_type = 0, nbor_idx = 0;
       decoding_nbor_info(nei_type, nbor_idx, key_out[col]);
       int out_indx = col - nei_iter[nei_type] + sec[nei_type];
-      if (out_indx < sec[nei_type + 1]){
+      if (out_indx < sec[nei_type + 1]) {
         row_nlist[out_indx] = nbor_idx;
       }
     }
   }
 }
 
-template<typename FPTYPE>
-__global__ void encoding_decoding_nbor_info(
-    uint_64 * key,
-    int * out_type,
-    int * out_index,
-    const int * in_type,
-    const FPTYPE * in_dist,
-    const int * in_index,
-    const int size_of_array)
-{ 
+template <typename FPTYPE>
+__global__ void encoding_decoding_nbor_info(uint_64* key,
+                                            int* out_type,
+                                            int* out_index,
+                                            const int* in_type,
+                                            const FPTYPE* in_dist,
+                                            const int* in_index,
+                                            const int size_of_array) {
   const unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
-  if(idx >= size_of_array) {
+  if (idx >= size_of_array) {
     return;
   }
-  
+
   key[idx] = encoding_nbor_info(in_type[idx], in_dist[idx], in_index[idx]);
   decoding_nbor_info(out_type[idx], out_index[idx], key[idx]);
 }
 
-template<typename FPTYPE>
-void format_nbor_list_256 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_256(uint_64* key,
+                          const FPTYPE* coord,
+                          const int* type,
+                          const deepmd::InputNlist& gpu_inlist,
+                          const int& nloc,
+                          const float& rcut,
+                          int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 256;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, 
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, key,
+                     coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh,
+                     rcut, i_idx, MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
   const int ITEMS_PER_THREAD = 4;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0, 
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>), nloc, BLOCK_THREADS, 0, 0, 
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo,
+  // BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0,
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>),
+      nloc, BLOCK_THREADS, 0, 0, key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_512 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_512(uint_64* key,
+                          const FPTYPE* coord,
+                          const int* type,
+                          const deepmd::InputNlist& gpu_inlist,
+                          const int& nloc,
+                          const float& rcut,
+                          int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 512;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, 
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, key,
+                     coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh,
+                     rcut, i_idx, MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
   const int ITEMS_PER_THREAD = 4;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0, 
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>), nloc, BLOCK_THREADS, 0, 0, 
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo,
+  // BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0,
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>),
+      nloc, BLOCK_THREADS, 0, 0, key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_1024 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_1024(uint_64* key,
+                           const FPTYPE* coord,
+                           const int* type,
+                           const deepmd::InputNlist& gpu_inlist,
+                           const int& nloc,
+                           const float& rcut,
+                           int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 1024;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, 
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, key,
+                     coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh,
+                     rcut, i_idx, MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
   const int ITEMS_PER_THREAD = 8;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0, 
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>), nloc, BLOCK_THREADS, 0, 0, 
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo,
+  // BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0,
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>),
+      nloc, BLOCK_THREADS, 0, 0, key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_2048 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx) 
-{   
+template <typename FPTYPE>
+void format_nbor_list_2048(uint_64* key,
+                           const FPTYPE* coord,
+                           const int* type,
+                           const deepmd::InputNlist& gpu_inlist,
+                           const int& nloc,
+                           const float& rcut,
+                           int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 2048;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, 
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, key,
+                     coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh,
+                     rcut, i_idx, MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
   const int ITEMS_PER_THREAD = 8;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0, 
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>), nloc, BLOCK_THREADS, 0, 0, 
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo,
+  // BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0,
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>),
+      nloc, BLOCK_THREADS, 0, 0, key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void format_nbor_list_4096 (
-    uint_64 * key,
-    const FPTYPE* coord,
-    const int* type,
-    const deepmd::InputNlist & gpu_inlist,
-    const int& nloc,       
-    const float& rcut, 
-    int * i_idx)
-{   
+template <typename FPTYPE>
+void format_nbor_list_4096(uint_64* key,
+                           const FPTYPE* coord,
+                           const int* type,
+                           const deepmd::InputNlist& gpu_inlist,
+                           const int& nloc,
+                           const float& rcut,
+                           int* i_idx) {
   const int LEN = 256;
   const int MAX_NBOR_SIZE = 4096;
   const int nblock = (MAX_NBOR_SIZE + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(1, LEN);
-  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, 
-      key,
-      coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh, rcut, i_idx, MAX_NBOR_SIZE);
+  hipLaunchKernelGGL(format_nlist_fill_a, block_grid, thread_grid, 0, 0, key,
+                     coord, type, gpu_inlist.numneigh, gpu_inlist.firstneigh,
+                     rcut, i_idx, MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
   const int ITEMS_PER_THREAD = 16;
   const int BLOCK_THREADS = MAX_NBOR_SIZE / ITEMS_PER_THREAD;
-  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo, BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0, 
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>), nloc, BLOCK_THREADS, 0, 0, 
-      key, 
-      key + nloc * MAX_NBOR_SIZE);
+  // hipLaunchKernelGGL(HIP_KERNEL_NAME(BlockSortKernel<NeighborInfo,
+  // BLOCK_THREADS, ITEMS_PER_THREAD>), g_grid_size, BLOCK_THREADS, 0, 0,
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          BlockSortKernel<uint_64, BLOCK_THREADS, ITEMS_PER_THREAD>),
+      nloc, BLOCK_THREADS, 0, 0, key, key + nloc * MAX_NBOR_SIZE);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void compute_env_mat_a(
-    FPTYPE* em,
-    FPTYPE* em_deriv,
-    FPTYPE* rij,
-    const FPTYPE* coord,
-    const FPTYPE* avg,
-    const FPTYPE* std,
-    const int* type,
-    const int* nlist,
-    const int nnei,
-    const float rmin,
-    const float rmax)
-{   
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void compute_env_mat_a(FPTYPE* em,
+                                  FPTYPE* em_deriv,
+                                  FPTYPE* rij,
+                                  const FPTYPE* coord,
+                                  const FPTYPE* avg,
+                                  const FPTYPE* std,
+                                  const int* type,
+                                  const int* nlist,
+                                  const int nnei,
+                                  const float rmin,
+                                  const float rmax) {
   // <<<nloc, TPB>>>
   const int_64 bid = blockIdx.x;
   const unsigned int tid = threadIdx.x;
@@ -390,16 +368,16 @@ __global__ void compute_env_mat_a(
     return;
   }
   const int ndescrpt = nnei * 4;
-  const int * row_nlist = nlist + bid * nnei;
-  FPTYPE * row_rij = rij + bid * nnei * 3;
-  FPTYPE * row_descript = em + bid * nnei * 4;
-  FPTYPE * row_descript_deriv = em_deriv + bid * nnei * 12;
+  const int* row_nlist = nlist + bid * nnei;
+  FPTYPE* row_rij = rij + bid * nnei * 3;
+  FPTYPE* row_descript = em + bid * nnei * 4;
+  FPTYPE* row_descript_deriv = em_deriv + bid * nnei * 12;
   for (int ii = tid; ii < nnei; ii += THREADS_PER_BLOCK) {
-    const int idx_value = ii * 4;	  // 4 components
-    const int idx_deriv = ii * 12;	// 4 components time 3 directions
+    const int idx_value = ii * 4;   // 4 components
+    const int idx_deriv = ii * 12;  // 4 components time 3 directions
     if (row_nlist[ii] >= 0) {
-      FPTYPE rr[3]  = {0};
-      FPTYPE dd[4]  = {0};
+      FPTYPE rr[3] = {0};
+      FPTYPE dd[4] = {0};
       FPTYPE vv[12] = {0};
       const int j_idx = row_nlist[ii];
       for (int kk = 0; kk < 3; kk++) {
@@ -415,60 +393,105 @@ __global__ void compute_env_mat_a(
       FPTYPE inr3 = inr4 * nr;
       FPTYPE sw, dsw;
       spline5_switch(sw, dsw, nr, rmin, rmax);
-      dd[0] = ((FPTYPE)1./nr)       ;//* sw;
-      dd[1] = (rr[0] / nr2) ;//* sw;
-      dd[2] = (rr[1] / nr2) ;//* sw;
-      dd[3] = (rr[2] / nr2) ;//* sw;
-      vv[0] = (rr[0] * inr3 * sw - dd[0] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
-      vv[1] = (rr[1] * inr3 * sw - dd[0] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
-      vv[2] = (rr[2] * inr3 * sw - dd[0] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
+      dd[0] = ((FPTYPE)1. / nr);  //* sw;
+      dd[1] = (rr[0] / nr2);      //* sw;
+      dd[2] = (rr[1] / nr2);      //* sw;
+      dd[3] = (rr[2] / nr2);      //* sw;
+      vv[0] = (rr[0] * inr3 * sw -
+               dd[0] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
+      vv[1] = (rr[1] * inr3 * sw -
+               dd[0] * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
+      vv[2] = (rr[2] * inr3 * sw -
+               dd[0] * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
       // ****deriv of component x/r2
-      vv[3] = (((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw - dd[1] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 3) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 3) % (ndescrpt * 3)) / 3];
-      vv[4] = (((FPTYPE)2. * rr[0] * rr[1] * inr4	) * sw - dd[1] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 4) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 4) % (ndescrpt * 3)) / 3];
-      vv[5] = (((FPTYPE)2. * rr[0] * rr[2] * inr4	) * sw - dd[1] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 5) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 5) % (ndescrpt * 3)) / 3];
+      vv[3] = (((FPTYPE)2. * rr[0] * rr[0] * inr4 - inr2) * sw -
+               dd[1] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 3) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 3) % (ndescrpt * 3)) / 3];
+      vv[4] = (((FPTYPE)2. * rr[0] * rr[1] * inr4) * sw -
+               dd[1] * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 4) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 4) % (ndescrpt * 3)) / 3];
+      vv[5] = (((FPTYPE)2. * rr[0] * rr[2] * inr4) * sw -
+               dd[1] * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 5) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 5) % (ndescrpt * 3)) / 3];
       // ***deriv of component y/r2
-      vv[6] = (((FPTYPE)2. * rr[1] * rr[0] * inr4	) * sw - dd[2] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 6) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 6) % (ndescrpt * 3)) / 3];
-      vv[7] = (((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw - dd[2] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 7) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 7) % (ndescrpt * 3)) / 3];
-      vv[8] = (((FPTYPE)2. * rr[1] * rr[2] * inr4	) * sw - dd[2] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 8) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 8) % (ndescrpt * 3)) / 3];
-      // ***deriv of component z/r2 
-      vv[9] = (((FPTYPE)2. * rr[2] * rr[0] * inr4	) * sw - dd[3] * dsw * rr[0] * inr); // avg[type[(idx_deriv + 9) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 9) % (ndescrpt * 3)) / 3];
-      vv[10]= (((FPTYPE)2. * rr[2] * rr[1] * inr4	) * sw - dd[3] * dsw * rr[1] * inr); // avg[type[(idx_deriv + 10) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 10) % (ndescrpt * 3)) / 3];
-      vv[11]= (((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw - dd[3] * dsw * rr[2] * inr); // avg[type[(idx_deriv + 11) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 11) % (ndescrpt * 3)) / 3];
+      vv[6] = (((FPTYPE)2. * rr[1] * rr[0] * inr4) * sw -
+               dd[2] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 6) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 6) % (ndescrpt * 3)) / 3];
+      vv[7] = (((FPTYPE)2. * rr[1] * rr[1] * inr4 - inr2) * sw -
+               dd[2] * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 7) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 7) % (ndescrpt * 3)) / 3];
+      vv[8] = (((FPTYPE)2. * rr[1] * rr[2] * inr4) * sw -
+               dd[2] * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 8) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 8) % (ndescrpt * 3)) / 3];
+      // ***deriv of component z/r2
+      vv[9] = (((FPTYPE)2. * rr[2] * rr[0] * inr4) * sw -
+               dd[3] * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 9) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 9) % (ndescrpt * 3)) / 3];
+      vv[10] =
+          (((FPTYPE)2. * rr[2] * rr[1] * inr4) * sw -
+           dd[3] * dsw * rr[1] *
+               inr);  // avg[type[(idx_deriv + 10) / (ndescrpt * 3)] * ndescrpt
+                      // + ((idx_deriv + 10) % (ndescrpt * 3)) / 3];
+      vv[11] =
+          (((FPTYPE)2. * rr[2] * rr[2] * inr4 - inr2) * sw -
+           dd[3] * dsw * rr[2] *
+               inr);  // avg[type[(idx_deriv + 11) / (ndescrpt * 3)] * ndescrpt
+                      // + ((idx_deriv + 11) % (ndescrpt * 3)) / 3];
       // 4 value components
-      dd[0] *= sw; // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt + idx_value + 0];
-      dd[1] *= sw; // * em[idx * ndescrpt + idx_value + 1]);// - avg[type[idx] * ndescrpt + idx_value + 1]) / std[type[idx] * ndescrpt + idx_value + 1];
-      dd[2] *= sw; // * em[idx * ndescrpt + idx_value + 2]);// - avg[type[idx] * ndescrpt + idx_value + 2]) / std[type[idx] * ndescrpt + idx_value + 2];
-      dd[3] *= sw; // * em[idx * ndescrpt + idx_value + 3]);// - avg[type[idx] * ndescrpt + idx_value + 3]) / std[type[idx] * ndescrpt + idx_value + 3];
+      dd[0] *= sw;  // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt +
+                    // idx_value + 0];
+      dd[1] *= sw;  // * em[idx * ndescrpt + idx_value + 1]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 1]) / std[type[idx] * ndescrpt +
+                    // idx_value + 1];
+      dd[2] *= sw;  // * em[idx * ndescrpt + idx_value + 2]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 2]) / std[type[idx] * ndescrpt +
+                    // idx_value + 2];
+      dd[3] *= sw;  // * em[idx * ndescrpt + idx_value + 3]);// - avg[type[idx]
+                    // * ndescrpt + idx_value + 3]) / std[type[idx] * ndescrpt +
+                    // idx_value + 3];
       for (int ii = 0; ii < 12; ii++) {
-        row_descript_deriv[idx_deriv + ii] = vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
+        row_descript_deriv[idx_deriv + ii] =
+            vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
       }
-      for (int ii = 0; ii < 4; ii++) {  
-        row_descript[idx_value + ii] = (dd[ii] - avg[type[bid] * ndescrpt + idx_value + ii]) / std[type[bid] * ndescrpt + idx_value + ii];
+      for (int ii = 0; ii < 4; ii++) {
+        row_descript[idx_value + ii] =
+            (dd[ii] - avg[type[bid] * ndescrpt + idx_value + ii]) /
+            std[type[bid] * ndescrpt + idx_value + ii];
       }
-    }
-    else {
+    } else {
       // TODO: move it to the memset.
-      row_descript[idx_value] -= avg[type[bid] * ndescrpt + idx_value] / std[type[bid] * ndescrpt + idx_value];
+      row_descript[idx_value] -= avg[type[bid] * ndescrpt + idx_value] /
+                                 std[type[bid] * ndescrpt + idx_value];
     }
   }
 }
 
-template<
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void compute_env_mat_r(
-    FPTYPE* em,
-    FPTYPE* em_deriv,
-    FPTYPE* rij,
-    const FPTYPE* coord,
-    const FPTYPE* avg,
-    const FPTYPE* std,
-    const int* type,
-    const int* nlist,
-    const int nnei,
-    const float rmin,
-    const float rmax)
-{
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void compute_env_mat_r(FPTYPE* em,
+                                  FPTYPE* em_deriv,
+                                  FPTYPE* rij,
+                                  const FPTYPE* coord,
+                                  const FPTYPE* avg,
+                                  const FPTYPE* std,
+                                  const int* type,
+                                  const int* nlist,
+                                  const int nnei,
+                                  const float rmin,
+                                  const float rmax) {
   // <<<nloc, TPB>>>
   const int_64 bid = blockIdx.x;
   const unsigned int tid = threadIdx.x;
@@ -476,18 +499,18 @@ __global__ void compute_env_mat_r(
     return;
   }
   const int ndescrpt = nnei;
-  const int * row_nlist = nlist + bid * nnei;
-  FPTYPE * row_rij = rij + bid * nnei * 3;
-  FPTYPE * row_em = em + bid * nnei;
-  FPTYPE * row_em_deriv = em_deriv + bid * nnei * 3;
+  const int* row_nlist = nlist + bid * nnei;
+  FPTYPE* row_rij = rij + bid * nnei * 3;
+  FPTYPE* row_em = em + bid * nnei;
+  FPTYPE* row_em_deriv = em_deriv + bid * nnei * 3;
   for (int ii = tid; ii < nnei; ii += THREADS_PER_BLOCK) {
-    const int idx_value = ii;	  // 4 components
-    const int idx_deriv = ii * 3;	// 4 components time 3 directions
+    const int idx_value = ii;      // 4 components
+    const int idx_deriv = ii * 3;  // 4 components time 3 directions
     if (row_nlist[ii] >= 0) {
-      FPTYPE rr[3]  = {(FPTYPE)0.};
-      FPTYPE vv[3]  = {(FPTYPE)0.};
-      FPTYPE dd     = (FPTYPE)0.;
-      const int & j_idx = row_nlist[ii];
+      FPTYPE rr[3] = {(FPTYPE)0.};
+      FPTYPE vv[3] = {(FPTYPE)0.};
+      FPTYPE dd = (FPTYPE)0.;
+      const int& j_idx = row_nlist[ii];
       for (int kk = 0; kk < 3; kk++) {
         rr[kk] = coord[j_idx * 3 + kk] - coord[bid * 3 + kk];
         row_rij[ii * 3 + kk] = rr[kk];
@@ -501,201 +524,299 @@ __global__ void compute_env_mat_r(
       FPTYPE inr3 = inr4 * nr;
       FPTYPE sw, dsw;
       spline5_switch(sw, dsw, nr, rmin, rmax);
-      dd = ((FPTYPE)1./nr)       ;//* sw;
-      vv[0] = (rr[0] * inr3 * sw - dd * dsw * rr[0] * inr); // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
-      vv[1] = (rr[1] * inr3 * sw - dd * dsw * rr[1] * inr); // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
-      vv[2] = (rr[2] * inr3 * sw - dd * dsw * rr[2] * inr); // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] * ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
-      
+      dd = ((FPTYPE)1. / nr);  //* sw;
+      vv[0] = (rr[0] * inr3 * sw -
+               dd * dsw * rr[0] *
+                   inr);  // avg[type[(idx_deriv + 0) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 0) % (ndescrpt * 3)) / 3];
+      vv[1] = (rr[1] * inr3 * sw -
+               dd * dsw * rr[1] *
+                   inr);  // avg[type[(idx_deriv + 1) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 1) % (ndescrpt * 3)) / 3];
+      vv[2] = (rr[2] * inr3 * sw -
+               dd * dsw * rr[2] *
+                   inr);  // avg[type[(idx_deriv + 2) / (ndescrpt * 3)] *
+                          // ndescrpt + ((idx_deriv + 2) % (ndescrpt * 3)) / 3];
+
       // 4 value components
-      dd *= sw; // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] * ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt + idx_value + 0];
+      dd *= sw;  // * em[idx * ndescrpt + idx_value + 0]);// - avg[type[idx] *
+                 // ndescrpt + idx_value + 0]) / std[type[idx] * ndescrpt +
+                 // idx_value + 0];
       for (int ii = 0; ii < 3; ii++) {
-        row_em_deriv[idx_deriv + ii] = vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
+        row_em_deriv[idx_deriv + ii] =
+            vv[ii] / std[type[bid] * ndescrpt + idx_value + ii / 3];
       }
-      row_em[idx_value] = (dd - avg[type[bid] * ndescrpt + idx_value]) / std[type[bid] * ndescrpt + idx_value];
-    }
-    else {
+      row_em[idx_value] = (dd - avg[type[bid] * ndescrpt + idx_value]) /
+                          std[type[bid] * ndescrpt + idx_value];
+    } else {
       // TODO: move it to the memset.
-      row_em[idx_value] -= avg[type[bid] * ndescrpt + idx_value] / std[type[bid] * ndescrpt + idx_value];
+      row_em[idx_value] -= avg[type[bid] * ndescrpt + idx_value] /
+                           std[type[bid] * ndescrpt + idx_value];
     }
   }
 }
 
 namespace deepmd {
 template <typename FPTYPE>
-void format_nbor_list_gpu_rocm(    
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const deepmd::InputNlist & gpu_inlist,
-    int * array_int,
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const std::vector<int> sec)
-{
+void format_nbor_list_gpu_rocm(int* nlist,
+                               const FPTYPE* coord,
+                               const int* type,
+                               const deepmd::InputNlist& gpu_inlist,
+                               int* array_int,
+                               uint_64* array_longlong,
+                               const int max_nbor_size,
+                               const int nloc,
+                               const int nall,
+                               const float rcut,
+                               const std::vector<int> sec) {
   const int LEN = 256;
   const int nnei = sec.back();
-  const int nblock = (nloc + LEN -1) / LEN;
-  int * sec_dev = array_int;
-  int * nei_iter = array_int + sec.size(); // = new int[sec_size];
-  int * i_idx = array_int + sec.size() + nloc * sec.size();
-  uint_64 * key = array_longlong;
-  assert(max_nbor_size == 256 || max_nbor_size == 512 || 1024 || max_nbor_size == 2048 || max_nbor_size == 4096);
+  const int nblock = (nloc + LEN - 1) / LEN;
+  int* sec_dev = array_int;
+  int* nei_iter = array_int + sec.size();  // = new int[sec_size];
+  int* i_idx = array_int + sec.size() + nloc * sec.size();
+  uint_64* key = array_longlong;
+  assert(max_nbor_size == 256 || max_nbor_size == 512 || 1024 ||
+         max_nbor_size == 2048 || max_nbor_size == 4096);
   DPErrcheck(hipMemset(nlist, -1, sizeof(int) * int_64(nloc) * nnei));
-  DPErrcheck(hipMemset(key, 0xffffffff, sizeof(uint_64) * int_64(nloc) * max_nbor_size));
-  DPErrcheck(hipMemcpy(sec_dev, &sec[0], sizeof(int) * sec.size(), hipMemcpyHostToDevice));   
+  DPErrcheck(hipMemset(key, 0xffffffff,
+                       sizeof(uint_64) * int_64(nloc) * max_nbor_size));
+  DPErrcheck(hipMemcpy(sec_dev, &sec[0], sizeof(int) * sec.size(),
+                       hipMemcpyHostToDevice));
 
-  hipLaunchKernelGGL(get_i_idx, nblock, LEN, 0, 0, 
-      i_idx,
-      nloc, gpu_inlist.ilist);
+  hipLaunchKernelGGL(get_i_idx, nblock, LEN, 0, 0, i_idx, nloc,
+                     gpu_inlist.ilist);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 
   if (max_nbor_size == 256) {
-    format_nbor_list_256 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  }
-  else if (max_nbor_size == 512) {
-    format_nbor_list_512 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  } 
-  else if (max_nbor_size == 1024) {
-    format_nbor_list_1024 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  } 
-  else if (max_nbor_size == 2048) {
-    format_nbor_list_2048 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
-  } 
-  else if (max_nbor_size == 4096) {
-    format_nbor_list_4096 (
-        key,
-        coord, type, gpu_inlist, nloc, rcut, i_idx); 
+    format_nbor_list_256(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 512) {
+    format_nbor_list_512(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 1024) {
+    format_nbor_list_1024(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 2048) {
+    format_nbor_list_2048(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
+  } else if (max_nbor_size == 4096) {
+    format_nbor_list_4096(key, coord, type, gpu_inlist, nloc, rcut, i_idx);
   }
-    
-  hipLaunchKernelGGL(fill_nei_iter, dim3(nloc, (max_nbor_size + LEN - 1) / LEN) , LEN, 0, 0,
-      nei_iter,
-      key, nloc, max_nbor_size, sec.size());
-  
-  hipLaunchKernelGGL(format_nlist_fill_b, dim3(nloc, (max_nbor_size + LEN - 1) / LEN), LEN, 0, 0, 
-      nlist,
-      nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size);
+
+  hipLaunchKernelGGL(fill_nei_iter, dim3(nloc, (max_nbor_size + LEN - 1) / LEN),
+                     LEN, 0, 0, nei_iter, key, nloc, max_nbor_size, sec.size());
+
+  hipLaunchKernelGGL(
+      format_nlist_fill_b, dim3(nloc, (max_nbor_size + LEN - 1) / LEN), LEN, 0,
+      0, nlist, nnei, nloc, key, sec_dev, sec.size(), nei_iter, max_nbor_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void prod_env_mat_a_gpu_rocm(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec,
-    const int * f_type)
-{
-  if (f_type == NULL){
+void prod_env_mat_a_gpu_rocm(FPTYPE* em,
+                             FPTYPE* em_deriv,
+                             FPTYPE* rij,
+                             int* nlist,
+                             const FPTYPE* coord,
+                             const int* type,
+                             const InputNlist& gpu_inlist,
+                             int* array_int,
+                             uint_64* array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE* avg,
+                             const FPTYPE* std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec,
+                             const int* f_type) {
+  if (f_type == NULL) {
     f_type = type;
   }
   const int nnei = sec.back();
   const int ndescrpt = nnei * 4;
   DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt));
-  DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
+  DPErrcheck(
+      hipMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
   DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * int_64(nloc) * nnei * 3));
 
-  format_nbor_list_gpu_rocm(
-      nlist, 
-      coord, f_type, gpu_inlist, array_int, array_longlong, max_nbor_size, nloc, nall, rcut, sec);
+  format_nbor_list_gpu_rocm(nlist, coord, f_type, gpu_inlist, array_int,
+                            array_longlong, max_nbor_size, nloc, nall, rcut,
+                            sec);
   nborErrcheck(hipGetLastError());
   nborErrcheck(hipDeviceSynchronize());
 
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_env_mat_a<FPTYPE, TPB>), nloc, TPB, 0, 0, 
-      em, em_deriv, rij, 
-      coord, avg, std, type, nlist, nnei, rcut_smth, rcut);
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_env_mat_a<FPTYPE, TPB>), nloc, TPB,
+                     0, 0, em, em_deriv, rij, coord, avg, std, type, nlist,
+                     nnei, rcut_smth, rcut);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void prod_env_mat_r_gpu_rocm(    
-    FPTYPE * em, 
-    FPTYPE * em_deriv, 
-    FPTYPE * rij, 
-    int * nlist, 
-    const FPTYPE * coord, 
-    const int * type, 
-    const InputNlist & gpu_inlist,
-    int * array_int, 
-    uint_64 * array_longlong,
-    const int max_nbor_size,
-    const FPTYPE * avg, 
-    const FPTYPE * std, 
-    const int nloc, 
-    const int nall, 
-    const float rcut, 
-    const float rcut_smth, 
-    const std::vector<int> sec)
-{
+void prod_env_mat_r_gpu_rocm(FPTYPE* em,
+                             FPTYPE* em_deriv,
+                             FPTYPE* rij,
+                             int* nlist,
+                             const FPTYPE* coord,
+                             const int* type,
+                             const InputNlist& gpu_inlist,
+                             int* array_int,
+                             uint_64* array_longlong,
+                             const int max_nbor_size,
+                             const FPTYPE* avg,
+                             const FPTYPE* std,
+                             const int nloc,
+                             const int nall,
+                             const float rcut,
+                             const float rcut_smth,
+                             const std::vector<int> sec) {
   const int nnei = sec.back();
   const int ndescrpt = nnei * 1;
   DPErrcheck(hipMemset(em, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt));
-  DPErrcheck(hipMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
+  DPErrcheck(
+      hipMemset(em_deriv, 0, sizeof(FPTYPE) * int_64(nloc) * ndescrpt * 3));
   DPErrcheck(hipMemset(rij, 0, sizeof(FPTYPE) * int_64(nloc) * nnei * 3));
 
-  format_nbor_list_gpu_rocm(
-      nlist, 
-      coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, nloc, nall, rcut, sec);
+  format_nbor_list_gpu_rocm(nlist, coord, type, gpu_inlist, array_int,
+                            array_longlong, max_nbor_size, nloc, nall, rcut,
+                            sec);
   nborErrcheck(hipGetLastError());
   nborErrcheck(hipDeviceSynchronize());
-  
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_env_mat_r<FPTYPE, TPB>), nloc, TPB, 0, 0, 
-      em, em_deriv, rij, 
-      coord, avg, std, type, nlist, nnei, rcut_smth, rcut);
+
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_env_mat_r<FPTYPE, TPB>), nloc, TPB,
+                     0, 0, em, em_deriv, rij, coord, avg, std, type, nlist,
+                     nnei, rcut_smth, rcut);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
 template <typename FPTYPE>
-void test_encoding_decoding_nbor_info_gpu_rocm(
-    uint_64 * key,
-    int * out_type,
-    int * out_index,
-    const int * in_type,
-    const FPTYPE * in_dist,
-    const int * in_index,
-    const int size_of_array)
-{
+void test_encoding_decoding_nbor_info_gpu_rocm(uint_64* key,
+                                               int* out_type,
+                                               int* out_index,
+                                               const int* in_type,
+                                               const FPTYPE* in_dist,
+                                               const int* in_index,
+                                               const int size_of_array) {
   const int nblock = (size_of_array + TPB - 1) / TPB;
-  hipLaunchKernelGGL(encoding_decoding_nbor_info, nblock, TPB, 0, 0, 
-      key, out_type, out_index,
-      in_type, in_dist, in_index, size_of_array);
+  hipLaunchKernelGGL(encoding_decoding_nbor_info, nblock, TPB, 0, 0, key,
+                     out_type, out_index, in_type, in_dist, in_index,
+                     size_of_array);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template void prod_env_mat_a_gpu_rocm<float>(float * em, float * em_deriv, float * rij, int * nlist, const float * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const float * avg, const float * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec, const int * f_type);
-template void prod_env_mat_a_gpu_rocm<double>(double * em, double * em_deriv, double * rij, int * nlist, const double * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const double * avg, const double * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec, const int * f_type);
-template void prod_env_mat_r_gpu_rocm<float>(float * em, float * em_deriv, float * rij, int * nlist, const float * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const float * avg, const float * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec);
-template void prod_env_mat_r_gpu_rocm<double>(double * em, double * em_deriv, double * rij, int * nlist, const double * coord, const int * type, const InputNlist & gpu_inlist, int * array_int, unsigned long long * array_longlong, const int max_nbor_size, const double * avg, const double * std, const int nloc, const int nall, const float rcut, const float rcut_smth, const std::vector<int> sec);
-template void format_nbor_list_gpu_rocm<float>(int * nlist, const float * coord, const int * type, const deepmd::InputNlist & gpu_inlist,int * array_int,uint_64 * array_longlong,const int max_nbor_size,const int nloc, const int nall, const float rcut, const std::vector<int> sec);
-template void format_nbor_list_gpu_rocm<double>(int * nlist, const double * coord, const int * type, const deepmd::InputNlist & gpu_inlist,int * array_int,uint_64 * array_longlong,const int max_nbor_size,const int nloc, const int nall, const float rcut, const std::vector<int> sec);
-template void test_encoding_decoding_nbor_info_gpu_rocm(uint_64 * key, int * out_type, int * out_index, const int * in_type, const float * in_dist, const int * in_index, const int size_of_array);
-template void test_encoding_decoding_nbor_info_gpu_rocm(uint_64 * key, int * out_type, int * out_index, const int * in_type, const double * in_dist, const int * in_index, const int size_of_array);
-}
+template void prod_env_mat_a_gpu_rocm<float>(float* em,
+                                             float* em_deriv,
+                                             float* rij,
+                                             int* nlist,
+                                             const float* coord,
+                                             const int* type,
+                                             const InputNlist& gpu_inlist,
+                                             int* array_int,
+                                             unsigned long long* array_longlong,
+                                             const int max_nbor_size,
+                                             const float* avg,
+                                             const float* std,
+                                             const int nloc,
+                                             const int nall,
+                                             const float rcut,
+                                             const float rcut_smth,
+                                             const std::vector<int> sec,
+                                             const int* f_type);
+template void prod_env_mat_a_gpu_rocm<double>(
+    double* em,
+    double* em_deriv,
+    double* rij,
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const InputNlist& gpu_inlist,
+    int* array_int,
+    unsigned long long* array_longlong,
+    const int max_nbor_size,
+    const double* avg,
+    const double* std,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const float rcut_smth,
+    const std::vector<int> sec,
+    const int* f_type);
+template void prod_env_mat_r_gpu_rocm<float>(float* em,
+                                             float* em_deriv,
+                                             float* rij,
+                                             int* nlist,
+                                             const float* coord,
+                                             const int* type,
+                                             const InputNlist& gpu_inlist,
+                                             int* array_int,
+                                             unsigned long long* array_longlong,
+                                             const int max_nbor_size,
+                                             const float* avg,
+                                             const float* std,
+                                             const int nloc,
+                                             const int nall,
+                                             const float rcut,
+                                             const float rcut_smth,
+                                             const std::vector<int> sec);
+template void prod_env_mat_r_gpu_rocm<double>(
+    double* em,
+    double* em_deriv,
+    double* rij,
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const InputNlist& gpu_inlist,
+    int* array_int,
+    unsigned long long* array_longlong,
+    const int max_nbor_size,
+    const double* avg,
+    const double* std,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const float rcut_smth,
+    const std::vector<int> sec);
+template void format_nbor_list_gpu_rocm<float>(
+    int* nlist,
+    const float* coord,
+    const int* type,
+    const deepmd::InputNlist& gpu_inlist,
+    int* array_int,
+    uint_64* array_longlong,
+    const int max_nbor_size,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const std::vector<int> sec);
+template void format_nbor_list_gpu_rocm<double>(
+    int* nlist,
+    const double* coord,
+    const int* type,
+    const deepmd::InputNlist& gpu_inlist,
+    int* array_int,
+    uint_64* array_longlong,
+    const int max_nbor_size,
+    const int nloc,
+    const int nall,
+    const float rcut,
+    const std::vector<int> sec);
+template void test_encoding_decoding_nbor_info_gpu_rocm(
+    uint_64* key,
+    int* out_type,
+    int* out_index,
+    const int* in_type,
+    const float* in_dist,
+    const int* in_index,
+    const int size_of_array);
+template void test_encoding_decoding_nbor_info_gpu_rocm(
+    uint_64* key,
+    int* out_type,
+    int* out_index,
+    const int* in_type,
+    const double* in_dist,
+    const int* in_index,
+    const int size_of_array);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/prod_force.hip.cu b/source/lib/src/rocm/prod_force.hip.cu
index 16db29859e..cf2df5d4a8 100644
--- a/source/lib/src/rocm/prod_force.hip.cu
+++ b/source/lib/src/rocm/prod_force.hip.cu
@@ -1,15 +1,11 @@
 #include "device.h"
 #include "prod_force.h"
 
-template <
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void force_deriv_wrt_center_atom(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const int ndescrpt)
-{
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void force_deriv_wrt_center_atom(FPTYPE* force,
+                                            const FPTYPE* net_deriv,
+                                            const FPTYPE* in_deriv,
+                                            const int ndescrpt) {
   __shared__ FPTYPE data[THREADS_PER_BLOCK * 3];
   int_64 bid = blockIdx.x;
   unsigned int tid = threadIdx.x;
@@ -18,15 +14,18 @@ __global__ void force_deriv_wrt_center_atom(
   }
   for (int ii = tid; ii < ndescrpt; ii += THREADS_PER_BLOCK) {
     for (int jj = 0; jj < 3; jj++) {
-      data[jj * THREADS_PER_BLOCK + tid] += net_deriv[bid * ndescrpt + ii] * in_deriv[bid * ndescrpt * 3 + ii * 3 + jj];
+      data[jj * THREADS_PER_BLOCK + tid] +=
+          net_deriv[bid * ndescrpt + ii] *
+          in_deriv[bid * ndescrpt * 3 + ii * 3 + jj];
     }
   }
-  __syncthreads(); 
+  __syncthreads();
   // do reduction in shared memory
   for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) {
     if (tid < ii) {
       for (int jj = 0; jj < 3; jj++) {
-        data[jj * THREADS_PER_BLOCK + tid] += data[jj * THREADS_PER_BLOCK + tid + ii];
+        data[jj * THREADS_PER_BLOCK + tid] +=
+            data[jj * THREADS_PER_BLOCK + tid + ii];
       }
     }
     __syncthreads();
@@ -39,130 +38,139 @@ __global__ void force_deriv_wrt_center_atom(
   }
 }
 
-template<typename FPTYPE>
-__global__ void force_deriv_wrt_neighbors_a(
-    FPTYPE * force, 
-    const FPTYPE * net_deriv,
-    const FPTYPE * in_deriv,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{  
-    // idy -> nnei
-    const int_64 idx = blockIdx.x;
-    const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
-    const unsigned int idz = threadIdx.y;
-    const int ndescrpt = nnei * 4;
-    if (idy >= nnei) {
-        return;
-    }
-    // deriv wrt neighbors
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    FPTYPE force_tmp = (FPTYPE)0.;
-    for (int idw = 0; idw < 4; ++idw) {
-        force_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz];
-    }
-    atomicAdd(force + j_idx * 3 + idz, force_tmp);
+template <typename FPTYPE>
+__global__ void force_deriv_wrt_neighbors_a(FPTYPE* force,
+                                            const FPTYPE* net_deriv,
+                                            const FPTYPE* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x;
+  const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
+  const unsigned int idz = threadIdx.y;
+  const int ndescrpt = nnei * 4;
+  if (idy >= nnei) {
+    return;
+  }
+  // deriv wrt neighbors
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  FPTYPE force_tmp = (FPTYPE)0.;
+  for (int idw = 0; idw < 4; ++idw) {
+    force_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] *
+                 in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz];
+  }
+  atomicAdd(force + j_idx * 3 + idz, force_tmp);
 }
 
-template<typename FPTYPE>
-__global__ void force_deriv_wrt_neighbors_r(
-		FPTYPE * force, 
-		const FPTYPE * net_deriv,
-		const FPTYPE * in_deriv,
-		const int * nlist,
-		const int nloc,
-		const int nnei)
-{  
-    // idy -> nnei
-    const int_64 idx = blockIdx.x;
-    const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
-    const unsigned int idz = threadIdx.y;
-    const int ndescrpt = nnei * 1;
-    if (idy >= nnei) {
-        return;
-    }
-    // deriv wrt neighbors
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    atomicAdd(
-        force + j_idx * 3 + idz, 
-        net_deriv[idx * ndescrpt + idy] * in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]);
+template <typename FPTYPE>
+__global__ void force_deriv_wrt_neighbors_r(FPTYPE* force,
+                                            const FPTYPE* net_deriv,
+                                            const FPTYPE* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x;
+  const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
+  const unsigned int idz = threadIdx.y;
+  const int ndescrpt = nnei * 1;
+  if (idy >= nnei) {
+    return;
+  }
+  // deriv wrt neighbors
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  atomicAdd(force + j_idx * 3 + idz,
+            net_deriv[idx * ndescrpt + idy] *
+                in_deriv[idx * ndescrpt * 3 + idy * 3 + idz]);
 }
 
 namespace deepmd {
-  template<typename FPTYPE> 
-  void prod_force_a_gpu_rocm(    
-      FPTYPE * force, 
-      const FPTYPE * net_deriv, 
-      const FPTYPE * in_deriv, 
-      const int * nlist, 
-      const int nloc, 
-      const int nall, 
-      const int nnei)
-  {
-    const int ndescrpt = nnei * 4;
-    DPErrcheck(hipMemset(
-        force, 
-        0, sizeof(FPTYPE) * nall * 3));
-  
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0, 
-        force, 
-        net_deriv, in_deriv, ndescrpt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-  
-    const int LEN = 64;
-    const int nblock = (nnei + LEN - 1) / LEN;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(LEN, 3);
-    hipLaunchKernelGGL(force_deriv_wrt_neighbors_a, block_grid, thread_grid, 0, 0, 
-        force, 
-        net_deriv, in_deriv, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-  }
-  
-  template<typename FPTYPE> 
-  void prod_force_r_gpu_rocm(    
-      FPTYPE * force, 
-      const FPTYPE * net_deriv, 
-      const FPTYPE * in_deriv, 
-      const int * nlist, 
-      const int nloc, 
-      const int nall, 
-      const int nnei)
-  {
-    const int ndescrpt = nnei * 1;
-    DPErrcheck(hipMemset(
-        force, 
-        0, sizeof(FPTYPE) * nall * 3));
-  
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>), nloc, TPB, 0, 0, 
-        force, 
-        net_deriv, in_deriv, ndescrpt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-  
-    const int LEN = 64;
-    const int nblock = (nnei + LEN -1) / LEN;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(LEN, 3);
-    hipLaunchKernelGGL(force_deriv_wrt_neighbors_r, block_grid, thread_grid, 0, 0, 
-        force, 
-        net_deriv, in_deriv, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-  }
-  
-  template void prod_force_a_gpu_rocm<float>(float * force, const float * net_deriv, const float * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-  template void prod_force_a_gpu_rocm<double>(double * force, const double * net_deriv, const double * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-  template void prod_force_r_gpu_rocm<float>(float * force, const float * net_deriv, const float * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-  template void prod_force_r_gpu_rocm<double>(double * force, const double * net_deriv, const double * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei);
-  
+template <typename FPTYPE>
+void prod_force_a_gpu_rocm(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei) {
+  const int ndescrpt = nnei * 4;
+  DPErrcheck(hipMemset(force, 0, sizeof(FPTYPE) * nall * 3));
+
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>),
+                     nloc, TPB, 0, 0, force, net_deriv, in_deriv, ndescrpt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+
+  const int LEN = 64;
+  const int nblock = (nnei + LEN - 1) / LEN;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(LEN, 3);
+  hipLaunchKernelGGL(force_deriv_wrt_neighbors_a, block_grid, thread_grid, 0, 0,
+                     force, net_deriv, in_deriv, nlist, nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+}
+
+template <typename FPTYPE>
+void prod_force_r_gpu_rocm(FPTYPE* force,
+                           const FPTYPE* net_deriv,
+                           const FPTYPE* in_deriv,
+                           const int* nlist,
+                           const int nloc,
+                           const int nall,
+                           const int nnei) {
+  const int ndescrpt = nnei * 1;
+  DPErrcheck(hipMemset(force, 0, sizeof(FPTYPE) * nall * 3));
+
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(force_deriv_wrt_center_atom<FPTYPE, TPB>),
+                     nloc, TPB, 0, 0, force, net_deriv, in_deriv, ndescrpt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+
+  const int LEN = 64;
+  const int nblock = (nnei + LEN - 1) / LEN;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(LEN, 3);
+  hipLaunchKernelGGL(force_deriv_wrt_neighbors_r, block_grid, thread_grid, 0, 0,
+                     force, net_deriv, in_deriv, nlist, nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
+
+template void prod_force_a_gpu_rocm<float>(float* force,
+                                           const float* net_deriv,
+                                           const float* in_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nall,
+                                           const int nnei);
+template void prod_force_a_gpu_rocm<double>(double* force,
+                                            const double* net_deriv,
+                                            const double* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+template void prod_force_r_gpu_rocm<float>(float* force,
+                                           const float* net_deriv,
+                                           const float* in_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nall,
+                                           const int nnei);
+template void prod_force_r_gpu_rocm<double>(double* force,
+                                            const double* net_deriv,
+                                            const double* in_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/prod_force_grad.hip.cu b/source/lib/src/rocm/prod_force_grad.hip.cu
index f7540c07a2..e266389eb6 100644
--- a/source/lib/src/rocm/prod_force_grad.hip.cu
+++ b/source/lib/src/rocm/prod_force_grad.hip.cu
@@ -1,149 +1,151 @@
 #include "device.h"
 #include "prod_force_grad.h"
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot(
-    const FPTYPE * arr1, 
-    const FPTYPE * arr2) 
-{
-    return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot(const FPTYPE* arr1, const FPTYPE* arr2) {
+  return arr1[0] * arr2[0] + arr1[1] * arr2[1] + arr1[2] * arr2[2];
 }
 
-template<typename FPTYPE>
-__global__ void force_grad_wrt_center_atom(
-    FPTYPE * grad_net,
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int ndescrpt)
-{
-    __shared__ FPTYPE grad_one[3];
-    int_64 center_idx = blockIdx.x;
-    unsigned int tid = threadIdx.x;
-    if(tid < 3){
-        grad_one[tid] = grad[center_idx * 3 + tid];
-    }
-    __syncthreads();
-    unsigned int descrpt_idx = blockIdx.y * blockDim.x + tid;
-    if(descrpt_idx < ndescrpt){
-        grad_net[center_idx * ndescrpt + descrpt_idx] -= dev_dot(grad_one, env_deriv + center_idx * ndescrpt * 3 + descrpt_idx * 3);
-    }
+template <typename FPTYPE>
+__global__ void force_grad_wrt_center_atom(FPTYPE* grad_net,
+                                           const FPTYPE* grad,
+                                           const FPTYPE* env_deriv,
+                                           const int ndescrpt) {
+  __shared__ FPTYPE grad_one[3];
+  int_64 center_idx = blockIdx.x;
+  unsigned int tid = threadIdx.x;
+  if (tid < 3) {
+    grad_one[tid] = grad[center_idx * 3 + tid];
+  }
+  __syncthreads();
+  unsigned int descrpt_idx = blockIdx.y * blockDim.x + tid;
+  if (descrpt_idx < ndescrpt) {
+    grad_net[center_idx * ndescrpt + descrpt_idx] -= dev_dot(
+        grad_one, env_deriv + center_idx * ndescrpt * 3 + descrpt_idx * 3);
+  }
 }
 
-template<typename FPTYPE>
-__global__ void force_grad_wrt_neighbors_a(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int idy = blockIdx.y;
-    const unsigned int idw = threadIdx.y;
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    if (j_idx >= nloc) j_idx = j_idx % nloc;
-    grad_net[idx * nnei * 4 + idy * 4 + idw] += dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 4 * 3 + idy * 4 * 3 + idw * 3);
+template <typename FPTYPE>
+__global__ void force_grad_wrt_neighbors_a(FPTYPE* grad_net,
+                                           const FPTYPE* grad,
+                                           const FPTYPE* env_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const unsigned int idy = blockIdx.y;
+  const unsigned int idw = threadIdx.y;
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  if (j_idx >= nloc) j_idx = j_idx % nloc;
+  grad_net[idx * nnei * 4 + idy * 4 + idw] += dev_dot(
+      grad + j_idx * 3, env_deriv + idx * nnei * 4 * 3 + idy * 4 * 3 + idw * 3);
 }
 
-template<typename FPTYPE>
-__global__ void force_grad_wrt_neighbors_r(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
-    const unsigned int idy = blockIdx.y;
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    if (j_idx >= nloc) j_idx = j_idx % nloc;
-    grad_net[idx * nnei + idy] += dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 3 + idy * 3);
+template <typename FPTYPE>
+__global__ void force_grad_wrt_neighbors_r(FPTYPE* grad_net,
+                                           const FPTYPE* grad,
+                                           const FPTYPE* env_deriv,
+                                           const int* nlist,
+                                           const int nloc,
+                                           const int nnei) {
+  // idy -> nnei
+  const int_64 idx = blockIdx.x * blockDim.x + threadIdx.x;
+  const unsigned int idy = blockIdx.y;
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  if (j_idx >= nloc) j_idx = j_idx % nloc;
+  grad_net[idx * nnei + idy] +=
+      dev_dot(grad + j_idx * 3, env_deriv + idx * nnei * 3 + idy * 3);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void prod_force_grad_a_gpu_rocm(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei)
-{
-    const int ndescrpt = nnei * 4;
-    DPErrcheck(hipMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int nblock = (ndescrpt + TPB - 1) / TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(TPB, 1);
-    hipLaunchKernelGGL(force_grad_wrt_center_atom, block_grid, thread_grid, 0, 0, 
-        grad_net,
-        grad, env_deriv, ndescrpt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
-    const int LEN = 128;
-    const int nblock_ = (nloc + LEN -1) / LEN;
-    dim3 block_grid_(nblock_, nnei);
-    dim3 thread_grid_(LEN, 4);
-    hipLaunchKernelGGL(force_grad_wrt_neighbors_a, block_grid_, thread_grid_, 0, 0, 
-        grad_net,
-        grad, env_deriv, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void prod_force_grad_a_gpu_rocm(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei) {
+  const int ndescrpt = nnei * 4;
+  DPErrcheck(hipMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int nblock = (ndescrpt + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(TPB, 1);
+  hipLaunchKernelGGL(force_grad_wrt_center_atom, block_grid, thread_grid, 0, 0,
+                     grad_net, grad, env_deriv, ndescrpt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
+  const int LEN = 128;
+  const int nblock_ = (nloc + LEN - 1) / LEN;
+  dim3 block_grid_(nblock_, nnei);
+  dim3 thread_grid_(LEN, 4);
+  hipLaunchKernelGGL(force_grad_wrt_neighbors_a, block_grid_, thread_grid_, 0,
+                     0, grad_net, grad, env_deriv, nlist, nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void prod_force_grad_r_gpu_rocm(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad, 
-    const FPTYPE * env_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei)
-{
-    const int ndescrpt = nnei * 1;
-    DPErrcheck(hipMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int nblock = (ndescrpt + TPB - 1) / TPB;
-    dim3 block_grid(nloc, nblock);
-    dim3 thread_grid(TPB, 1);
-    hipLaunchKernelGGL(force_grad_wrt_center_atom, block_grid, thread_grid, 0, 0, 
-        grad_net,
-        grad, env_deriv, ndescrpt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void prod_force_grad_r_gpu_rocm(FPTYPE* grad_net,
+                                const FPTYPE* grad,
+                                const FPTYPE* env_deriv,
+                                const int* nlist,
+                                const int nloc,
+                                const int nnei) {
+  const int ndescrpt = nnei * 1;
+  DPErrcheck(hipMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int nblock = (ndescrpt + TPB - 1) / TPB;
+  dim3 block_grid(nloc, nblock);
+  dim3 thread_grid(TPB, 1);
+  hipLaunchKernelGGL(force_grad_wrt_center_atom, block_grid, thread_grid, 0, 0,
+                     grad_net, grad, env_deriv, ndescrpt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 
-    const int LEN = 128;
-    const int nblock_ = (nloc + LEN -1) / LEN;
-    dim3 block_grid_(nblock_, nnei);
-    dim3 thread_grid_(LEN, 1);
-    hipLaunchKernelGGL(force_grad_wrt_neighbors_r, block_grid_, thread_grid_, 0, 0, 
-        grad_net,
-        grad, env_deriv, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  const int LEN = 128;
+  const int nblock_ = (nloc + LEN - 1) / LEN;
+  dim3 block_grid_(nblock_, nnei);
+  dim3 thread_grid_(LEN, 1);
+  hipLaunchKernelGGL(force_grad_wrt_neighbors_r, block_grid_, thread_grid_, 0,
+                     0, grad_net, grad, env_deriv, nlist, nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template void prod_force_grad_a_gpu_rocm<float>(float * grad_net, const float * grad, const float * env_deriv, const int * nlist, const int nloc, const int nnei);
-template void prod_force_grad_a_gpu_rocm<double>(double * grad_net, const double * grad, const double * env_deriv, const int * nlist, const int nloc, const int nnei);
-template void prod_force_grad_r_gpu_rocm<float>(float * grad_net, const float * grad, const float * env_deriv, const int * nlist, const int nloc, const int nnei);
-template void prod_force_grad_r_gpu_rocm<double>(double * grad_net, const double * grad, const double * env_deriv, const int * nlist, const int nloc, const int nnei);
-}
\ No newline at end of file
+template void prod_force_grad_a_gpu_rocm<float>(float* grad_net,
+                                                const float* grad,
+                                                const float* env_deriv,
+                                                const int* nlist,
+                                                const int nloc,
+                                                const int nnei);
+template void prod_force_grad_a_gpu_rocm<double>(double* grad_net,
+                                                 const double* grad,
+                                                 const double* env_deriv,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+template void prod_force_grad_r_gpu_rocm<float>(float* grad_net,
+                                                const float* grad,
+                                                const float* env_deriv,
+                                                const int* nlist,
+                                                const int nloc,
+                                                const int nnei);
+template void prod_force_grad_r_gpu_rocm<double>(double* grad_net,
+                                                 const double* grad,
+                                                 const double* env_deriv,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/prod_virial.hip.cu b/source/lib/src/rocm/prod_virial.hip.cu
index 066e425d81..30628c610f 100644
--- a/source/lib/src/rocm/prod_virial.hip.cu
+++ b/source/lib/src/rocm/prod_virial.hip.cu
@@ -1,44 +1,38 @@
 #include "device.h"
 #include "prod_virial.h"
 
-template <
-    typename FPTYPE,
-    int      THREADS_PER_BLOCK>
-__global__ void atom_virial_reduction(
-    FPTYPE * virial, 
-    const FPTYPE * atom_virial,
-    const int nall)
-{
-    unsigned int bid = blockIdx.x;
-    unsigned int tid = threadIdx.x;
-    __shared__ FPTYPE data[THREADS_PER_BLOCK];
-    data[tid] = (FPTYPE)0.;
-    for (int ii = tid; ii < nall; ii += THREADS_PER_BLOCK) {
-        data[tid] += atom_virial[ii * 9 + bid];
-    }
-    __syncthreads(); 
-    // do reduction in shared memory
-    for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) {
-        if (tid < ii) {
-            data[tid] += data[tid + ii];
-        }
-        __syncthreads();
+template <typename FPTYPE, int THREADS_PER_BLOCK>
+__global__ void atom_virial_reduction(FPTYPE* virial,
+                                      const FPTYPE* atom_virial,
+                                      const int nall) {
+  unsigned int bid = blockIdx.x;
+  unsigned int tid = threadIdx.x;
+  __shared__ FPTYPE data[THREADS_PER_BLOCK];
+  data[tid] = (FPTYPE)0.;
+  for (int ii = tid; ii < nall; ii += THREADS_PER_BLOCK) {
+    data[tid] += atom_virial[ii * 9 + bid];
+  }
+  __syncthreads();
+  // do reduction in shared memory
+  for (int ii = THREADS_PER_BLOCK >> 1; ii > 0; ii >>= 1) {
+    if (tid < ii) {
+      data[tid] += data[tid + ii];
     }
-    // write result for this block to global memory
-    if (tid == 0) virial[bid] = data[0];
+    __syncthreads();
+  }
+  // write result for this block to global memory
+  if (tid == 0) virial[bid] = data[0];
 }
 
-template<typename FPTYPE>
-__global__ void virial_deriv_wrt_neighbors_a(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial,
-    const FPTYPE * net_deriv,
-    const FPTYPE * in_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei) 
-{
+template <typename FPTYPE>
+__global__ void virial_deriv_wrt_neighbors_a(FPTYPE* virial,
+                                             FPTYPE* atom_virial,
+                                             const FPTYPE* net_deriv,
+                                             const FPTYPE* in_deriv,
+                                             const FPTYPE* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nnei) {
   // idx -> nloc
   // idy -> nnei
   // idz = dd0 * 3 + dd1
@@ -49,132 +43,153 @@ __global__ void virial_deriv_wrt_neighbors_a(
   const unsigned int idz = threadIdx.y;
   const int ndescrpt = nnei * 4;
   if (idy >= nnei) {
-      return;
+    return;
   }
   int j_idx = nlist[idx * nnei + idy];
   if (j_idx < 0) {
-      return;
+    return;
   }
   FPTYPE virial_tmp = (FPTYPE)0.;
   for (int idw = 0; idw < 4; ++idw) {
-      virial_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + idz % 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz / 3];
+    virial_tmp += net_deriv[idx * ndescrpt + idy * 4 + idw] *
+                  rij[idx * nnei * 3 + idy * 3 + idz % 3] *
+                  in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz / 3];
   }
   atomicAdd(atom_virial + j_idx * 9 + idz, virial_tmp);
 }
 
-template<typename FPTYPE>
-__global__ void virial_deriv_wrt_neighbors_r(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial,
-    const FPTYPE * net_deriv,
-    const FPTYPE * in_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei) 
-{
-    // idx -> nloc
-    // idy -> nnei
-    // idz = dd0 * 3 + dd1
-    // dd0 = idz / 3
-    // dd1 = idz % 3
-    const int_64 idx = blockIdx.x;
-    const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
-    const unsigned int idz = threadIdx.y;
-    const int ndescrpt = nnei * 1;
+template <typename FPTYPE>
+__global__ void virial_deriv_wrt_neighbors_r(FPTYPE* virial,
+                                             FPTYPE* atom_virial,
+                                             const FPTYPE* net_deriv,
+                                             const FPTYPE* in_deriv,
+                                             const FPTYPE* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nnei) {
+  // idx -> nloc
+  // idy -> nnei
+  // idz = dd0 * 3 + dd1
+  // dd0 = idz / 3
+  // dd1 = idz % 3
+  const int_64 idx = blockIdx.x;
+  const unsigned int idy = blockIdx.y * blockDim.x + threadIdx.x;
+  const unsigned int idz = threadIdx.y;
+  const int ndescrpt = nnei * 1;
 
-    if (idy >= nnei) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    // atomicAdd(
-    //    virial + idz, 
-    //    net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3 + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz % 3]);
-    atomicAdd(
-        atom_virial + j_idx * 9 + idz, 
-        net_deriv[idx * ndescrpt + idy] * rij[idx * nnei * 3 + idy * 3 + idz % 3] * in_deriv[idx * ndescrpt * 3 + idy * 3 + idz / 3]);
+  if (idy >= nnei) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  // atomicAdd(
+  //    virial + idz,
+  //    net_deriv[idx * ndescrpt + idy * 4 + idw] * rij[idx * nnei * 3 + idy * 3
+  //    + idz / 3] * in_deriv[idx * ndescrpt * 3 + (idy * 4 + idw) * 3 + idz %
+  //    3]);
+  atomicAdd(atom_virial + j_idx * 9 + idz,
+            net_deriv[idx * ndescrpt + idy] *
+                rij[idx * nnei * 3 + idy * 3 + idz % 3] *
+                in_deriv[idx * ndescrpt * 3 + idy * 3 + idz / 3]);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void prod_virial_a_gpu_rocm(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
-    DPErrcheck(hipMemset(
-        virial, 
-        0, sizeof(FPTYPE) * 9));
-    DPErrcheck(hipMemset(
-      atom_virial, 
-      0, sizeof(FPTYPE) * 9 * nall));
-    
+template <typename FPTYPE>
+void prod_virial_a_gpu_rocm(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* in_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei) {
+  DPErrcheck(hipMemset(virial, 0, sizeof(FPTYPE) * 9));
+  DPErrcheck(hipMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall));
+
   const int LEN = 16;
-  int nblock = (nnei + LEN -1) / LEN;
+  int nblock = (nnei + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(LEN, 9);
   // compute virial of a frame
-  hipLaunchKernelGGL(virial_deriv_wrt_neighbors_a, block_grid, thread_grid, 0, 0, 
-      virial, atom_virial, 
-      net_deriv, in_deriv, rij, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  hipLaunchKernelGGL(virial_deriv_wrt_neighbors_a, block_grid, thread_grid, 0,
+                     0, virial, atom_virial, net_deriv, in_deriv, rij, nlist,
+                     nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
   // reduction atom_virial to virial
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(atom_virial_reduction<FPTYPE, TPB>), 9, TPB, 0, 0, 
-      virial, 
-      atom_virial, nall);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(atom_virial_reduction<FPTYPE, TPB>), 9,
+                     TPB, 0, 0, virial, atom_virial, nall);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void prod_virial_r_gpu_rocm(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * net_deriv, 
-    const FPTYPE * in_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
-{
-    DPErrcheck(hipMemset(
-        virial, 
-        0, sizeof(FPTYPE) * 9));
-    DPErrcheck(hipMemset(
-      atom_virial, 
-      0, sizeof(FPTYPE) * 9 * nall));
-    
+template <typename FPTYPE>
+void prod_virial_r_gpu_rocm(FPTYPE* virial,
+                            FPTYPE* atom_virial,
+                            const FPTYPE* net_deriv,
+                            const FPTYPE* in_deriv,
+                            const FPTYPE* rij,
+                            const int* nlist,
+                            const int nloc,
+                            const int nall,
+                            const int nnei) {
+  DPErrcheck(hipMemset(virial, 0, sizeof(FPTYPE) * 9));
+  DPErrcheck(hipMemset(atom_virial, 0, sizeof(FPTYPE) * 9 * nall));
+
   const int LEN = 16;
-  int nblock = (nnei + LEN -1) / LEN;
+  int nblock = (nnei + LEN - 1) / LEN;
   dim3 block_grid(nloc, nblock);
   dim3 thread_grid(LEN, 9);
   // compute virial of a frame
-  hipLaunchKernelGGL(virial_deriv_wrt_neighbors_r, block_grid, thread_grid, 0, 0, 
-      virial, atom_virial, 
-      net_deriv, in_deriv, rij, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  hipLaunchKernelGGL(virial_deriv_wrt_neighbors_r, block_grid, thread_grid, 0,
+                     0, virial, atom_virial, net_deriv, in_deriv, rij, nlist,
+                     nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
   // reduction atom_virial to virial
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(atom_virial_reduction<FPTYPE, TPB>), 9, TPB, 0, 0, 
-    virial, 
-    atom_virial, nall);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(atom_virial_reduction<FPTYPE, TPB>), 9,
+                     TPB, 0, 0, virial, atom_virial, nall);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template void prod_virial_a_gpu_rocm<float>(float * virial, float * atom_virial, const float * net_deriv, const float * in_deriv, const float * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_virial_a_gpu_rocm<double>(double * virial, double * atom_virial, const double * net_deriv, const double * in_deriv, const double * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_virial_r_gpu_rocm<float>(float * virial, float * atom_virial, const float * net_deriv, const float * in_deriv, const float * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-template void prod_virial_r_gpu_rocm<double>(double * virial, double * atom_virial, const double * net_deriv, const double * in_deriv, const double * rij, const int * nlist, const int nloc, const int nall, const int nnei);
-}
+template void prod_virial_a_gpu_rocm<float>(float* virial,
+                                            float* atom_virial,
+                                            const float* net_deriv,
+                                            const float* in_deriv,
+                                            const float* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+template void prod_virial_a_gpu_rocm<double>(double* virial,
+                                             double* atom_virial,
+                                             const double* net_deriv,
+                                             const double* in_deriv,
+                                             const double* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nall,
+                                             const int nnei);
+template void prod_virial_r_gpu_rocm<float>(float* virial,
+                                            float* atom_virial,
+                                            const float* net_deriv,
+                                            const float* in_deriv,
+                                            const float* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nall,
+                                            const int nnei);
+template void prod_virial_r_gpu_rocm<double>(double* virial,
+                                             double* atom_virial,
+                                             const double* net_deriv,
+                                             const double* in_deriv,
+                                             const double* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nall,
+                                             const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/prod_virial_grad.hip.cu b/source/lib/src/rocm/prod_virial_grad.hip.cu
index ebe4948a50..81fb9f4bad 100644
--- a/source/lib/src/rocm/prod_virial_grad.hip.cu
+++ b/source/lib/src/rocm/prod_virial_grad.hip.cu
@@ -1,144 +1,154 @@
 #include "device.h"
 #include "prod_virial_grad.h"
 
-template<typename FPTYPE>
-__device__ inline FPTYPE dev_dot9(
-    const FPTYPE * arr1, 
-    const FPTYPE * arr2) 
-{
-    FPTYPE result = (FPTYPE)0.0;
-    for(int ii=0; ii<9; ii++){
-        result += arr1[ii] * arr2[ii];
-    }
-    return result;
+template <typename FPTYPE>
+__device__ inline FPTYPE dev_dot9(const FPTYPE* arr1, const FPTYPE* arr2) {
+  FPTYPE result = (FPTYPE)0.0;
+  for (int ii = 0; ii < 9; ii++) {
+    result += arr1[ii] * arr2[ii];
+  }
+  return result;
 }
 
-template<typename FPTYPE>
-__global__ void virial_grad_wrt_neighbors_a(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const unsigned int tid = threadIdx.x;
-    const int_64 idx = blockIdx.x * blockDim.x + tid;
-    const unsigned int idy = blockIdx.y;
-    const unsigned int idw = threadIdx.y;
-    const int ndescrpt = nnei * 4;
-    __shared__ FPTYPE grad_one[9];
-    if(tid < 9){
-        grad_one[tid] = grad[tid];
-    }
-    __syncthreads(); 
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    FPTYPE tmp[9];
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-        for (int dd1 = 0; dd1 < 3; ++dd1){
-            tmp[dd0 * 3 + dd1] = rij[idx * nnei * 3 + idy * 3 + dd1] * env_deriv[idx * ndescrpt * 3 + idy * 4 * 3 + idw * 3 + dd0];
-        }
+template <typename FPTYPE>
+__global__ void virial_grad_wrt_neighbors_a(FPTYPE* grad_net,
+                                            const FPTYPE* grad,
+                                            const FPTYPE* env_deriv,
+                                            const FPTYPE* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const unsigned int tid = threadIdx.x;
+  const int_64 idx = blockIdx.x * blockDim.x + tid;
+  const unsigned int idy = blockIdx.y;
+  const unsigned int idw = threadIdx.y;
+  const int ndescrpt = nnei * 4;
+  __shared__ FPTYPE grad_one[9];
+  if (tid < 9) {
+    grad_one[tid] = grad[tid];
+  }
+  __syncthreads();
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  FPTYPE tmp[9];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      tmp[dd0 * 3 + dd1] =
+          rij[idx * nnei * 3 + idy * 3 + dd1] *
+          env_deriv[idx * ndescrpt * 3 + idy * 4 * 3 + idw * 3 + dd0];
     }
-    grad_net[idx * ndescrpt + idy * 4 + idw] -= -1.0 * dev_dot9(grad_one, tmp);
+  }
+  grad_net[idx * ndescrpt + idy * 4 + idw] -= -1.0 * dev_dot9(grad_one, tmp);
 }
 
-template<typename FPTYPE>
-__global__ void virial_grad_wrt_neighbors_r(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    // idy -> nnei
-    const unsigned int tid = threadIdx.x;
-    const int_64 idx = blockIdx.x * blockDim.x + tid;
-    const unsigned int idy = blockIdx.y;
-    const int ndescrpt = nnei;
-    __shared__ FPTYPE grad_one[9];
-    if(tid < 9){
-        grad_one[tid] = grad[tid];
-    }
-    __syncthreads(); 
-    if (idx >= nloc) {
-        return;
-    }
-    int j_idx = nlist[idx * nnei + idy];
-    if (j_idx < 0) {
-        return;
-    }
-    FPTYPE tmp[9];
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-        for (int dd1 = 0; dd1 < 3; ++dd1){
-            tmp[dd0 * 3 + dd1] = rij[idx * nnei * 3 + idy * 3 + dd1] * env_deriv[idx * ndescrpt * 3 + idy * 3 + dd0];
-        }
+template <typename FPTYPE>
+__global__ void virial_grad_wrt_neighbors_r(FPTYPE* grad_net,
+                                            const FPTYPE* grad,
+                                            const FPTYPE* env_deriv,
+                                            const FPTYPE* rij,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei) {
+  // idy -> nnei
+  const unsigned int tid = threadIdx.x;
+  const int_64 idx = blockIdx.x * blockDim.x + tid;
+  const unsigned int idy = blockIdx.y;
+  const int ndescrpt = nnei;
+  __shared__ FPTYPE grad_one[9];
+  if (tid < 9) {
+    grad_one[tid] = grad[tid];
+  }
+  __syncthreads();
+  if (idx >= nloc) {
+    return;
+  }
+  int j_idx = nlist[idx * nnei + idy];
+  if (j_idx < 0) {
+    return;
+  }
+  FPTYPE tmp[9];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      tmp[dd0 * 3 + dd1] = rij[idx * nnei * 3 + idy * 3 + dd1] *
+                           env_deriv[idx * ndescrpt * 3 + idy * 3 + dd0];
     }
-    grad_net[idx * ndescrpt + idy] -= (FPTYPE)-1.0 * dev_dot9(grad_one, tmp);
+  }
+  grad_net[idx * ndescrpt + idy] -= (FPTYPE)-1.0 * dev_dot9(grad_one, tmp);
 }
 
 namespace deepmd {
-template<typename FPTYPE>
-void prod_virial_grad_a_gpu_rocm(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    const int ndescrpt = nnei * 4;
-    DPErrcheck(hipMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int LEN = 128;
-    const int nblock = (nloc + LEN -1) / LEN;
-    dim3 block_grid(nblock, nnei);
-    dim3 thread_grid(LEN, 4);
-    hipLaunchKernelGGL(virial_grad_wrt_neighbors_a, block_grid, thread_grid, 0, 0, 
-        grad_net,
-        grad, env_deriv, rij, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void prod_virial_grad_a_gpu_rocm(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei) {
+  const int ndescrpt = nnei * 4;
+  DPErrcheck(hipMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int LEN = 128;
+  const int nblock = (nloc + LEN - 1) / LEN;
+  dim3 block_grid(nblock, nnei);
+  dim3 thread_grid(LEN, 4);
+  hipLaunchKernelGGL(virial_grad_wrt_neighbors_a, block_grid, thread_grid, 0, 0,
+                     grad_net, grad, env_deriv, rij, nlist, nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void prod_virial_grad_r_gpu_rocm(
-    FPTYPE * grad_net,
-    const FPTYPE * grad,
-    const FPTYPE * env_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int nloc,
-    const int nnei)
-{
-    const int ndescrpt = nnei;
-    DPErrcheck(hipMemset(
-        grad_net, 
-        0, sizeof(FPTYPE) * nloc * ndescrpt));
-    const int LEN = 128;
-    const int nblock = (nloc + LEN -1) / LEN;
-    dim3 block_grid(nblock, nnei);
-    dim3 thread_grid(LEN, 1);
-    hipLaunchKernelGGL(virial_grad_wrt_neighbors_r, block_grid, thread_grid, 0, 0, 
-        grad_net,
-        grad, env_deriv, rij, nlist, nloc, nnei);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void prod_virial_grad_r_gpu_rocm(FPTYPE* grad_net,
+                                 const FPTYPE* grad,
+                                 const FPTYPE* env_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int nloc,
+                                 const int nnei) {
+  const int ndescrpt = nnei;
+  DPErrcheck(hipMemset(grad_net, 0, sizeof(FPTYPE) * nloc * ndescrpt));
+  const int LEN = 128;
+  const int nblock = (nloc + LEN - 1) / LEN;
+  dim3 block_grid(nblock, nnei);
+  dim3 thread_grid(LEN, 1);
+  hipLaunchKernelGGL(virial_grad_wrt_neighbors_r, block_grid, thread_grid, 0, 0,
+                     grad_net, grad, env_deriv, rij, nlist, nloc, nnei);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template void prod_virial_grad_a_gpu_rocm<float>(float * grad_net, const float * grad, const float * env_deriv, const float * rij, const int * nlist, const int nloc, const int nnei);
-template void prod_virial_grad_a_gpu_rocm<double>(double * grad_net, const double * grad, const double * env_deriv, const double * rij, const int * nlist, const int nloc, const int nnei);
-template void prod_virial_grad_r_gpu_rocm<float>(float * grad_net, const float * grad, const float * env_deriv, const float * rij, const int * nlist, const int nloc, const int nnei);
-template void prod_virial_grad_r_gpu_rocm<double>(double * grad_net, const double * grad, const double * env_deriv, const double * rij, const int * nlist, const int nloc, const int nnei);
-}
\ No newline at end of file
+template void prod_virial_grad_a_gpu_rocm<float>(float* grad_net,
+                                                 const float* grad,
+                                                 const float* env_deriv,
+                                                 const float* rij,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+template void prod_virial_grad_a_gpu_rocm<double>(double* grad_net,
+                                                  const double* grad,
+                                                  const double* env_deriv,
+                                                  const double* rij,
+                                                  const int* nlist,
+                                                  const int nloc,
+                                                  const int nnei);
+template void prod_virial_grad_r_gpu_rocm<float>(float* grad_net,
+                                                 const float* grad,
+                                                 const float* env_deriv,
+                                                 const float* rij,
+                                                 const int* nlist,
+                                                 const int nloc,
+                                                 const int nnei);
+template void prod_virial_grad_r_gpu_rocm<double>(double* grad_net,
+                                                  const double* grad,
+                                                  const double* env_deriv,
+                                                  const double* rij,
+                                                  const int* nlist,
+                                                  const int nloc,
+                                                  const int nnei);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/region.hip.cu b/source/lib/src/rocm/region.hip.cu
index f803f1293c..f4ee5517cc 100644
--- a/source/lib/src/rocm/region.hip.cu
+++ b/source/lib/src/rocm/region.hip.cu
@@ -1,74 +1,67 @@
 #include "device.h"
-#include "region.h"
 #include "region.cuh"
+#include "region.h"
 
-template<typename FPTYPE>
-__global__ void _phys2Inter(
-    FPTYPE *inter, 
-    const FPTYPE *phys, 
-    const FPTYPE *rec_boxt)
-{
-    phys2Inter(inter, phys, rec_boxt);
+template <typename FPTYPE>
+__global__ void _phys2Inter(FPTYPE *inter,
+                            const FPTYPE *phys,
+                            const FPTYPE *rec_boxt) {
+  phys2Inter(inter, phys, rec_boxt);
 }
 
-template<typename FPTYPE>
-__global__ void _inter2Phys(
-    FPTYPE *phys, 
-    const FPTYPE *inter, 
-    const FPTYPE *boxt)
-{
-    inter2Phys(phys, inter, boxt);
+template <typename FPTYPE>
+__global__ void _inter2Phys(FPTYPE *phys,
+                            const FPTYPE *inter,
+                            const FPTYPE *boxt) {
+  inter2Phys(phys, inter, boxt);
 }
 
-template<typename FPTYPE>
-__global__ void _compute_volume(
-    FPTYPE * volume, 
-    const FPTYPE * boxt)
-{
-    volume[0] = compute_volume(boxt);
+template <typename FPTYPE>
+__global__ void _compute_volume(FPTYPE *volume, const FPTYPE *boxt) {
+  volume[0] = compute_volume(boxt);
 }
 
 namespace deepmd {
-//only for unittest
-template<typename FPTYPE>
-void
-convert_to_inter_gpu_rocm(
-    FPTYPE * ri, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * rp)
-{
-    hipLaunchKernelGGL(_phys2Inter, 1, 1, 0, 0, ri, rp, region.rec_boxt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+// only for unittest
+template <typename FPTYPE>
+void convert_to_inter_gpu_rocm(FPTYPE *ri,
+                               const Region<FPTYPE> &region,
+                               const FPTYPE *rp) {
+  hipLaunchKernelGGL(_phys2Inter, 1, 1, 0, 0, ri, rp, region.rec_boxt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void
-convert_to_phys_gpu_rocm(
-    FPTYPE * rp, 
-    const Region<FPTYPE> & region,
-    const FPTYPE * ri)
-{
-    hipLaunchKernelGGL(_inter2Phys, 1, 1, 0, 0, rp, ri, region.boxt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void convert_to_phys_gpu_rocm(FPTYPE *rp,
+                              const Region<FPTYPE> &region,
+                              const FPTYPE *ri) {
+  hipLaunchKernelGGL(_inter2Phys, 1, 1, 0, 0, rp, ri, region.boxt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void
-volume_gpu_rocm(
-    FPTYPE * volume,
-    const Region<FPTYPE> & region)
-{
-    hipLaunchKernelGGL(_compute_volume, 1, 1, 0, 0, volume, region.boxt);
-    DPErrcheck(hipGetLastError());
-    DPErrcheck(hipDeviceSynchronize());
+template <typename FPTYPE>
+void volume_gpu_rocm(FPTYPE *volume, const Region<FPTYPE> &region) {
+  hipLaunchKernelGGL(_compute_volume, 1, 1, 0, 0, volume, region.boxt);
+  DPErrcheck(hipGetLastError());
+  DPErrcheck(hipDeviceSynchronize());
 }
 
-template void convert_to_inter_gpu_rocm<float>(float * ri, const Region<float> & region, const float * rp);
-template void convert_to_inter_gpu_rocm<double>(double * ri, const Region<double> & region, const double * rp);
-template void convert_to_phys_gpu_rocm<float>(float * rp, const Region<float> & region, const float * ri);
-template void convert_to_phys_gpu_rocm<double>(double * rp, const Region<double> & region, const double * ri);
-template void volume_gpu_rocm<float>(float * volume, const Region<float> & region);
-template void volume_gpu_rocm<double>(double * volume, const Region<double> & region);
-}
\ No newline at end of file
+template void convert_to_inter_gpu_rocm<float>(float *ri,
+                                               const Region<float> &region,
+                                               const float *rp);
+template void convert_to_inter_gpu_rocm<double>(double *ri,
+                                                const Region<double> &region,
+                                                const double *rp);
+template void convert_to_phys_gpu_rocm<float>(float *rp,
+                                              const Region<float> &region,
+                                              const float *ri);
+template void convert_to_phys_gpu_rocm<double>(double *rp,
+                                               const Region<double> &region,
+                                               const double *ri);
+template void volume_gpu_rocm<float>(float *volume,
+                                     const Region<float> &region);
+template void volume_gpu_rocm<double>(double *volume,
+                                      const Region<double> &region);
+}  // namespace deepmd
diff --git a/source/lib/src/rocm/tabulate.hip.cu b/source/lib/src/rocm/tabulate.hip.cu
index caa51578dd..38a16db2ae 100644
--- a/source/lib/src/rocm/tabulate.hip.cu
+++ b/source/lib/src/rocm/tabulate.hip.cu
@@ -1,5 +1,5 @@
-#include "tabulate.h"
 #include "device.h"
+#include "tabulate.h"
 
 #define MM 4
 #define KK 4
@@ -8,120 +8,98 @@
 #define FULL_MASK 0xffffffff
 
 template <typename FPTYPE>
-__forceinline__ __device__
-void locate_xx(
-    FPTYPE& xx, 
-    int& table_idx,
-    const FPTYPE& lower, 
-    const FPTYPE& upper,  
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1)
-{
+__forceinline__ __device__ void locate_xx(FPTYPE& xx,
+                                          int& table_idx,
+                                          const FPTYPE& lower,
+                                          const FPTYPE& upper,
+                                          const FPTYPE& max,
+                                          const FPTYPE& stride0,
+                                          const FPTYPE& stride1) {
   if (xx < lower) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     table_idx = (int)((xx - lower) / stride0);
     xx -= (table_idx * stride0 + lower);
-  }
-  else if (xx < max) {
+  } else if (xx < max) {
     int first_stride = int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx =
+        int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
 template <typename FPTYPE>
-__forceinline__ __device__
-void locate_xx_se_t(
-    FPTYPE& xx, 
-    int& table_idx,
-    const FPTYPE& lower, 
-    const FPTYPE& upper,  
-    const FPTYPE& min, 
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1)
-{
+__forceinline__ __device__ void locate_xx_se_t(FPTYPE& xx,
+                                               int& table_idx,
+                                               const FPTYPE& lower,
+                                               const FPTYPE& upper,
+                                               const FPTYPE& min,
+                                               const FPTYPE& max,
+                                               const FPTYPE& stride0,
+                                               const FPTYPE& stride1) {
   if (xx < min) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < lower) {
+  } else if (xx < lower) {
     table_idx = (int)((xx - min) / stride1);
     xx -= (table_idx * stride1 + min);
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     int first_stride = int((lower - min) / stride1);
     table_idx = first_stride + (int)((xx - lower) / stride0);
     xx -= ((table_idx - first_stride) * stride0 + lower);
-  }
-  else if (xx < max) {
-    int first_stride = int((lower - min) / stride1) + int((upper - lower) / stride0);
+  } else if (xx < max) {
+    int first_stride =
+        int((lower - min) / stride1) + int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) +
+                (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
 template <typename FPTYPE>
-__forceinline__ __device__ 
-FPTYPE dot(
-    FPTYPE ll[4], 
-    FPTYPE rr[4]) 
-{
+__forceinline__ __device__ FPTYPE dot(FPTYPE ll[4], FPTYPE rr[4]) {
   return ll[0] * rr[0] + ll[1] * rr[1] + ll[2] * rr[2] + ll[3] * rr[3];
 }
 
 template <typename FPTYPE>
-__forceinline__ 
-__device__
-void warp_reduce(
-    FPTYPE & val) 
-{
+__forceinline__ __device__ void warp_reduce(FPTYPE& val) {
   for (int offset = 32; offset > 0; offset >>= 1)
-    val += __shfl_down( val, offset);//########????
+    val += __shfl_down(val, offset);  // ########????
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
-    FPTYPE * out, 
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
-  HIP_DYNAMIC_SHARED( int, _data)
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
+  HIP_DYNAMIC_SHARED(int, _data)
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
   FPTYPE ago = __shfl(em_x[block_idx * nnei + nnei - 1], 0);
   bool unloop = false;
   int breakpoint = nnei - 1;
-  FPTYPE * iteratorC = (FPTYPE*) &_data[0];
+  FPTYPE* iteratorC = (FPTYPE*)&_data[0];
   for (int kk = 0; kk < MTILE; kk++)
     iteratorC[kk * last_layer_size + thread_idx] = (FPTYPE)0.;
   __syncthreads();
 
   for (int ii = 0; ii < nnei; ii++) {
-    FPTYPE var[6]; 
+    FPTYPE var[6];
     FPTYPE xx = em_x[block_idx * nnei + ii];
     if (xx == ago) {
       unloop = true;
@@ -135,82 +113,101 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
     var[3] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 3];
     var[4] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 4];
     var[5] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 5];
-    FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-    
+    FPTYPE res =
+        var[0] +
+        (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+            xx;
+
     for (int kk = 0; kk < MTILE; kk++) {
-      iteratorC[kk * last_layer_size + thread_idx] += (nnei - breakpoint) * em[block_idx * nnei * MTILE + ii * MTILE + kk] * res;
+      iteratorC[kk * last_layer_size + thread_idx] +=
+          (nnei - breakpoint) * em[block_idx * nnei * MTILE + ii * MTILE + kk] *
+          res;
     }
     if (unloop) break;
   }
   for (int ii = 0; ii < MTILE; ii++) {
-    out[block_idx * MTILE * last_layer_size + ii * last_layer_size + thread_idx] = iteratorC[ii * last_layer_size + thread_idx];
+    out[block_idx * MTILE * last_layer_size + ii * last_layer_size +
+        thread_idx] = iteratorC[ii * last_layer_size + thread_idx];
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,   
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
-  HIP_DYNAMIC_SHARED( int, _data)
+    FPTYPE* dy_dem_x,
+    FPTYPE* dy_dem,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
+  HIP_DYNAMIC_SHARED(int, _data)
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x; // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
   int warp_idx = __shfl(threadIdx.x / 64, 0);
   int lane_idx = threadIdx.x % 64;
   int breakpoint = nnei - 1;
   bool unloop = false;
-  FPTYPE * iteratorA = (FPTYPE *)&_data[0]; // dy
+  FPTYPE* iteratorA = (FPTYPE*)&_data[0];  // dy
   for (int ii = 0; ii < MTILE; ii++) {
     for (int jj = thread_idx; jj < last_layer_size; jj += blockDim.x) {
-      iteratorA[ii * last_layer_size + jj] = dy[block_idx * MTILE * last_layer_size + ii * last_layer_size + jj];
+      iteratorA[ii * last_layer_size + jj] =
+          dy[block_idx * MTILE * last_layer_size + ii * last_layer_size + jj];
     }
   }
   __syncthreads();
-  FPTYPE ago = __shfl( em_x[block_idx * nnei + nnei - 1], 0);
+  FPTYPE ago = __shfl(em_x[block_idx * nnei + nnei - 1], 0);
   for (int ii = 0; ii < nnei; ii += KTILE) {
     FPTYPE xx = em_x[block_idx * nnei + ii + warp_idx];
-    if (ago == xx) { 
+    if (ago == xx) {
       unloop = true;
       breakpoint = ii + warp_idx;
     }
-    
+
     int table_idx = 0;
     locate_xx(xx, table_idx, lower, upper, max, stride0, stride1);
     FPTYPE sum[KTILE] = {(FPTYPE)0.};
     FPTYPE Csub = (FPTYPE)0.;
     for (int jj = lane_idx; jj < last_layer_size; jj += WARP_SIZE) {
-      FPTYPE var[6]; 
-      // load iteratorB through table 
-      var[0]  = table[table_idx * last_layer_size * 6 + 6 * jj + 0]; 
-      var[1]  = table[table_idx * last_layer_size * 6 + 6 * jj + 1]; 
-      var[2]  = table[table_idx * last_layer_size * 6 + 6 * jj + 2]; 
-      var[3]  = table[table_idx * last_layer_size * 6 + 6 * jj + 3];
-      var[4]  = table[table_idx * last_layer_size * 6 + 6 * jj + 4];
-      var[5]  = table[table_idx * last_layer_size * 6 + 6 * jj + 5];
-      FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-      
+      FPTYPE var[6];
+      // load iteratorB through table
+      var[0] = table[table_idx * last_layer_size * 6 + 6 * jj + 0];
+      var[1] = table[table_idx * last_layer_size * 6 + 6 * jj + 1];
+      var[2] = table[table_idx * last_layer_size * 6 + 6 * jj + 2];
+      var[3] = table[table_idx * last_layer_size * 6 + 6 * jj + 3];
+      var[4] = table[table_idx * last_layer_size * 6 + 6 * jj + 4];
+      var[5] = table[table_idx * last_layer_size * 6 + 6 * jj + 5];
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
+
       for (int kk = 0; kk < KTILE; kk++) {
-        sum[kk] += (nnei - breakpoint) * iteratorA[kk * last_layer_size + jj] * res;
+        sum[kk] +=
+            (nnei - breakpoint) * iteratorA[kk * last_layer_size + jj] * res;
       }
-      res  = em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 0] * iteratorA[0 * last_layer_size + jj];
-      res += em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 1] * iteratorA[1 * last_layer_size + jj];
-      res += em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 2] * iteratorA[2 * last_layer_size + jj];
-      res += em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 3] * iteratorA[3 * last_layer_size + jj];
-      Csub += (nnei - breakpoint) * (var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx) * res;
+      res = em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 0] *
+            iteratorA[0 * last_layer_size + jj];
+      res += em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 1] *
+             iteratorA[1 * last_layer_size + jj];
+      res += em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 2] *
+             iteratorA[2 * last_layer_size + jj];
+      res += em[block_idx * nnei * MTILE + (ii + warp_idx) * 4 + 3] *
+             iteratorA[3 * last_layer_size + jj];
+      Csub +=
+          (nnei - breakpoint) *
+          (var[1] + ((FPTYPE)2. * var[2] +
+                     ((FPTYPE)3. * var[3] +
+                      ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                         xx) *
+                        xx) *
+          res;
     }
     //__syncwarp();->syncwrap
     __syncthreads();
@@ -228,32 +225,28 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE>
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem_x,
+    const FPTYPE* dz_dy_dem,
     const FPTYPE lower,
     const FPTYPE upper,
     const FPTYPE max,
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size)
-{
+    const int last_layer_size) {
   extern __shared__ int _data[];
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
-  FPTYPE ago = __shfl( em_x[block_idx * nnei + nnei - 1], 0);
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
+  FPTYPE ago = __shfl(em_x[block_idx * nnei + nnei - 1], 0);
   bool unloop = false;
   int breakpoint = nnei - 1;
-  FPTYPE * iteratorC = (FPTYPE*) &_data[0];
+  FPTYPE* iteratorC = (FPTYPE*)&_data[0];
   for (int kk = 0; kk < MTILE; kk++)
     iteratorC[kk * last_layer_size + thread_idx] = (FPTYPE)0.;
   __syncthreads();
@@ -274,49 +267,57 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
     var[3] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 3];
     var[4] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 4];
     var[5] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 5];
-    FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-    FPTYPE res_grad = var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx;
+    FPTYPE res =
+        var[0] +
+        (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+            xx;
+    FPTYPE res_grad =
+        var[1] + ((FPTYPE)2. * var[2] +
+                  ((FPTYPE)3. * var[3] +
+                   ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                      xx) *
+                     xx;
 
     for (int kk = 0; kk < MTILE; kk++) {
       int em_index = block_idx * nnei * MTILE + ii * MTILE + kk;
-      iteratorC[kk * last_layer_size + thread_idx] += (nnei - breakpoint) * (em[em_index] * res_grad * dz_xx + dz_dy_dem[em_index] * res);
+      iteratorC[kk * last_layer_size + thread_idx] +=
+          (nnei - breakpoint) *
+          (em[em_index] * res_grad * dz_xx + dz_dy_dem[em_index] * res);
     }
     if (unloop) break;
   }
   for (int ii = 0; ii < MTILE; ii++) {
-    dz_dy[block_idx * MTILE * last_layer_size + ii * last_layer_size + thread_idx] = iteratorC[ii * last_layer_size + thread_idx];
+    dz_dy[block_idx * MTILE * last_layer_size + ii * last_layer_size +
+          thread_idx] = iteratorC[ii * last_layer_size + thread_idx];
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_t_fifth_order_polynomial(
-    FPTYPE * out, 
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
-  HIP_DYNAMIC_SHARED( int, _data)
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size) {
+  HIP_DYNAMIC_SHARED(int, _data)
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
 
   FPTYPE sum = (FPTYPE)0.;
   for (int ii = 0; ii < nnei_i; ii++) {
-    FPTYPE ago = __shfl(em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
+    FPTYPE ago =
+        __shfl(em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
     int breakpoint = nnei_j - 1;
     bool unloop = false;
     for (int jj = 0; jj < nnei_j; jj++) {
-      FPTYPE xx  = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
       if (xx == ago) {
         unloop = true;
@@ -331,7 +332,11 @@ __global__ void tabulate_fusion_se_t_fifth_order_polynomial(
       var[3] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 3];
       var[4] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 4];
       var[5] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 5];
-      FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
 
       sum += (nnei_j - breakpoint) * tmp * res;
       if (unloop) break;
@@ -340,69 +345,76 @@ __global__ void tabulate_fusion_se_t_fifth_order_polynomial(
   out[block_idx * last_layer_size + thread_idx] = sum;
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_t_grad_fifth_order_polynomial(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,   
-    const FPTYPE * table, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
-  HIP_DYNAMIC_SHARED( int, _data)
+    FPTYPE* dy_dem_x,
+    FPTYPE* dy_dem,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size) {
+  HIP_DYNAMIC_SHARED(int, _data)
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x; // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
   int warp_idx = __shfl(threadIdx.x / 64, 0);
   int lane_idx = threadIdx.x % 64;
-  FPTYPE * iteratorA = (FPTYPE *)&_data[0]; // dy
+  FPTYPE* iteratorA = (FPTYPE*)&_data[0];  // dy
   for (int ii = thread_idx; ii < last_layer_size; ii += blockDim.x) {
     iteratorA[ii] = dy[block_idx * last_layer_size + ii];
   }
   __syncthreads();
 
   for (int ii = 0; ii < nnei_i; ii++) {
-    FPTYPE ago = __shfl(em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
+    FPTYPE ago =
+        __shfl(em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
     bool unloop = false;
     for (int jj = warp_idx; jj < nnei_j; jj += KTILE) {
-      FPTYPE xx  = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
-      if (ago == xx) { 
+      if (ago == xx) {
         unloop = true;
       }
       int table_idx = 0;
       locate_xx_se_t(xx, table_idx, lower, upper, -max, max, stride0, stride1);
-      FPTYPE sum  = (FPTYPE)0.;
+      FPTYPE sum = (FPTYPE)0.;
       FPTYPE Csub = (FPTYPE)0.;
       for (int kk = lane_idx; kk < last_layer_size; kk += WARP_SIZE) {
-        FPTYPE var[6]; 
-        // load iteratorB through table 
-        var[0]  = table[table_idx * last_layer_size * 6 + 6 * kk + 0]; 
-        var[1]  = table[table_idx * last_layer_size * 6 + 6 * kk + 1]; 
-        var[2]  = table[table_idx * last_layer_size * 6 + 6 * kk + 2]; 
-        var[3]  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        var[4]  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        var[5]  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-
-        sum  += iteratorA[kk] * res;
-        Csub += iteratorA[kk] * tmp * (var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx);
+        FPTYPE var[6];
+        // load iteratorB through table
+        var[0] = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        var[1] = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        var[2] = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        var[3] = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        var[4] = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        var[5] = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        FPTYPE res =
+            var[0] +
+            (var[1] +
+             (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+                xx;
+
+        sum += iteratorA[kk] * res;
+        Csub +=
+            iteratorA[kk] * tmp *
+            (var[1] + ((FPTYPE)2. * var[2] +
+                       ((FPTYPE)3. * var[3] +
+                        ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                           xx) *
+                          xx);
       }
       __syncthreads();
       warp_reduce(sum);
       warp_reduce(Csub);
       if (lane_idx == 0) {
-        dy_dem  [block_idx * nnei_i * nnei_j + ii * nnei_j + jj] = sum;
+        dy_dem[block_idx * nnei_i * nnei_j + ii * nnei_j + jj] = sum;
         dy_dem_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj] = Csub;
       }
       if (unloop) break;
@@ -410,17 +422,14 @@ __global__ void tabulate_fusion_se_t_grad_fifth_order_polynomial(
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE>
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em_x,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem_x,
+    const FPTYPE* dz_dy_dem,
     const FPTYPE lower,
     const FPTYPE upper,
     const FPTYPE max,
@@ -428,22 +437,23 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
     const FPTYPE stride1,
     const int nnei_i,
     const int nnei_j,
-    const int last_layer_size)
-{
-  const int_64 block_idx  = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+    const int last_layer_size) {
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
 
   FPTYPE sum = (FPTYPE)0.;
-  for (int ii = 0; ii < nnei_i; ii++) { 
-    FPTYPE ago = __shfl(em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
+  for (int ii = 0; ii < nnei_i; ii++) {
+    FPTYPE ago =
+        __shfl(em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + nnei_j - 1], 0);
     bool unloop = false;
     for (int jj = 0; ii < nnei_j; jj++) {
-      FPTYPE xx  = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE xx = em_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE tmp = xx;
-      FPTYPE dz_xx = dz_dy_dem_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
+      FPTYPE dz_xx =
+          dz_dy_dem_x[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE dz_em = dz_dy_dem[block_idx * nnei_i * nnei_j + ii * nnei_j + jj];
       FPTYPE var[6];
-      if (ago == xx) { 
+      if (ago == xx) {
         unloop = true;
       }
 
@@ -455,9 +465,18 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
       var[3] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 3];
       var[4] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 4];
       var[5] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 5];
-      FPTYPE res = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
-      FPTYPE res_grad = var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx;
-  
+      FPTYPE res =
+          var[0] +
+          (var[1] +
+           (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+              xx;
+      FPTYPE res_grad =
+          var[1] + ((FPTYPE)2. * var[2] +
+                    ((FPTYPE)3. * var[3] +
+                     ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                        xx) *
+                       xx;
+
       sum += (tmp * res_grad * dz_xx + dz_em * res);
       if (unloop) break;
     }
@@ -465,28 +484,24 @@ __global__ void tabulate_fusion_se_t_grad_grad_fifth_order_polynomial(
   dz_dy[block_idx * last_layer_size + thread_idx] = sum;
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
-    FPTYPE * out, 
-    const FPTYPE * table,  
-    const FPTYPE * em, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
-  HIP_DYNAMIC_SHARED( int, _data)
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
-  
+    FPTYPE* out,
+    const FPTYPE* table,
+    const FPTYPE* em,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
+  HIP_DYNAMIC_SHARED(int, _data)
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
+
   for (int ii = 0; ii < nnei; ii++) {
-    FPTYPE var[6]; 
+    FPTYPE var[6];
     FPTYPE xx = em[block_idx * nnei + ii];
     int table_idx = 0;
     locate_xx(xx, table_idx, lower, upper, max, stride0, stride1);
@@ -496,50 +511,55 @@ __global__ void tabulate_fusion_se_r_fifth_order_polynomial(
     var[3] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 3];
     var[4] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 4];
     var[5] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 5];
-    out[block_idx * nnei * last_layer_size + ii * last_layer_size + thread_idx] = var[0] + (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) * xx;
+    out[block_idx * nnei * last_layer_size + ii * last_layer_size +
+        thread_idx] =
+        var[0] +
+        (var[1] + (var[2] + (var[3] + (var[4] + var[5] * xx) * xx) * xx) * xx) *
+            xx;
   }
-
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE> 
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_grad_fifth_order_polynomial(
-    FPTYPE * dy_dem,   
-    const FPTYPE * table, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const FPTYPE lower, 
-    const FPTYPE upper, 
-    const FPTYPE max, 
-    const FPTYPE stride0, 
-    const FPTYPE stride1, 
-    const int nnei, 
-    const int last_layer_size) 
-{
-  HIP_DYNAMIC_SHARED( int, _data)
+    FPTYPE* dy_dem,
+    const FPTYPE* table,
+    const FPTYPE* em,
+    const FPTYPE* dy,
+    const FPTYPE lower,
+    const FPTYPE upper,
+    const FPTYPE max,
+    const FPTYPE stride0,
+    const FPTYPE stride1,
+    const int nnei,
+    const int last_layer_size) {
+  HIP_DYNAMIC_SHARED(int, _data)
   const int_64 block_idx = blockIdx.x;  // nloc
-  const int thread_idx = threadIdx.x; // KTILE * WARP_SIZE, usally 128 here~
+  const int thread_idx = threadIdx.x;   // KTILE * WARP_SIZE, usally 128 here~
   int warp_idx = __shfl(threadIdx.x / 64, 0);
   int lane_idx = threadIdx.x % 64;
 
   for (int ii = 0; ii < nnei; ii += KTILE) {
     FPTYPE xx = em[block_idx * nnei + ii + warp_idx];
-    
+
     int table_idx = 0;
     locate_xx(xx, table_idx, lower, upper, max, stride0, stride1);
     FPTYPE Csub = 0.f;
     for (int jj = lane_idx; jj < last_layer_size; jj += WARP_SIZE) {
-      FPTYPE var[6]; 
-      // load iteratorB through table 
-      var[0]  = table[table_idx * last_layer_size * 6 + 6 * jj + 0]; 
-      var[1]  = table[table_idx * last_layer_size * 6 + 6 * jj + 1]; 
-      var[2]  = table[table_idx * last_layer_size * 6 + 6 * jj + 2]; 
-      var[3]  = table[table_idx * last_layer_size * 6 + 6 * jj + 3];
-      var[4]  = table[table_idx * last_layer_size * 6 + 6 * jj + 4];
-      var[5]  = table[table_idx * last_layer_size * 6 + 6 * jj + 5];
-      Csub +=(var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx) * dy[block_idx * nnei * last_layer_size + ii * last_layer_size + jj];
+      FPTYPE var[6];
+      // load iteratorB through table
+      var[0] = table[table_idx * last_layer_size * 6 + 6 * jj + 0];
+      var[1] = table[table_idx * last_layer_size * 6 + 6 * jj + 1];
+      var[2] = table[table_idx * last_layer_size * 6 + 6 * jj + 2];
+      var[3] = table[table_idx * last_layer_size * 6 + 6 * jj + 3];
+      var[4] = table[table_idx * last_layer_size * 6 + 6 * jj + 4];
+      var[5] = table[table_idx * last_layer_size * 6 + 6 * jj + 5];
+      Csub +=
+          (var[1] + ((FPTYPE)2. * var[2] +
+                     ((FPTYPE)3. * var[3] +
+                      ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                         xx) *
+                        xx) *
+          dy[block_idx * nnei * last_layer_size + ii * last_layer_size + jj];
     }
     //__syncwarp();->syncwrap
     __syncthreads();
@@ -547,30 +567,25 @@ __global__ void tabulate_fusion_se_r_grad_fifth_order_polynomial(
     if (lane_idx == 0) {
       dy_dem[block_idx * nnei + ii + warp_idx] = Csub;
     }
-
   }
 }
 
-template <
-    typename FPTYPE,
-    int      MTILE,
-    int      KTILE>
+template <typename FPTYPE, int MTILE, int KTILE>
 __global__ void tabulate_fusion_se_r_grad_grad_fifth_order_polynomial(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
+    FPTYPE* dz_dy,
+    const FPTYPE* table,
+    const FPTYPE* em,
+    const FPTYPE* dz_dy_dem,
     const FPTYPE lower,
     const FPTYPE upper,
     const FPTYPE max,
     const FPTYPE stride0,
     const FPTYPE stride1,
     const int nnei,
-    const int last_layer_size)
-{
+    const int last_layer_size) {
   extern __shared__ int _data[];
-  const int_64 block_idx = blockIdx.x;   // nloc
-  const int thread_idx = threadIdx.x; // last_layer_size
+  const int_64 block_idx = blockIdx.x;  // nloc
+  const int thread_idx = threadIdx.x;   // last_layer_size
 
   __syncthreads();
 
@@ -585,241 +600,418 @@ __global__ void tabulate_fusion_se_r_grad_grad_fifth_order_polynomial(
     var[3] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 3];
     var[4] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 4];
     var[5] = table[table_idx * last_layer_size * 6 + thread_idx * 6 + 5];
-    FPTYPE res_grad = var[1] + ((FPTYPE)2. * var[2] + ((FPTYPE)3. * var[3] + ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) * xx) * xx;
-    dz_dy[block_idx * nnei * last_layer_size + ii * last_layer_size + thread_idx] = dz_dy_dem[block_idx * nnei + ii]*res_grad;
-
+    FPTYPE res_grad =
+        var[1] + ((FPTYPE)2. * var[2] +
+                  ((FPTYPE)3. * var[3] +
+                   ((FPTYPE)4. * var[4] + (FPTYPE)5. * var[5] * xx) * xx) *
+                      xx) *
+                     xx;
+    dz_dy[block_idx * nnei * last_layer_size + ii * last_layer_size +
+          thread_idx] = dz_dy_dem[block_idx * nnei + ii] * res_grad;
   }
 }
 
-
 namespace deepmd {
-template<typename FPTYPE>
-void tabulate_fusion_se_a_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size) 
-{
-  if(nloc <= 0){return;}
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, 
-      out, 
-      table, em_x, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_a_gpu_rocm(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_a_fifth_order_polynomial<FPTYPE, MM, KK>),
+      nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, out,
+      table, em_x, em, table_info[0], table_info[1], table_info[2],
+      table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_gpu_rocm(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size)
-{
-  if(nloc <= 0) {return;}
-  DPErrcheck(hipMemset(
-      dy_dem_x,
-      0, sizeof(FPTYPE) * nloc * nnei));
-  DPErrcheck(hipMemset(
-      dy_dem,
-      0, sizeof(FPTYPE) * nloc * nnei * 4));
-
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, 
-      dy_dem_x, dy_dem,
-      table, em_x, em, dy,  table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_gpu_rocm(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(hipMemset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei));
+  DPErrcheck(hipMemset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei * 4));
+
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_a_grad_fifth_order_polynomial<FPTYPE, MM, KK>),
+      nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, dy_dem_x,
+      dy_dem, table, em_x, em, dy, table_info[0], table_info[1], table_info[2],
+      table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_a_grad_grad_gpu_rocm(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size)
-{
-  if(nloc <= 0) {return;}
-  DPErrcheck(hipMemset(
-    dz_dy,
-    0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, 
-    dz_dy,
-    table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_a_grad_grad_gpu_rocm(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(hipMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size));
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_a_grad_grad_fifth_order_polynomial<FPTYPE, MM,
+                                                                KK>),
+      nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, dz_dy,
+      table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1],
+      table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_t_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
-  if(nloc <= 0){return;}
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, 0, 0, 
-      out, 
-      table, em_x, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_t_gpu_rocm(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em_x,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei_i,
+                                   const int nnei_j,
+                                   const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_t_fifth_order_polynomial<FPTYPE, MM, KK>),
+      nloc, last_layer_size, 0, 0, out, table, em_x, em, table_info[0],
+      table_info[1], table_info[2], table_info[3], table_info[4], nnei_i,
+      nnei_j, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_gpu_rocm(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size)
-{
-  if(nloc <= 0) {return;}
-  DPErrcheck(hipMemset(
-      dy_dem_x,
-      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
-  DPErrcheck(hipMemset(
-      dy_dem,
-      0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
-
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size, 0, 
-      dy_dem_x, dy_dem,
-      table, em_x, em, dy,  table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_gpu_rocm(FPTYPE* dy_dem_x,
+                                        FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em_x,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei_i,
+                                        const int nnei_j,
+                                        const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(hipMemset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+  DPErrcheck(hipMemset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j));
+
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_t_grad_fifth_order_polynomial<FPTYPE, MM, KK>),
+      nloc, KK * WARP_SIZE, sizeof(FPTYPE) * last_layer_size, 0, dy_dem_x,
+      dy_dem, table, em_x, em, dy, table_info[0], table_info[1], table_info[2],
+      table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_t_grad_grad_gpu_rocm(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei_i,
-    const int nnei_j,
-    const int last_layer_size)
-{
-  if(nloc <= 0) {return;}
-  DPErrcheck(hipMemset(
-    dz_dy,
-    0, sizeof(FPTYPE) * nloc * last_layer_size));
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, 0, 0, 
-    dz_dy,
-    table, em_x, em, dz_dy_dem_x, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei_i, nnei_j, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_t_grad_grad_gpu_rocm(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em_x,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem_x,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei_i,
+                                             const int nnei_j,
+                                             const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(hipMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * last_layer_size));
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_t_grad_grad_fifth_order_polynomial<FPTYPE, MM,
+                                                                KK>),
+      nloc, last_layer_size, 0, 0, dz_dy, table, em_x, em, dz_dy_dem_x,
+      dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3],
+      table_info[4], nnei_i, nnei_j, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_r_gpu_rocm(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info,  
-    const FPTYPE * em, 
-    const int nloc,
-    const int nnei, 
-    const int last_layer_size) 
-{
-  if(nloc <= 0){return;}
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, 
-      out, 
-      table, em, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_r_gpu_rocm(FPTYPE* out,
+                                   const FPTYPE* table,
+                                   const FPTYPE* table_info,
+                                   const FPTYPE* em,
+                                   const int nloc,
+                                   const int nnei,
+                                   const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_r_fifth_order_polynomial<FPTYPE, MM, KK>),
+      nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, out,
+      table, em, table_info[0], table_info[1], table_info[2], table_info[3],
+      table_info[4], nnei, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_gpu_rocm(
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size)
-{
-  if(nloc <= 0) {return;}
-  DPErrcheck(hipMemset(
-      dy_dem,
-      0, sizeof(FPTYPE) * nloc * nnei));
-
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, 
-      dy_dem,
-      table, em, dy,  table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_gpu_rocm(FPTYPE* dy_dem,
+                                        const FPTYPE* table,
+                                        const FPTYPE* table_info,
+                                        const FPTYPE* em,
+                                        const FPTYPE* dy,
+                                        const int nloc,
+                                        const int nnei,
+                                        const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(hipMemset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei));
+
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_r_grad_fifth_order_polynomial<FPTYPE, MM, KK>),
+      nloc, KK * WARP_SIZE, sizeof(FPTYPE) * MM * last_layer_size, 0, dy_dem,
+      table, em, dy, table_info[0], table_info[1], table_info[2], table_info[3],
+      table_info[4], nnei, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template<typename FPTYPE>
-void tabulate_fusion_se_r_grad_grad_gpu_rocm(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size)
-{
-  if(nloc <= 0) {return;}
-  DPErrcheck(hipMemset(
-    dz_dy,
-    0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM, KK>), nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, 
-    dz_dy,
-    table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2], table_info[3], table_info[4], nnei, last_layer_size);
+template <typename FPTYPE>
+void tabulate_fusion_se_r_grad_grad_gpu_rocm(FPTYPE* dz_dy,
+                                             const FPTYPE* table,
+                                             const FPTYPE* table_info,
+                                             const FPTYPE* em,
+                                             const FPTYPE* dz_dy_dem,
+                                             const int nloc,
+                                             const int nnei,
+                                             const int last_layer_size) {
+  if (nloc <= 0) {
+    return;
+  }
+  DPErrcheck(
+      hipMemset(dz_dy, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size));
+  hipLaunchKernelGGL(
+      HIP_KERNEL_NAME(
+          tabulate_fusion_se_r_grad_grad_fifth_order_polynomial<FPTYPE, MM,
+                                                                KK>),
+      nloc, last_layer_size, sizeof(FPTYPE) * MM * last_layer_size, 0, dz_dy,
+      table, em, dz_dy_dem, table_info[0], table_info[1], table_info[2],
+      table_info[3], table_info[4], nnei, last_layer_size);
   DPErrcheck(hipGetLastError());
   DPErrcheck(hipDeviceSynchronize());
 }
 
-template void tabulate_fusion_se_a_gpu_rocm<float>(float * out, const float * table, const float * table_info, const float * em_x, const float * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_gpu_rocm<double>(double * out, const double * table, const double * table_info, const double * em_x, const double * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_grad_gpu_rocm<float> (float * dy_dem_x, float * dy_dem, const float * table, const float * table_info, const float * em_x, const float * em, const float * dy, const int nloc, const int nnei, const int last_layer_size); 
-template void tabulate_fusion_se_a_grad_gpu_rocm<double> (double * dy_dem_x, double * dy_dem, const double * table, const double * table_info, const double * em_x, const double * em, const double * dy, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_grad_grad_gpu_rocm<float> (float * dz_dy, const float * table, const float * table_info, const float * em_x, const float * em, const float * dz_dy_dem_x, const float * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_a_grad_grad_gpu_rocm<double> (double * dz_dy, const double * table, const double * table_info, const double * em_x, const double * em, const double * dz_dy_dem_x, const double * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-
-template void tabulate_fusion_se_t_gpu_rocm<float>(float * out, const float * table, const float * table_info, const float * em_x, const float * em, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_gpu_rocm<double>(double * out, const double * table, const double * table_info, const double * em_x, const double * em, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_grad_gpu_rocm<float> (float * dy_dem_x, float * dy_dem, const float * table, const float * table_info, const float * em_x, const float * em, const float * dy, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size); 
-template void tabulate_fusion_se_t_grad_gpu_rocm<double> (double * dy_dem_x, double * dy_dem, const double * table, const double * table_info, const double * em_x, const double * em, const double * dy, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_grad_grad_gpu_rocm<float> (float * dz_dy, const float * table, const float * table_info, const float * em_x, const float * em, const float * dz_dy_dem_x, const float * dz_dy_dem, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void tabulate_fusion_se_t_grad_grad_gpu_rocm<double> (double * dz_dy, const double * table, const double * table_info, const double * em_x, const double * em, const double * dz_dy_dem_x, const double * dz_dy_dem, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-
-template void tabulate_fusion_se_r_gpu_rocm<float>(float * out, const float * table, const float * table_info, const float * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_gpu_rocm<double>(double * out, const double * table, const double * table_info,const double * em, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_grad_gpu_rocm<float> (float * dy_dem, const float * table, const float * table_info, const float * em, const float * dy, const int nloc, const int nnei, const int last_layer_size); 
-template void tabulate_fusion_se_r_grad_gpu_rocm<double> (double * dy_dem, const double * table, const double * table_info, const double * em, const double * dy, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_grad_grad_gpu_rocm<float> (float * dz_dy, const float * table, const float * table_info, const float * em, const float * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-template void tabulate_fusion_se_r_grad_grad_gpu_rocm<double> (double * dz_dy, const double * table, const double * table_info, const double * em, const double * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-
-}
\ No newline at end of file
+template void tabulate_fusion_se_a_gpu_rocm<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em_x,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei,
+                                                   const int last_layer_size);
+template void tabulate_fusion_se_a_gpu_rocm<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em_x,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei,
+                                                    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_gpu_rocm<float>(
+    float* dy_dem_x,
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_gpu_rocm<double>(
+    double* dy_dem_x,
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_grad_gpu_rocm<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_a_grad_grad_gpu_rocm<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_t_gpu_rocm<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em_x,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei_i,
+                                                   const int nnei_j,
+                                                   const int last_layer_size);
+template void tabulate_fusion_se_t_gpu_rocm<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em_x,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei_i,
+                                                    const int nnei_j,
+                                                    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_gpu_rocm<float>(
+    float* dy_dem_x,
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_gpu_rocm<double>(
+    double* dy_dem_x,
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_grad_gpu_rocm<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void tabulate_fusion_se_t_grad_grad_gpu_rocm<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+
+template void tabulate_fusion_se_r_gpu_rocm<float>(float* out,
+                                                   const float* table,
+                                                   const float* table_info,
+                                                   const float* em,
+                                                   const int nloc,
+                                                   const int nnei,
+                                                   const int last_layer_size);
+template void tabulate_fusion_se_r_gpu_rocm<double>(double* out,
+                                                    const double* table,
+                                                    const double* table_info,
+                                                    const double* em,
+                                                    const int nloc,
+                                                    const int nnei,
+                                                    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_gpu_rocm<float>(
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_gpu_rocm<double>(
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_grad_gpu_rocm<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void tabulate_fusion_se_r_grad_grad_gpu_rocm<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+
+}  // namespace deepmd
diff --git a/source/lib/src/soft_min_switch.cc b/source/lib/src/soft_min_switch.cc
index 9b37b29cde..f46f9ad9f2 100644
--- a/source/lib/src/soft_min_switch.cc
+++ b/source/lib/src/soft_min_switch.cc
@@ -1,43 +1,40 @@
-#include <iostream>
-#include <cmath>
 #include "soft_min_switch.h"
+
+#include <cmath>
+#include <iostream>
+
 #include "switcher.h"
 
 template <typename FPTYPE>
-void deepmd::soft_min_switch_cpu(
-    FPTYPE * sw_value,
-    FPTYPE * sw_deriv,
-    const FPTYPE * rij,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei, 
-    const FPTYPE & alpha,
-    const FPTYPE & rmin,
-    const FPTYPE & rmax)
-{
+void deepmd::soft_min_switch_cpu(FPTYPE* sw_value,
+                                 FPTYPE* sw_deriv,
+                                 const FPTYPE* rij,
+                                 const int* nlist,
+                                 const int& nloc,
+                                 const int& nnei,
+                                 const FPTYPE& alpha,
+                                 const FPTYPE& rmin,
+                                 const FPTYPE& rmax) {
   // fill results with 0
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     sw_value[ii] = (FPTYPE)0.;
   }
-  for (int ii = 0; ii < nloc * nnei; ++ii){
+  for (int ii = 0; ii < nloc * nnei; ++ii) {
     sw_deriv[ii * 3 + 0] = (FPTYPE)0.;
     sw_deriv[ii * 3 + 1] = (FPTYPE)0.;
     sw_deriv[ii * 3 + 2] = (FPTYPE)0.;
   }
-  // compute force of a frame      
-  for (int ii = 0; ii < nloc; ++ii){
+  // compute force of a frame
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
     FPTYPE aa = 0;
     FPTYPE bb = 0;
-    for (int jj = 0; jj < nnei; ++jj){
-      int j_idx = nlist [i_idx * nnei + jj];
+    for (int jj = 0; jj < nnei; ++jj) {
+      int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int rij_idx_shift = (i_idx * nnei + jj) * 3;
-      FPTYPE dr[3] = {
-	rij[rij_idx_shift + 0],
-	rij[rij_idx_shift + 1],
-	rij[rij_idx_shift + 2]
-      };
+      FPTYPE dr[3] = {rij[rij_idx_shift + 0], rij[rij_idx_shift + 1],
+                      rij[rij_idx_shift + 2]};
       FPTYPE rr2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
       FPTYPE rr = sqrt(rr2);
       FPTYPE ee = exp(-rr / alpha);
@@ -50,56 +47,48 @@ void deepmd::soft_min_switch_cpu(
     // value of switch
     sw_value[i_idx] = vv;
     // deriv of switch distributed as force
-    for (int jj = 0; jj < nnei; ++jj){
-      int j_idx = nlist [i_idx * nnei + jj];
+    for (int jj = 0; jj < nnei; ++jj) {
+      int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int rij_idx_shift = (ii * nnei + jj) * 3;
-      FPTYPE dr[3] = {
-	rij[rij_idx_shift + 0],
-	rij[rij_idx_shift + 1],
-	rij[rij_idx_shift + 2]
-      };
+      FPTYPE dr[3] = {rij[rij_idx_shift + 0], rij[rij_idx_shift + 1],
+                      rij[rij_idx_shift + 2]};
       FPTYPE rr2 = dr[0] * dr[0] + dr[1] * dr[1] + dr[2] * dr[2];
       FPTYPE rr = sqrt(rr2);
       FPTYPE ee = exp(-rr / alpha);
-      FPTYPE pref_c = ((FPTYPE)1./rr - (FPTYPE)1./alpha) * ee ;
-      FPTYPE pref_d = (FPTYPE)1./(rr * alpha) * ee;
+      FPTYPE pref_c = ((FPTYPE)1. / rr - (FPTYPE)1. / alpha) * ee;
+      FPTYPE pref_d = (FPTYPE)1. / (rr * alpha) * ee;
       FPTYPE ts;
       ts = dd / (aa * aa) * (aa * pref_c + bb * pref_d);
       sw_deriv[rij_idx_shift + 0] += ts * dr[0];
       sw_deriv[rij_idx_shift + 1] += ts * dr[1];
       sw_deriv[rij_idx_shift + 2] += ts * dr[2];
       // std::cout << ii << " "  << jj << " " << j_idx << "   "
-      //      << vv << " " 
-      //      << sw_deriv[rij_idx_shift+0) << " " 
-      //      << sw_deriv[rij_idx_shift+1) << " " 
-      //      << sw_deriv[rij_idx_shift+2) << " " 
+      //      << vv << " "
+      //      << sw_deriv[rij_idx_shift+0) << " "
+      //      << sw_deriv[rij_idx_shift+1) << " "
+      //      << sw_deriv[rij_idx_shift+2) << " "
       //      << std::endl;
     }
   }
 }
 
-template
-void deepmd::soft_min_switch_cpu<double>(
-    double * sw_value,
-    double * sw_deriv,
-    const double * rij,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei, 
-    const double & alpha,
-    const double & rmin,
-    const double & rmax);
-
-template
-void deepmd::soft_min_switch_cpu<float>(
-    float * sw_value,
-    float * sw_deriv,
-    const float * rij,
-    const int * nlist,
-    const int & nloc,
-    const int & nnei, 
-    const float & alpha,
-    const float & rmin,
-    const float & rmax);
+template void deepmd::soft_min_switch_cpu<double>(double* sw_value,
+                                                  double* sw_deriv,
+                                                  const double* rij,
+                                                  const int* nlist,
+                                                  const int& nloc,
+                                                  const int& nnei,
+                                                  const double& alpha,
+                                                  const double& rmin,
+                                                  const double& rmax);
 
+template void deepmd::soft_min_switch_cpu<float>(float* sw_value,
+                                                 float* sw_deriv,
+                                                 const float* rij,
+                                                 const int* nlist,
+                                                 const int& nloc,
+                                                 const int& nnei,
+                                                 const float& alpha,
+                                                 const float& rmin,
+                                                 const float& rmax);
diff --git a/source/lib/src/soft_min_switch_force.cc b/source/lib/src/soft_min_switch_force.cc
index 9a6633daa0..8ae10ae11c 100644
--- a/source/lib/src/soft_min_switch_force.cc
+++ b/source/lib/src/soft_min_switch_force.cc
@@ -1,15 +1,15 @@
 #include "soft_min_switch_force.h"
+
 #include <iostream>
 
-template<typename FPTYPE>
-void deepmd::soft_min_switch_force_cpu(
-    FPTYPE * force, 
-    const FPTYPE * du, 
-    const FPTYPE * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
+template <typename FPTYPE>
+void deepmd::soft_min_switch_force_cpu(FPTYPE* force,
+                                       const FPTYPE* du,
+                                       const FPTYPE* sw_deriv,
+                                       const int* nlist,
+                                       const int nloc,
+                                       const int nall,
+                                       const int nnei)
 //
 //	force :		nall * 3
 //	du :		nloc
@@ -17,16 +17,16 @@ void deepmd::soft_min_switch_force_cpu(
 //
 {
   // set zeros
-  for (int ii = 0; ii < nall; ++ii){
+  for (int ii = 0; ii < nall; ++ii) {
     int i_idx = ii;
     force[i_idx * 3 + 0] = (FPTYPE)0.;
     force[i_idx * 3 + 1] = (FPTYPE)0.;
     force[i_idx * 3 + 2] = (FPTYPE)0.;
   }
   // compute force of a frame
-  for (int ii = 0; ii < nloc; ++ii){
-    int i_idx = ii;	
-    for (int jj = 0; jj < nnei; ++jj){	  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int i_idx = ii;
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int rij_idx_shift = (ii * nnei + jj) * 3;
@@ -37,25 +37,21 @@ void deepmd::soft_min_switch_force_cpu(
       force[j_idx * 3 + 1] -= du[i_idx] * sw_deriv[rij_idx_shift + 1];
       force[j_idx * 3 + 2] -= du[i_idx] * sw_deriv[rij_idx_shift + 2];
     }
-  }  
+  }
 }
 
-template
-void deepmd::soft_min_switch_force_cpu<double>(
-    double * force, 
-    const double * du, 
-    const double * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template void deepmd::soft_min_switch_force_cpu<double>(double* force,
+                                                        const double* du,
+                                                        const double* sw_deriv,
+                                                        const int* nlist,
+                                                        const int nloc,
+                                                        const int nall,
+                                                        const int nnei);
 
-template
-void deepmd::soft_min_switch_force_cpu<float>(
-    float * force, 
-    const float * du, 
-    const float * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
+template void deepmd::soft_min_switch_force_cpu<float>(float* force,
+                                                       const float* du,
+                                                       const float* sw_deriv,
+                                                       const int* nlist,
+                                                       const int nloc,
+                                                       const int nall,
+                                                       const int nnei);
diff --git a/source/lib/src/soft_min_switch_force_grad.cc b/source/lib/src/soft_min_switch_force_grad.cc
index 138d20d93c..3976a04a57 100644
--- a/source/lib/src/soft_min_switch_force_grad.cc
+++ b/source/lib/src/soft_min_switch_force_grad.cc
@@ -1,14 +1,14 @@
 #include "soft_min_switch_force_grad.h"
+
 #include <iostream>
 
-template<typename FPTYPE>
-void deepmd::soft_min_switch_force_grad_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad,
-    const FPTYPE * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei)
+template <typename FPTYPE>
+void deepmd::soft_min_switch_force_grad_cpu(FPTYPE* grad_net,
+                                            const FPTYPE* grad,
+                                            const FPTYPE* sw_deriv,
+                                            const int* nlist,
+                                            const int nloc,
+                                            const int nnei)
 //
 //	grad_net :	nloc
 //	grad :		nloc * 3
@@ -17,16 +17,16 @@ void deepmd::soft_min_switch_force_grad_cpu(
 //
 {
   // reset the frame to 0
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     grad_net[ii] = (FPTYPE)0.;
-  }      
+  }
 
   // compute grad of one frame
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
-    // deriv wrt center atom	
-    for (int jj = 0; jj < nnei; ++jj){
-      int j_idx = nlist [i_idx * nnei + jj];
+    // deriv wrt center atom
+    for (int jj = 0; jj < nnei; ++jj) {
+      int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx >= nloc) j_idx = j_idx % nloc;
       if (j_idx < 0) continue;
       int rij_idx_shift = (ii * nnei + jj) * 3;
@@ -40,20 +40,18 @@ void deepmd::soft_min_switch_force_grad_cpu(
   }
 }
 
-template
-void deepmd::soft_min_switch_force_grad_cpu<double>(
-    double * grad_net, 
-    const double * grad,
-    const double * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
+template void deepmd::soft_min_switch_force_grad_cpu<double>(
+    double* grad_net,
+    const double* grad,
+    const double* sw_deriv,
+    const int* nlist,
+    const int nloc,
     const int nnei);
 
-template
-void deepmd::soft_min_switch_force_grad_cpu<float>(
-    float * grad_net, 
-    const float * grad,
-    const float * sw_deriv, 
-    const int * nlist, 
-    const int nloc, 
+template void deepmd::soft_min_switch_force_grad_cpu<float>(
+    float* grad_net,
+    const float* grad,
+    const float* sw_deriv,
+    const int* nlist,
+    const int nloc,
     const int nnei);
diff --git a/source/lib/src/soft_min_switch_virial.cc b/source/lib/src/soft_min_switch_virial.cc
index 634f6b6d49..6b1bbe4659 100644
--- a/source/lib/src/soft_min_switch_virial.cc
+++ b/source/lib/src/soft_min_switch_virial.cc
@@ -1,17 +1,17 @@
 #include "soft_min_switch_virial.h"
+
 #include <iostream>
 
-template<typename FPTYPE>
-void deepmd::soft_min_switch_virial_cpu(
-    FPTYPE * virial, 
-    FPTYPE * atom_virial, 
-    const FPTYPE * du, 
-    const FPTYPE * sw_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei)
+template <typename FPTYPE>
+void deepmd::soft_min_switch_virial_cpu(FPTYPE* virial,
+                                        FPTYPE* atom_virial,
+                                        const FPTYPE* du,
+                                        const FPTYPE* sw_deriv,
+                                        const FPTYPE* rij,
+                                        const int* nlist,
+                                        const int nloc,
+                                        const int nall,
+                                        const int nnei)
 //
 //	virial :	9
 //	atom_virial :	nall * 9
@@ -19,56 +19,49 @@ void deepmd::soft_min_switch_virial_cpu(
 //	sw_deriv :	nloc * nnei * 3
 //
 {
-  for (int ii = 0; ii < 9; ++ ii){
+  for (int ii = 0; ii < 9; ++ii) {
     virial[ii] = (FPTYPE)0.;
   }
-  for (int ii = 0; ii < 9 * nall; ++ ii){
+  for (int ii = 0; ii < 9 * nall; ++ii) {
     atom_virial[ii] = (FPTYPE)0.;
   }
 
   // compute virial of a frame
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
     // loop over neighbors
-    for (int jj = 0; jj < nnei; ++jj){	  
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int rij_idx_shift = (ii * nnei + jj) * 3;
-      for (int dd0 = 0; dd0 < 3; ++dd0){
-	for (int dd1 = 0; dd1 < 3; ++dd1){
-	  FPTYPE tmp_v = du[i_idx] * sw_deriv[rij_idx_shift + dd0] * rij[rij_idx_shift + dd1];
-	  virial[dd0 * 3 + dd1] -= tmp_v;		  
-	  atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v;
-	}
+      for (int dd0 = 0; dd0 < 3; ++dd0) {
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          FPTYPE tmp_v = du[i_idx] * sw_deriv[rij_idx_shift + dd0] *
+                         rij[rij_idx_shift + dd1];
+          virial[dd0 * 3 + dd1] -= tmp_v;
+          atom_virial[j_idx * 9 + dd0 * 3 + dd1] -= tmp_v;
+        }
       }
     }
-  }  
+  }
 }
 
+template void deepmd::soft_min_switch_virial_cpu<double>(double* virial,
+                                                         double* atom_virial,
+                                                         const double* du,
+                                                         const double* sw_deriv,
+                                                         const double* rij,
+                                                         const int* nlist,
+                                                         const int nloc,
+                                                         const int nall,
+                                                         const int nnei);
 
-template
-void deepmd::soft_min_switch_virial_cpu<double>(
-    double * virial, 
-    double * atom_virial, 
-    const double * du, 
-    const double * sw_deriv, 
-    const double * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-
-template
-void deepmd::soft_min_switch_virial_cpu<float>(
-    float * virial, 
-    float * atom_virial, 
-    const float * du, 
-    const float * sw_deriv, 
-    const float * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nall, 
-    const int nnei);
-
-
-
+template void deepmd::soft_min_switch_virial_cpu<float>(float* virial,
+                                                        float* atom_virial,
+                                                        const float* du,
+                                                        const float* sw_deriv,
+                                                        const float* rij,
+                                                        const int* nlist,
+                                                        const int nloc,
+                                                        const int nall,
+                                                        const int nnei);
diff --git a/source/lib/src/soft_min_switch_virial_grad.cc b/source/lib/src/soft_min_switch_virial_grad.cc
index 5c86376737..c94332d4ca 100644
--- a/source/lib/src/soft_min_switch_virial_grad.cc
+++ b/source/lib/src/soft_min_switch_virial_grad.cc
@@ -1,14 +1,13 @@
 #include "soft_min_switch_virial_grad.h"
 
-template<typename FPTYPE>
-void deepmd::soft_min_switch_virial_grad_cpu(
-    FPTYPE * grad_net, 
-    const FPTYPE * grad,
-    const FPTYPE * sw_deriv, 
-    const FPTYPE * rij, 
-    const int * nlist, 
-    const int nloc, 
-    const int nnei)
+template <typename FPTYPE>
+void deepmd::soft_min_switch_virial_grad_cpu(FPTYPE* grad_net,
+                                             const FPTYPE* grad,
+                                             const FPTYPE* sw_deriv,
+                                             const FPTYPE* rij,
+                                             const int* nlist,
+                                             const int nloc,
+                                             const int nnei)
 //
 //	grad_net:	nloc
 //	grad:		9
@@ -18,47 +17,43 @@ void deepmd::soft_min_switch_virial_grad_cpu(
 //
 {
   // reset the frame to 0
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     grad_net[ii] = (FPTYPE)0.;
-  }      
+  }
 
   // compute grad of one frame
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     int i_idx = ii;
     // loop over neighbors
-    for (int jj = 0; jj < nnei; ++jj){
+    for (int jj = 0; jj < nnei; ++jj) {
       int j_idx = nlist[i_idx * nnei + jj];
       if (j_idx < 0) continue;
       int rij_idx_shift = (ii * nnei + jj) * 3;
-      for (int dd0 = 0; dd0 < 3; ++dd0){
-	for (int dd1 = 0; dd1 < 3; ++dd1){
-	  grad_net[i_idx] -= 
-	      grad[dd0 * 3 + dd1] * sw_deriv[rij_idx_shift + dd0] * rij[rij_idx_shift + dd1];
-	}
+      for (int dd0 = 0; dd0 < 3; ++dd0) {
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          grad_net[i_idx] -= grad[dd0 * 3 + dd1] *
+                             sw_deriv[rij_idx_shift + dd0] *
+                             rij[rij_idx_shift + dd1];
+        }
       }
     }
   }
 }
 
-template
-void deepmd::soft_min_switch_virial_grad_cpu<double>(
-    double * grad_net, 
-    const double * grad,
-    const double * sw_deriv, 
-    const double * rij, 
-    const int * nlist, 
-    const int nloc, 
+template void deepmd::soft_min_switch_virial_grad_cpu<double>(
+    double* grad_net,
+    const double* grad,
+    const double* sw_deriv,
+    const double* rij,
+    const int* nlist,
+    const int nloc,
     const int nnei);
 
-template
-void deepmd::soft_min_switch_virial_grad_cpu<float>(
-    float * grad_net, 
-    const float * grad,
-    const float * sw_deriv, 
-    const float * rij, 
-    const int * nlist, 
-    const int nloc, 
+template void deepmd::soft_min_switch_virial_grad_cpu<float>(
+    float* grad_net,
+    const float* grad,
+    const float* sw_deriv,
+    const float* rij,
+    const int* nlist,
+    const int nloc,
     const int nnei);
-
-
-
diff --git a/source/lib/src/tabulate.cc b/source/lib/src/tabulate.cc
index b75e9155f9..dcc17e07e9 100644
--- a/source/lib/src/tabulate.cc
+++ b/source/lib/src/tabulate.cc
@@ -1,145 +1,141 @@
-#include <vector>
+#include "tabulate.h"
+
+#include <string.h>
+
 #include <cassert>
 #include <iostream>
-#include <string.h>
-#include "tabulate.h"
+#include <vector>
 /*
-    This inline function was designed to get the table info and bias value for current input xx!
-    lower:      indicate the lower boundary of the first table;
-    upper:      indicate the upper boundary of the first table as well as the lower boundary of the second table;
-    max:        indicate the upper boundary of the second table;
-    stride0:    indicate the stride of the first table;
+    This inline function was designed to get the table info and bias value for
+   current input xx! lower:      indicate the lower boundary of the first table;
+    upper:      indicate the upper boundary of the first table as well as the
+   lower boundary of the second table; max:        indicate the upper boundary
+   of the second table; stride0:    indicate the stride of the first table;
     stride1:    indicate the stride of the second table;
     xx:         indicate the inputs value;
     table_idx:  indicate the location of table info of input value xx;
 */
 template <typename FPTYPE>
-inline void locate_xx(
-    const FPTYPE& lower, 
-    const FPTYPE& upper,
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1, 
-    FPTYPE& xx, 
-    int& table_idx) 
-{
+inline void locate_xx(const FPTYPE& lower,
+                      const FPTYPE& upper,
+                      const FPTYPE& max,
+                      const FPTYPE& stride0,
+                      const FPTYPE& stride1,
+                      FPTYPE& xx,
+                      int& table_idx) {
   if (xx < lower) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     table_idx = (int)((xx - lower) / stride0);
     xx -= (table_idx * stride0 + lower);
-  }
-  else if (xx < max) {
+  } else if (xx < max) {
     int first_stride = int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx =
+        int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
-
 template <typename FPTYPE>
-inline void locate_xx_se_t(
-    const FPTYPE& lower, 
-    const FPTYPE& upper,  
-    const FPTYPE& min, 
-    const FPTYPE& max, 
-    const FPTYPE& stride0, 
-    const FPTYPE& stride1,
-    FPTYPE& xx, 
-    int& table_idx)
-{
+inline void locate_xx_se_t(const FPTYPE& lower,
+                           const FPTYPE& upper,
+                           const FPTYPE& min,
+                           const FPTYPE& max,
+                           const FPTYPE& stride0,
+                           const FPTYPE& stride1,
+                           FPTYPE& xx,
+                           int& table_idx) {
   if (xx < min) {
     table_idx = 0;
     xx = (FPTYPE)0.;
-  }
-  else if (xx < lower) {
+  } else if (xx < lower) {
     table_idx = (int)((xx - min) / stride1);
     xx -= (table_idx * stride1 + min);
-  }
-  else if (xx < upper) {
+  } else if (xx < upper) {
     int first_stride = int((lower - min) / stride1);
     table_idx = first_stride + (int)((xx - lower) / stride0);
     xx -= ((table_idx - first_stride) * stride0 + lower);
-  }
-  else if (xx < max) {
-    int first_stride = int((lower - min) / stride1) + int((upper - lower) / stride0);
+  } else if (xx < max) {
+    int first_stride =
+        int((lower - min) / stride1) + int((upper - lower) / stride0);
     table_idx = first_stride + (int)((xx - upper) / stride1);
     xx -= ((table_idx - first_stride) * stride1 + upper);
-  }
-  else {
-    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) + (int)((max - upper) / stride1) - 1;
+  } else {
+    table_idx = int((lower - min) / stride1) + int((upper - lower) / stride0) +
+                (int)((max - upper) / stride1) - 1;
     xx = (FPTYPE)0.;
   }
 }
 
 template <typename FPTYPE>
-inline FPTYPE dot(
-    FPTYPE a[4], 
-    FPTYPE b[4]) 
-{
-  return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; 
+inline FPTYPE dot(FPTYPE a[4], FPTYPE b[4]) {
+  return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_a_cpu(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size)
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_a_cpu(FPTYPE* out,
+                                      const FPTYPE* table,
+                                      const FPTYPE* table_info,
+                                      const FPTYPE* em_x,
+                                      const FPTYPE* em,
+                                      const int nloc,
+                                      const int nnei,
+                                      const int last_layer_size) {
   memset(out, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
-  const FPTYPE lower   = table_info[0];
-  const FPTYPE upper   = table_info[1];
-  const FPTYPE _max    = table_info[2];
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
   const FPTYPE stride0 = table_info[3];
   const FPTYPE stride1 = table_info[4];
-  // for every atom, execute a small manual gemm ~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small manual gemm ~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     FPTYPE ll[4] = {0};
     FPTYPE ago = em_x[ii * nnei + nnei - 1];
-    bool unloop = false; 
-    for (int jj = 0; jj < nnei; jj++) { 
+    bool unloop = false;
+    for (int jj = 0; jj < nnei; jj++) {
       ll[0] = em[ii * nnei * 4 + jj * 4 + 0];
       ll[1] = em[ii * nnei * 4 + jj * 4 + 1];
       ll[2] = em[ii * nnei * 4 + jj * 4 + 2];
       ll[3] = em[ii * nnei * 4 + jj * 4 + 3];
-      FPTYPE xx = em_x[ii * nnei + jj]; 
+      FPTYPE xx = em_x[ii * nnei + jj];
       if (ago == xx) {
         unloop = true;
       }
       int table_idx = 0;
       locate_xx(lower, upper, _max, stride0, stride1, xx, table_idx);
       for (int kk = 0; kk < last_layer_size; kk++) {
-        FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * kk + 0]; 
-        FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * kk + 1]; 
-        FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * kk + 2]; 
-        FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        FPTYPE var = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+        FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        FPTYPE var =
+            a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
         if (unloop) {
-          out[ii * last_layer_size * 4 + 0 * last_layer_size + kk] += (nnei - jj) * var * ll[0];
-          out[ii * last_layer_size * 4 + 1 * last_layer_size + kk] += (nnei - jj) * var * ll[1];
-          out[ii * last_layer_size * 4 + 2 * last_layer_size + kk] += (nnei - jj) * var * ll[2];
-          out[ii * last_layer_size * 4 + 3 * last_layer_size + kk] += (nnei - jj) * var * ll[3];
-        }
-        else {
-          out[ii * last_layer_size * 4 + 0 * last_layer_size + kk] += var * ll[0];
-          out[ii * last_layer_size * 4 + 1 * last_layer_size + kk] += var * ll[1];
-          out[ii * last_layer_size * 4 + 2 * last_layer_size + kk] += var * ll[2];
-          out[ii * last_layer_size * 4 + 3 * last_layer_size + kk] += var * ll[3];
+          out[ii * last_layer_size * 4 + 0 * last_layer_size + kk] +=
+              (nnei - jj) * var * ll[0];
+          out[ii * last_layer_size * 4 + 1 * last_layer_size + kk] +=
+              (nnei - jj) * var * ll[1];
+          out[ii * last_layer_size * 4 + 2 * last_layer_size + kk] +=
+              (nnei - jj) * var * ll[2];
+          out[ii * last_layer_size * 4 + 3 * last_layer_size + kk] +=
+              (nnei - jj) * var * ll[3];
+        } else {
+          out[ii * last_layer_size * 4 + 0 * last_layer_size + kk] +=
+              var * ll[0];
+          out[ii * last_layer_size * 4 + 1 * last_layer_size + kk] +=
+              var * ll[1];
+          out[ii * last_layer_size * 4 + 2 * last_layer_size + kk] +=
+              var * ll[2];
+          out[ii * last_layer_size * 4 + 3 * last_layer_size + kk] +=
+              var * ll[3];
         }
       }
       if (unloop) break;
@@ -147,29 +143,27 @@ void deepmd::tabulate_fusion_se_a_cpu(
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_a_grad_cpu(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size) 
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
+                                           FPTYPE* dy_dem,
+                                           const FPTYPE* table,
+                                           const FPTYPE* table_info,
+                                           const FPTYPE* em_x,
+                                           const FPTYPE* em,
+                                           const FPTYPE* dy,
+                                           const int nloc,
+                                           const int nnei,
+                                           const int last_layer_size) {
   memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei);
   memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei * 4);
-  FPTYPE const lower   = table_info[0];
-  FPTYPE const upper   = table_info[1];
-  FPTYPE const _max    = table_info[2];
+  FPTYPE const lower = table_info[0];
+  FPTYPE const upper = table_info[1];
+  FPTYPE const _max = table_info[2];
   FPTYPE const stride0 = table_info[3];
   FPTYPE const stride1 = table_info[4];
-  // for every atom, execute a small gemm~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small gemm~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     FPTYPE ll[4];
     FPTYPE rr[4];
@@ -181,7 +175,7 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(
       ll[1] = em[ii * nnei * 4 + jj * 4 + 1];
       ll[2] = em[ii * nnei * 4 + jj * 4 + 2];
       ll[3] = em[ii * nnei * 4 + jj * 4 + 3];
-      FPTYPE xx = em_x[ii * nnei + jj]; 
+      FPTYPE xx = em_x[ii * nnei + jj];
       if (ago == xx) {
         unloop = true;
       }
@@ -193,23 +187,27 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(
         rr[1] = dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk];
         rr[2] = dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk];
         rr[3] = dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk];
-        FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * kk + 0]; 
-        FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * kk + 1]; 
-        FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * kk + 2]; 
-        FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        FPTYPE res = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+        FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        FPTYPE res =
+            a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
 
         if (unloop) {
-          grad += (a1 + (2 * a2 + (3 * a3 + (4 * a4 + 5 * a5 * xx) * xx) * xx) * xx) * dot(ll, rr) * (nnei - jj);
+          grad += (a1 + (2 * a2 + (3 * a3 + (4 * a4 + 5 * a5 * xx) * xx) * xx) *
+                            xx) *
+                  dot(ll, rr) * (nnei - jj);
           dy_dem[ii * nnei * 4 + jj * 4 + 0] += res * rr[0] * (nnei - jj);
           dy_dem[ii * nnei * 4 + jj * 4 + 1] += res * rr[1] * (nnei - jj);
           dy_dem[ii * nnei * 4 + jj * 4 + 2] += res * rr[2] * (nnei - jj);
           dy_dem[ii * nnei * 4 + jj * 4 + 3] += res * rr[3] * (nnei - jj);
-        }
-        else {
-          grad += (a1 + (2 * a2 + (3 * a3 + (4 * a4 + 5 * a5 * xx) * xx) * xx) * xx) * dot(ll, rr);
+        } else {
+          grad += (a1 + (2 * a2 + (3 * a3 + (4 * a4 + 5 * a5 * xx) * xx) * xx) *
+                            xx) *
+                  dot(ll, rr);
           dy_dem[ii * nnei * 4 + jj * 4 + 0] += res * rr[0];
           dy_dem[ii * nnei * 4 + jj * 4 + 1] += res * rr[1];
           dy_dem[ii * nnei * 4 + jj * 4 + 2] += res * rr[2];
@@ -222,28 +220,26 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_a_grad_grad_cpu(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size)
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
+                                                const FPTYPE* table,
+                                                const FPTYPE* table_info,
+                                                const FPTYPE* em_x,
+                                                const FPTYPE* em,
+                                                const FPTYPE* dz_dy_dem_x,
+                                                const FPTYPE* dz_dy_dem,
+                                                const int nloc,
+                                                const int nnei,
+                                                const int last_layer_size) {
   memset(dz_dy, 0, sizeof(FPTYPE) * nloc * 4 * last_layer_size);
-  const FPTYPE lower   = table_info[0];
-  const FPTYPE upper   = table_info[1];
-  const FPTYPE _max    = table_info[2];
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
   const FPTYPE stride0 = table_info[3];
   const FPTYPE stride1 = table_info[4];
-  // for every atom, execute a small manual gemm ~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small manual gemm ~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     FPTYPE ll[4];
     FPTYPE hh[4];
@@ -266,25 +262,38 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(
       int table_idx = 0;
       locate_xx(lower, upper, _max, stride0, stride1, xx, table_idx);
       for (int kk = 0; kk < last_layer_size; kk++) {
-        FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
-        FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
-        FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
-        FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        FPTYPE var = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
-        FPTYPE var_grad = a1 + ((FPTYPE)2. * a2 + ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx;
+        FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        FPTYPE var =
+            a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+        FPTYPE var_grad =
+            a1 +
+            ((FPTYPE)2. * a2 +
+             ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) *
+                 xx) *
+                xx;
         if (unloop) {
-          dz_dy[ii * last_layer_size * 4 + 0 * last_layer_size + kk] += (nnei - jj) * (var * hh[0] + dz_xx * var_grad * ll[0]);
-          dz_dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk] += (nnei - jj) * (var * hh[1] + dz_xx * var_grad * ll[1]);
-          dz_dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk] += (nnei - jj) * (var * hh[2] + dz_xx * var_grad * ll[2]);
-          dz_dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk] += (nnei - jj) * (var * hh[3] + dz_xx * var_grad * ll[3]);
-        }
-        else {
-          dz_dy[ii * last_layer_size * 4 + 0 * last_layer_size + kk] += var * hh[0] + dz_xx * var_grad * ll[0];
-          dz_dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk] += var * hh[1] + dz_xx * var_grad * ll[1];
-          dz_dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk] += var * hh[2] + dz_xx * var_grad * ll[2];
-          dz_dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk] += var * hh[3] + dz_xx * var_grad * ll[3];
+          dz_dy[ii * last_layer_size * 4 + 0 * last_layer_size + kk] +=
+              (nnei - jj) * (var * hh[0] + dz_xx * var_grad * ll[0]);
+          dz_dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk] +=
+              (nnei - jj) * (var * hh[1] + dz_xx * var_grad * ll[1]);
+          dz_dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk] +=
+              (nnei - jj) * (var * hh[2] + dz_xx * var_grad * ll[2]);
+          dz_dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk] +=
+              (nnei - jj) * (var * hh[3] + dz_xx * var_grad * ll[3]);
+        } else {
+          dz_dy[ii * last_layer_size * 4 + 0 * last_layer_size + kk] +=
+              var * hh[0] + dz_xx * var_grad * ll[0];
+          dz_dy[ii * last_layer_size * 4 + 1 * last_layer_size + kk] +=
+              var * hh[1] + dz_xx * var_grad * ll[1];
+          dz_dy[ii * last_layer_size * 4 + 2 * last_layer_size + kk] +=
+              var * hh[2] + dz_xx * var_grad * ll[2];
+          dz_dy[ii * last_layer_size * 4 + 3 * last_layer_size + kk] +=
+              var * hh[3] + dz_xx * var_grad * ll[3];
         }
       }
       if (unloop) break;
@@ -292,51 +301,50 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_t_cpu(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size)
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_t_cpu(FPTYPE* out,
+                                      const FPTYPE* table,
+                                      const FPTYPE* table_info,
+                                      const FPTYPE* em_x,
+                                      const FPTYPE* em,
+                                      const int nloc,
+                                      const int nnei_i,
+                                      const int nnei_j,
+                                      const int last_layer_size) {
   memset(out, 0, sizeof(FPTYPE) * nloc * last_layer_size);
-  const FPTYPE lower   = table_info[0];
-  const FPTYPE upper   = table_info[1];
-  const FPTYPE _max    = table_info[2];
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
   const FPTYPE stride0 = table_info[3];
   const FPTYPE stride1 = table_info[4];
-  // for every atom, execute a small manual gemm ~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small manual gemm ~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     for (int jj = 0; jj < nnei_i; jj++) {
       FPTYPE ago = em_x[ii * nnei_i * nnei_j + jj * nnei_j + nnei_j - 1];
-      bool unloop = false; 
-      for (int kk = 0; kk < nnei_j; kk++) { 
+      bool unloop = false;
+      for (int kk = 0; kk < nnei_j; kk++) {
         FPTYPE xx = em_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
         FPTYPE ll = xx;
         if (ago == xx) {
           unloop = true;
         }
         int table_idx = 0;
-        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx, table_idx);
+        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx,
+                       table_idx);
         for (int mm = 0; mm < last_layer_size; mm++) {
-          FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * mm + 0]; 
-          FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * mm + 1]; 
-          FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * mm + 2]; 
-          FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
-          FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
-          FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
-          FPTYPE var = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+          FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * mm + 0];
+          FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * mm + 1];
+          FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * mm + 2];
+          FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
+          FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
+          FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
+          FPTYPE var =
+              a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
           if (unloop) {
             out[ii * last_layer_size + mm] += (nnei_j - kk) * var * ll;
-          }
-          else {
+          } else {
             out[ii * last_layer_size + mm] += var * ll;
           }
         }
@@ -346,30 +354,28 @@ void deepmd::tabulate_fusion_se_t_cpu(
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_t_grad_cpu(
-    FPTYPE * dy_dem_x, 
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em_x, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei_i, 
-    const int nnei_j, 
-    const int last_layer_size) 
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_t_grad_cpu(FPTYPE* dy_dem_x,
+                                           FPTYPE* dy_dem,
+                                           const FPTYPE* table,
+                                           const FPTYPE* table_info,
+                                           const FPTYPE* em_x,
+                                           const FPTYPE* em,
+                                           const FPTYPE* dy,
+                                           const int nloc,
+                                           const int nnei_i,
+                                           const int nnei_j,
+                                           const int last_layer_size) {
   memset(dy_dem_x, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
-  memset(dy_dem,   0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
-  FPTYPE const lower   = table_info[0];
-  FPTYPE const upper   = table_info[1];
-  FPTYPE const _max    = table_info[2];
+  memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei_i * nnei_j);
+  FPTYPE const lower = table_info[0];
+  FPTYPE const upper = table_info[1];
+  FPTYPE const _max = table_info[2];
   FPTYPE const stride0 = table_info[3];
   FPTYPE const stride1 = table_info[4];
-  // for every atom, execute a small gemm~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small gemm~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     FPTYPE ll = (FPTYPE)0.;
     FPTYPE rr = (FPTYPE)0.;
@@ -384,24 +390,36 @@ void deepmd::tabulate_fusion_se_t_grad_cpu(
           unloop = true;
         }
         int table_idx = 0;
-        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx, table_idx);
+        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx,
+                       table_idx);
         FPTYPE grad = (FPTYPE)0.0;
         for (int mm = 0; mm < last_layer_size; mm++) {
           rr = dy[ii * last_layer_size + mm];
-          FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * mm + 0]; 
-          FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * mm + 1]; 
-          FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * mm + 2]; 
-          FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
-          FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
-          FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
-          FPTYPE res = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+          FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * mm + 0];
+          FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * mm + 1];
+          FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * mm + 2];
+          FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
+          FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
+          FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
+          FPTYPE res =
+              a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
 
           if (unloop) {
-            grad += (a1 + ((FPTYPE)2. * a2 + ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx) * ll * rr * (nnei_j - kk);
-            dy_dem[ii * nnei_i * nnei_j + jj * nnei_j + kk] += res * rr * (nnei_j - kk);
-          }
-          else {
-            grad += (a1 + ((FPTYPE)2. * a2 + ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx) * ll * rr;
+            grad += (a1 + ((FPTYPE)2. * a2 +
+                           ((FPTYPE)3. * a3 +
+                            ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) *
+                               xx) *
+                              xx) *
+                    ll * rr * (nnei_j - kk);
+            dy_dem[ii * nnei_i * nnei_j + jj * nnei_j + kk] +=
+                res * rr * (nnei_j - kk);
+          } else {
+            grad += (a1 + ((FPTYPE)2. * a2 +
+                           ((FPTYPE)3. * a3 +
+                            ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) *
+                               xx) *
+                              xx) *
+                    ll * rr;
             dy_dem[ii * nnei_i * nnei_j + jj * nnei_j + kk] += res * rr;
           }
         }
@@ -412,55 +430,61 @@ void deepmd::tabulate_fusion_se_t_grad_cpu(
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_t_grad_grad_cpu(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em_x,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem_x,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei_i,
-    const int nnei_j,
-    const int last_layer_size)
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_t_grad_grad_cpu(FPTYPE* dz_dy,
+                                                const FPTYPE* table,
+                                                const FPTYPE* table_info,
+                                                const FPTYPE* em_x,
+                                                const FPTYPE* em,
+                                                const FPTYPE* dz_dy_dem_x,
+                                                const FPTYPE* dz_dy_dem,
+                                                const int nloc,
+                                                const int nnei_i,
+                                                const int nnei_j,
+                                                const int last_layer_size) {
   memset(dz_dy, 0, sizeof(FPTYPE) * nloc * last_layer_size);
-  const FPTYPE lower   = table_info[0];
-  const FPTYPE upper   = table_info[1];
-  const FPTYPE _max    = table_info[2];
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
   const FPTYPE stride0 = table_info[3];
   const FPTYPE stride1 = table_info[4];
-  // for every atom, execute a small manual gemm ~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small manual gemm ~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     for (int jj = 0; jj < nnei_i; jj++) {
       FPTYPE ago = em_x[ii * nnei_i * nnei_j + jj * nnei_j + nnei_j - 1];
-      bool unloop = false; 
-      for (int kk = 0; kk < nnei_j; kk++) { 
+      bool unloop = false;
+      for (int kk = 0; kk < nnei_j; kk++) {
         FPTYPE xx = em_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
         FPTYPE tmp = xx;
-        FPTYPE dz_em = dz_dy_dem  [ii * nnei_i * nnei_j + jj * nnei_j + kk];
+        FPTYPE dz_em = dz_dy_dem[ii * nnei_i * nnei_j + jj * nnei_j + kk];
         FPTYPE dz_xx = dz_dy_dem_x[ii * nnei_i * nnei_j + jj * nnei_j + kk];
 
         if (ago == xx) {
           unloop = true;
         }
         int table_idx = 0;
-        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx, table_idx);
+        locate_xx_se_t(lower, upper, -_max, _max, stride0, stride1, xx,
+                       table_idx);
         for (int mm = 0; mm < last_layer_size; mm++) {
-          FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * mm + 0]; 
-          FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * mm + 1]; 
-          FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * mm + 2]; 
-          FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
-          FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
-          FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
-          FPTYPE var = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
-          FPTYPE var_grad = a1 + ((FPTYPE)2. * a2 + ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx;
-          
-          dz_dy[ii * last_layer_size + mm] += var * dz_em + dz_xx * var_grad * tmp;
+          FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * mm + 0];
+          FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * mm + 1];
+          FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * mm + 2];
+          FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * mm + 3];
+          FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * mm + 4];
+          FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * mm + 5];
+          FPTYPE var =
+              a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+          FPTYPE var_grad =
+              a1 + ((FPTYPE)2. * a2 +
+                    ((FPTYPE)3. * a3 +
+                     ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) *
+                        xx) *
+                       xx;
+
+          dz_dy[ii * last_layer_size + mm] +=
+              var * dz_em + dz_xx * var_grad * tmp;
         }
         if (unloop) break;
       }
@@ -468,140 +492,310 @@ void deepmd::tabulate_fusion_se_t_grad_grad_cpu(
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_r_cpu(
-    FPTYPE * out,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size)
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_r_cpu(FPTYPE* out,
+                                      const FPTYPE* table,
+                                      const FPTYPE* table_info,
+                                      const FPTYPE* em,
+                                      const int nloc,
+                                      const int nnei,
+                                      const int last_layer_size) {
   memset(out, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size);
-  const FPTYPE lower   = table_info[0];
-  const FPTYPE upper   = table_info[1];
-  const FPTYPE _max    = table_info[2];
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
   const FPTYPE stride0 = table_info[3];
   const FPTYPE stride1 = table_info[4];
-  // for every atom, execute a small manual gemm ~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small manual gemm ~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
-    for (int jj = 0; jj < nnei; jj++) { 
-      FPTYPE xx = em[ii * nnei + jj]; 
+    for (int jj = 0; jj < nnei; jj++) {
+      FPTYPE xx = em[ii * nnei + jj];
       int table_idx = 0;
       locate_xx(lower, upper, _max, stride0, stride1, xx, table_idx);
       for (int kk = 0; kk < last_layer_size; kk++) {
-        FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * kk + 0]; 
-        FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * kk + 1]; 
-        FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * kk + 2]; 
-        FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        out[ii * last_layer_size * nnei + jj * last_layer_size + kk] = a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
+        FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        out[ii * last_layer_size * nnei + jj * last_layer_size + kk] =
+            a0 + (a1 + (a2 + (a3 + (a4 + a5 * xx) * xx) * xx) * xx) * xx;
       }
     }
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_r_grad_cpu(
-    FPTYPE * dy_dem,
-    const FPTYPE * table, 
-    const FPTYPE * table_info, 
-    const FPTYPE * em, 
-    const FPTYPE * dy, 
-    const int nloc, 
-    const int nnei, 
-    const int last_layer_size) 
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_r_grad_cpu(FPTYPE* dy_dem,
+                                           const FPTYPE* table,
+                                           const FPTYPE* table_info,
+                                           const FPTYPE* em,
+                                           const FPTYPE* dy,
+                                           const int nloc,
+                                           const int nnei,
+                                           const int last_layer_size) {
   memset(dy_dem, 0, sizeof(FPTYPE) * nloc * nnei);
-  FPTYPE const lower   = table_info[0];
-  FPTYPE const upper   = table_info[1];
-  FPTYPE const _max    = table_info[2];
+  FPTYPE const lower = table_info[0];
+  FPTYPE const upper = table_info[1];
+  FPTYPE const _max = table_info[2];
   FPTYPE const stride0 = table_info[3];
   FPTYPE const stride1 = table_info[4];
-  // for every atom, execute a small gemm~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small gemm~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     for (int jj = 0; jj < nnei; jj++) {
       // construct the dy/dx
-      FPTYPE xx = em[ii * nnei + jj]; 
+      FPTYPE xx = em[ii * nnei + jj];
       int table_idx = 0;
       locate_xx(lower, upper, _max, stride0, stride1, xx, table_idx);
       FPTYPE grad = (FPTYPE)0.0;
       for (int kk = 0; kk < last_layer_size; kk++) {
-        FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * kk + 0]; 
-        FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * kk + 1]; 
-        FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * kk + 2]; 
-        FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        grad += (a1 + ((FPTYPE)2. * a2 + ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx) * dy[ii * last_layer_size * nnei + jj * last_layer_size + kk];
+        FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        grad += (a1 + ((FPTYPE)2. * a2 +
+                       ((FPTYPE)3. * a3 +
+                        ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) *
+                           xx) *
+                          xx) *
+                dy[ii * last_layer_size * nnei + jj * last_layer_size + kk];
       }
       dy_dem[ii * nnei + jj] = grad;
     }
   }
 }
 
-template<typename FPTYPE>
-void deepmd::tabulate_fusion_se_r_grad_grad_cpu(
-    FPTYPE * dz_dy,
-    const FPTYPE * table,
-    const FPTYPE * table_info,
-    const FPTYPE * em,
-    const FPTYPE * dz_dy_dem,
-    const int nloc,
-    const int nnei,
-    const int last_layer_size)
-{
+template <typename FPTYPE>
+void deepmd::tabulate_fusion_se_r_grad_grad_cpu(FPTYPE* dz_dy,
+                                                const FPTYPE* table,
+                                                const FPTYPE* table_info,
+                                                const FPTYPE* em,
+                                                const FPTYPE* dz_dy_dem,
+                                                const int nloc,
+                                                const int nnei,
+                                                const int last_layer_size) {
   memset(dz_dy, 0, sizeof(FPTYPE) * nloc * nnei * last_layer_size);
-  const FPTYPE lower   = table_info[0];
-  const FPTYPE upper   = table_info[1];
-  const FPTYPE _max    = table_info[2];
+  const FPTYPE lower = table_info[0];
+  const FPTYPE upper = table_info[1];
+  const FPTYPE _max = table_info[2];
   const FPTYPE stride0 = table_info[3];
   const FPTYPE stride1 = table_info[4];
-  // for every atom, execute a small manual gemm ~
-  // FPTYPE * res = new FPTYPE[4 * last_layer_size];
-  #pragma omp parallel for
+// for every atom, execute a small manual gemm ~
+// FPTYPE * res = new FPTYPE[4 * last_layer_size];
+#pragma omp parallel for
   for (int ii = 0; ii < nloc; ii++) {
     for (int jj = 0; jj < nnei; jj++) {
       FPTYPE xx = em[ii * nnei + jj];
       int table_idx = 0;
       locate_xx(lower, upper, _max, stride0, stride1, xx, table_idx);
       for (int kk = 0; kk < last_layer_size; kk++) {
-        FPTYPE a0  = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
-        FPTYPE a1  = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
-        FPTYPE a2  = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
-        FPTYPE a3  = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
-        FPTYPE a4  = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
-        FPTYPE a5  = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
-        FPTYPE var_grad = a1 + ((FPTYPE)2. * a2 + ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) * xx) * xx;
-        dz_dy[ii * last_layer_size * nnei + jj * last_layer_size + kk] = dz_dy_dem[ii * nnei + jj] * var_grad;
+        FPTYPE a0 = table[table_idx * last_layer_size * 6 + 6 * kk + 0];
+        FPTYPE a1 = table[table_idx * last_layer_size * 6 + 6 * kk + 1];
+        FPTYPE a2 = table[table_idx * last_layer_size * 6 + 6 * kk + 2];
+        FPTYPE a3 = table[table_idx * last_layer_size * 6 + 6 * kk + 3];
+        FPTYPE a4 = table[table_idx * last_layer_size * 6 + 6 * kk + 4];
+        FPTYPE a5 = table[table_idx * last_layer_size * 6 + 6 * kk + 5];
+        FPTYPE var_grad =
+            a1 +
+            ((FPTYPE)2. * a2 +
+             ((FPTYPE)3. * a3 + ((FPTYPE)4. * a4 + (FPTYPE)5. * a5 * xx) * xx) *
+                 xx) *
+                xx;
+        dz_dy[ii * last_layer_size * nnei + jj * last_layer_size + kk] =
+            dz_dy_dem[ii * nnei + jj] * var_grad;
       }
     }
   }
 }
 
-template void deepmd::tabulate_fusion_se_a_cpu<float>(float * out, const float * table, const float * table_info, const float * em_x, const float * em, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_a_cpu<double>(double * out, const double * table, const double * table_info, const double * em_x, const double * em, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_a_grad_cpu<float> (float * dy_dem_x, float * dy_dem, const float * table, const float * table_info, const float * em_x, const float * em, const float * dy, const int nloc, const int nnei, const int last_layer_size); 
-template void deepmd::tabulate_fusion_se_a_grad_cpu<double> (double * dy_dem_x, double * dy_dem, const double * table, const double * table_info, const double * em_x, const double * em, const double * dy, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<float>(float * dz_dy, const float * table, const float * table_info, const float * em_x, const float * em, const float * dz_dy_dem_x, const float * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<double>(double * dz_dy, const double * table, const double * table_info, const double * em_x, const double * em, const double * dz_dy_dem_x, const double * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_cpu<float>(
+    float* out,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_cpu<double>(
+    double* out,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_grad_cpu<float>(
+    float* dy_dem_x,
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_grad_cpu<double>(
+    double* dy_dem_x,
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_a_grad_grad_cpu<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
 
-template void deepmd::tabulate_fusion_se_t_cpu<float>(float * out, const float * table, const float * table_info, const float * em_x, const float * em, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_t_cpu<double>(double * out, const double * table, const double * table_info, const double * em_x, const double * em, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_t_grad_cpu<float> (float * dy_dem_x, float * dy_dem, const float * table, const float * table_info, const float * em_x, const float * em, const float * dy, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size); 
-template void deepmd::tabulate_fusion_se_t_grad_cpu<double> (double * dy_dem_x, double * dy_dem, const double * table, const double * table_info, const double * em_x, const double * em, const double * dy, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_t_grad_grad_cpu<float>(float * dz_dy, const float * table, const float * table_info, const float * em_x, const float * em, const float * dz_dy_dem_x, const float * dz_dy_dem, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_t_grad_grad_cpu<double>(double * dz_dy, const double * table, const double * table_info, const double * em_x, const double * em, const double * dz_dy_dem_x, const double * dz_dy_dem, const int nloc, const int nnei_i, const int nnei_j, const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_cpu<float>(
+    float* out,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_cpu<double>(
+    double* out,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_grad_cpu<float>(
+    float* dy_dem_x,
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_grad_cpu<double>(
+    double* dy_dem_x,
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_grad_grad_cpu<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em_x,
+    const float* em,
+    const float* dz_dy_dem_x,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_t_grad_grad_cpu<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em_x,
+    const double* em,
+    const double* dz_dy_dem_x,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei_i,
+    const int nnei_j,
+    const int last_layer_size);
 
-template void deepmd::tabulate_fusion_se_r_cpu<float>(float * out, const float * table, const float * table_info, const float * em, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_r_cpu<double>(double * out, const double * table, const double * table_info, const double * em, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_r_grad_cpu<float> (float * dy_dem, const float * table, const float * table_info, const float * em, const float * dy, const int nloc, const int nnei, const int last_layer_size); 
-template void deepmd::tabulate_fusion_se_r_grad_cpu<double> (double * dy_dem, const double * table, const double * table_info, const double * em, const double * dy, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_r_grad_grad_cpu<float>(float * dz_dy, const float * table, const float * table_info, const float * em, const float * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
-template void deepmd::tabulate_fusion_se_r_grad_grad_cpu<double>(double * dz_dy, const double * table, const double * table_info, const double * em, const double * dz_dy_dem, const int nloc, const int nnei, const int last_layer_size);
\ No newline at end of file
+template void deepmd::tabulate_fusion_se_r_cpu<float>(
+    float* out,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_r_cpu<double>(
+    double* out,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_r_grad_cpu<float>(
+    float* dy_dem,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const float* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_r_grad_cpu<double>(
+    double* dy_dem,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const double* dy,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_r_grad_grad_cpu<float>(
+    float* dz_dy,
+    const float* table,
+    const float* table_info,
+    const float* em,
+    const float* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
+template void deepmd::tabulate_fusion_se_r_grad_grad_cpu<double>(
+    double* dz_dy,
+    const double* table,
+    const double* table_info,
+    const double* em,
+    const double* dz_dy_dem,
+    const int nloc,
+    const int nnei,
+    const int last_layer_size);
diff --git a/source/lib/src/utilities.cc b/source/lib/src/utilities.cc
index df7bac98f5..01e87b0441 100644
--- a/source/lib/src/utilities.cc
+++ b/source/lib/src/utilities.cc
@@ -1,13 +1,10 @@
 #include "utilities.h"
 
 // functions used in custom ops
-void deepmd::cum_sum(
-    std::vector<int> & sec, 
-    const std::vector<int> & n_sel) 
-{
-  sec.resize (n_sel.size() + 1);
+void deepmd::cum_sum(std::vector<int>& sec, const std::vector<int>& n_sel) {
+  sec.resize(n_sel.size() + 1);
   sec[0] = 0;
   for (int ii = 1; ii < sec.size(); ++ii) {
-    sec[ii] = sec[ii-1] + n_sel[ii-1];
+    sec[ii] = sec[ii - 1] + n_sel[ii - 1];
   }
 }
diff --git a/source/lib/tests/CMakeLists.txt b/source/lib/tests/CMakeLists.txt
index 975b0a1458..542588535d 100644
--- a/source/lib/tests/CMakeLists.txt
+++ b/source/lib/tests/CMakeLists.txt
@@ -1,17 +1,14 @@
 cmake_minimum_required(VERSION 3.9)
 project(libdeepmd_test)
 
-
 file(GLOB TEST_SRC test_*.cc)
-add_executable( runUnitTests_lib ${TEST_SRC} )
+add_executable(runUnitTests_lib ${TEST_SRC})
 
-target_link_libraries(runUnitTests_lib GTest::gtest_main ${LIB_DEEPMD} coverage_config)
-add_test( runUnitTests_lib runUnitTests_lib )
+target_link_libraries(runUnitTests_lib GTest::gtest_main ${LIB_DEEPMD}
+                      coverage_config)
+add_test(runUnitTests_lib runUnitTests_lib)
 
-set_target_properties(
-  runUnitTests_lib
-  PROPERTIES 
-  INSTALL_RPATH "$ORIGIN/../lib"
-)
+set_target_properties(runUnitTests_lib PROPERTIES INSTALL_RPATH
+                                                  "$ORIGIN/../lib")
 
 install(TARGETS runUnitTests_lib DESTINATION bin/)
diff --git a/source/lib/tests/test_coord.cc b/source/lib/tests/test_coord.cc
index 54c58c8a11..b0c41b1dcb 100644
--- a/source/lib/tests/test_coord.cc
+++ b/source/lib/tests/test_coord.cc
@@ -1,89 +1,77 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
+
 #include "coord.h"
 #include "device.h"
 
-class TestNormCoord : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {
-    1.83, 1.56, 1.18, 
-    1.09, 1.87, 1.74,
-  };
-  std::vector<double > boxt = {
-    3.27785716,  0.09190842,  0.14751448,  0.02331264,  3.36482777, -0.2999871 , -0.47510999, -0.38123489,  3.33561809
+class TestNormCoord : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {
+      1.83, 1.56, 1.18, 1.09, 1.87, 1.74,
   };
+  std::vector<double> boxt = {3.27785716,  0.09190842,  0.14751448,
+                              0.02331264,  3.36482777,  -0.2999871,
+                              -0.47510999, -0.38123489, 3.33561809};
   // 10, 11, 12
-  std::vector<double > r0 ={
-    29.16369076, 34.91737099, 39.38270378,
-    28.42369076, 35.22737099, 39.94270378
-  };
+  std::vector<double> r0 = {29.16369076, 34.91737099, 39.38270378,
+                            28.42369076, 35.22737099, 39.94270378};
   // -10, 11, -12
-  std::vector<double > r1 ={
-    -24.990812680000005,  42.22883995, -43.622419980000004,
-    -25.730812680000003,  42.538839949999996, -43.06241998       
-  };
+  std::vector<double> r1 = {-24.990812680000005, 42.22883995,
+                            -43.622419980000004, -25.730812680000003,
+                            42.538839949999996,  -43.06241998};
   // 10, -11, 12
-  std::vector<double > r2 ={
-    28.65081268, -39.10883995,  45.98241998,
-    27.91081268, -38.79883995,  46.54241998
-  };
+  std::vector<double> r2 = {28.65081268, -39.10883995, 45.98241998,
+                            27.91081268, -38.79883995, 46.54241998};
   int natoms;
-  void SetUp() override {
-    natoms = posi.size()/3;
-  };
+  void SetUp() override { natoms = posi.size() / 3; };
 };
 
-
-TEST_F(TestNormCoord, cpu_case0)
-{
+TEST_F(TestNormCoord, cpu_case0) {
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  std::vector<double > out_c(r0);
+  std::vector<double> out_c(r0);
   normalize_coord_cpu(&out_c[0], natoms, region);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-TEST_F(TestNormCoord, cpu_case1)
-{
+TEST_F(TestNormCoord, cpu_case1) {
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  std::vector<double > out_c(r1);
+  std::vector<double> out_c(r1);
   normalize_coord_cpu(&out_c[0], natoms, region);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-TEST_F(TestNormCoord, cpu_case2)
-{
+TEST_F(TestNormCoord, cpu_case2) {
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  std::vector<double > out_c(r2);
+  std::vector<double> out_c(r2);
   normalize_coord_cpu(&out_c[0], natoms, region);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestNormCoord, gpu_case0)
-{
+TEST_F(TestNormCoord, gpu_case0) {
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL;
-  std::vector<double > out_c(r0);
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  double* box_info_dev = NULL;
+  double* out_c_dev = NULL;
+  std::vector<double> out_c(r0);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
   region_dev.boxt = box_info_dev;
@@ -94,25 +82,24 @@ TEST_F(TestNormCoord, gpu_case0)
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-TEST_F(TestNormCoord, gpu_case1)
-{
+TEST_F(TestNormCoord, gpu_case1) {
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL;
-  std::vector<double > out_c(r1);
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  double* box_info_dev = NULL;
+  double* out_c_dev = NULL;
+  std::vector<double> out_c(r1);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
   region_dev.boxt = box_info_dev;
@@ -123,25 +110,24 @@ TEST_F(TestNormCoord, gpu_case1)
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-TEST_F(TestNormCoord, gpu_case2)
-{
+TEST_F(TestNormCoord, gpu_case2) {
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL;
-  std::vector<double > out_c(r2);
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  double* box_info_dev = NULL;
+  double* out_c_dev = NULL;
+  std::vector<double> out_c(r2);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
   region_dev.boxt = box_info_dev;
@@ -152,28 +138,27 @@ TEST_F(TestNormCoord, gpu_case2)
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-#endif //GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestNormCoord, gpu_case0)
-{
+TEST_F(TestNormCoord, gpu_case0) {
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL;
-  std::vector<double > out_c(r0);
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  double* box_info_dev = NULL;
+  double* out_c_dev = NULL;
+  std::vector<double> out_c(r0);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
   region_dev.boxt = box_info_dev;
@@ -184,25 +169,24 @@ TEST_F(TestNormCoord, gpu_case0)
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-TEST_F(TestNormCoord, gpu_case1)
-{
+TEST_F(TestNormCoord, gpu_case1) {
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL;
-  std::vector<double > out_c(r1);
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  double* box_info_dev = NULL;
+  double* out_c_dev = NULL;
+  std::vector<double> out_c(r1);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
   region_dev.boxt = box_info_dev;
@@ -213,25 +197,24 @@ TEST_F(TestNormCoord, gpu_case1)
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-TEST_F(TestNormCoord, gpu_case2)
-{
+TEST_F(TestNormCoord, gpu_case2) {
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL;
-  std::vector<double > out_c(r2);
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  double* box_info_dev = NULL;
+  double* out_c_dev = NULL;
+  std::vector<double> out_c(r2);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(out_c_dev, out_c);
   region_dev.boxt = box_info_dev;
@@ -242,78 +225,84 @@ TEST_F(TestNormCoord, gpu_case2)
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
   deepmd::delete_device_memory(box_info_dev);
   deepmd::delete_device_memory(out_c_dev);
-  for(int ii = 0; ii < posi.size(); ++ii){
+  for (int ii = 0; ii < posi.size(); ++ii) {
     EXPECT_LT(fabs(out_c[ii] - posi[ii]), 1e-12);
   }
 }
 
-#endif //TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
 
-typedef std::pair<std::vector<double>,std::vector<int>> atom;
+typedef std::pair<std::vector<double>, std::vector<int>> atom;
 
-static void
-sort_atoms(
-    std::vector<double > & coord,
-    std::vector<int > & atype,
-    std::vector<int > & mapping,
-    const std::vector<double > & icoord,
-    const std::vector<int > & iatype,
-    const std::vector<int > & imapping,
-    const int start,
-    const int end
-    )
-{
+static void sort_atoms(std::vector<double>& coord,
+                       std::vector<int>& atype,
+                       std::vector<int>& mapping,
+                       const std::vector<double>& icoord,
+                       const std::vector<int>& iatype,
+                       const std::vector<int>& imapping,
+                       const int start,
+                       const int end) {
   int natoms = end - start;
   std::vector<atom> atoms(natoms);
-  for(int ii = start; ii < end; ++ii){
+  for (int ii = start; ii < end; ++ii) {
     atom tmp_atom;
     tmp_atom.first.resize(3);
-    for(int dd = 0; dd < 3; ++dd){
-      tmp_atom.first[dd] = icoord[ii*3+dd];
+    for (int dd = 0; dd < 3; ++dd) {
+      tmp_atom.first[dd] = icoord[ii * 3 + dd];
     }
     tmp_atom.second.resize(2);
     tmp_atom.second[0] = iatype[ii];
     tmp_atom.second[1] = imapping[ii];
-    atoms[ii-start] = tmp_atom;
+    atoms[ii - start] = tmp_atom;
   }
   std::sort(atoms.begin(), atoms.end());
   coord = icoord;
   atype = iatype;
   mapping = imapping;
-  for(int ii = start; ii < end; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      coord[ii*3+dd] = atoms[ii-start].first[dd];
+  for (int ii = start; ii < end; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      coord[ii * 3 + dd] = atoms[ii - start].first[dd];
     }
-    atype[ii] = atoms[ii-start].second[0];
-    mapping[ii] = atoms[ii-start].second[1];
+    atype[ii] = atoms[ii - start].second[0];
+    mapping[ii] = atoms[ii - start].second[1];
   }
 }
 
-class TestCopyCoord : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {
-    12.83, 2.56, 2.18, 
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > _expected_posi_cpy = {
-    12.83, 2.56, 2.18, 12.09, 2.87, 2.74, 0.25, 3.32, 1.68, 3.36, 3.00, 1.81, 3.51, 2.51, 2.60, 4.27, 3.22, 1.56, -0.17, 2.56, 2.18, -0.91, 2.87, 2.74, -0.17, 2.56, 15.18, -0.91, 2.87, 15.74, -0.17, 15.56, 2.18, -0.91, 15.87, 2.74, -0.17, 15.56, 15.18, -0.91, 15.87, 15.74, 0.25, 3.32, 14.68, 3.36, 3.00, 14.81, 3.51, 2.51, 15.60, 4.27, 3.22, 14.56, 0.25, 16.32, 1.68, 3.36, 16.00, 1.81, 3.51, 15.51, 2.60, 4.27, 16.22, 1.56, 0.25, 16.32, 14.68, 3.36, 16.00, 14.81, 3.51, 15.51, 15.60, 4.27, 16.22, 14.56, 12.83, 2.56, 15.18, 12.09, 2.87, 15.74, 12.83, 15.56, 2.18, 12.09, 15.87, 2.74, 12.83, 15.56, 15.18, 12.09, 15.87, 15.74, 13.25, 3.32, 1.68, 16.36, 3.00, 1.81, 16.51, 2.51, 2.60, 17.27, 3.22, 1.56, 13.25, 3.32, 14.68, 16.36, 3.00, 14.81, 16.51, 2.51, 15.60, 17.27, 3.22, 14.56, 13.25, 16.32, 1.68, 16.36, 16.00, 1.81, 16.51, 15.51, 2.60, 17.27, 16.22, 1.56, 13.25, 16.32, 14.68, 16.36, 16.00, 14.81, 16.51, 15.51, 15.60, 17.27, 16.22, 14.56, 
+class TestCopyCoord : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> _expected_posi_cpy = {
+      12.83, 2.56,  2.18,  12.09, 2.87,  2.74,  0.25,  3.32,  1.68,  3.36,
+      3.00,  1.81,  3.51,  2.51,  2.60,  4.27,  3.22,  1.56,  -0.17, 2.56,
+      2.18,  -0.91, 2.87,  2.74,  -0.17, 2.56,  15.18, -0.91, 2.87,  15.74,
+      -0.17, 15.56, 2.18,  -0.91, 15.87, 2.74,  -0.17, 15.56, 15.18, -0.91,
+      15.87, 15.74, 0.25,  3.32,  14.68, 3.36,  3.00,  14.81, 3.51,  2.51,
+      15.60, 4.27,  3.22,  14.56, 0.25,  16.32, 1.68,  3.36,  16.00, 1.81,
+      3.51,  15.51, 2.60,  4.27,  16.22, 1.56,  0.25,  16.32, 14.68, 3.36,
+      16.00, 14.81, 3.51,  15.51, 15.60, 4.27,  16.22, 14.56, 12.83, 2.56,
+      15.18, 12.09, 2.87,  15.74, 12.83, 15.56, 2.18,  12.09, 15.87, 2.74,
+      12.83, 15.56, 15.18, 12.09, 15.87, 15.74, 13.25, 3.32,  1.68,  16.36,
+      3.00,  1.81,  16.51, 2.51,  2.60,  17.27, 3.22,  1.56,  13.25, 3.32,
+      14.68, 16.36, 3.00,  14.81, 16.51, 2.51,  15.60, 17.27, 3.22,  14.56,
+      13.25, 16.32, 1.68,  16.36, 16.00, 1.81,  16.51, 15.51, 2.60,  17.27,
+      16.22, 1.56,  13.25, 16.32, 14.68, 16.36, 16.00, 14.81, 16.51, 15.51,
+      15.60, 17.27, 16.22, 14.56,
   };
-  std::vector<double > expected_posi_cpy;
-  std::vector<int > _expected_atype_cpy = {
-    0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 
+  std::vector<double> expected_posi_cpy;
+  std::vector<int> _expected_atype_cpy = {
+      0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
+      1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
   };
-  std::vector<int > expected_atype_cpy;
-  std::vector<int > _expected_mapping = {
-    0, 1, 2, 3, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 
+  std::vector<int> expected_atype_cpy;
+  std::vector<int> _expected_mapping = {
+      0, 1, 2, 3, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3,
+      4, 5, 0, 1, 0, 1, 0, 1, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5,
   };
-  std::vector<int > expected_mapping;
-  int ntypes = 2;  
+  std::vector<int> expected_mapping;
+  int ntypes = 2;
   int nloc, expected_nall;
   double rc = 6;
   std::vector<double> boxt = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
@@ -324,120 +313,93 @@ class TestCopyCoord : public ::testing::Test
     expected_nall = _expected_posi_cpy.size() / 3;
     EXPECT_EQ(expected_nall, _expected_atype_cpy.size());
     EXPECT_EQ(expected_nall, _expected_mapping.size());
-    // sort the atoms between nloc and nall, to remove the uncertainty of the ordering
-    sort_atoms(
-	expected_posi_cpy,
-	expected_atype_cpy,
-	expected_mapping,
-	_expected_posi_cpy,
-	_expected_atype_cpy,
-	_expected_mapping,
-	nloc,
-	expected_nall);
-  }  
+    // sort the atoms between nloc and nall, to remove the uncertainty of the
+    // ordering
+    sort_atoms(expected_posi_cpy, expected_atype_cpy, expected_mapping,
+               _expected_posi_cpy, _expected_atype_cpy, _expected_mapping, nloc,
+               expected_nall);
+  }
 };
 
-
-
-
-TEST_F(TestCopyCoord, cpu)
-{
+TEST_F(TestCopyCoord, cpu) {
   int mem_size = 1000;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  
-  int ret = copy_coord_cpu(
-      &out_c[0],
-      &out_t[0],
-      &mapping[0],
-      &nall,
-      &posi[0],
-      &atype[0],
-      nloc,
-      mem_size,
-      rc,
-      region);
+
+  int ret = copy_coord_cpu(&out_c[0], &out_t[0], &mapping[0], &nall, &posi[0],
+                           &atype[0], nloc, mem_size, rc, region);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
-  
-  out_c.resize(nall*3);
+
+  out_c.resize(nall * 3);
   out_t.resize(nall);
   mapping.resize(nall);
-  
-  std::vector<double > out_c_1(mem_size * 3);
-  std::vector<int > out_t_1(mem_size);
-  std::vector<int > mapping_1(mem_size);
+
+  std::vector<double> out_c_1(mem_size * 3);
+  std::vector<int> out_t_1(mem_size);
+  std::vector<int> mapping_1(mem_size);
   sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall);
-  for(int ii = 0; ii < expected_nall; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12);
+  for (int ii = 0; ii < expected_nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]),
+                1e-12);
     }
     EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]);
     EXPECT_EQ(mapping_1[ii], expected_mapping[ii]);
-  }  
+  }
 }
 
-TEST_F(TestCopyCoord, cpu_lessmem)
-{
+TEST_F(TestCopyCoord, cpu_lessmem) {
   int mem_size = 40;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  
-  int ret = copy_coord_cpu(
-      &out_c[0],
-      &out_t[0],
-      &mapping[0],
-      &nall,
-      &posi[0],
-      &atype[0],
-      nloc,
-      mem_size,
-      rc,
-      region);
+
+  int ret = copy_coord_cpu(&out_c[0], &out_t[0], &mapping[0], &nall, &posi[0],
+                           &atype[0], nloc, mem_size, rc, region);
   EXPECT_EQ(ret, 1);
   // EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestCopyCoord, gpu)
-{
+TEST_F(TestCopyCoord, gpu) {
   int mem_size = 1000;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -445,23 +407,16 @@ TEST_F(TestCopyCoord, gpu)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
-  int ret = deepmd::copy_coord_gpu(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
-      region_dev);
+  int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
+                                   int_data_dev, in_c_dev, in_t_dev, nloc,
+                                   mem_size, loc_cellnum, total_cellnum,
+                                   cell_info_dev, region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
@@ -477,48 +432,49 @@ TEST_F(TestCopyCoord, gpu)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  out_c.resize(nall*3);
+  out_c.resize(nall * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
-  std::vector<double > out_c_1(mem_size * 3);
-  std::vector<int > out_t_1(mem_size);
-  std::vector<int > mapping_1(mem_size);
+  std::vector<double> out_c_1(mem_size * 3);
+  std::vector<int> out_t_1(mem_size);
+  std::vector<int> mapping_1(mem_size);
   sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall);
-  for(int ii = 0; ii < expected_nall; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12);
+  for (int ii = 0; ii < expected_nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]),
+                1e-12);
     }
     EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]);
     EXPECT_EQ(mapping_1[ii], expected_mapping[ii]);
-  }  
+  }
 }
 
-TEST_F(TestCopyCoord, gpu_lessmem)
-{
+TEST_F(TestCopyCoord, gpu_lessmem) {
   int mem_size = 40;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -526,23 +482,16 @@ TEST_F(TestCopyCoord, gpu_lessmem)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
-  int ret = deepmd::copy_coord_gpu(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
-      region_dev);
+  int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
+                                   int_data_dev, in_c_dev, in_t_dev, nloc,
+                                   mem_size, loc_cellnum, total_cellnum,
+                                   cell_info_dev, region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
@@ -558,38 +507,38 @@ TEST_F(TestCopyCoord, gpu_lessmem)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 1);
   // EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
 }
-#endif //GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestCopyCoord, gpu)
-{
+TEST_F(TestCopyCoord, gpu) {
   int mem_size = 1000;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -597,22 +546,15 @@ TEST_F(TestCopyCoord, gpu)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
   int ret = deepmd::copy_coord_gpu_rocm(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
+      out_c_dev, out_t_dev, mapping_dev, &nall, int_data_dev, in_c_dev,
+      in_t_dev, nloc, mem_size, loc_cellnum, total_cellnum, cell_info_dev,
       region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
@@ -629,48 +571,49 @@ TEST_F(TestCopyCoord, gpu)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  out_c.resize(nall*3);
+  out_c.resize(nall * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
-  std::vector<double > out_c_1(mem_size * 3);
-  std::vector<int > out_t_1(mem_size);
-  std::vector<int > mapping_1(mem_size);
+  std::vector<double> out_c_1(mem_size * 3);
+  std::vector<int> out_t_1(mem_size);
+  std::vector<int> mapping_1(mem_size);
   sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall);
-  for(int ii = 0; ii < expected_nall; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12);
+  for (int ii = 0; ii < expected_nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]),
+                1e-12);
     }
     EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]);
     EXPECT_EQ(mapping_1[ii], expected_mapping[ii]);
-  }  
+  }
 }
 
-TEST_F(TestCopyCoord, gpu_lessmem)
-{
+TEST_F(TestCopyCoord, gpu_lessmem) {
   int mem_size = 40;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -678,22 +621,15 @@ TEST_F(TestCopyCoord, gpu_lessmem)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
   int ret = deepmd::copy_coord_gpu_rocm(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
+      out_c_dev, out_t_dev, mapping_dev, &nall, int_data_dev, in_c_dev,
+      in_t_dev, nloc, mem_size, loc_cellnum, total_cellnum, cell_info_dev,
       region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
@@ -710,38 +646,48 @@ TEST_F(TestCopyCoord, gpu_lessmem)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 1);
   // EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
 }
-#endif //TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
 
-class TestCopyCoordMoreCell : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {
-    0.041, 0.072, 0.100,
-    4.053, 0.041, 0.068,
-    1.130, 0.014, 0.109,
-    0.018, 1.134, 0.139,
-    5.120, 0.087, 0.002,
-    4.067, 1.141, 0.055
+class TestCopyCoordMoreCell : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {0.041, 0.072, 0.100, 4.053, 0.041, 0.068,
+                              1.130, 0.014, 0.109, 0.018, 1.134, 0.139,
+                              5.120, 0.087, 0.002, 4.067, 1.141, 0.055};
+  std::vector<int> atype = {0, 0, 1, 1, 1, 1};
+  std::vector<double> _expected_posi_cpy = {
+      0.041,  0.072,  0.1,    4.053,  0.041,  0.068,  1.13,   0.014,  0.109,
+      0.018,  1.134,  0.139,  5.12,   0.087,  0.002,  4.067,  1.141,  0.055,
+      0.018,  1.134,  20.139, 0.018,  21.134, 0.139,  0.018,  21.134, 20.139,
+      0.041,  0.072,  20.1,   0.041,  20.072, 0.1,    0.041,  20.072, 20.1,
+      1.13,   0.014,  20.109, 1.13,   20.014, 0.109,  1.13,   20.014, 20.109,
+      4.053,  0.041,  20.068, 4.053,  20.041, 0.068,  4.053,  20.041, 20.068,
+      4.067,  1.141,  20.055, 4.067,  21.141, 0.055,  4.067,  21.141, 20.055,
+      5.12,   0.087,  20.002, 5.12,   20.087, 0.002,  5.12,   20.087, 20.002,
+      20.018, 1.134,  0.139,  20.018, 1.134,  20.139, 20.018, 21.134, 0.139,
+      20.018, 21.134, 20.139, 20.041, 0.072,  0.1,    20.041, 0.072,  20.1,
+      20.041, 20.072, 0.1,    20.041, 20.072, 20.1,   21.13,  0.014,  0.109,
+      21.13,  0.014,  20.109, 21.13,  20.014, 0.109,  21.13,  20.014, 20.109,
+      24.053, 0.041,  0.068,  24.053, 0.041,  20.068, 24.053, 20.041, 0.068,
+      24.053, 20.041, 20.068, 24.067, 1.141,  0.055,  24.067, 1.141,  20.055,
+      24.067, 21.141, 0.055,  24.067, 21.141, 20.055, 25.12,  0.087,  0.002,
+      25.12,  0.087,  20.002, 25.12,  20.087, 0.002,  25.12,  20.087, 20.002,
   };
-  std::vector<int > atype = {0, 0, 1, 1, 1, 1};
-  std::vector<double > _expected_posi_cpy = {0.041, 0.072, 0.1, 4.053, 0.041, 0.068, 1.13, 0.014, 0.109, 0.018, 1.134, 0.139, 5.12, 0.087, 0.002, 4.067, 1.141, 0.055, 0.018, 1.134, 20.139, 0.018, 21.134, 0.139, 0.018, 21.134, 20.139, 0.041, 0.072, 20.1, 0.041, 20.072, 0.1, 0.041, 20.072, 20.1, 1.13, 0.014, 20.109, 1.13, 20.014, 0.109, 1.13, 20.014, 20.109, 4.053, 0.041, 20.068,
-  4.053, 20.041, 0.068, 4.053, 20.041, 20.068, 4.067, 1.141, 20.055, 4.067, 21.141, 0.055, 4.067, 21.141, 20.055, 5.12, 0.087, 20.002, 5.12, 20.087, 0.002, 5.12, 20.087, 20.002, 20.018, 1.134, 0.139, 20.018, 1.134, 20.139, 20.018, 21.134, 0.139, 20.018, 21.134, 20.139, 20.041, 0.072, 0.1, 20.041, 0.072, 20.1, 20.041, 20.072, 0.1, 20.041, 20.072, 20.1,
-  21.13, 0.014, 0.109, 21.13, 0.014, 20.109, 21.13, 20.014, 0.109, 21.13, 20.014, 20.109, 24.053, 0.041, 0.068, 24.053, 0.041, 20.068, 24.053, 20.041, 0.068, 24.053, 20.041, 20.068, 24.067,1.141, 0.055, 24.067, 1.141, 20.055, 24.067, 21.141, 0.055, 24.067, 21.141, 20.055, 25.12, 0.087, 0.002, 25.12, 0.087, 20.002, 25.12, 20.087, 0.002, 25.12, 20.087, 20.002,
+  std::vector<double> expected_posi_cpy;
+  std::vector<int> _expected_atype_cpy = {
+      0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
   };
-  std::vector<double > expected_posi_cpy;
-  std::vector<int > _expected_atype_cpy = {
-    0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 
+  std::vector<int> expected_atype_cpy;
+  std::vector<int> _expected_mapping = {
+      0, 1, 2, 3, 4, 5, 3, 3, 3, 0, 0, 0, 2, 2, 2, 1, 1, 1, 5, 5, 5, 4, 4, 4,
+      3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 5, 5, 5, 5, 4, 4, 4, 4,
   };
-  std::vector<int > expected_atype_cpy;
-  std::vector<int > _expected_mapping = {
-    0, 1, 2, 3, 4, 5, 3, 3, 3, 0, 0, 0, 2, 2, 2, 1, 1, 1, 5, 5, 5, 4, 4, 4, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2, 1, 1, 1, 1, 5, 5, 5, 5, 4, 4, 4, 4,
-  };
-  std::vector<int > expected_mapping;
-  int ntypes = 2;  
+  std::vector<int> expected_mapping;
+  int ntypes = 2;
   int nloc, expected_nall;
   double rc = 4;
   std::vector<double> boxt = {20., 0., 0., 0., 20., 0., 0., 0., 20.};
@@ -752,117 +698,93 @@ class TestCopyCoordMoreCell : public ::testing::Test
     expected_nall = _expected_posi_cpy.size() / 3;
     EXPECT_EQ(expected_nall, _expected_atype_cpy.size());
     EXPECT_EQ(expected_nall, _expected_mapping.size());
-    // sort the atoms between nloc and nall, to remove the uncertainty of the ordering
-    sort_atoms(
-	expected_posi_cpy,
-	expected_atype_cpy,
-	expected_mapping,
-	_expected_posi_cpy,
-	_expected_atype_cpy,
-	_expected_mapping,
-	nloc,
-	expected_nall);
-  }  
+    // sort the atoms between nloc and nall, to remove the uncertainty of the
+    // ordering
+    sort_atoms(expected_posi_cpy, expected_atype_cpy, expected_mapping,
+               _expected_posi_cpy, _expected_atype_cpy, _expected_mapping, nloc,
+               expected_nall);
+  }
 };
 
-TEST_F(TestCopyCoordMoreCell, cpu)
-{
+TEST_F(TestCopyCoordMoreCell, cpu) {
   int mem_size = 1000;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  
-  int ret = copy_coord_cpu(
-      &out_c[0],
-      &out_t[0],
-      &mapping[0],
-      &nall,
-      &posi[0],
-      &atype[0],
-      nloc,
-      mem_size,
-      rc,
-      region);
+
+  int ret = copy_coord_cpu(&out_c[0], &out_t[0], &mapping[0], &nall, &posi[0],
+                           &atype[0], nloc, mem_size, rc, region);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
-  
-  out_c.resize(nall*3);
+
+  out_c.resize(nall * 3);
   out_t.resize(nall);
   mapping.resize(nall);
-  
-  std::vector<double > out_c_1(mem_size * 3);
-  std::vector<int > out_t_1(mem_size);
-  std::vector<int > mapping_1(mem_size);
+
+  std::vector<double> out_c_1(mem_size * 3);
+  std::vector<int> out_t_1(mem_size);
+  std::vector<int> mapping_1(mem_size);
   sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall);
-  for(int ii = 0; ii < expected_nall; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12);
+  for (int ii = 0; ii < expected_nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]),
+                1e-12);
     }
     EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]);
     EXPECT_EQ(mapping_1[ii], expected_mapping[ii]);
-  }  
+  }
 }
 
-TEST_F(TestCopyCoordMoreCell, cpu_lessmem)
-{
+TEST_F(TestCopyCoordMoreCell, cpu_lessmem) {
   int mem_size = 40;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
-  
-  int ret = copy_coord_cpu(
-      &out_c[0],
-      &out_t[0],
-      &mapping[0],
-      &nall,
-      &posi[0],
-      &atype[0],
-      nloc,
-      mem_size,
-      rc,
-      region);
+
+  int ret = copy_coord_cpu(&out_c[0], &out_t[0], &mapping[0], &nall, &posi[0],
+                           &atype[0], nloc, mem_size, rc, region);
   EXPECT_EQ(ret, 1);
   // EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestCopyCoordMoreCell, gpu)
-{
+TEST_F(TestCopyCoordMoreCell, gpu) {
   int mem_size = 1000;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -870,23 +792,16 @@ TEST_F(TestCopyCoordMoreCell, gpu)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
-  int ret = deepmd::copy_coord_gpu(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
-      region_dev);
+  int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
+                                   int_data_dev, in_c_dev, in_t_dev, nloc,
+                                   mem_size, loc_cellnum, total_cellnum,
+                                   cell_info_dev, region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
@@ -902,48 +817,49 @@ TEST_F(TestCopyCoordMoreCell, gpu)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  out_c.resize(nall*3);
+  out_c.resize(nall * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
-  std::vector<double > out_c_1(mem_size * 3);
-  std::vector<int > out_t_1(mem_size);
-  std::vector<int > mapping_1(mem_size);
+  std::vector<double> out_c_1(mem_size * 3);
+  std::vector<int> out_t_1(mem_size);
+  std::vector<int> mapping_1(mem_size);
   sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall);
-  for(int ii = 0; ii < expected_nall; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12);
+  for (int ii = 0; ii < expected_nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]),
+                1e-12);
     }
     EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]);
     EXPECT_EQ(mapping_1[ii], expected_mapping[ii]);
-  }  
+  }
 }
 
-TEST_F(TestCopyCoordMoreCell, gpu_lessmem)
-{
+TEST_F(TestCopyCoordMoreCell, gpu_lessmem) {
   int mem_size = 40;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -951,23 +867,16 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
-  int ret = deepmd::copy_coord_gpu(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
-      region_dev);
+  int ret = deepmd::copy_coord_gpu(out_c_dev, out_t_dev, mapping_dev, &nall,
+                                   int_data_dev, in_c_dev, in_t_dev, nloc,
+                                   mem_size, loc_cellnum, total_cellnum,
+                                   cell_info_dev, region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
   deepmd::memcpy_device_to_host(out_c_dev, out_c);
@@ -983,38 +892,38 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 1);
   // EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
 }
-#endif //GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestCopyCoordMoreCell, gpu)
-{
+TEST_F(TestCopyCoordMoreCell, gpu) {
   int mem_size = 1000;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -1022,22 +931,15 @@ TEST_F(TestCopyCoordMoreCell, gpu)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
   int ret = deepmd::copy_coord_gpu_rocm(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
+      out_c_dev, out_t_dev, mapping_dev, &nall, int_data_dev, in_c_dev,
+      in_t_dev, nloc, mem_size, loc_cellnum, total_cellnum, cell_info_dev,
       region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
@@ -1054,48 +956,49 @@ TEST_F(TestCopyCoordMoreCell, gpu)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nall, expected_nall);
-  out_c.resize(nall*3);
+  out_c.resize(nall * 3);
   out_t.resize(nall);
   mapping.resize(nall);
 
-  std::vector<double > out_c_1(mem_size * 3);
-  std::vector<int > out_t_1(mem_size);
-  std::vector<int > mapping_1(mem_size);
+  std::vector<double> out_c_1(mem_size * 3);
+  std::vector<int> out_t_1(mem_size);
+  std::vector<int> mapping_1(mem_size);
   sort_atoms(out_c_1, out_t_1, mapping_1, out_c, out_t, mapping, nloc, nall);
-  for(int ii = 0; ii < expected_nall; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      EXPECT_LT(fabs(out_c_1[ii*3+dd] - expected_posi_cpy[ii*3+dd]), 1e-12);
+  for (int ii = 0; ii < expected_nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      EXPECT_LT(fabs(out_c_1[ii * 3 + dd] - expected_posi_cpy[ii * 3 + dd]),
+                1e-12);
     }
     EXPECT_EQ(out_t_1[ii], expected_atype_cpy[ii]);
     EXPECT_EQ(mapping_1[ii], expected_mapping[ii]);
-  }  
+  }
 }
 
-TEST_F(TestCopyCoordMoreCell, gpu_lessmem)
-{
+TEST_F(TestCopyCoordMoreCell, gpu_lessmem) {
   int mem_size = 40;
-  std::vector<double > out_c(mem_size * 3);
-  std::vector<int > out_t(mem_size);
-  std::vector<int > mapping(mem_size);
+  std::vector<double> out_c(mem_size * 3);
+  std::vector<int> out_t(mem_size);
+  std::vector<int> mapping(mem_size);
   int nall;
   std::vector<int> cell_info;
   cell_info.resize(23);
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
   init_region_cpu(region, &boxt[0]);
   deepmd::compute_cell_info(&cell_info[0], rc, region);
   std::vector<double> box_info;
   box_info.resize(18);
-  memcpy(&box_info[0], &boxt[0], sizeof(double)*9);
-  memcpy(&box_info[9], region.rec_boxt, sizeof(double)*9);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  int * cell_info_dev=NULL;
-  double * box_info_dev=NULL;
-  double * out_c_dev=NULL, * in_c_dev=NULL;
-  int * out_t_dev=NULL, * in_t_dev=NULL, * mapping_dev=NULL, * int_data_dev=NULL;
+  memcpy(&box_info[0], &boxt[0], sizeof(double) * 9);
+  memcpy(&box_info[9], region.rec_boxt, sizeof(double) * 9);
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  int* cell_info_dev = NULL;
+  double* box_info_dev = NULL;
+  double *out_c_dev = NULL, *in_c_dev = NULL;
+  int *out_t_dev = NULL, *in_t_dev = NULL, *mapping_dev = NULL,
+      *int_data_dev = NULL;
   deepmd::malloc_device_memory_sync(cell_info_dev, cell_info);
   deepmd::malloc_device_memory_sync(box_info_dev, box_info);
   deepmd::malloc_device_memory_sync(in_c_dev, posi);
@@ -1103,22 +1006,15 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem)
   deepmd::malloc_device_memory(out_c_dev, mem_size * 3);
   deepmd::malloc_device_memory(out_t_dev, mem_size);
   deepmd::malloc_device_memory(mapping_dev, mem_size);
-  deepmd::malloc_device_memory(int_data_dev, nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
+  deepmd::malloc_device_memory(
+      int_data_dev, nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                        total_cellnum * 3 + loc_cellnum + 1 + total_cellnum +
+                        1 + nloc);
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
   int ret = deepmd::copy_coord_gpu_rocm(
-      out_c_dev, 
-      out_t_dev, 
-      mapping_dev, 
-      &nall,
-      int_data_dev,
-      in_c_dev,
-      in_t_dev,
-      nloc,
-      mem_size,
-      loc_cellnum,
-      total_cellnum,
-      cell_info_dev,
+      out_c_dev, out_t_dev, mapping_dev, &nall, int_data_dev, in_c_dev,
+      in_t_dev, nloc, mem_size, loc_cellnum, total_cellnum, cell_info_dev,
       region_dev);
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
@@ -1135,8 +1031,8 @@ TEST_F(TestCopyCoordMoreCell, gpu_lessmem)
   deepmd::delete_device_memory(int_data_dev);
   EXPECT_EQ(ret, 1);
   // EXPECT_EQ(nall, expected_nall);
-  // std::cout << "---------------------" 
-  // 	    << nloc << " " 
+  // std::cout << "---------------------"
+  // 	    << nloc << " "
   // 	    << nall << std::endl;
 }
-#endif //TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_env_mat_a.cc b/source/lib/tests/test_env_mat_a.cc
index df7b5f38a0..3cac18a494 100644
--- a/source/lib/tests/test_env_mat_a.cc
+++ b/source/lib/tests/test_env_mat_a.cc
@@ -1,399 +1,496 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
-#include "prod_env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
-#include "device.h"
+#include "prod_env_mat.h"
 
-class TestEnvMatA : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
+class TestEnvMatA : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 10, 20};
   std::vector<int> sec_r = {0, 0, 0};
   std::vector<int> nat_stt, ext_stt, ext_end;
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
-  int ntypes = sec_a.size()-1;
+  int ntypes = sec_a.size() - 1;
   int nnei = sec_a.back();
   int ndescrpt = nnei * 4;
-  std::vector<double > expected_env = {
-    0.12206, 0.12047, 0.01502, -0.01263, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.02167, -0.77271, 0.32370, 0.58475, 0.99745, 0.41810, 0.75655, -0.49773, 0.10564, 0.10495, -0.00143, 0.01198, 0.03103, 0.03041, 0.00452, -0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.02167, 0.77271, -0.32370, -0.58475, 0.04135, 0.04039, 0.00123, -0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, 0.42028, 0.16304, -0.38405, 0.03694, 0.03680, -0.00300, -0.00117, 0.00336, 0.00327, 0.00022, -0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    0.99745, -0.41810, -0.75655, 0.49773, 0.19078, 0.18961, -0.01951, 0.00793, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, -0.42028, -0.16304, 0.38405, 0.13499, 0.12636, -0.03140, 0.03566, 0.07054, 0.07049, -0.00175, -0.00210, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    0.12206, -0.12047, -0.01502, 0.01263, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.06176, 0.16913, -0.55250, 0.89077, 1.03163, 0.96880, 0.23422, -0.26615, 0.19078, -0.18961, 0.01951, -0.00793, 0.04135, -0.04039, -0.00123, 0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.06176, -0.16913, 0.55250, -0.89077, 0.10564, -0.10495, 0.00143, -0.01198, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, 0.34516, 0.32245, -0.47232, 0.13499, -0.12636, 0.03140, -0.03566, 0.03694, -0.03680, 0.00300, 0.00117, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.03163, -0.96880, -0.23422, 0.26615, 0.03103, -0.03041, -0.00452, 0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, -0.34516, -0.32245, 0.47232, 0.07054, -0.07049, 0.00175, 0.00210, 0.00336, -0.00327, -0.00022, 0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-  };  
-  
+  std::vector<double> expected_env = {
+      0.12206,  0.12047,  0.01502,  -0.01263, 0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.02167,  -0.77271,
+      0.32370,  0.58475,  0.99745,  0.41810,  0.75655,  -0.49773, 0.10564,
+      0.10495,  -0.00143, 0.01198,  0.03103,  0.03041,  0.00452,  -0.00425,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  1.02167,  0.77271,  -0.32370, -0.58475,
+      0.04135,  0.04039,  0.00123,  -0.00880, 0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.59220,  0.42028,  0.16304,  -0.38405, 0.03694,  0.03680,
+      -0.00300, -0.00117, 0.00336,  0.00327,  0.00022,  -0.00074, 0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.99745,
+      -0.41810, -0.75655, 0.49773,  0.19078,  0.18961,  -0.01951, 0.00793,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.59220,  -0.42028, -0.16304,
+      0.38405,  0.13499,  0.12636,  -0.03140, 0.03566,  0.07054,  0.07049,
+      -0.00175, -0.00210, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.12206,  -0.12047, -0.01502, 0.01263,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      1.06176,  0.16913,  -0.55250, 0.89077,  1.03163,  0.96880,  0.23422,
+      -0.26615, 0.19078,  -0.18961, 0.01951,  -0.00793, 0.04135,  -0.04039,
+      -0.00123, 0.00880,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.06176,  -0.16913,
+      0.55250,  -0.89077, 0.10564,  -0.10495, 0.00143,  -0.01198, 0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.66798,  0.34516,  0.32245,  -0.47232,
+      0.13499,  -0.12636, 0.03140,  -0.03566, 0.03694,  -0.03680, 0.00300,
+      0.00117,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  1.03163,  -0.96880, -0.23422, 0.26615,  0.03103,  -0.03041,
+      -0.00452, 0.00425,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.66798,
+      -0.34516, -0.32245, 0.47232,  0.07054,  -0.07049, 0.00175,  0.00210,
+      0.00336,  -0.00327, -0.00022, 0.00074,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-
-class TestEnvMatAShortSel : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
+class TestEnvMatAShortSel : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 2, 4};
   std::vector<int> sec_r = {0, 0, 0};
   std::vector<int> nat_stt, ext_stt, ext_end;
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
-  int ntypes = sec_a.size()-1;
+  int ntypes = sec_a.size() - 1;
   int nnei = sec_a.back();
   int ndescrpt = nnei * 4;
-  std::vector<double > expected_env = {
-    0.12206,  0.12047,  0.01502, -0.01263,  0.00000,  0.00000,  0.00000,  0.00000,  1.02167, -0.77271,  0.32370,  0.58475,  0.99745,  0.41810,  0.75655, -0.49773, 
-    1.02167,  0.77271, -0.32370, -0.58475,  0.04135,  0.04039,  0.00123, -0.00880,  0.59220,  0.42028,  0.16304, -0.38405,  0.03694,  0.03680, -0.00300, -0.00117, 
-    0.99745, -0.41810, -0.75655,  0.49773,  0.19078,  0.18961, -0.01951,  0.00793,  0.59220, -0.42028, -0.16304,  0.38405,  0.13499,  0.12636, -0.03140,  0.03566, 
-    0.12206, -0.12047, -0.01502,  0.01263,  0.00000,  0.00000,  0.00000,  0.00000,  1.06176,  0.16913, -0.55250,  0.89077,  1.03163,  0.96880,  0.23422, -0.26615, 
-    1.06176, -0.16913,  0.55250, -0.89077,  0.10564, -0.10495,  0.00143, -0.01198,  0.66798,  0.34516,  0.32245, -0.47232,  0.13499, -0.12636,  0.03140, -0.03566, 
-    1.03163, -0.96880, -0.23422,  0.26615,  0.03103, -0.03041, -0.00452,  0.00425,  0.66798, -0.34516, -0.32245,  0.47232,  0.07054, -0.07049,  0.00175,  0.00210, 
-  };  
-  
+  std::vector<double> expected_env = {
+      0.12206,  0.12047,  0.01502,  -0.01263, 0.00000,  0.00000,  0.00000,
+      0.00000,  1.02167,  -0.77271, 0.32370,  0.58475,  0.99745,  0.41810,
+      0.75655,  -0.49773, 1.02167,  0.77271,  -0.32370, -0.58475, 0.04135,
+      0.04039,  0.00123,  -0.00880, 0.59220,  0.42028,  0.16304,  -0.38405,
+      0.03694,  0.03680,  -0.00300, -0.00117, 0.99745,  -0.41810, -0.75655,
+      0.49773,  0.19078,  0.18961,  -0.01951, 0.00793,  0.59220,  -0.42028,
+      -0.16304, 0.38405,  0.13499,  0.12636,  -0.03140, 0.03566,  0.12206,
+      -0.12047, -0.01502, 0.01263,  0.00000,  0.00000,  0.00000,  0.00000,
+      1.06176,  0.16913,  -0.55250, 0.89077,  1.03163,  0.96880,  0.23422,
+      -0.26615, 1.06176,  -0.16913, 0.55250,  -0.89077, 0.10564,  -0.10495,
+      0.00143,  -0.01198, 0.66798,  0.34516,  0.32245,  -0.47232, 0.13499,
+      -0.12636, 0.03140,  -0.03566, 1.03163,  -0.96880, -0.23422, 0.26615,
+      0.03103,  -0.03041, -0.00452, 0.00425,  0.66798,  -0.34516, -0.32245,
+      0.47232,  0.07054,  -0.07049, 0.00175,  0.00210,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-
-TEST_F(TestEnvMatA, orig_cpy)
-{
+TEST_F(TestEnvMatA, orig_cpy) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    env_mat_a(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc,
+              ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatA, orig_pbc)
-{
+TEST_F(TestEnvMatA, orig_pbc) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = true;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, ntypes, atype, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, ntypes,
+                                    atype, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret, -1);
-    env_mat_a(env, env_deriv, rij_a, posi, ntypes, atype, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi, ntypes, atype, region, pbc, ii,
+              fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatA, orig_cpy_equal_pbc)
-{
+TEST_F(TestEnvMatA, orig_cpy_equal_pbc) {
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_0, env_deriv_0, rij_a_0;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
-    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
-    int ret_1 = format_nlist_i_fill_a(fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region,
+              false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+    int ret_1 = format_nlist_i_fill_a(
+        fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii,
+        nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret_1, -1);
-    env_mat_a(env_1, env_deriv_1, rij_a_1, posi, ntypes, atype, region, true, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);    
+    env_mat_a(env_1, env_deriv_1, rij_a_1, posi, ntypes, atype, region, true,
+              ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
     EXPECT_EQ(env_0.size(), env_1.size());
     EXPECT_EQ(env_deriv_0.size(), env_deriv_1.size());
     EXPECT_EQ(rij_a_0.size(), rij_a_1.size());
-    for (unsigned jj = 0; jj < env_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_0.size(); ++jj) {
       EXPECT_LT(fabs(env_0[jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj){
-      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj) {
+      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj) {
       EXPECT_LT(fabs(rij_a_0[jj] - rij_a_1[jj]), 1e-10);
     }
   }
 }
 
-
-TEST_F(TestEnvMatA, orig_cpy_num_deriv)
-{
+TEST_F(TestEnvMatA, orig_cpy_num_deriv) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_0, env_1, env_deriv, env_deriv_tmp, rij_a;
   bool pbc = false;
   double hh = 1e-5;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    env_mat_a(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
+    env_mat_a(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc,
+              ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
 
-    for (int jj = 0; jj < sec_a[2]; ++jj){
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
       int j_idx = fmt_nlist_a[jj];
       if (j_idx < 0) continue;
-      for (int kk = 0; kk < 4; ++kk){
-	for (int dd = 0; dd < 3; ++dd){
-	  std::vector<double> posi_0 = posi_cpy;
-	  std::vector<double> posi_1 = posi_cpy;
-	  posi_0[j_idx*3+dd] -= hh;
-	  posi_1[j_idx*3+dd] += hh;
-	  env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  double num_deriv = (env_1[jj*4+kk] - env_0[jj*4+kk])/(2.*hh);
-	  double ana_deriv = -env_deriv[jj*12+kk*3+dd];
-	  EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
-	}
+      for (int kk = 0; kk < 4; ++kk) {
+        for (int dd = 0; dd < 3; ++dd) {
+          std::vector<double> posi_0 = posi_cpy;
+          std::vector<double> posi_1 = posi_cpy;
+          posi_0[j_idx * 3 + dd] -= hh;
+          posi_1[j_idx * 3 + dd] += hh;
+          env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          double num_deriv =
+              (env_1[jj * 4 + kk] - env_0[jj * 4 + kk]) / (2. * hh);
+          double ana_deriv = -env_deriv[jj * 12 + kk * 3 + dd];
+          EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+        }
       }
     }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-
-TEST_F(TestEnvMatA, cpu)
-{
+TEST_F(TestEnvMatA, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatA, cpu_equal_orig_cpy)
-{
+TEST_F(TestEnvMatA, cpu_equal_orig_cpy) {
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_0, env_deriv_0, rij_a_0;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
-    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region,
+              false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
 
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
-  
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
 
     EXPECT_EQ(env_0.size(), env_1.size());
     EXPECT_EQ(env_deriv_0.size(), env_deriv_1.size());
     EXPECT_EQ(rij_a_0.size(), rij_a_1.size());
-    for (unsigned jj = 0; jj < env_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_0.size(); ++jj) {
       EXPECT_LT(fabs(env_0[jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj){
-      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj) {
+      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj) {
       EXPECT_LT(fabs(rij_a_0[jj] - rij_a_1[jj]), 1e-10);
     }
   }
 }
 
-TEST_F(TestEnvMatA, cpu_num_deriv)
-{
+TEST_F(TestEnvMatA, cpu_num_deriv) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_0, env_1, env_deriv, env_deriv_tmp, rij_a;
   bool pbc = false;
   double hh = 1e-5;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
+    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
 
-    for (int jj = 0; jj < sec_a[2]; ++jj){
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
       int j_idx = fmt_nlist_a[jj];
       if (j_idx < 0) continue;
-      for (int kk = 0; kk < 4; ++kk){
-	for (int dd = 0; dd < 3; ++dd){
-	  std::vector<double> posi_0 = posi_cpy;
-	  std::vector<double> posi_1 = posi_cpy;
-	  posi_0[j_idx*3+dd] -= hh;
-	  posi_1[j_idx*3+dd] += hh;
-	  env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  double num_deriv = (env_1[jj*4+kk] - env_0[jj*4+kk])/(2.*hh);
-	  double ana_deriv = -env_deriv[jj*12+kk*3+dd];
-	  EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
-	}
+      for (int kk = 0; kk < 4; ++kk) {
+        for (int dd = 0; dd < 3; ++dd) {
+          std::vector<double> posi_0 = posi_cpy;
+          std::vector<double> posi_1 = posi_cpy;
+          posi_0[j_idx * 3 + dd] -= hh;
+          posi_1[j_idx * 3 + dd] += hh;
+          env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          double num_deriv =
+              (env_1[jj * 4 + kk] - env_0[jj * 4 + kk]) / (2. * hh);
+          double ana_deriv = -env_deriv[jj * 12 + kk * 3 + dd];
+          EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+        }
       }
     }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-
-TEST_F(TestEnvMatAShortSel, orig_cpy)
-{
+TEST_F(TestEnvMatAShortSel, orig_cpy) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, 1);
-    env_mat_a(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc,
+              ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
-    //   printf("%8.5f, %8.5f, %8.5f, %8.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%8.5f, %8.5f, %8.5f, %8.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatAShortSel, orig_pbc)
-{
+TEST_F(TestEnvMatAShortSel, orig_pbc) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = true;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, ntypes, atype, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, ntypes,
+                                    atype, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret, 1);
-    env_mat_a(env, env_deriv, rij_a, posi, ntypes, atype, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi, ntypes, atype, region, pbc, ii,
+              fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatAShortSel, cpu)
-{
+TEST_F(TestEnvMatAShortSel, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, 1);
-    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatA, prod_cpu)
-{
+TEST_F(TestEnvMatA, prod_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -401,48 +498,34 @@ TEST_F(TestEnvMatA, prod_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
-  
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-  deepmd::prod_env_mat_a_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype_cpy[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
-
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+  deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
+                             &posi_cpy[0], &atype_cpy[0], inlist, max_nbor_size,
+                             &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatA, prod_cpu_equal_cpu)
-{
+TEST_F(TestEnvMatA, prod_cpu_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -450,33 +533,24 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-  deepmd::prod_env_mat_a_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype_cpy[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+  deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
+                             &posi_cpy[0], &atype_cpy[0], inlist, max_nbor_size,
+                             &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -485,67 +559,67 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
   // for(int ii = 0; ii < nloc; ++ii){
   //   for (int jj = 0; jj < nnei; ++jj){
   //     for (int dd = 0; dd < 4; ++dd){
-  //   	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  // 		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
+  //   	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] -
+  // 		       expected_env[ii*nnei*4 + jj*4 + dd]) ,
   // 		  1e-5);
   //     }
-  //   }    
+  //   }
   // }
 }
 
-
 #if GOOGLE_CUDA
-TEST_F(TestEnvMatA, prod_gpu_cuda)
-{
+TEST_F(TestEnvMatA, prod_gpu_cuda) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -554,29 +628,18 @@ TEST_F(TestEnvMatA, prod_gpu_cuda)
   deepmd::malloc_device_memory_sync(std_dev, std);
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_a_gpu_cuda(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_a_gpu_cuda(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::delete_device_memory(em_dev);
   deepmd::delete_device_memory(em_deriv_dev);
@@ -590,52 +653,51 @@ TEST_F(TestEnvMatA, prod_gpu_cuda)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu)
-{
+TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -645,29 +707,18 @@ TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu)
 
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_a_gpu_cuda(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_a_gpu_cuda(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
   deepmd::memcpy_device_to_host(rij_dev, rij);
@@ -686,10 +737,13 @@ TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu)
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -698,67 +752,68 @@ TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-  		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
-#endif //GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestEnvMatA, prod_gpu_rocm)
-{
+TEST_F(TestEnvMatA, prod_gpu_rocm) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -767,29 +822,18 @@ TEST_F(TestEnvMatA, prod_gpu_rocm)
   deepmd::malloc_device_memory_sync(std_dev, std);
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_a_gpu_rocm(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_a_gpu_rocm(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::delete_device_memory(em_dev);
   deepmd::delete_device_memory(em_deriv_dev);
@@ -803,52 +847,51 @@ TEST_F(TestEnvMatA, prod_gpu_rocm)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu)
-{
+TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -858,29 +901,18 @@ TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu)
 
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_a_gpu_rocm(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_a_gpu_rocm(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
   deepmd::memcpy_device_to_host(rij_dev, rij);
@@ -899,10 +931,13 @@ TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu)
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -911,28 +946,29 @@ TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-  		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
-#endif //TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_env_mat_a_mix.cc b/source/lib/tests/test_env_mat_a_mix.cc
index 03d01fe01d..e257c66dc6 100644
--- a/source/lib/tests/test_env_mat_a_mix.cc
+++ b/source/lib/tests/test_env_mat_a_mix.cc
@@ -1,30 +1,27 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
-#include "prod_env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
-#include "device.h"
+#include "prod_env_mat.h"
 
-class TestEnvMatAMix : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<int > f_atype = {0, 0, 0, 0, 0, 0};
-  std::vector<double > posi_cpy;
-//   std::vector<int > atype_cpy;
-  std::vector<int > f_atype_cpy;
+class TestEnvMatAMix : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<int> f_atype = {0, 0, 0, 0, 0, 0};
+  std::vector<double> posi_cpy;
+  //   std::vector<int > atype_cpy;
+  std::vector<int> f_atype_cpy;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 20};
   std::vector<int> sec_r = {0, 0};
@@ -33,74 +30,130 @@ class TestEnvMatAMix : public ::testing::Test
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
   std::vector<std::vector<int>> ntype;
   int f_ntypes = 1;
-  int ntypes = 2; // this information normally comes from natoms or avg/std
+  int ntypes = 2;  // this information normally comes from natoms or avg/std
   int nnei = sec_a.back();
   int ndescrpt = nnei * 4;
-  std::vector<double > expected_env = {
-    1.02167, -0.77271, 0.32370, 0.58475, 0.99745, 0.41810, 0.75655, -0.49773, 0.12206, 0.12047, 0.01502, -0.01263, 0.10564, 0.10495, -0.00143, 0.01198, 0.03103, 0.03041, 0.00452, -0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.02167, 0.77271, -0.32370, -0.58475, 0.59220, 0.42028, 0.16304, -0.38405, 0.04135, 0.04039, 0.00123, -0.00880, 0.03694, 0.03680, -0.00300, -0.00117, 0.00336, 0.00327, 0.00022, -0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    0.99745, -0.41810, -0.75655, 0.49773, 0.59220, -0.42028, -0.16304, 0.38405, 0.19078, 0.18961, -0.01951, 0.00793, 0.13499, 0.12636, -0.03140, 0.03566, 0.07054, 0.07049, -0.00175, -0.00210, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,  
-    1.06176, 0.16913, -0.55250, 0.89077, 1.03163, 0.96880, 0.23422, -0.26615, 0.19078, -0.18961, 0.01951, -0.00793, 0.12206, -0.12047, -0.01502, 0.01263, 0.04135, -0.04039, -0.00123, 0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.06176, -0.16913, 0.55250, -0.89077, 0.66798, 0.34516, 0.32245, -0.47232, 0.13499, -0.12636, 0.03140, -0.03566, 0.10564, -0.10495, 0.00143, -0.01198, 0.03694, -0.03680, 0.00300, 0.00117, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.03163, -0.96880, -0.23422, 0.26615, 0.66798, -0.34516, -0.32245, 0.47232, 0.07054, -0.07049, 0.00175, 0.00210, 0.03103, -0.03041, -0.00452, 0.00425, 0.00336, -0.00327, -0.00022, 0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
+  std::vector<double> expected_env = {
+      1.02167,  -0.77271, 0.32370,  0.58475,  0.99745,  0.41810,  0.75655,
+      -0.49773, 0.12206,  0.12047,  0.01502,  -0.01263, 0.10564,  0.10495,
+      -0.00143, 0.01198,  0.03103,  0.03041,  0.00452,  -0.00425, 0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  1.02167,  0.77271,  -0.32370, -0.58475,
+      0.59220,  0.42028,  0.16304,  -0.38405, 0.04135,  0.04039,  0.00123,
+      -0.00880, 0.03694,  0.03680,  -0.00300, -0.00117, 0.00336,  0.00327,
+      0.00022,  -0.00074, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.99745,
+      -0.41810, -0.75655, 0.49773,  0.59220,  -0.42028, -0.16304, 0.38405,
+      0.19078,  0.18961,  -0.01951, 0.00793,  0.13499,  0.12636,  -0.03140,
+      0.03566,  0.07054,  0.07049,  -0.00175, -0.00210, 0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  1.06176,  0.16913,  -0.55250, 0.89077,  1.03163,
+      0.96880,  0.23422,  -0.26615, 0.19078,  -0.18961, 0.01951,  -0.00793,
+      0.12206,  -0.12047, -0.01502, 0.01263,  0.04135,  -0.04039, -0.00123,
+      0.00880,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.06176,  -0.16913,
+      0.55250,  -0.89077, 0.66798,  0.34516,  0.32245,  -0.47232, 0.13499,
+      -0.12636, 0.03140,  -0.03566, 0.10564,  -0.10495, 0.00143,  -0.01198,
+      0.03694,  -0.03680, 0.00300,  0.00117,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  1.03163,  -0.96880, -0.23422, 0.26615,  0.66798,  -0.34516,
+      -0.32245, 0.47232,  0.07054,  -0.07049, 0.00175,  0.00210,  0.03103,
+      -0.03041, -0.00452, 0.00425,  0.00336,  -0.00327, -0.00022, 0.00074,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,
+  };
+  std::vector<int> expected_ntype = {
+      1, 1, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1,
+      1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1, 1, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  };
+  std::vector<bool> expected_nmask = {
+      1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   };
-  std::vector<int > expected_ntype = {
-    1, 1, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    0, 1, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    0, 1, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    0, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-  }; 
-  std::vector<bool > expected_nmask = {
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-  }; 
-  
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, f_atype_cpy, mapping, ncell, ngcell, posi, f_atype, rc, region);
+    copy_coord(posi_cpy, f_atype_cpy, mapping, ncell, ngcell, posi, f_atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-
-class TestEnvMatAMixShortSel : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<int > f_atype = {0, 0, 0, 0, 0, 0};
-  std::vector<double > posi_cpy;
-//   std::vector<int > atype_cpy;
-  std::vector<int > f_atype_cpy;
+class TestEnvMatAMixShortSel : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<int> f_atype = {0, 0, 0, 0, 0, 0};
+  std::vector<double> posi_cpy;
+  //   std::vector<int > atype_cpy;
+  std::vector<int> f_atype_cpy;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 4};
   std::vector<int> sec_r = {0, 0};
@@ -109,326 +162,363 @@ class TestEnvMatAMixShortSel : public ::testing::Test
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
   std::vector<std::vector<int>> ntype;
   int f_ntypes = 1;
-  int ntypes = 2; // this information normally comes from natoms or avg/std
+  int ntypes = 2;  // this information normally comes from natoms or avg/std
   int nnei = sec_a.back();
   int ndescrpt = nnei * 4;
-  std::vector<double > expected_env = {
-    1.02167, -0.77271, 0.32370, 0.58475, 0.99745, 0.41810, 0.75655, -0.49773, 0.12206, 0.12047, 0.01502, -0.01263, 0.10564, 0.10495, -0.00143, 0.01198,
-    1.02167, 0.77271, -0.32370, -0.58475, 0.59220, 0.42028, 0.16304, -0.38405, 0.04135, 0.04039, 0.00123, -0.00880, 0.03694, 0.03680, -0.00300, -0.00117,
-    0.99745, -0.41810, -0.75655, 0.49773, 0.59220, -0.42028, -0.16304, 0.38405, 0.19078, 0.18961, -0.01951, 0.00793, 0.13499, 0.12636, -0.03140, 0.03566,
-    1.06176, 0.16913, -0.55250, 0.89077, 1.03163, 0.96880, 0.23422, -0.26615, 0.19078, -0.18961, 0.01951, -0.00793, 0.12206, -0.12047, -0.01502, 0.01263,
-    1.06176, -0.16913, 0.55250, -0.89077, 0.66798, 0.34516, 0.32245, -0.47232, 0.13499, -0.12636, 0.03140, -0.03566, 0.10564, -0.10495, 0.00143, -0.01198,
-    1.03163, -0.96880, -0.23422, 0.26615, 0.66798, -0.34516, -0.32245, 0.47232, 0.07054, -0.07049, 0.00175, 0.00210, 0.03103, -0.03041, -0.00452, 0.00425,
+  std::vector<double> expected_env = {
+      1.02167,  -0.77271, 0.32370,  0.58475,  0.99745,  0.41810,  0.75655,
+      -0.49773, 0.12206,  0.12047,  0.01502,  -0.01263, 0.10564,  0.10495,
+      -0.00143, 0.01198,  1.02167,  0.77271,  -0.32370, -0.58475, 0.59220,
+      0.42028,  0.16304,  -0.38405, 0.04135,  0.04039,  0.00123,  -0.00880,
+      0.03694,  0.03680,  -0.00300, -0.00117, 0.99745,  -0.41810, -0.75655,
+      0.49773,  0.59220,  -0.42028, -0.16304, 0.38405,  0.19078,  0.18961,
+      -0.01951, 0.00793,  0.13499,  0.12636,  -0.03140, 0.03566,  1.06176,
+      0.16913,  -0.55250, 0.89077,  1.03163,  0.96880,  0.23422,  -0.26615,
+      0.19078,  -0.18961, 0.01951,  -0.00793, 0.12206,  -0.12047, -0.01502,
+      0.01263,  1.06176,  -0.16913, 0.55250,  -0.89077, 0.66798,  0.34516,
+      0.32245,  -0.47232, 0.13499,  -0.12636, 0.03140,  -0.03566, 0.10564,
+      -0.10495, 0.00143,  -0.01198, 1.03163,  -0.96880, -0.23422, 0.26615,
+      0.66798,  -0.34516, -0.32245, 0.47232,  0.07054,  -0.07049, 0.00175,
+      0.00210,  0.03103,  -0.03041, -0.00452, 0.00425,
   };
-  std::vector<int > expected_ntype = {
-    1, 1, 0, 1,
-    1, 1, 1, 0,
-    0, 1, 0, 1,
-    0, 1, 0, 1,
-    0, 1, 1, 0,
-    0, 1, 1, 0,
-  }; 
-  std::vector<bool > expected_nmask = {
-    1, 1, 1, 1,
-    1, 1, 1, 1,
-    1, 1, 1, 1,
-    1, 1, 1, 1,
-    1, 1, 1, 1,
-    1, 1, 1, 1,
-  }; 
-  
+  std::vector<int> expected_ntype = {
+      1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0,
+  };
+  std::vector<bool> expected_nmask = {
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, f_atype_cpy, mapping, ncell, ngcell, posi, f_atype, rc, region);
+    copy_coord(posi_cpy, f_atype_cpy, mapping, ncell, ngcell, posi, f_atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-
-TEST_F(TestEnvMatAMix, orig_cpy)
-{
+TEST_F(TestEnvMatAMix, orig_cpy) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    env_mat_a(env, env_deriv, rij_a, posi_cpy, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
-    for (int jj = 0; jj < sec_a.back(); ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a.back()*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi_cpy, f_ntypes, f_atype_cpy, region,
+              pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a.back() * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
     // for (int jj = 0; jj < sec_a.back(); ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatAMix, orig_pbc)
-{
+TEST_F(TestEnvMatAMix, orig_pbc) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = true;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, f_ntypes, f_atype, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, f_ntypes,
+                                    f_atype, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret, -1);
-    env_mat_a(env, env_deriv, rij_a, posi, f_ntypes, f_atype, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
-    for (int jj = 0; jj < sec_a.back(); ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a.back()*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi, f_ntypes, f_atype, region, pbc, ii,
+              fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a.back() * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatAMix, orig_cpy_equal_pbc)
-{
+TEST_F(TestEnvMatAMix, orig_cpy_equal_pbc) {
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_0, env_deriv_0, rij_a_0;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, f_atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
-    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, f_ntypes, f_atype_cpy, region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
-    int ret_1 = format_nlist_i_fill_a(fmt_nlist_a_1, fmt_nlist_r_1, posi, f_ntypes, f_atype, region, true, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, f_ntypes, f_atype_cpy,
+              region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+    int ret_1 = format_nlist_i_fill_a(
+        fmt_nlist_a_1, fmt_nlist_r_1, posi, f_ntypes, f_atype, region, true, ii,
+        nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret_1, -1);
-    env_mat_a(env_1, env_deriv_1, rij_a_1, posi, f_ntypes, f_atype, region, true, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);    
+    env_mat_a(env_1, env_deriv_1, rij_a_1, posi, f_ntypes, f_atype, region,
+              true, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
     EXPECT_EQ(env_0.size(), env_1.size());
     EXPECT_EQ(env_deriv_0.size(), env_deriv_1.size());
     EXPECT_EQ(rij_a_0.size(), rij_a_1.size());
-    for (unsigned jj = 0; jj < env_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_0.size(); ++jj) {
       EXPECT_LT(fabs(env_0[jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj){
-      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj) {
+      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj) {
       EXPECT_LT(fabs(rij_a_0[jj] - rij_a_1[jj]), 1e-10);
     }
   }
 }
 
-TEST_F(TestEnvMatAMix, orig_cpy_num_deriv)
-{
+TEST_F(TestEnvMatAMix, orig_cpy_num_deriv) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_0, env_1, env_deriv, env_deriv_tmp, rij_a;
   bool pbc = false;
   double hh = 1e-5;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    env_mat_a(env, env_deriv, rij_a, posi_cpy, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
+    env_mat_a(env, env_deriv, rij_a, posi_cpy, f_ntypes, f_atype_cpy, region,
+              pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
 
-    for (int jj = 0; jj < sec_a.back(); ++jj){
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
       int j_idx = fmt_nlist_a[jj];
       if (j_idx < 0) continue;
-      for (int kk = 0; kk < 4; ++kk){
-	for (int dd = 0; dd < 3; ++dd){
-	  std::vector<double> posi_0 = posi_cpy;
-	  std::vector<double> posi_1 = posi_cpy;
-	  posi_0[j_idx*3+dd] -= hh;
-	  posi_1[j_idx*3+dd] += hh;
-	  env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  double num_deriv = (env_1[jj*4+kk] - env_0[jj*4+kk])/(2.*hh);
-	  double ana_deriv = -env_deriv[jj*12+kk*3+dd];
-	  EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
-	}
+      for (int kk = 0; kk < 4; ++kk) {
+        for (int dd = 0; dd < 3; ++dd) {
+          std::vector<double> posi_0 = posi_cpy;
+          std::vector<double> posi_1 = posi_cpy;
+          posi_0[j_idx * 3 + dd] -= hh;
+          posi_1[j_idx * 3 + dd] += hh;
+          env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, f_ntypes, f_atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, f_ntypes, f_atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          double num_deriv =
+              (env_1[jj * 4 + kk] - env_0[jj * 4 + kk]) / (2. * hh);
+          double ana_deriv = -env_deriv[jj * 12 + kk * 3 + dd];
+          EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+        }
       }
     }
     // for (int jj = 0; jj < sec_a.back(); ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatAMix, cpu)
-{
+TEST_F(TestEnvMatAMix, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, f_atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
-    for (int jj = 0; jj < sec_a.back(); ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a.back()*4 + jj*4 + dd]) , 1e-5);
+    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, f_atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a.back() * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatAMix, cpu_equal_orig_cpy)
-{
+TEST_F(TestEnvMatAMix, cpu_equal_orig_cpy) {
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_0, env_deriv_0, rij_a_0;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, f_atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
-    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, f_ntypes, f_atype_cpy, region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+    env_mat_a(env_0, env_deriv_0, rij_a_0, posi_cpy, f_ntypes, f_atype_cpy,
+              region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
 
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
-  
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, f_atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  f_atype_cpy, ii, fmt_nlist_a_1, sec_a,
+                                  rc_smth, rc);
 
     EXPECT_EQ(env_0.size(), env_1.size());
     EXPECT_EQ(env_deriv_0.size(), env_deriv_1.size());
     EXPECT_EQ(rij_a_0.size(), rij_a_1.size());
-    for (unsigned jj = 0; jj < env_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_0.size(); ++jj) {
       EXPECT_LT(fabs(env_0[jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj){
-      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj) {
+      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj) {
       EXPECT_LT(fabs(rij_a_0[jj] - rij_a_1[jj]), 1e-10);
     }
   }
 }
 
-TEST_F(TestEnvMatAMix, cpu_num_deriv)
-{
+TEST_F(TestEnvMatAMix, cpu_num_deriv) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_0, env_1, env_deriv, env_deriv_tmp, rij_a;
   bool pbc = false;
   double hh = 1e-5;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, f_atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
+    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, f_atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
 
-    for (int jj = 0; jj < sec_a.back(); ++jj){
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
       int j_idx = fmt_nlist_a[jj];
       if (j_idx < 0) continue;
-      for (int kk = 0; kk < 4; ++kk){
-	for (int dd = 0; dd < 3; ++dd){
-	  std::vector<double> posi_0 = posi_cpy;
-	  std::vector<double> posi_1 = posi_cpy;
-	  posi_0[j_idx*3+dd] -= hh;
-	  posi_1[j_idx*3+dd] += hh;
-	  env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  double num_deriv = (env_1[jj*4+kk] - env_0[jj*4+kk])/(2.*hh);
-	  double ana_deriv = -env_deriv[jj*12+kk*3+dd];
-	  EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
-	}
+      for (int kk = 0; kk < 4; ++kk) {
+        for (int dd = 0; dd < 3; ++dd) {
+          std::vector<double> posi_0 = posi_cpy;
+          std::vector<double> posi_1 = posi_cpy;
+          posi_0[j_idx * 3 + dd] -= hh;
+          posi_1[j_idx * 3 + dd] += hh;
+          env_mat_a(env_0, env_deriv_tmp, rij_a, posi_0, f_ntypes, f_atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          env_mat_a(env_1, env_deriv_tmp, rij_a, posi_1, f_ntypes, f_atype_cpy,
+                    region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+          double num_deriv =
+              (env_1[jj * 4 + kk] - env_0[jj * 4 + kk]) / (2. * hh);
+          double ana_deriv = -env_deriv[jj * 12 + kk * 3 + dd];
+          EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+        }
       }
     }
     // for (int jj = 0; jj < sec_a.back(); ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatAMixShortSel, orig_cpy)
-{
+TEST_F(TestEnvMatAMixShortSel, orig_cpy) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, 0);
-    env_mat_a(env, env_deriv, rij_a, posi_cpy, f_ntypes, f_atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
-    for (int jj = 0; jj < sec_a.back(); ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a.back()*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi_cpy, f_ntypes, f_atype_cpy, region,
+              pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a.back() * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
     // for (int jj = 0; jj < sec_a.back(); ++jj){
-    //   printf("%8.5f, %8.5f, %8.5f, %8.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%8.5f, %8.5f, %8.5f, %8.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatAMixShortSel, orig_pbc)
-{
+TEST_F(TestEnvMatAMixShortSel, orig_pbc) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = true;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, f_ntypes, f_atype, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, f_ntypes,
+                                    f_atype, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret, 0);
-    env_mat_a(env, env_deriv, rij_a, posi, f_ntypes, f_atype, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
-    for (int jj = 0; jj < sec_a.back(); ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a.back()*4 + jj*4 + dd]) , 1e-5);
+    env_mat_a(env, env_deriv, rij_a, posi, f_ntypes, f_atype, region, pbc, ii,
+              fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a.back() * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatAMixShortSel, cpu)
-{
+TEST_F(TestEnvMatAMixShortSel, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, f_atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, 0);
-    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, f_atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a.back()*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a.back()*3);
-    for (int jj = 0; jj < sec_a.back(); ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a.back()*4 + jj*4 + dd]) , 1e-5);
+    deepmd::env_mat_a_cpu<double>(env, env_deriv, rij_a, posi_cpy, f_atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a.back() * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a.back() * 3);
+    for (int jj = 0; jj < sec_a.back(); ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a.back() * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatAMix, prod_cpu)
-{
+TEST_F(TestEnvMatAMix, prod_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -436,64 +526,43 @@ TEST_F(TestEnvMatAMix, prod_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
-  
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
   std::vector<int> ntype(nloc * nnei);
-  bool * nmask = new bool [nloc * nnei];
+  bool* nmask = new bool[nloc * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-  deepmd::prod_env_mat_a_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a,
-      &f_atype_cpy[0]);
-  deepmd::use_nei_info_cpu(
-      &nlist[0],
-      &ntype[0],
-      nmask,
-      &atype[0],
-      &mapping[0],
-      nloc,
-      nnei,
-      ntypes,
-      true);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+  deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
+                             &posi_cpy[0], &atype[0], inlist, max_nbor_size,
+                             &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a,
+                             &f_atype_cpy[0]);
+  deepmd::use_nei_info_cpu(&nlist[0], &ntype[0], nmask, &atype[0], &mapping[0],
+                           nloc, nnei, ntypes, true);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-      EXPECT_EQ(ntype[ii*nnei+jj], expected_ntype[ii*nnei+jj]);
-      EXPECT_EQ(nmask[ii*nnei+jj], expected_nmask[ii*nnei+jj]);
-    }    
+      EXPECT_EQ(ntype[ii * nnei + jj], expected_ntype[ii * nnei + jj]);
+      EXPECT_EQ(nmask[ii * nnei + jj], expected_nmask[ii * nnei + jj]);
+    }
   }
   free(nmask);
 }
 
-TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu)
-{
+TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -501,34 +570,25 @@ TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-  deepmd::prod_env_mat_a_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a,
-      &f_atype_cpy[0]);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+  deepmd::prod_env_mat_a_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
+                             &posi_cpy[0], &atype[0], inlist, max_nbor_size,
+                             &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a,
+                             &f_atype_cpy[0]);
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, f_atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  f_atype_cpy, ii, fmt_nlist_a_1, sec_a,
+                                  rc_smth, rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -537,71 +597,72 @@ TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
   // for(int ii = 0; ii < nloc; ++ii){
   //   for (int jj = 0; jj < nnei; ++jj){
   //     for (int dd = 0; dd < 4; ++dd){
-  //   	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  // 		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
+  //   	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] -
+  // 		       expected_env[ii*nnei*4 + jj*4 + dd]) ,
   // 		  1e-5);
   //     }
-  //   }    
+  //   }
   // }
 }
 
-
 #if GOOGLE_CUDA
-TEST_F(TestEnvMatAMix, prod_gpu_cuda)
-{
+TEST_F(TestEnvMatAMix, prod_gpu_cuda) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
   std::vector<int> ntype(nloc * nnei, 0);
-  bool * nmask = new bool [nloc * nnei];
+  bool* nmask = new bool[nloc * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
 
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  bool * nmask_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * f_atype_cpy_dev = NULL, * atype_dev = NULL, * nlist_dev = NULL, * ntype_dev = NULL, * mapping_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  bool* nmask_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
+      *ntype_dev = NULL, *mapping_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -614,41 +675,21 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda)
   deepmd::malloc_device_memory_sync(ntype_dev, ntype);
   deepmd::malloc_device_memory_sync(mapping_dev, mapping);
   deepmd::malloc_device_memory_sync(nmask_dev, nmask, nloc * nnei);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
 
-  deepmd::prod_env_mat_a_gpu_cuda(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_dev,
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a,
-      f_atype_cpy_dev);
+  deepmd::prod_env_mat_a_gpu_cuda(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a, f_atype_cpy_dev);
 
-  deepmd::use_nei_info_gpu(
-      nlist_dev,
-      ntype_dev,
-      nmask_dev,
-      atype_dev,
-      mapping_dev,
-      nloc,
-      nnei,
-      ntypes,
-      true);
+  deepmd::use_nei_info_gpu(nlist_dev, ntype_dev, nmask_dev, atype_dev,
+                           mapping_dev, nloc, nnei, ntypes, true);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(ntype_dev, ntype);
   deepmd::memcpy_device_to_host(nmask_dev, nmask, nloc * nnei);
@@ -669,55 +710,54 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-      EXPECT_EQ(ntype[ii*nnei+jj], expected_ntype[ii*nnei+jj]);
-      EXPECT_EQ(nmask[ii*nnei+jj], expected_nmask[ii*nnei+jj]);
-    }    
+      EXPECT_EQ(ntype[ii * nnei + jj], expected_ntype[ii * nnei + jj]);
+      EXPECT_EQ(nmask[ii * nnei + jj], expected_nmask[ii * nnei + jj]);
+    }
   }
   free(nmask);
 }
 
-
-TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu)
-{
+TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
 
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * f_atype_cpy_dev = NULL, * atype_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
+      *array_int_dev = NULL, *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -728,30 +768,18 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu)
   deepmd::malloc_device_memory_sync(f_atype_cpy_dev, f_atype_cpy);
   deepmd::malloc_device_memory_sync(atype_dev, atype);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
 
-  deepmd::prod_env_mat_a_gpu_cuda(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev,  
-      atype_dev,
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a,
-      f_atype_cpy_dev);
+  deepmd::prod_env_mat_a_gpu_cuda(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a, f_atype_cpy_dev);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
   deepmd::memcpy_device_to_host(rij_dev, rij);
@@ -771,10 +799,13 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu)
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, f_atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  f_atype_cpy, ii, fmt_nlist_a_1, sec_a,
+                                  rc_smth, rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -783,70 +814,73 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-  		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
-#endif //GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestEnvMatAMix, prod_gpu_rocm)
-{
+TEST_F(TestEnvMatAMix, prod_gpu_rocm) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
   std::vector<int> ntype(nloc * nnei, 0);
-  bool * nmask = new bool [nloc * nnei];
+  bool* nmask = new bool[nloc * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
 
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL, * nmask_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * f_atype_cpy_dev = NULL, * atype_dev = NULL, * nlist_dev = NULL, * ntype_dev = NULL, * mapping_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL,
+         *nmask_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
+      *ntype_dev = NULL, *mapping_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -859,41 +893,21 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm)
   deepmd::malloc_device_memory_sync(ntype_dev, ntype);
   deepmd::malloc_device_memory_sync(mapping_dev, mapping);
   deepmd::malloc_device_memory_sync(nmask_dev, nmask, nloc * nnei);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
 
-  deepmd::prod_env_mat_a_gpu_rocm(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_dev,
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a,
-      f_atype_cpy_dev);
+  deepmd::prod_env_mat_a_gpu_rocm(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a, f_atype_cpy_dev);
 
-  deepmd::use_nei_info_gpu_rocm(
-      nlist_dev,
-      ntype_dev,
-      nmask_dev,
-      atype_dev,
-      mapping_dev,
-      nloc,
-      nnei,
-      ntypes,
-      true);
+  deepmd::use_nei_info_gpu_rocm(nlist_dev, ntype_dev, nmask_dev, atype_dev,
+                                mapping_dev, nloc, nnei, ntypes, true);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(ntype_dev, ntype);
   deepmd::memcpy_device_to_host(nmask_dev, nmask, nloc * nnei);
@@ -914,55 +928,54 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-      EXPECT_EQ(ntype[ii*nnei+jj], expected_ntype[ii*nnei+jj]);
-      EXPECT_EQ(nmask[ii*nnei+jj], expected_nmask[ii*nnei+jj]);
-    }    
+      EXPECT_EQ(ntype[ii * nnei + jj], expected_ntype[ii * nnei + jj]);
+      EXPECT_EQ(nmask[ii * nnei + jj], expected_nmask[ii * nnei + jj]);
+    }
   }
   free(nmask);
 }
 
-
-TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu)
-{
+TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
 
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * f_atype_cpy_dev = NULL, * atype_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
+      *array_int_dev = NULL, *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -973,30 +986,18 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu)
   deepmd::malloc_device_memory_sync(f_atype_cpy_dev, f_atype_cpy);
   deepmd::malloc_device_memory_sync(atype_dev, atype);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
 
-  deepmd::prod_env_mat_a_gpu_rocm(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_dev,
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a,
-      f_atype_cpy_dev);
+  deepmd::prod_env_mat_a_gpu_rocm(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a, f_atype_cpy_dev);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
   deepmd::memcpy_device_to_host(rij_dev, rij);
@@ -1016,10 +1017,13 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu)
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, f_atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, f_atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  f_atype_cpy, ii, fmt_nlist_a_1, sec_a,
+                                  rc_smth, rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -1028,28 +1032,29 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-  		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
-#endif //TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_env_mat_a_nvnmd.cc b/source/lib/tests/test_env_mat_a_nvnmd.cc
index eaeb1b4e41..b56c2bdf40 100644
--- a/source/lib/tests/test_env_mat_a_nvnmd.cc
+++ b/source/lib/tests/test_env_mat_a_nvnmd.cc
@@ -1,149 +1,198 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat_nvnmd.h"
-#include "prod_env_mat_nvnmd.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
-#include "device.h"
-
+#include "prod_env_mat_nvnmd.h"
 
-class TestEnvMatANvnmd : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
+class TestEnvMatANvnmd : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 10, 20};
   std::vector<int> sec_r = {0, 0, 0};
   std::vector<int> nat_stt, ext_stt, ext_end;
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
-  int ntypes = sec_a.size()-1;
+  int ntypes = sec_a.size() - 1;
   int nnei = sec_a.back();
   int ndescrpt = nnei * 4;
   /* r_ij^2, x_ij, y_ij, z_ij */
-  std::vector<double > expected_env = {
-    12.791382, 3.529999, 0.440000, -0.370000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.957299, -0.740000, 0.310000, 0.560000, 1.003999, 0.420000, 0.760000, -0.500000, 
-    13.721283, 3.679998, -0.050000, 0.420000, 20.533585, 4.439999, 0.660000, -0.620000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.957299, 0.740000, -0.310000, -0.560000, 19.114655, 4.269997, 0.130000, -0.930000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 2.671698, 1.160000, 0.450000, -1.059999, 19.685577, 4.419998, -0.360000, -0.140000, 
-    28.347244, 5.179996, 0.350000, -1.179999, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    1.003999, -0.420000, -0.760000, 0.500000, 9.791389, 3.109999, -0.320000, 0.130000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 2.671698, -1.160000, -0.450000, 1.059999, 12.130081, 3.259998, -0.810000, 0.920000, 
-    16.184769, 4.019997, -0.100000, -0.120000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    12.791382, -3.529999, -0.440000, 0.370000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.886699, 0.150000, -0.490000, 0.790000, 0.938999, 0.910000, 0.220000, -0.250000, 
-    9.791389, -3.109999, 0.320000, -0.130000, 19.114655, -4.269997, -0.130000, 0.930000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.886699, -0.150000, 0.490000, -0.790000, 13.721283, -3.679998, 0.050000, -0.420000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 2.163296, 0.760000, 0.710000, -1.040000, 12.130081, -3.259998, 0.810000, -0.920000, 
-    19.685577, -4.419998, 0.360000, 0.140000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.938999, -0.910000, -0.220000, 0.250000, 20.533585, -4.439999, -0.660000, 0.620000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 2.163296, -0.760000, -0.710000, 1.040000, 16.184769, -4.019997, 0.100000, 0.120000, 
-    28.347244, -5.179996, -0.350000, 1.179999, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 
-    0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000
-  };
-  
+  std::vector<double> expected_env = {
+      12.791382, 3.529999,  0.440000,  -0.370000, 0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.957299,  -0.740000,
+      0.310000,  0.560000,  1.003999,  0.420000,  0.760000,  -0.500000,
+      13.721283, 3.679998,  -0.050000, 0.420000,  20.533585, 4.439999,
+      0.660000,  -0.620000, 0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.957299,  0.740000,  -0.310000, -0.560000,
+      19.114655, 4.269997,  0.130000,  -0.930000, 0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      2.671698,  1.160000,  0.450000,  -1.059999, 19.685577, 4.419998,
+      -0.360000, -0.140000, 28.347244, 5.179996,  0.350000,  -1.179999,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  1.003999,  -0.420000,
+      -0.760000, 0.500000,  9.791389,  3.109999,  -0.320000, 0.130000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  2.671698,  -1.160000, -0.450000, 1.059999,
+      12.130081, 3.259998,  -0.810000, 0.920000,  16.184769, 4.019997,
+      -0.100000, -0.120000, 0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      12.791382, -3.529999, -0.440000, 0.370000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.886699,  0.150000,
+      -0.490000, 0.790000,  0.938999,  0.910000,  0.220000,  -0.250000,
+      9.791389,  -3.109999, 0.320000,  -0.130000, 19.114655, -4.269997,
+      -0.130000, 0.930000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.886699,  -0.150000, 0.490000,  -0.790000,
+      13.721283, -3.679998, 0.050000,  -0.420000, 0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      2.163296,  0.760000,  0.710000,  -1.040000, 12.130081, -3.259998,
+      0.810000,  -0.920000, 19.685577, -4.419998, 0.360000,  0.140000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.938999,  -0.910000,
+      -0.220000, 0.250000,  20.533585, -4.439999, -0.660000, 0.620000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  2.163296,  -0.760000, -0.710000, 1.040000,
+      16.184769, -4.019997, 0.100000,  0.120000,  28.347244, -5.179996,
+      -0.350000, 1.179999,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.000000,  0.000000,  0.000000,  0.000000};
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-
-class TestEnvMatANvnmdShortSel : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
+class TestEnvMatANvnmdShortSel : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 2, 4};
   std::vector<int> sec_r = {0, 0, 0};
   std::vector<int> nat_stt, ext_stt, ext_end;
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
-  int ntypes = sec_a.size()-1;
+  int ntypes = sec_a.size() - 1;
   int nnei = sec_a.back();
   int ndescrpt = nnei * 4;
-  std::vector<double > expected_env = {
-    12.791382, 3.529999, 0.440000, -0.370000, 0.000000, 0.000000, 0.000000, 0.000000, 0.957299, -0.740000, 0.310000, 0.560000, 1.003999, 0.420000, 0.760000, -0.500000, 
-    0.957299, 0.740000, -0.310000, -0.560000, 19.114655, 4.269997, 0.130000, -0.930000, 2.671698, 1.160000, 0.450000, -1.059999, 19.685577, 4.419998, -0.360000, -0.140000, 
-    1.003999, -0.420000, -0.760000, 0.500000, 9.791389, 3.109999, -0.320000, 0.130000, 2.671698, -1.160000, -0.450000, 1.059999, 12.130081, 3.259998, -0.810000, 0.920000, 
-    12.791382, -3.529999, -0.440000, 0.370000, 0.000000, 0.000000, 0.000000, 0.000000, 0.886699, 0.150000, -0.490000, 0.790000, 0.938999, 0.910000, 0.220000, -0.250000, 
-    0.886699, -0.150000, 0.490000, -0.790000, 13.721283, -3.679998, 0.050000, -0.420000, 2.163296, 0.760000, 0.710000, -1.040000, 12.130081, -3.259998, 0.810000, -0.920000, 
-    0.938999, -0.910000, -0.220000, 0.250000, 20.533585, -4.439999, -0.660000, 0.620000, 2.163296, -0.760000, -0.710000, 1.040000, 16.184769, -4.019997, 0.100000, 0.120000
-  };  
-  
+  std::vector<double> expected_env = {
+      12.791382, 3.529999,  0.440000,  -0.370000, 0.000000,  0.000000,
+      0.000000,  0.000000,  0.957299,  -0.740000, 0.310000,  0.560000,
+      1.003999,  0.420000,  0.760000,  -0.500000, 0.957299,  0.740000,
+      -0.310000, -0.560000, 19.114655, 4.269997,  0.130000,  -0.930000,
+      2.671698,  1.160000,  0.450000,  -1.059999, 19.685577, 4.419998,
+      -0.360000, -0.140000, 1.003999,  -0.420000, -0.760000, 0.500000,
+      9.791389,  3.109999,  -0.320000, 0.130000,  2.671698,  -1.160000,
+      -0.450000, 1.059999,  12.130081, 3.259998,  -0.810000, 0.920000,
+      12.791382, -3.529999, -0.440000, 0.370000,  0.000000,  0.000000,
+      0.000000,  0.000000,  0.886699,  0.150000,  -0.490000, 0.790000,
+      0.938999,  0.910000,  0.220000,  -0.250000, 0.886699,  -0.150000,
+      0.490000,  -0.790000, 13.721283, -3.679998, 0.050000,  -0.420000,
+      2.163296,  0.760000,  0.710000,  -1.040000, 12.130081, -3.259998,
+      0.810000,  -0.920000, 0.938999,  -0.910000, -0.220000, 0.250000,
+      20.533585, -4.439999, -0.660000, 0.620000,  2.163296,  -0.760000,
+      -0.710000, 1.040000,  16.184769, -4.019997, 0.100000,  0.120000};
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-
 /*  env_mat_a_nvnmd_quantize_cpu is not same as env_mat_a.
 remove some tests:
 TEST_F(TestEnvMatANvnmd, orig_cpy)
@@ -152,23 +201,27 @@ TEST_F(TestEnvMatANvnmd, orig_cpy_equal_pbc)
 TEST_F(TestEnvMatANvnmd, orig_cpy_num_deriv)
 */
 
-TEST_F(TestEnvMatANvnmd, cpu)
-{
+TEST_F(TestEnvMatANvnmd, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_a_nvnmd_quantize_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	  EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    deepmd::env_mat_a_nvnmd_quantize_cpu<double>(
+        env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a,
+        rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
@@ -180,36 +233,37 @@ TEST_F(TestEnvMatANvnmdShortSel, orig_cpy)
 TEST_F(TestEnvMatANvnmdShortSel, orig_pbc)
 */
 
-
-TEST_F(TestEnvMatANvnmdShortSel, cpu)
-{
+TEST_F(TestEnvMatANvnmdShortSel, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, 1);
-    deepmd::env_mat_a_nvnmd_quantize_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    EXPECT_EQ(env.size(), sec_a[2]*4);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    EXPECT_EQ(rij_a.size(), sec_a[2]*3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(env[jj*4+dd] - expected_env[ii*sec_a[2]*4 + jj*4 + dd]) , 1e-5);
+    deepmd::env_mat_a_nvnmd_quantize_cpu<double>(
+        env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a,
+        rc_smth, rc);
+    EXPECT_EQ(env.size(), sec_a[2] * 4);
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    EXPECT_EQ(rij_a.size(), sec_a[2] * 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(env[jj * 4 + dd] -
+                       expected_env[ii * sec_a[2] * 4 + jj * 4 + dd]),
+                  1e-5);
       }
     }
   }
 }
 
-
-TEST_F(TestEnvMatANvnmd, prod_cpu)
-{
+TEST_F(TestEnvMatANvnmd, prod_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -217,48 +271,34 @@ TEST_F(TestEnvMatANvnmd, prod_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
-  
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
   deepmd::prod_env_mat_a_nvnmd_quantize_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype_cpy[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+      &em[0], &em_deriv[0], &rij[0], &nlist[0], &posi_cpy[0], &atype_cpy[0],
+      inlist, max_nbor_size, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatANvnmd, prod_cpu_equal_cpu)
-{
+TEST_F(TestEnvMatANvnmd, prod_cpu_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -266,33 +306,24 @@ TEST_F(TestEnvMatANvnmd, prod_cpu_equal_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
   deepmd::prod_env_mat_a_nvnmd_quantize_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype_cpy[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+      &em[0], &em_deriv[0], &rij[0], &nlist[0], &posi_cpy[0], &atype_cpy[0],
+      inlist, max_nbor_size, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_a_nvnmd_quantize_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_a_nvnmd_quantize_cpu<double>(
+        env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1,
+        sec_a, rc_smth, rc);
     EXPECT_EQ(env_1.size(), nnei * 4);
     EXPECT_EQ(env_deriv_1.size(), nnei * 4 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -301,28 +332,28 @@ TEST_F(TestEnvMatANvnmd, prod_cpu_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*4+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 4 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 4 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*4*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 4; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*4 + jj*4 + dd] - 
-  		       expected_env[ii*nnei*4 + jj*4 + dd]) , 
-  		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 4; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 4 + jj * 4 + dd] -
+                       expected_env[ii * nnei * 4 + jj * 4 + dd]),
+                  1e-5);
       }
     }
   }
 }
-
diff --git a/source/lib/tests/test_env_mat_r.cc b/source/lib/tests/test_env_mat_r.cc
index 39d36be42d..a889d89749 100644
--- a/source/lib/tests/test_env_mat_r.cc
+++ b/source/lib/tests/test_env_mat_r.cc
@@ -1,28 +1,25 @@
-#include <iostream>
 #include <gtest/gtest.h>
+
+#include <iostream>
+
 #include "env_mat.h"
 #include "fmt_nlist.h"
-#include "prod_env_mat.h"
 #include "neighbor_list.h"
+#include "prod_env_mat.h"
 
-class TestEnvMatR : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestEnvMatR : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 10, 20};
   std::vector<int> sec_r = {0, 0, 0};
@@ -31,49 +28,60 @@ class TestEnvMatR : public ::testing::Test
   std::vector<std::vector<int>> nlist_a_cpy, nlist_r_cpy;
   int nnei = sec_a.back();
   int ndescrpt = nnei * 1;
-  std::vector<double > expected_env = {
-    0.12206, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.02167, 0.99745, 0.10564, 0.03103, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.02167, 0.04135, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, 0.03694, 0.00336, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    0.99745, 0.19078, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, 0.13499, 0.07054, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    0.12206, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.06176, 1.03163, 0.19078, 0.04135, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.06176, 0.10564, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, 0.13499, 0.03694, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-    1.03163, 0.03103, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, 0.07054, 0.00336, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-  };  
-  
+  std::vector<double> expected_env = {
+      0.12206, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 1.02167, 0.99745, 0.10564, 0.03103, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 1.02167, 0.04135, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, 0.03694,
+      0.00336, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
+      0.99745, 0.19078, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.59220, 0.13499, 0.07054, 0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.12206, 0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.06176, 1.03163,
+      0.19078, 0.04135, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
+      1.06176, 0.10564, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.66798, 0.13499, 0.03694, 0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 1.03163, 0.03103, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, 0.07054,
+      0.00336, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
     build_nlist(nlist_a, nlist_r, posi, rc, rc, ncell, region);
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-  }
-  void TearDown() override {
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
   }
+  void TearDown() override {}
 };
 
-TEST_F(TestEnvMatR, orig_cpy)
-{
+TEST_F(TestEnvMatR, orig_cpy) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    env_mat_r(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
+    env_mat_r(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc,
+              ii, fmt_nlist_a, sec_a, rc_smth, rc);
     EXPECT_EQ(env.size(), sec_a[2]);
-    EXPECT_EQ(env.size(), env_deriv.size()/3);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_LT(fabs(env[jj] - expected_env[ii*sec_a[2] + jj]) , 1e-5);
-    }    
+    EXPECT_EQ(env.size(), env_deriv.size() / 3);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_LT(fabs(env[jj] - expected_env[ii * sec_a[2] + jj]), 1e-5);
+    }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
     //   printf("%7.5f, ", env[jj]);
     // }
@@ -81,171 +89,186 @@ TEST_F(TestEnvMatR, orig_cpy)
   }
 }
 
-TEST_F(TestEnvMatR, orig_pbc)
-{
+TEST_F(TestEnvMatR, orig_pbc) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = true;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, ntypes, atype, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi, ntypes,
+                                    atype, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret, -1);
-    env_mat_r(env, env_deriv, rij_a, posi, ntypes, atype, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_LT(fabs(env[jj] - expected_env[ii*sec_a[2] + jj]) , 1e-5);
-    }    
+    env_mat_r(env, env_deriv, rij_a, posi, ntypes, atype, region, pbc, ii,
+              fmt_nlist_a, sec_a, rc_smth, rc);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_LT(fabs(env[jj] - expected_env[ii * sec_a[2] + jj]), 1e-5);
+    }
   }
 }
 
-
-TEST_F(TestEnvMatR, orig_cpy_equal_pbc)
-{
+TEST_F(TestEnvMatR, orig_cpy_equal_pbc) {
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_0, env_deriv_0, rij_a_0;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
-    env_mat_r(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
-    int ret_1 = format_nlist_i_fill_a(fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);    
+    env_mat_r(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region,
+              false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+    int ret_1 = format_nlist_i_fill_a(
+        fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii,
+        nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret_1, -1);
-    env_mat_r(env_1, env_deriv_1, rij_a_1, posi, ntypes, atype, region, true, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);    
+    env_mat_r(env_1, env_deriv_1, rij_a_1, posi, ntypes, atype, region, true,
+              ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
     EXPECT_EQ(env_0.size(), env_1.size());
     EXPECT_EQ(env_deriv_0.size(), env_deriv_1.size());
     EXPECT_EQ(rij_a_0.size(), rij_a_1.size());
-    for (unsigned jj = 0; jj < env_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_0.size(); ++jj) {
       EXPECT_LT(fabs(env_0[jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj){
-      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj) {
+      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj) {
       EXPECT_LT(fabs(rij_a_0[jj] - rij_a_1[jj]), 1e-10);
     }
   }
 }
 
-
-TEST_F(TestEnvMatR, orig_cpy_num_deriv)
-{
+TEST_F(TestEnvMatR, orig_cpy_num_deriv) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_0, env_1, env_deriv, env_deriv_tmp, rij_a;
   bool pbc = false;
   double hh = 1e-5;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    env_mat_r(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
+    env_mat_r(env, env_deriv, rij_a, posi_cpy, ntypes, atype_cpy, region, pbc,
+              ii, fmt_nlist_a, sec_a, rc_smth, rc);
 
-    for (int jj = 0; jj < sec_a[2]; ++jj){
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
       int j_idx = fmt_nlist_a[jj];
       if (j_idx < 0) continue;
-      for (int dd = 0; dd < 3; ++dd){
-	std::vector<double> posi_0 = posi_cpy;
-	std::vector<double> posi_1 = posi_cpy;
-	posi_0[j_idx*3+dd] -= hh;
-	posi_1[j_idx*3+dd] += hh;
-	env_mat_r(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	env_mat_r(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	double num_deriv = (env_1[jj] - env_0[jj])/(2.*hh);
-	double ana_deriv = -env_deriv[jj*3+dd];
-	EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+      for (int dd = 0; dd < 3; ++dd) {
+        std::vector<double> posi_0 = posi_cpy;
+        std::vector<double> posi_1 = posi_cpy;
+        posi_0[j_idx * 3 + dd] -= hh;
+        posi_1[j_idx * 3 + dd] += hh;
+        env_mat_r(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy,
+                  region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+        env_mat_r(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy,
+                  region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+        double num_deriv = (env_1[jj] - env_0[jj]) / (2. * hh);
+        double ana_deriv = -env_deriv[jj * 3 + dd];
+        EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
       }
     }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-
-TEST_F(TestEnvMatR, cpu)
-{
+TEST_F(TestEnvMatR, cpu) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_deriv, rij_a;
   bool pbc = false;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_r_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_LT(fabs(env[jj] - expected_env[ii*sec_a[2] + jj]) , 1e-5);
-    }    
+    deepmd::env_mat_r_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_LT(fabs(env[jj] - expected_env[ii * sec_a[2] + jj]), 1e-5);
+    }
   }
 }
 
-TEST_F(TestEnvMatR, cpu_equal_orig_cpy)
-{
+TEST_F(TestEnvMatR, cpu_equal_orig_cpy) {
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_0, env_deriv_0, rij_a_0;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_cpu<double>(fmt_nlist_a_0, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
-    env_mat_r(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region, false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
+    env_mat_r(env_0, env_deriv_0, rij_a_0, posi_cpy, ntypes, atype_cpy, region,
+              false, ii, fmt_nlist_a_0, sec_a, rc_smth, rc);
 
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
 
     EXPECT_EQ(env_0.size(), env_1.size());
     EXPECT_EQ(env_deriv_0.size(), env_deriv_1.size());
     EXPECT_EQ(rij_a_0.size(), rij_a_1.size());
-    for (unsigned jj = 0; jj < env_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_0.size(); ++jj) {
       EXPECT_LT(fabs(env_0[jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj){
-      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj){
+    for (unsigned jj = 0; jj < env_deriv_0.size(); ++jj) {
+      EXPECT_LT(fabs(env_deriv_0[jj] - env_deriv_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < rij_a_0.size(); ++jj) {
       EXPECT_LT(fabs(rij_a_0[jj] - rij_a_1[jj]), 1e-10);
     }
   }
 }
 
-TEST_F(TestEnvMatR, cpu_num_deriv)
-{
+TEST_F(TestEnvMatR, cpu_num_deriv) {
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
   std::vector<double> env, env_0, env_1, env_deriv, env_deriv_tmp, rij_a;
   bool pbc = false;
   double hh = 1e-5;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    deepmd::env_mat_r_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
+    deepmd::env_mat_r_cpu<double>(env, env_deriv, rij_a, posi_cpy, atype_cpy,
+                                  ii, fmt_nlist_a, sec_a, rc_smth, rc);
 
-    for (int jj = 0; jj < sec_a[2]; ++jj){
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
       int j_idx = fmt_nlist_a[jj];
       if (j_idx < 0) continue;
-	for (int dd = 0; dd < 3; ++dd){
-	  std::vector<double> posi_0 = posi_cpy;
-	  std::vector<double> posi_1 = posi_cpy;
-	  posi_0[j_idx*3+dd] -= hh;
-	  posi_1[j_idx*3+dd] += hh;
-	  env_mat_r(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  env_mat_r(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy, region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	  double num_deriv = (env_1[jj] - env_0[jj])/(2.*hh);
-	  double ana_deriv = -env_deriv[jj*3+dd];
-	  EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
-	}
+      for (int dd = 0; dd < 3; ++dd) {
+        std::vector<double> posi_0 = posi_cpy;
+        std::vector<double> posi_1 = posi_cpy;
+        posi_0[j_idx * 3 + dd] -= hh;
+        posi_1[j_idx * 3 + dd] += hh;
+        env_mat_r(env_0, env_deriv_tmp, rij_a, posi_0, ntypes, atype_cpy,
+                  region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+        env_mat_r(env_1, env_deriv_tmp, rij_a, posi_1, ntypes, atype_cpy,
+                  region, pbc, ii, fmt_nlist_a, sec_a, rc_smth, rc);
+        double num_deriv = (env_1[jj] - env_0[jj]) / (2. * hh);
+        double ana_deriv = -env_deriv[jj * 3 + dd];
+        EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+      }
     }
     // for (int jj = 0; jj < sec_a[2]; ++jj){
-    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1], env[jj*4+2], env[jj*4+3]);
+    //   printf("%7.5f, %7.5f, %7.5f, %7.5f, ", env[jj*4+0], env[jj*4+1],
+    //   env[jj*4+2], env[jj*4+3]);
     // }
     // printf("\n");
   }
 }
 
-TEST_F(TestEnvMatR, prod_cpu)
-{
+TEST_F(TestEnvMatR, prod_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -253,48 +276,34 @@ TEST_F(TestEnvMatR, prod_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-  deepmd::prod_env_mat_r_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype_cpy[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
-
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 1; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*1 + jj*1 + dd] - 
-		       expected_env[ii*nnei*1 + jj*1 + dd]) , 
-		  1e-5);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+  deepmd::prod_env_mat_r_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
+                             &posi_cpy[0], &atype_cpy[0], inlist, max_nbor_size,
+                             &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 1; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 1 + jj * 1 + dd] -
+                       expected_env[ii * nnei * 1 + jj * 1 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-
-TEST_F(TestEnvMatR, prod_cpu_equal_cpu)
-{
+TEST_F(TestEnvMatR, prod_cpu_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
@@ -302,33 +311,24 @@ TEST_F(TestEnvMatR, prod_cpu_equal_cpu)
   std::vector<int*> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3), rij(nloc * nnei * 3);
+  std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
+      rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-  deepmd::prod_env_mat_r_cpu(
-      &em[0],
-      &em_deriv[0],
-      &rij[0],
-      &nlist[0],
-      &posi_cpy[0],
-      &atype_cpy[0],
-      inlist,
-      max_nbor_size,
-      &avg[0],
-      &std[0],
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+  deepmd::prod_env_mat_r_cpu(&em[0], &em_deriv[0], &rij[0], &nlist[0],
+                             &posi_cpy[0], &atype_cpy[0], inlist, max_nbor_size,
+                             &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
     EXPECT_EQ(env_1.size(), nnei * 1);
     EXPECT_EQ(env_deriv_1.size(), nnei * 1 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -337,57 +337,57 @@ TEST_F(TestEnvMatR, prod_cpu_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*1+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 1 + jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*1*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 1 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 }
 
-
 #if GOOGLE_CUDA
-TEST_F(TestEnvMatR, prod_gpu_cuda)
-{
+TEST_F(TestEnvMatR, prod_gpu_cuda) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -397,29 +397,18 @@ TEST_F(TestEnvMatR, prod_gpu_cuda)
 
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_r_gpu_cuda(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_r_gpu_cuda(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::delete_device_memory(em_dev);
   deepmd::delete_device_memory(em_deriv_dev);
@@ -433,51 +422,51 @@ TEST_F(TestEnvMatR, prod_gpu_cuda)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 1; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*1 + jj*1 + dd] - 
-		       expected_env[ii*nnei*1 + jj*1 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 1; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 1 + jj * 1 + dd] -
+                       expected_env[ii * nnei * 1 + jj * 1 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu)
-{
+TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -487,29 +476,18 @@ TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu)
 
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_r_gpu_cuda(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_r_gpu_cuda(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
   deepmd::memcpy_device_to_host(rij_dev, rij);
@@ -528,10 +506,13 @@ TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu)
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
     EXPECT_EQ(env_1.size(), nnei * 1);
     EXPECT_EQ(env_deriv_1.size(), nnei * 1 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -540,57 +521,58 @@ TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*1+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 1 + jj] - env_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*1*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 1 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 }
-#endif //GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestEnvMatR, prod_gpu_rocm)
-{
+TEST_F(TestEnvMatR, prod_gpu_rocm) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -600,29 +582,18 @@ TEST_F(TestEnvMatR, prod_gpu_rocm)
 
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_r_gpu_rocm(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_r_gpu_rocm(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::delete_device_memory(em_dev);
   deepmd::delete_device_memory(em_deriv_dev);
@@ -636,51 +607,51 @@ TEST_F(TestEnvMatR, prod_gpu_rocm)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
 
-  for(int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      for (int dd = 0; dd < 1; ++dd){
-    	EXPECT_LT(fabs(em[ii*nnei*1 + jj*1 + dd] - 
-		       expected_env[ii*nnei*1 + jj*1 + dd]) , 
-		  1e-5);
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      for (int dd = 0; dd < 1; ++dd) {
+        EXPECT_LT(fabs(em[ii * nnei * 1 + jj * 1 + dd] -
+                       expected_env[ii * nnei * 1 + jj * 1 + dd]),
+                  1e-5);
       }
-    }    
+    }
   }
 }
 
-TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu)
-{
+TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu) {
   EXPECT_EQ(nlist_r_cpy.size(), nloc);
   int tot_nnei = 0;
   int max_nbor_size = 0;
-  for(int ii = 0; ii < nlist_a_cpy.size(); ++ii){
+  for (int ii = 0; ii < nlist_a_cpy.size(); ++ii) {
     tot_nnei += nlist_a_cpy[ii].size();
-    if (nlist_a_cpy[ii].size() > max_nbor_size){
+    if (nlist_a_cpy[ii].size() > max_nbor_size) {
       max_nbor_size = nlist_a_cpy[ii].size();
     }
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
   std::vector<int*> firstneigh(nloc);
-  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
-  std::vector<double > em(nloc * ndescrpt, 0.0), em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
+  std::vector<double> em(nloc * ndescrpt, 0.0),
+      em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
-  std::vector<double > avg(ntypes * ndescrpt, 0);
-  std::vector<double > std(ntypes * ndescrpt, 1);
-
-  double * em_dev = NULL, * em_deriv_dev = NULL, * rij_dev = NULL;
-  double * posi_cpy_dev = NULL, * avg_dev = NULL, * std_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  std::vector<double> avg(ntypes * ndescrpt, 0);
+  std::vector<double> std(ntypes * ndescrpt, 1);
+
+  double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
+  double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -690,29 +661,18 @@ TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu)
 
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev, max_nbor_size);
-
-  deepmd::prod_env_mat_r_gpu_rocm(    
-      em_dev, 
-      em_deriv_dev, 
-      rij_dev, 
-      nlist_dev, 
-      posi_cpy_dev, 
-      atype_cpy_dev, 
-      gpu_inlist,
-      array_int_dev, 
-      array_longlong_dev,
-      max_nbor_size,
-      avg_dev, 
-      std_dev, 
-      nloc,
-      nall,
-      rc, 
-      rc_smth,
-      sec_a);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, inlist, memory_dev,
+                                   max_nbor_size);
+
+  deepmd::prod_env_mat_r_gpu_rocm(
+      em_dev, em_deriv_dev, rij_dev, nlist_dev, posi_cpy_dev, atype_cpy_dev,
+      gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, avg_dev,
+      std_dev, nloc, nall, rc, rc_smth, sec_a);
   deepmd::memcpy_device_to_host(em_dev, em);
   deepmd::memcpy_device_to_host(em_deriv_dev, em_deriv);
   deepmd::memcpy_device_to_host(rij_dev, rij);
@@ -731,10 +691,13 @@ TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu)
 
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
   std::vector<double> env_1, env_deriv_1, rij_a_1;
-  for(int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);  
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret_1, -1);
-    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy, atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth, rc);
+    deepmd::env_mat_r_cpu<double>(env_1, env_deriv_1, rij_a_1, posi_cpy,
+                                  atype_cpy, ii, fmt_nlist_a_1, sec_a, rc_smth,
+                                  rc);
     EXPECT_EQ(env_1.size(), nnei * 1);
     EXPECT_EQ(env_deriv_1.size(), nnei * 1 * 3);
     EXPECT_EQ(rij_a_1.size(), nnei * 3);
@@ -743,18 +706,19 @@ TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu)
     EXPECT_EQ(env_deriv_1.size() * nloc, em_deriv.size());
     EXPECT_EQ(rij_a_1.size() * nloc, rij.size());
     EXPECT_EQ(fmt_nlist_a_1.size() * nloc, nlist.size());
-    for (unsigned jj = 0; jj < env_1.size(); ++jj){
-      EXPECT_LT(fabs(em[ii*nnei*1+jj] - env_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < env_1.size(); ++jj) {
+      EXPECT_LT(fabs(em[ii * nnei * 1 + jj] - env_1[jj]), 1e-10);
+    }
+    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj) {
+      EXPECT_LT(fabs(em_deriv[ii * nnei * 1 * 3 + jj] - env_deriv_1[jj]),
+                1e-10);
     }
-    for (unsigned jj = 0; jj < env_deriv_1.size(); ++jj){
-      EXPECT_LT(fabs(em_deriv[ii*nnei*1*3+jj] - env_deriv_1[jj]), 1e-10);      
-    }    
-    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj){
-      EXPECT_LT(fabs(rij[ii*nnei*3+jj] - rij_a_1[jj]), 1e-10);
+    for (unsigned jj = 0; jj < rij_a_1.size(); ++jj) {
+      EXPECT_LT(fabs(rij[ii * nnei * 3 + jj] - rij_a_1[jj]), 1e-10);
     }
-    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj){
-      EXPECT_EQ(nlist[ii*nnei+jj], fmt_nlist_a_1[jj]);
+    for (unsigned jj = 0; jj < fmt_nlist_a_1.size(); ++jj) {
+      EXPECT_EQ(nlist[ii * nnei + jj], fmt_nlist_a_1[jj]);
     }
   }
 }
-#endif //TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_ewald.cc b/source/lib/tests/test_ewald.cc
index cb8e59e1d9..ee81ba7dfd 100644
--- a/source/lib/tests/test_ewald.cc
+++ b/source/lib/tests/test_ewald.cc
@@ -1,52 +1,48 @@
 #include <gtest/gtest.h>
-#include <cmath>
+
 #include <algorithm>
+#include <cmath>
+
 #include "ewald.h"
 
-class TestEwald : public ::testing::Test
-{
-protected:
-  std::vector<double> coord = {
-    12.83, 2.56, 2.18,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56 
-  };
-  std::vector<double > charge = {
-    -2, 1, 1, -2, 1, 1,
-  };
-  std::vector<double > boxt = {
-    13., 0., 0., 0., 13., 0., 0., 0., 13.
+class TestEwald : public ::testing::Test {
+ protected:
+  std::vector<double> coord = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                               00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                               3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<double> charge = {
+      -2, 1, 1, -2, 1, 1,
   };
+  std::vector<double> boxt = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   deepmd::EwaldParameters<double> eparam;
   double expected_e = 4.7215808340392229e+00;
   std::vector<double> expected_f = {
-    -5.4937025715874448e+00,5.6659817006308417e+00,3.8059426028301313e-01,2.5210962791915938e+00,-2.6383552457553545e+00,-4.8998411247787405e-01,2.7390037416771147e+00,-3.2890571945143514e+00,3.8057620258450320e-01,6.7561832843578351e+00,-1.3707287681111919e+00,2.7733203842981604e+00,-3.3297964389679557e+00,1.0404967238120841e+00,-1.8035649784287722e+00,-3.1927842946711418e+00,5.9166278393797123e-01,-1.2409417562590299e+00,
+      -5.4937025715874448e+00, 5.6659817006308417e+00,  3.8059426028301313e-01,
+      2.5210962791915938e+00,  -2.6383552457553545e+00, -4.8998411247787405e-01,
+      2.7390037416771147e+00,  -3.2890571945143514e+00, 3.8057620258450320e-01,
+      6.7561832843578351e+00,  -1.3707287681111919e+00, 2.7733203842981604e+00,
+      -3.3297964389679557e+00, 1.0404967238120841e+00,  -1.8035649784287722e+00,
+      -3.1927842946711418e+00, 5.9166278393797123e-01,  -1.2409417562590299e+00,
   };
   std::vector<double> expected_v = {
-    6.5088081157418898e-01,1.9076542856278367e+00,-9.8010077026955389e-01,1.9076542856278367e+00,1.3101841366497322e+00,1.9794445391572657e-01,-9.8010077026955389e-01,1.9794445391572657e-01,1.9232614011636004e+00
-  };
-  
-  void SetUp() override {    
-  };
-};
+      6.5088081157418898e-01,  1.9076542856278367e+00, -9.8010077026955389e-01,
+      1.9076542856278367e+00,  1.3101841366497322e+00, 1.9794445391572657e-01,
+      -9.8010077026955389e-01, 1.9794445391572657e-01, 1.9232614011636004e+00};
 
+  void SetUp() override{};
+};
 
-TEST_F(TestEwald, cpu)
-{
+TEST_F(TestEwald, cpu) {
   double ener;
-  std::vector<double > force, virial;
+  std::vector<double> force, virial;
   deepmd::Region<double> region;
   init_region_cpu(region, &boxt[0]);
   ewald_recp(ener, force, virial, coord, charge, region, eparam);
   EXPECT_LT(fabs(ener - expected_e), 1e-10);
-  for(int ii = 0; ii < force.size(); ++ii){
+  for (int ii = 0; ii < force.size(); ++ii) {
     EXPECT_LT(fabs(force[ii] - expected_f[ii]), 1e-10);
   }
-  for(int ii = 0; ii < virial.size(); ++ii){
+  for (int ii = 0; ii < virial.size(); ++ii) {
     EXPECT_LT(fabs(virial[ii] - expected_v[ii]), 1e-10);
   }
 }
-
diff --git a/source/lib/tests/test_fmt_nlist.cc b/source/lib/tests/test_fmt_nlist.cc
index 844d110de5..9db1cacd63 100644
--- a/source/lib/tests/test_fmt_nlist.cc
+++ b/source/lib/tests/test_fmt_nlist.cc
@@ -1,391 +1,364 @@
 #include <gtest/gtest.h>
+
 #include "fmt_nlist.h"
 #include "neighbor_list.h"
 
-class TestFormatNlist : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestFormatNlist : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall;
   double rc = 6;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 10, 20};
   std::vector<int> sec_r = {0, 0, 0};
   std::vector<int> nat_stt, ext_stt, ext_end;
   std::vector<int> expect_nlist_cpy = {
-    33, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1 , 32, 34, 35, -1, -1, -1, -1, -1, -1, 
-    0 , 33, -1, -1, -1, -1, -1, -1, -1, -1, 32, 34, 35, -1, -1, -1, -1, -1, -1, -1, 
-    6 , 3 , -1, -1, -1, -1, -1, -1, -1, -1, 7 , 4 , 5 , -1, -1, -1, -1, -1, -1, -1, 
-    6 , -1, -1, -1, -1, -1, -1, -1, -1, -1, 4 , 5 , 2 , 7 , -1, -1, -1, -1, -1, -1, 
-    3 , 6 , -1, -1, -1, -1, -1, -1, -1, -1, 5 , 2 , 7 , -1, -1, -1, -1, -1, -1, -1, 
-    3 , 6 , -1, -1, -1, -1, -1, -1, -1, -1, 4 , 2 , 7 , -1, -1, -1, -1, -1, -1, -1
-  };      
+      33, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1,  32, 34, 35, -1, -1, -1, -1,
+      -1, -1, 0,  33, -1, -1, -1, -1, -1, -1, -1, -1, 32, 34, 35, -1, -1, -1,
+      -1, -1, -1, -1, 6,  3,  -1, -1, -1, -1, -1, -1, -1, -1, 7,  4,  5,  -1,
+      -1, -1, -1, -1, -1, -1, 6,  -1, -1, -1, -1, -1, -1, -1, -1, -1, 4,  5,
+      2,  7,  -1, -1, -1, -1, -1, -1, 3,  6,  -1, -1, -1, -1, -1, -1, -1, -1,
+      5,  2,  7,  -1, -1, -1, -1, -1, -1, -1, 3,  6,  -1, -1, -1, -1, -1, -1,
+      -1, -1, 4,  2,  7,  -1, -1, -1, -1, -1, -1, -1};
   std::vector<int> expect_nlist;
   int max_nbor_size;
 
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    for (unsigned ii = 0; ii < expect_nlist_cpy.size(); ++ii){
-      if (expect_nlist_cpy[ii] >= 0){
-	expect_nlist.push_back(mapping[expect_nlist_cpy[ii]]);
-      }
-      else{
-	expect_nlist.push_back(-1);
+    for (unsigned ii = 0; ii < expect_nlist_cpy.size(); ++ii) {
+      if (expect_nlist_cpy[ii] >= 0) {
+        expect_nlist.push_back(mapping[expect_nlist_cpy[ii]]);
+      } else {
+        expect_nlist.push_back(-1);
       }
-    } 
-    max_nbor_size = 0;   
-  }
-  void TearDown() override {
+    }
+    max_nbor_size = 0;
   }
+  void TearDown() override {}
 };
 
-
-class TestFormatNlistShortSel : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestFormatNlistShortSel : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall;
   double rc = 6;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 2, 4};
   std::vector<int> sec_r = {0, 0, 0};
   std::vector<int> nat_stt, ext_stt, ext_end;
   std::vector<int> expect_nlist_cpy = {
-    33, -1,  1, 32, 
-    0, 33, 32, 34, 
-    6,  3,  7,  4, 
-    6, -1,  4,  5, 
-    3,  6,  5,  2, 
-    3,  6,  4,  2, 
-  };      
+      33, -1, 1, 32, 0, 33, 32, 34, 6, 3, 7, 4,
+      6,  -1, 4, 5,  3, 6,  5,  2,  3, 6, 4, 2,
+  };
   std::vector<int> expect_nlist;
   int max_nbor_size;
 
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    for (unsigned ii = 0; ii < expect_nlist_cpy.size(); ++ii){
-      if (expect_nlist_cpy[ii] >= 0){
-	expect_nlist.push_back(mapping[expect_nlist_cpy[ii]]);
-      }
-      else{
-	expect_nlist.push_back(-1);
+    for (unsigned ii = 0; ii < expect_nlist_cpy.size(); ++ii) {
+      if (expect_nlist_cpy[ii] >= 0) {
+        expect_nlist.push_back(mapping[expect_nlist_cpy[ii]]);
+      } else {
+        expect_nlist.push_back(-1);
       }
-    } 
-    max_nbor_size = 0;      
-  }
-  void TearDown() override {
+    }
+    max_nbor_size = 0;
   }
+  void TearDown() override {}
 };
 
-class TestEncodingDecodingNborInfo : public ::testing::Test
-{
-protected:
-  std::vector<int > valid_type = {
-    0, 1, 127, 77, 47, 9, 11
-  };
-  std::vector<double > valid_dist = {
-    23.3333, 0.001234, 1.456, 127.7, 2.021, 0.409, 11.2
-  };
-  std::vector<int > valid_index = {
-    0, 16777215, 1000000, 10000000, 202149, 478910, 5006
-  };
-  std::vector<uint_64 > expect_key = {
-    26270960290103296UL, 144116577447444479UL, 18304268195882549824UL, 11240646899941283456UL, 6775689283274741157UL, 1297497185738772158UL, 1597877147777635214UL
-  };
-
-  std::vector<int > invalid_type = {
-    0, 256, 128, 77, 47, 126, 1100
-  };
-  std::vector<double > invalid_dist = {
-    128.0, 0.001234, 1.456, 130.7, 2.021, 0.409, 11.2
-  };
-  std::vector<int > invalid_index = {
-    0, 16777215, 1 << 24, 10000000, 20210409, 478910, 5006
-  };
-  std::vector<bool> expect_cuda_error_check = {
-    false, false, false, false, false, true, false
-  };
-
-  std::vector<int > expect_type = valid_type;
-  std::vector<int > expect_index = valid_index;
+class TestEncodingDecodingNborInfo : public ::testing::Test {
+ protected:
+  std::vector<int> valid_type = {0, 1, 127, 77, 47, 9, 11};
+  std::vector<double> valid_dist = {23.3333, 0.001234, 1.456, 127.7,
+                                    2.021,   0.409,    11.2};
+  std::vector<int> valid_index = {0,      16777215, 1000000, 10000000,
+                                  202149, 478910,   5006};
+  std::vector<uint_64> expect_key = {
+      26270960290103296UL,    144116577447444479UL,  18304268195882549824UL,
+      11240646899941283456UL, 6775689283274741157UL, 1297497185738772158UL,
+      1597877147777635214UL};
+
+  std::vector<int> invalid_type = {0, 256, 128, 77, 47, 126, 1100};
+  std::vector<double> invalid_dist = {128.0, 0.001234, 1.456, 130.7,
+                                      2.021, 0.409,    11.2};
+  std::vector<int> invalid_index = {0,        16777215, 1 << 24, 10000000,
+                                    20210409, 478910,   5006};
+  std::vector<bool> expect_cuda_error_check = {false, false, false, false,
+                                               false, true,  false};
+
+  std::vector<int> expect_type = valid_type;
+  std::vector<int> expect_index = valid_index;
   int size_of_array = valid_type.size();
 
-  void SetUp() override {
-  }
-  void TearDown() override {
-  }
+  void SetUp() override {}
+  void TearDown() override {}
 };
 
 // orginal implementation. copy ghost
-TEST_F(TestFormatNlist, orig_cpy)
-{
+TEST_F(TestFormatNlist, orig_cpy) {
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
-  build_nlist(nlist_a, nlist_r, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_a, nlist_r, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt,
+              ext_end, region, ncell);
 
   bool pbc = false;
   int ii = 0;
-  for (ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi_cpy, ntypes, atype_cpy, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);
-  
+  for (ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi_cpy, ntypes,
+                                    atype_cpy, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
+
     EXPECT_EQ(ret, -1);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_EQ(fmt_nlist_a[jj], expect_nlist_cpy[ii*sec_a[2]+jj]);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_EQ(fmt_nlist_a[jj], expect_nlist_cpy[ii * sec_a[2] + jj]);
     }
   }
 }
 
 // orginal implementation. copy ghost should be equal to pbc
-TEST_F(TestFormatNlist, orig_pbc)
-{
+TEST_F(TestFormatNlist, orig_pbc) {
   std::vector<std::vector<int>> nlist_a_1, nlist_r_1;
   build_nlist(nlist_a_1, nlist_r_1, posi, rc, rc, ncell, region);
-  
+
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
 
-  for (int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_fill_a(fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii, nlist_a_1[ii], nlist_r_1[ii], rc, sec_a, sec_r);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_fill_a(
+        fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii,
+        nlist_a_1[ii], nlist_r_1[ii], rc, sec_a, sec_r);
 
     EXPECT_EQ(ret_1, -1);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_EQ(fmt_nlist_a_1[jj], expect_nlist[ii*sec_a[2]+jj]);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_EQ(fmt_nlist_a_1[jj], expect_nlist[ii * sec_a[2] + jj]);
     }
-  }  
+  }
 }
 
 // orginal implementation. copy ghost should be equal to pbc
-TEST_F(TestFormatNlist, orig_cpy_equal_pbc)
-{
+TEST_F(TestFormatNlist, orig_cpy_equal_pbc) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   std::vector<std::vector<int>> nlist_a_1, nlist_r_1;
   build_nlist(nlist_a_1, nlist_r_1, posi, rc, rc, ncell, region);
-  
+
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1, fmt_nlist_r_1;
 
-  for (int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_fill_a(fmt_nlist_a_0, fmt_nlist_r_0, posi_cpy, ntypes, atype_cpy, region, false, ii, nlist_a_0[ii], nlist_r_0[ii], rc, sec_a, sec_r);
-    int ret_1 = format_nlist_i_fill_a(fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii, nlist_a_1[ii], nlist_r_1[ii], rc, sec_a, sec_r);
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_fill_a(
+        fmt_nlist_a_0, fmt_nlist_r_0, posi_cpy, ntypes, atype_cpy, region,
+        false, ii, nlist_a_0[ii], nlist_r_0[ii], rc, sec_a, sec_r);
+    int ret_1 = format_nlist_i_fill_a(
+        fmt_nlist_a_1, fmt_nlist_r_1, posi, ntypes, atype, region, true, ii,
+        nlist_a_1[ii], nlist_r_1[ii], rc, sec_a, sec_r);
 
     EXPECT_EQ(ret_0, -1);
     EXPECT_EQ(ret_1, -1);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      if (fmt_nlist_a_0[jj] == -1){
-	// null record
-	EXPECT_EQ(fmt_nlist_a_1[jj], -1);
-      }
-      else{
-	EXPECT_EQ(fmt_nlist_a_1[jj], mapping[fmt_nlist_a_0[jj]]);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      if (fmt_nlist_a_0[jj] == -1) {
+        // null record
+        EXPECT_EQ(fmt_nlist_a_1[jj], -1);
+      } else {
+        EXPECT_EQ(fmt_nlist_a_1[jj], mapping[fmt_nlist_a_0[jj]]);
       }
     }
-  }  
+  }
 }
 
-TEST_F(TestFormatNlist, cpu_i_equal_orig)
-{
+TEST_F(TestFormatNlist, cpu_i_equal_orig) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
 
   std::vector<int> fmt_nlist_a_0, fmt_nlist_r_0;
   std::vector<int> fmt_nlist_a_1;
-  
-  for (int ii = 0; ii < nloc; ++ii){
-    int ret_0 = format_nlist_i_fill_a(fmt_nlist_a_0, fmt_nlist_r_0, posi_cpy, ntypes, atype_cpy, region, false, ii, nlist_a_0[ii], nlist_r_0[ii], rc, sec_a, sec_r);
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_0[ii], rc, sec_a);
+
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_0 = format_nlist_i_fill_a(
+        fmt_nlist_a_0, fmt_nlist_r_0, posi_cpy, ntypes, atype_cpy, region,
+        false, ii, nlist_a_0[ii], nlist_r_0[ii], rc, sec_a, sec_r);
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_0[ii], rc, sec_a);
     EXPECT_EQ(ret_0, -1);
     EXPECT_EQ(ret_1, -1);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
       EXPECT_EQ(fmt_nlist_a_1[jj], fmt_nlist_a_0[jj]);
     }
   }
 }
 
-TEST_F(TestFormatNlist, cpu)
-{
+TEST_F(TestFormatNlist, cpu) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);  
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   // make a input nlist
   int inum = nlist_a_0.size();
-  std::vector<int > ilist(inum);
-  std::vector<int > numneigh(inum);
-  std::vector<int* > firstneigh(inum);
+  std::vector<int> ilist(inum);
+  std::vector<int> numneigh(inum);
+  std::vector<int*> firstneigh(inum);
   deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(in_nlist, nlist_a_0);
   // allocate the mem for the result
   std::vector<int> nlist(inum * sec_a.back());
   EXPECT_EQ(nlist.size(), expect_nlist_cpy.size());
   // format nlist
-  format_nlist_cpu(
-      &nlist[0], 
-      in_nlist,
-      &posi_cpy[0],
-      &atype_cpy[0],
-      nloc,
-      nall,
-      rc,
-      sec_a);
+  format_nlist_cpu(&nlist[0], in_nlist, &posi_cpy[0], &atype_cpy[0], nloc, nall,
+                   rc, sec_a);
   // validate
-  for(int ii = 0; ii < nlist.size(); ++ii){
+  for (int ii = 0; ii < nlist.size(); ++ii) {
     EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]);
   }
 }
 
-
 // orginal implementation. copy ghost
-TEST_F(TestFormatNlistShortSel, orig_cpy)
-{
+TEST_F(TestFormatNlistShortSel, orig_cpy) {
   std::vector<std::vector<int>> nlist_a, nlist_r;
   std::vector<int> fmt_nlist_a, fmt_nlist_r;
-  build_nlist(nlist_a, nlist_r, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_a, nlist_r, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt,
+              ext_end, region, ncell);
 
   bool pbc = false;
   int ii = 0;
-  for (ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi_cpy, ntypes, atype_cpy, region, pbc, ii, nlist_a[ii], nlist_r[ii], rc, sec_a, sec_r);  
+  for (ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, posi_cpy, ntypes,
+                                    atype_cpy, region, pbc, ii, nlist_a[ii],
+                                    nlist_r[ii], rc, sec_a, sec_r);
     EXPECT_EQ(ret, 1);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_EQ(fmt_nlist_a[jj], expect_nlist_cpy[ii*sec_a[2]+jj]);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_EQ(fmt_nlist_a[jj], expect_nlist_cpy[ii * sec_a[2] + jj]);
       // printf("%2d ", fmt_nlist_a[jj]);
     }
     // printf("\n");
   }
 }
 
-
-TEST_F(TestFormatNlistShortSel, cpu_equal_orig)
-{
+TEST_F(TestFormatNlistShortSel, cpu_equal_orig) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
 
   std::vector<int> fmt_nlist_a_1;
-  
-  for (int ii = 0; ii < nloc; ++ii){
-    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy, ii, nlist_a_0[ii], rc, sec_a);
+
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret_1 = format_nlist_i_cpu<double>(fmt_nlist_a_1, posi_cpy, atype_cpy,
+                                           ii, nlist_a_0[ii], rc, sec_a);
     EXPECT_EQ(ret_1, 1);
-    for (int jj = 0; jj < sec_a[2]; ++jj){
-      EXPECT_EQ(fmt_nlist_a_1[jj], expect_nlist_cpy[ii*sec_a[2]+jj]);
+    for (int jj = 0; jj < sec_a[2]; ++jj) {
+      EXPECT_EQ(fmt_nlist_a_1[jj], expect_nlist_cpy[ii * sec_a[2] + jj]);
     }
   }
 }
 
-TEST_F(TestFormatNlistShortSel, cpu)
-{
+TEST_F(TestFormatNlistShortSel, cpu) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);  
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   // make a input nlist
   int inum = nlist_a_0.size();
-  std::vector<int > ilist(inum);
-  std::vector<int > numneigh(inum);
-  std::vector<int* > firstneigh(inum);
+  std::vector<int> ilist(inum);
+  std::vector<int> numneigh(inum);
+  std::vector<int*> firstneigh(inum);
   deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]);
-  convert_nlist(in_nlist, nlist_a_0);  
+  convert_nlist(in_nlist, nlist_a_0);
   // mem
   std::vector<int> nlist(inum * sec_a.back());
   EXPECT_EQ(nlist.size(), expect_nlist_cpy.size());
   // format nlist
-  format_nlist_cpu(
-      &nlist[0], 
-      in_nlist,
-      &posi_cpy[0],
-      &atype_cpy[0],
-      nloc,
-      nall,
-      rc,
-      sec_a);
+  format_nlist_cpu(&nlist[0], in_nlist, &posi_cpy[0], &atype_cpy[0], nloc, nall,
+                   rc, sec_a);
   // validate
-  for(int ii = 0; ii < nlist.size(); ++ii){
+  for (int ii = 0; ii < nlist.size(); ++ii) {
     EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]);
   }
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestFormatNlist, gpu_cuda)
-{
+TEST_F(TestFormatNlist, gpu_cuda) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);  
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   // make a input nlist
   int inum = nlist_a_0.size();
-  std::vector<int > ilist(inum);
-  std::vector<int > numneigh(inum);
-  std::vector<int* > firstneigh(inum);
-  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  std::vector<int> ilist(inum);
+  std::vector<int> numneigh(inum);
+  std::vector<int*> firstneigh(inum);
+  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(in_nlist, nlist_a_0);
   // allocate the mem for the result
   std::vector<int> nlist(inum * sec_a.back());
   EXPECT_EQ(nlist.size(), expect_nlist_cpy.size());
 
-  double * posi_cpy_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double* posi_cpy_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   for (int ii = 0; ii < inum; ii++) {
-    max_nbor_size = max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
+    max_nbor_size =
+        max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy);
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev,
+                                   max_nbor_size);
   // format nlist
-  format_nbor_list_gpu_cuda(
-      nlist_dev, 
-      posi_cpy_dev, atype_cpy_dev, gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, nloc, nall, rc, sec_a);
+  format_nbor_list_gpu_cuda(nlist_dev, posi_cpy_dev, atype_cpy_dev, gpu_inlist,
+                            array_int_dev, array_longlong_dev, max_nbor_size,
+                            nloc, nall, rc, sec_a);
   deepmd::memcpy_device_to_host(nlist_dev, nlist);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(posi_cpy_dev);
@@ -395,53 +368,57 @@ TEST_F(TestFormatNlist, gpu_cuda)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
   // validate
-  for(int ii = 0; ii < nlist.size(); ++ii){
+  for (int ii = 0; ii < nlist.size(); ++ii) {
     EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]);
   }
 }
 
-TEST_F(TestFormatNlistShortSel, gpu_cuda)
-{
+TEST_F(TestFormatNlistShortSel, gpu_cuda) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);  
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   // make a input nlist
   int inum = nlist_a_0.size();
-  std::vector<int > ilist(inum);
-  std::vector<int > numneigh(inum);
-  std::vector<int* > firstneigh(inum);
-  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
-  convert_nlist(in_nlist, nlist_a_0);  
+  std::vector<int> ilist(inum);
+  std::vector<int> numneigh(inum);
+  std::vector<int*> firstneigh(inum);
+  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
+  convert_nlist(in_nlist, nlist_a_0);
   // mem
   std::vector<int> nlist(inum * sec_a.back());
   EXPECT_EQ(nlist.size(), expect_nlist_cpy.size());
   // format nlist
-  double * posi_cpy_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double* posi_cpy_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   for (int ii = 0; ii < inum; ii++) {
-    max_nbor_size = max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
+    max_nbor_size =
+        max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy);
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev,
+                                   max_nbor_size);
   // format nlist
-  format_nbor_list_gpu_cuda(
-      nlist_dev, 
-      posi_cpy_dev, atype_cpy_dev, gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, nloc, nall, rc, sec_a);
+  format_nbor_list_gpu_cuda(nlist_dev, posi_cpy_dev, atype_cpy_dev, gpu_inlist,
+                            array_int_dev, array_longlong_dev, max_nbor_size,
+                            nloc, nall, rc, sec_a);
   deepmd::memcpy_device_to_host(nlist_dev, nlist);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(posi_cpy_dev);
@@ -451,16 +428,16 @@ TEST_F(TestFormatNlistShortSel, gpu_cuda)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
   // validate
-  for(int ii = 0; ii < nlist.size(); ++ii){
+  for (int ii = 0; ii < nlist.size(); ++ii) {
     EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]);
   }
 }
 
-TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_cuda) 
-{
-  int * valid_type_dev = NULL, * valid_index_dev = NULL, * out_type_dev = NULL, * out_index_dev = NULL;
-  double * valid_dist_dev = NULL;
-  uint_64 * key_dev = NULL;
+TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_cuda) {
+  int *valid_type_dev = NULL, *valid_index_dev = NULL, *out_type_dev = NULL,
+      *out_index_dev = NULL;
+  double* valid_dist_dev = NULL;
+  uint_64* key_dev = NULL;
   std::vector<int> out_type(size_of_array, 0);
   std::vector<int> out_index(size_of_array, 0);
   std::vector<uint_64> key(size_of_array, 0);
@@ -472,9 +449,8 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_cuda)
   deepmd::malloc_device_memory_sync(key_dev, key);
 
   deepmd::test_encoding_decoding_nbor_info_gpu_cuda(
-      key_dev, out_type_dev, out_index_dev,
-      valid_type_dev, valid_dist_dev, valid_index_dev, size_of_array
-  );
+      key_dev, out_type_dev, out_index_dev, valid_type_dev, valid_dist_dev,
+      valid_index_dev, size_of_array);
 
   deepmd::memcpy_device_to_host(key_dev, key);
   deepmd::memcpy_device_to_host(out_type_dev, out_type);
@@ -486,19 +462,18 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_cuda)
   deepmd::delete_device_memory(out_index_dev);
   deepmd::delete_device_memory(key_dev);
   // validate
-  for(int ii = 0; ii < size_of_array; ii++) {
+  for (int ii = 0; ii < size_of_array; ii++) {
     EXPECT_EQ(key[ii], expect_key[ii]);
     EXPECT_EQ(out_type[ii], expect_type[ii]);
     EXPECT_EQ(out_index[ii], expect_index[ii]);
   }
 }
 
-// TEST_F(TestEncodingDecodingNborInfo, invalid_nbor_info_gpu_cuda) 
+// TEST_F(TestEncodingDecodingNborInfo, invalid_nbor_info_gpu_cuda)
 // {
-//   int * invalid_type_dev = NULL, * invalid_index_dev = NULL, * out_type_dev = NULL, * out_index_dev = NULL;
-//   double * invalid_dist_dev = NULL;
-//   uint_64 * key_dev = NULL;
-//   std::vector<int> out_type(size_of_array, 0);
+//   int * invalid_type_dev = NULL, * invalid_index_dev = NULL, * out_type_dev =
+//   NULL, * out_index_dev = NULL; double * invalid_dist_dev = NULL; uint_64 *
+//   key_dev = NULL; std::vector<int> out_type(size_of_array, 0);
 //   std::vector<int> out_index(size_of_array, 0);
 //   std::vector<uint_64> key(size_of_array, 0);
 //   deepmd::malloc_device_memory_sync(invalid_type_dev, invalid_type);
@@ -507,14 +482,14 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_cuda)
 //   deepmd::malloc_device_memory_sync(out_type_dev, out_type);
 //   deepmd::malloc_device_memory_sync(out_index_dev, out_index);
 //   deepmd::malloc_device_memory_sync(key_dev, key);
-  
-//   EXPECT_EQ(cudaGetLastError() == cudaSuccess && cudaDeviceSynchronize() == cudaSuccess, true);
-//   deepmd::test_encoding_decoding_nbor_info_gpu_cuda(
+
+//   EXPECT_EQ(cudaGetLastError() == cudaSuccess && cudaDeviceSynchronize() ==
+//   cudaSuccess, true); deepmd::test_encoding_decoding_nbor_info_gpu_cuda(
 //       key_dev, out_type_dev, out_index_dev,
 //       invalid_type_dev, invalid_dist_dev, invalid_index_dev, size_of_array
 //   );
-//   EXPECT_EQ(cudaGetLastError() == cudaSuccess && cudaDeviceSynchronize() == cudaSuccess, false);
-//   cudaErrcheck(cudaDeviceReset());
+//   EXPECT_EQ(cudaGetLastError() == cudaSuccess && cudaDeviceSynchronize() ==
+//   cudaSuccess, false); cudaErrcheck(cudaDeviceReset());
 //   deepmd::memcpy_device_to_host(key_dev, key);
 //   deepmd::memcpy_device_to_host(out_type_dev, out_type);
 //   deepmd::memcpy_device_to_host(out_index_dev, out_index);
@@ -525,51 +500,55 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_cuda)
 //   deepmd::delete_device_memory(out_index_dev);
 //   deepmd::delete_device_memory(key_dev);
 // }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestFormatNlist, gpu_rocm)
-{
+TEST_F(TestFormatNlist, gpu_rocm) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);  
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   // make a input nlist
   int inum = nlist_a_0.size();
-  std::vector<int > ilist(inum);
-  std::vector<int > numneigh(inum);
-  std::vector<int* > firstneigh(inum);
-  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
+  std::vector<int> ilist(inum);
+  std::vector<int> numneigh(inum);
+  std::vector<int*> firstneigh(inum);
+  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
   convert_nlist(in_nlist, nlist_a_0);
   // allocate the mem for the result
   std::vector<int> nlist(inum * sec_a.back());
   EXPECT_EQ(nlist.size(), expect_nlist_cpy.size());
 
-  double * posi_cpy_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double* posi_cpy_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   for (int ii = 0; ii < inum; ii++) {
-    max_nbor_size = max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
+    max_nbor_size =
+        max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy);
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev,
+                                   max_nbor_size);
   // format nlist
-  format_nbor_list_gpu_rocm(
-      nlist_dev, 
-      posi_cpy_dev, atype_cpy_dev, gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, nloc, nall, rc, sec_a);
+  format_nbor_list_gpu_rocm(nlist_dev, posi_cpy_dev, atype_cpy_dev, gpu_inlist,
+                            array_int_dev, array_longlong_dev, max_nbor_size,
+                            nloc, nall, rc, sec_a);
   deepmd::memcpy_device_to_host(nlist_dev, nlist);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(posi_cpy_dev);
@@ -579,53 +558,57 @@ TEST_F(TestFormatNlist, gpu_rocm)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
   // validate
-  for(int ii = 0; ii < nlist.size(); ++ii){
+  for (int ii = 0; ii < nlist.size(); ++ii) {
     EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]);
   }
 }
 
-TEST_F(TestFormatNlistShortSel, gpu_rocm)
-{
+TEST_F(TestFormatNlistShortSel, gpu_rocm) {
   std::vector<std::vector<int>> nlist_a_0, nlist_r_0;
-  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);  
+  build_nlist(nlist_a_0, nlist_r_0, posi_cpy, nloc, rc, rc, nat_stt, ncell,
+              ext_stt, ext_end, region, ncell);
   // make a input nlist
   int inum = nlist_a_0.size();
-  std::vector<int > ilist(inum);
-  std::vector<int > numneigh(inum);
-  std::vector<int* > firstneigh(inum);
-  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]), gpu_inlist;
-  convert_nlist(in_nlist, nlist_a_0);  
+  std::vector<int> ilist(inum);
+  std::vector<int> numneigh(inum);
+  std::vector<int*> firstneigh(inum);
+  deepmd::InputNlist in_nlist(inum, &ilist[0], &numneigh[0], &firstneigh[0]),
+      gpu_inlist;
+  convert_nlist(in_nlist, nlist_a_0);
   // mem
   std::vector<int> nlist(inum * sec_a.back());
   EXPECT_EQ(nlist.size(), expect_nlist_cpy.size());
   // format nlist
-  double * posi_cpy_dev = NULL;
-  int * atype_cpy_dev = NULL, * nlist_dev = NULL, * array_int_dev = NULL, * memory_dev = NULL;
-  uint_64 * array_longlong_dev = NULL;
+  double* posi_cpy_dev = NULL;
+  int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
+      *memory_dev = NULL;
+  uint_64* array_longlong_dev = NULL;
   for (int ii = 0; ii < inum; ii++) {
-    max_nbor_size = max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
+    max_nbor_size =
+        max_nbor_size >= numneigh[ii] ? max_nbor_size : numneigh[ii];
   }
   assert(max_nbor_size <= GPU_MAX_NBOR_SIZE);
   if (max_nbor_size <= 1024) {
     max_nbor_size = 1024;
-  }
-  else if (max_nbor_size <= 2048) {
+  } else if (max_nbor_size <= 2048) {
     max_nbor_size = 2048;
-  }
-  else {
+  } else {
     max_nbor_size = 4096;
   }
   deepmd::malloc_device_memory_sync(posi_cpy_dev, posi_cpy);
   deepmd::malloc_device_memory_sync(atype_cpy_dev, atype_cpy);
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
-  deepmd::malloc_device_memory(array_int_dev, sec_a.size() + nloc * sec_a.size() + nloc);
-  deepmd::malloc_device_memory(array_longlong_dev, nloc * GPU_MAX_NBOR_SIZE * 2);
+  deepmd::malloc_device_memory(array_int_dev,
+                               sec_a.size() + nloc * sec_a.size() + nloc);
+  deepmd::malloc_device_memory(array_longlong_dev,
+                               nloc * GPU_MAX_NBOR_SIZE * 2);
   deepmd::malloc_device_memory(memory_dev, nloc * max_nbor_size);
-  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev, max_nbor_size);
+  deepmd::convert_nlist_gpu_device(gpu_inlist, in_nlist, memory_dev,
+                                   max_nbor_size);
   // format nlist
-  format_nbor_list_gpu_rocm(
-      nlist_dev, 
-      posi_cpy_dev, atype_cpy_dev, gpu_inlist, array_int_dev, array_longlong_dev, max_nbor_size, nloc, nall, rc, sec_a);
+  format_nbor_list_gpu_rocm(nlist_dev, posi_cpy_dev, atype_cpy_dev, gpu_inlist,
+                            array_int_dev, array_longlong_dev, max_nbor_size,
+                            nloc, nall, rc, sec_a);
   deepmd::memcpy_device_to_host(nlist_dev, nlist);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(posi_cpy_dev);
@@ -635,16 +618,16 @@ TEST_F(TestFormatNlistShortSel, gpu_rocm)
   deepmd::delete_device_memory(memory_dev);
   deepmd::free_nlist_gpu_device(gpu_inlist);
   // validate
-  for(int ii = 0; ii < nlist.size(); ++ii){
+  for (int ii = 0; ii < nlist.size(); ++ii) {
     EXPECT_EQ(nlist[ii], expect_nlist_cpy[ii]);
   }
 }
 
-TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_rocm) 
-{
-  int * valid_type_dev = NULL, * valid_index_dev = NULL, * out_type_dev = NULL, * out_index_dev = NULL;
-  double * valid_dist_dev = NULL;
-  uint_64 * key_dev = NULL;
+TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_rocm) {
+  int *valid_type_dev = NULL, *valid_index_dev = NULL, *out_type_dev = NULL,
+      *out_index_dev = NULL;
+  double* valid_dist_dev = NULL;
+  uint_64* key_dev = NULL;
   std::vector<int> out_type(size_of_array, 0);
   std::vector<int> out_index(size_of_array, 0);
   std::vector<uint_64> key(size_of_array, 0);
@@ -656,9 +639,8 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_rocm)
   deepmd::malloc_device_memory_sync(key_dev, key);
 
   deepmd::test_encoding_decoding_nbor_info_gpu_rocm(
-      key_dev, out_type_dev, out_index_dev,
-      valid_type_dev, valid_dist_dev, valid_index_dev, size_of_array
-  );
+      key_dev, out_type_dev, out_index_dev, valid_type_dev, valid_dist_dev,
+      valid_index_dev, size_of_array);
 
   deepmd::memcpy_device_to_host(key_dev, key);
   deepmd::memcpy_device_to_host(out_type_dev, out_type);
@@ -670,12 +652,11 @@ TEST_F(TestEncodingDecodingNborInfo, valid_nbor_info_gpu_rocm)
   deepmd::delete_device_memory(out_index_dev);
   deepmd::delete_device_memory(key_dev);
   // validate
-  for(int ii = 0; ii < size_of_array; ii++) {
+  for (int ii = 0; ii < size_of_array; ii++) {
     EXPECT_EQ(key[ii], expect_key[ii]);
     EXPECT_EQ(out_type[ii], expect_type[ii]);
     EXPECT_EQ(out_index[ii], expect_index[ii]);
   }
 }
 
-
-#endif // TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_gelu.cc b/source/lib/tests/test_gelu.cc
index 46539f7334..c138339e2d 100644
--- a/source/lib/tests/test_gelu.cc
+++ b/source/lib/tests/test_gelu.cc
@@ -1,180 +1,177 @@
-#include "gelu.h"
-#include "device.h"
-#include <iostream>
 #include <gtest/gtest.h>
+
+#include <iostream>
+
+#include "device.h"
+#include "gelu.h"
 #include "utilities.h"
 
-class TestGelu : public ::testing::Test
-{
-protected:
+class TestGelu : public ::testing::Test {
+ protected:
   // xx = tf.random.uniform([100], minval=-4, maxval=4, dtype = tf.float64)
-  std::vector<double > xx = {
-    -0.65412617, -0.74208893, -2.21731157,  0.42540039, -0.20889174, -1.37948692,
-     3.36373004, -2.51647562, -2.985111  , -0.53251562,  0.36373729, -3.19052875,
-     0.37908265,  0.81605825,  1.66281318,  2.71761869, -0.89313006,  0.11503315,
-     2.2010268 ,  0.65498149,  1.51153638,  0.71501482, -1.27392131, -2.89503271,
-     0.53546578,  1.4564317 , -2.36701143,  1.23009056,  0.20264839,  2.06037292,
-     3.41302551, -2.3175205 , -0.27628221, -1.35701656,  2.13781656, -0.52921087,
-    -1.56774526,  2.92475766, -3.17376756, -2.61726505, -0.89399621, -1.30025318,
-    -3.98310127,  0.0378038 , -0.59195525, -0.71764632,  2.83774732, -1.83266476,
-    -3.52590216,  2.7735313 , -1.52387184, -3.57984618,  3.44036277, -1.52546413,
-     3.34095372,  1.12462909,  0.64319821, -3.94019443, -0.4976394 , -3.84744725,
-     3.32683062, -1.95707363, -1.73538352, -3.32614596, -1.46374614,  3.32600174,
-     1.56399235,  3.42035556,  3.029476  , -3.99135473,  2.22774417, -2.27991379,
-    -0.12769364,  3.27522847, -1.18421457, -2.65598248,  0.37112235,  1.27091438,
-     1.82646907,  2.06702457,  2.87834558,  0.63158531, -1.76016732, -0.85704887,
-     1.07093632, -2.55155726,  0.60068505, -0.36984068, -1.75685256,  1.2808404,
-     3.07005843,  1.11521146,  2.3648244 , -2.79509595,  2.4611316 ,  2.95155864,
-     3.45913518,  2.71155262,  0.49731474,  0.89416884
-  };
-  std::vector<double > expected_gelu = {
-    -1.67837557e-01, -1.70017454e-01, -2.92128115e-02,  2.82765887e-01,
-    -8.71641062e-02, -1.15934278e-01,  3.36269232e+00, -1.44692661e-02,
-    -3.81342874e-03, -1.58276988e-01,  2.33504238e-01, -1.93195840e-03,
-     2.45520665e-01,  6.46854620e-01,  1.58255340e+00,  2.70915699e+00,
-    -1.66142311e-01,  6.27839507e-02,  2.17077193e+00,  4.87104731e-01,
-     1.41257916e+00,  5.45282609e-01, -1.29333636e-01, -5.04228492e-03,
-     3.76858089e-01,  1.35041498e+00, -2.08435518e-02,  1.09538283e+00,
-     1.17595324e-01,  2.01997211e+00,  3.41216324e+00, -2.33762781e-02,
-    -1.08073931e-01, -1.18820554e-01,  2.10325928e+00, -1.57900404e-01,
-    -9.18635121e-02,  2.92015365e+00, -2.04685946e-03, -1.11316220e-02,
-    -1.66096393e-01, -1.26039117e-01, -7.61243780e-05,  1.94719045e-02,
-    -1.63967673e-01, -1.69774465e-01,  2.83175981e+00, -6.13003406e-02,
-    -5.56239423e-04,  2.76631042e+00, -9.73891862e-02, -4.47898619e-04,
-     3.43958593e+00, -9.71871941e-02,  3.33982471e+00,  9.77813916e-01,
-     4.75894192e-01, -9.31449548e-05, -1.53971187e-01, -1.42502838e-04,
-     3.32564148e+00, -4.91974866e-02, -7.18453399e-02, -1.19212505e-03,
-    -1.05075731e-01,  3.32480898e+00,  1.47165971e+00,  3.41951698e+00,
-     3.02616656e+00, -7.31989124e-05,  2.19918490e+00, -2.54527487e-02,
-    -5.73595208e-02,  3.27379481e+00, -1.40141518e-01, -1.00297107e-02,
-     2.39266857e-01,  1.14120736e+00,  1.76452433e+00,  2.02715079e+00,
-     2.87304205e+00,  4.64915551e-01, -6.90746681e-02, -1.67834746e-01,
-     9.18580320e-01, -1.32266764e-02,  4.36049337e-01, -1.31576637e-01,
-    -6.94420205e-02,  1.15236826e+00,  3.06715854e+00,  9.67388918e-01,
-     2.34387360e+00, -6.78489313e-03,  2.44451366e+00,  2.94732148e+00,
-     3.45841254e+00,  2.70294624e+00,  3.43387119e-01,  7.28081631e-01
-  };  
-  std::vector<double > expected_gelu_grad = {
-     4.60449412e-02,  4.50599718e-03, -6.31675690e-02,  8.19672783e-01,
-     3.35740612e-01, -1.28672776e-01,  1.00385962e+00, -3.67087198e-02,
-    -1.20649335e-02,  1.12970546e-01,  7.77739313e-01, -6.68287407e-03,
-     7.88372245e-01,  1.02580252e+00,  1.11883773e+00,  1.02368320e+00,
-    -5.28753300e-02,  5.91377555e-01,  1.06481455e+00,  9.54387332e-01,
-     1.12733634e+00,  9.83340637e-01, -1.24278352e-01, -1.53213139e-02,
-     8.88777668e-01,  1.12870635e+00, -4.89412880e-02,  1.12076578e+00,
-     6.59486509e-01,  1.07959136e+00,  1.00327260e+00, -5.34451698e-02,
-     2.85098027e-01, -1.28185394e-01,  1.07135120e+00,  1.14935972e-01,
-    -1.24907600e-01,  1.01417860e+00, -7.02992824e-03, -2.96883138e-02,
-    -5.31536587e-02, -1.25891871e-01, -3.60887857e-04,  5.30148635e-01,
-     7.89229070e-02,  1.54532953e-02,  1.01772418e+00, -1.03699345e-01,
-    -2.20978811e-03,  1.02074891e+00, -1.26887416e-01, -1.81810307e-03,
-     1.00298141e+00, -1.26825892e-01,  1.00415984e+00,  1.10771369e+00,
-     9.48383787e-01, -4.34543288e-04,  1.34084905e-01, -6.41896044e-04,
-     1.00435580e+00, -9.07188185e-02, -1.12900642e-01, -4.36549981e-03,
-    -1.28587248e-01,  1.00436754e+00,  1.12509925e+00,  1.00319222e+00,
-     1.01067764e+00, -3.48088477e-04,  1.06212145e+00, -5.70046708e-02,
-     3.98669653e-01,  1.00513974e+00, -1.15918449e-01, -2.72566889e-02,
-     7.82872230e-01,  1.12407088e+00,  1.10431752e+00,  1.07887693e+00,
-     1.01599352e+00,  9.42365429e-01, -1.10671686e-01, -4.07709087e-02,
-     1.09835643e+00, -3.41514078e-02,  9.25865272e-01,  2.18016504e-01,
-    -1.10974960e-01,  1.12473750e+00,  1.00952394e+00,  1.10621038e+00,
-     1.04913579e+00, -1.96928674e-02,  1.04098891e+00,  1.01320664e+00,
-     1.00279462e+00,  1.02401893e+00,  8.65714727e-01,  1.05320906e+00
-  };  
-  std::vector<double > expected_gelu_grad_grad = {
-     0.50571564,  0.43843355, -0.10061714,  0.66240156,  0.76347293,  0.01663728,
-    -0.01276427, -0.07462592, -0.03281601,  0.59359609,  0.69699731, -0.02027303,
-     0.68877611,  0.38103445, -0.07490714, -0.05506099,  0.32166971,  0.78732762,
-    -0.10164498,  0.5050721 , -0.03437986,  0.4593321 ,  0.06819688, -0.0396137,
-     0.59156916, -0.01491246, -0.08883747,  0.09234241,  0.7654675 , -0.10747085,
-    -0.01108326, -0.09311994,  0.7384317 ,  0.02681523, -0.10499993,  0.59585922,
-    -0.05160053, -0.03728553, -0.02114303, -0.06469217,  0.32100942,  0.05445215,
-    -0.00157624,  0.79673926,  0.55165186,  0.45730633, -0.04432554, -0.10006544,
-    -0.00789302, -0.04993342, -0.03838479, -0.00665763, -0.01022962, -0.03889244,
-    -0.01360533,  0.1565692 ,  0.51391336, -0.00186407,  0.61706551, -0.00264692,
-    -0.01414804, -0.10721734, -0.08807903, -0.01417477, -0.01764587, -0.0141804,
-    -0.05053224, -0.01084901, -0.02975642, -0.00152558, -0.09992717, -0.09614436,
-     0.7848912 , -0.01627357,  0.11925413, -0.06092512,  0.69307417,  0.06980313,
-    -0.09948152, -0.10733955, -0.04095624,  0.52257218, -0.09172304,  0.34933309,
-     0.19229249, -0.07116424,  0.54531393,  0.6937595 , -0.09125988,  0.06452926,
-    -0.02712756,  0.16269737, -0.08903289, -0.04801427, -0.08003338, -0.03525758,
-    -0.00967444, -0.05562922,  0.61727952,  0.32087784
-  }; 
+  std::vector<double> xx = {
+      -0.65412617, -0.74208893, -2.21731157, 0.42540039,  -0.20889174,
+      -1.37948692, 3.36373004,  -2.51647562, -2.985111,   -0.53251562,
+      0.36373729,  -3.19052875, 0.37908265,  0.81605825,  1.66281318,
+      2.71761869,  -0.89313006, 0.11503315,  2.2010268,   0.65498149,
+      1.51153638,  0.71501482,  -1.27392131, -2.89503271, 0.53546578,
+      1.4564317,   -2.36701143, 1.23009056,  0.20264839,  2.06037292,
+      3.41302551,  -2.3175205,  -0.27628221, -1.35701656, 2.13781656,
+      -0.52921087, -1.56774526, 2.92475766,  -3.17376756, -2.61726505,
+      -0.89399621, -1.30025318, -3.98310127, 0.0378038,   -0.59195525,
+      -0.71764632, 2.83774732,  -1.83266476, -3.52590216, 2.7735313,
+      -1.52387184, -3.57984618, 3.44036277,  -1.52546413, 3.34095372,
+      1.12462909,  0.64319821,  -3.94019443, -0.4976394,  -3.84744725,
+      3.32683062,  -1.95707363, -1.73538352, -3.32614596, -1.46374614,
+      3.32600174,  1.56399235,  3.42035556,  3.029476,    -3.99135473,
+      2.22774417,  -2.27991379, -0.12769364, 3.27522847,  -1.18421457,
+      -2.65598248, 0.37112235,  1.27091438,  1.82646907,  2.06702457,
+      2.87834558,  0.63158531,  -1.76016732, -0.85704887, 1.07093632,
+      -2.55155726, 0.60068505,  -0.36984068, -1.75685256, 1.2808404,
+      3.07005843,  1.11521146,  2.3648244,   -2.79509595, 2.4611316,
+      2.95155864,  3.45913518,  2.71155262,  0.49731474,  0.89416884};
+  std::vector<double> expected_gelu = {
+      -1.67837557e-01, -1.70017454e-01, -2.92128115e-02, 2.82765887e-01,
+      -8.71641062e-02, -1.15934278e-01, 3.36269232e+00,  -1.44692661e-02,
+      -3.81342874e-03, -1.58276988e-01, 2.33504238e-01,  -1.93195840e-03,
+      2.45520665e-01,  6.46854620e-01,  1.58255340e+00,  2.70915699e+00,
+      -1.66142311e-01, 6.27839507e-02,  2.17077193e+00,  4.87104731e-01,
+      1.41257916e+00,  5.45282609e-01,  -1.29333636e-01, -5.04228492e-03,
+      3.76858089e-01,  1.35041498e+00,  -2.08435518e-02, 1.09538283e+00,
+      1.17595324e-01,  2.01997211e+00,  3.41216324e+00,  -2.33762781e-02,
+      -1.08073931e-01, -1.18820554e-01, 2.10325928e+00,  -1.57900404e-01,
+      -9.18635121e-02, 2.92015365e+00,  -2.04685946e-03, -1.11316220e-02,
+      -1.66096393e-01, -1.26039117e-01, -7.61243780e-05, 1.94719045e-02,
+      -1.63967673e-01, -1.69774465e-01, 2.83175981e+00,  -6.13003406e-02,
+      -5.56239423e-04, 2.76631042e+00,  -9.73891862e-02, -4.47898619e-04,
+      3.43958593e+00,  -9.71871941e-02, 3.33982471e+00,  9.77813916e-01,
+      4.75894192e-01,  -9.31449548e-05, -1.53971187e-01, -1.42502838e-04,
+      3.32564148e+00,  -4.91974866e-02, -7.18453399e-02, -1.19212505e-03,
+      -1.05075731e-01, 3.32480898e+00,  1.47165971e+00,  3.41951698e+00,
+      3.02616656e+00,  -7.31989124e-05, 2.19918490e+00,  -2.54527487e-02,
+      -5.73595208e-02, 3.27379481e+00,  -1.40141518e-01, -1.00297107e-02,
+      2.39266857e-01,  1.14120736e+00,  1.76452433e+00,  2.02715079e+00,
+      2.87304205e+00,  4.64915551e-01,  -6.90746681e-02, -1.67834746e-01,
+      9.18580320e-01,  -1.32266764e-02, 4.36049337e-01,  -1.31576637e-01,
+      -6.94420205e-02, 1.15236826e+00,  3.06715854e+00,  9.67388918e-01,
+      2.34387360e+00,  -6.78489313e-03, 2.44451366e+00,  2.94732148e+00,
+      3.45841254e+00,  2.70294624e+00,  3.43387119e-01,  7.28081631e-01};
+  std::vector<double> expected_gelu_grad = {
+      4.60449412e-02,  4.50599718e-03,  -6.31675690e-02, 8.19672783e-01,
+      3.35740612e-01,  -1.28672776e-01, 1.00385962e+00,  -3.67087198e-02,
+      -1.20649335e-02, 1.12970546e-01,  7.77739313e-01,  -6.68287407e-03,
+      7.88372245e-01,  1.02580252e+00,  1.11883773e+00,  1.02368320e+00,
+      -5.28753300e-02, 5.91377555e-01,  1.06481455e+00,  9.54387332e-01,
+      1.12733634e+00,  9.83340637e-01,  -1.24278352e-01, -1.53213139e-02,
+      8.88777668e-01,  1.12870635e+00,  -4.89412880e-02, 1.12076578e+00,
+      6.59486509e-01,  1.07959136e+00,  1.00327260e+00,  -5.34451698e-02,
+      2.85098027e-01,  -1.28185394e-01, 1.07135120e+00,  1.14935972e-01,
+      -1.24907600e-01, 1.01417860e+00,  -7.02992824e-03, -2.96883138e-02,
+      -5.31536587e-02, -1.25891871e-01, -3.60887857e-04, 5.30148635e-01,
+      7.89229070e-02,  1.54532953e-02,  1.01772418e+00,  -1.03699345e-01,
+      -2.20978811e-03, 1.02074891e+00,  -1.26887416e-01, -1.81810307e-03,
+      1.00298141e+00,  -1.26825892e-01, 1.00415984e+00,  1.10771369e+00,
+      9.48383787e-01,  -4.34543288e-04, 1.34084905e-01,  -6.41896044e-04,
+      1.00435580e+00,  -9.07188185e-02, -1.12900642e-01, -4.36549981e-03,
+      -1.28587248e-01, 1.00436754e+00,  1.12509925e+00,  1.00319222e+00,
+      1.01067764e+00,  -3.48088477e-04, 1.06212145e+00,  -5.70046708e-02,
+      3.98669653e-01,  1.00513974e+00,  -1.15918449e-01, -2.72566889e-02,
+      7.82872230e-01,  1.12407088e+00,  1.10431752e+00,  1.07887693e+00,
+      1.01599352e+00,  9.42365429e-01,  -1.10671686e-01, -4.07709087e-02,
+      1.09835643e+00,  -3.41514078e-02, 9.25865272e-01,  2.18016504e-01,
+      -1.10974960e-01, 1.12473750e+00,  1.00952394e+00,  1.10621038e+00,
+      1.04913579e+00,  -1.96928674e-02, 1.04098891e+00,  1.01320664e+00,
+      1.00279462e+00,  1.02401893e+00,  8.65714727e-01,  1.05320906e+00};
+  std::vector<double> expected_gelu_grad_grad = {
+      0.50571564,  0.43843355,  -0.10061714, 0.66240156,  0.76347293,
+      0.01663728,  -0.01276427, -0.07462592, -0.03281601, 0.59359609,
+      0.69699731,  -0.02027303, 0.68877611,  0.38103445,  -0.07490714,
+      -0.05506099, 0.32166971,  0.78732762,  -0.10164498, 0.5050721,
+      -0.03437986, 0.4593321,   0.06819688,  -0.0396137,  0.59156916,
+      -0.01491246, -0.08883747, 0.09234241,  0.7654675,   -0.10747085,
+      -0.01108326, -0.09311994, 0.7384317,   0.02681523,  -0.10499993,
+      0.59585922,  -0.05160053, -0.03728553, -0.02114303, -0.06469217,
+      0.32100942,  0.05445215,  -0.00157624, 0.79673926,  0.55165186,
+      0.45730633,  -0.04432554, -0.10006544, -0.00789302, -0.04993342,
+      -0.03838479, -0.00665763, -0.01022962, -0.03889244, -0.01360533,
+      0.1565692,   0.51391336,  -0.00186407, 0.61706551,  -0.00264692,
+      -0.01414804, -0.10721734, -0.08807903, -0.01417477, -0.01764587,
+      -0.0141804,  -0.05053224, -0.01084901, -0.02975642, -0.00152558,
+      -0.09992717, -0.09614436, 0.7848912,   -0.01627357, 0.11925413,
+      -0.06092512, 0.69307417,  0.06980313,  -0.09948152, -0.10733955,
+      -0.04095624, 0.52257218,  -0.09172304, 0.34933309,  0.19229249,
+      -0.07116424, 0.54531393,  0.6937595,   -0.09125988, 0.06452926,
+      -0.02712756, 0.16269737,  -0.08903289, -0.04801427, -0.08003338,
+      -0.03525758, -0.00967444, -0.05562922, 0.61727952,  0.32087784};
 
   const int nloc = xx.size();
 
-  void SetUp() override {
-  }
-  void TearDown() override {
-  }
+  void SetUp() override {}
+  void TearDown() override {}
 };
 
-TEST_F(TestGelu, gelu_cpu)
-{
+TEST_F(TestGelu, gelu_cpu) {
   std::vector<double> gelu(nloc);
-  deepmd::gelu_cpu<double> (&gelu[0], &xx[0], nloc);
+  deepmd::gelu_cpu<double>(&gelu[0], &xx[0], nloc);
   EXPECT_EQ(gelu.size(), nloc);
   EXPECT_EQ(gelu.size(), expected_gelu.size());
-  for (int jj = 0; jj < gelu.size(); ++jj){
-    EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu.size(); ++jj) {
+    EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]), 1e-5);
+  }
 }
 
-TEST_F(TestGelu, gelu_grad_cpu)
-{
+TEST_F(TestGelu, gelu_grad_cpu) {
   std::vector<double> dy(100, 1.0);
   std::vector<double> gelu_grad(nloc);
-  deepmd::gelu_grad_cpu<double> (&gelu_grad[0], &xx[0], &dy[0], nloc);
+  deepmd::gelu_grad_cpu<double>(&gelu_grad[0], &xx[0], &dy[0], nloc);
   EXPECT_EQ(gelu_grad.size(), nloc);
   EXPECT_EQ(gelu_grad.size(), expected_gelu_grad.size());
-  for (int jj = 0; jj < gelu_grad.size(); ++jj){
-    EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu_grad.size(); ++jj) {
+    EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]), 1e-5);
+  }
 }
 
-TEST_F(TestGelu, gelu_grad_grad_cpu)
-{
+TEST_F(TestGelu, gelu_grad_grad_cpu) {
   std::vector<double> dy(100, 1.0);
   std::vector<double> dy_2(100, 1.0);
   std::vector<double> gelu_grad_grad(nloc);
-  deepmd::gelu_grad_grad_cpu<double> (&gelu_grad_grad[0], &xx[0], &dy[0], &dy_2[0], nloc);
+  deepmd::gelu_grad_grad_cpu<double>(&gelu_grad_grad[0], &xx[0], &dy[0],
+                                     &dy_2[0], nloc);
   EXPECT_EQ(gelu_grad_grad.size(), nloc);
   EXPECT_EQ(gelu_grad_grad.size(), expected_gelu_grad_grad.size());
-  for (int jj = 0; jj < gelu_grad_grad.size(); ++jj){
-    EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu_grad_grad.size(); ++jj) {
+    EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]), 1e-5);
+  }
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestGelu, gelu_gpu_cuda)
-{
+TEST_F(TestGelu, gelu_gpu_cuda) {
   std::vector<double> gelu(nloc, 0.0);
-  
-  double * gelu_dev = NULL, * xx_dev = NULL;
+
+  double *gelu_dev = NULL, *xx_dev = NULL;
   deepmd::malloc_device_memory_sync(gelu_dev, gelu);
   deepmd::malloc_device_memory_sync(xx_dev, xx);
-  deepmd::gelu_gpu_cuda<double> (gelu_dev, xx_dev, nloc);
+  deepmd::gelu_gpu_cuda<double>(gelu_dev, xx_dev, nloc);
   deepmd::memcpy_device_to_host(gelu_dev, gelu);
   deepmd::delete_device_memory(gelu_dev);
   deepmd::delete_device_memory(xx_dev);
 
   EXPECT_EQ(gelu.size(), nloc);
   EXPECT_EQ(gelu.size(), expected_gelu.size());
-  for (int jj = 0; jj < gelu.size(); ++jj){
-    EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu.size(); ++jj) {
+    EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]), 1e-5);
+  }
 }
 
-TEST_F(TestGelu, gelu_grad_gpu_cuda)
-{
+TEST_F(TestGelu, gelu_grad_gpu_cuda) {
   std::vector<double> dy(100, 1.0);
   std::vector<double> gelu_grad(nloc, 0.0);
 
-  double * gelu_grad_dev = NULL, * xx_dev = NULL, * dy_dev = NULL;
+  double *gelu_grad_dev = NULL, *xx_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(gelu_grad_dev, gelu_grad);
   deepmd::malloc_device_memory_sync(xx_dev, xx);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::gelu_grad_gpu_cuda<double> (gelu_grad_dev, xx_dev, dy_dev, nloc);
+  deepmd::gelu_grad_gpu_cuda<double>(gelu_grad_dev, xx_dev, dy_dev, nloc);
   deepmd::memcpy_device_to_host(gelu_grad_dev, gelu_grad);
   deepmd::delete_device_memory(gelu_grad_dev);
   deepmd::delete_device_memory(xx_dev);
@@ -182,23 +179,24 @@ TEST_F(TestGelu, gelu_grad_gpu_cuda)
 
   EXPECT_EQ(gelu_grad.size(), nloc);
   EXPECT_EQ(gelu_grad.size(), expected_gelu_grad.size());
-  for (int jj = 0; jj < gelu_grad.size(); ++jj){
-    EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu_grad.size(); ++jj) {
+    EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]), 1e-5);
+  }
 }
 
-TEST_F(TestGelu, gelu_grad_grad_gpu_cuda)
-{
+TEST_F(TestGelu, gelu_grad_grad_gpu_cuda) {
   std::vector<double> dy(100, 1.0);
   std::vector<double> dy_2(100, 1.0);
   std::vector<double> gelu_grad_grad(nloc, 0.0);
 
-  double * gelu_grad_grad_dev = NULL, * xx_dev = NULL, * dy_dev = NULL, * dy_2_dev = NULL;
+  double *gelu_grad_grad_dev = NULL, *xx_dev = NULL, *dy_dev = NULL,
+         *dy_2_dev = NULL;
   deepmd::malloc_device_memory_sync(gelu_grad_grad_dev, gelu_grad_grad);
   deepmd::malloc_device_memory_sync(xx_dev, xx);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
   deepmd::malloc_device_memory_sync(dy_2_dev, dy_2);
-  deepmd::gelu_grad_grad_gpu_cuda<double> (gelu_grad_grad_dev, xx_dev, dy_dev, dy_2_dev, nloc);
+  deepmd::gelu_grad_grad_gpu_cuda<double>(gelu_grad_grad_dev, xx_dev, dy_dev,
+                                          dy_2_dev, nloc);
   deepmd::memcpy_device_to_host(gelu_grad_grad_dev, gelu_grad_grad);
   deepmd::delete_device_memory(gelu_grad_grad_dev);
   deepmd::delete_device_memory(xx_dev);
@@ -207,44 +205,40 @@ TEST_F(TestGelu, gelu_grad_grad_gpu_cuda)
 
   EXPECT_EQ(gelu_grad_grad.size(), nloc);
   EXPECT_EQ(gelu_grad_grad.size(), expected_gelu_grad_grad.size());
-  for (int jj = 0; jj < gelu_grad_grad.size(); ++jj){
-    EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu_grad_grad.size(); ++jj) {
+    EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]), 1e-5);
+  }
 }
-#endif // GOOGLE_CUDA
-
-
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestGelu, gelu_gpu_rocm)
-{
+TEST_F(TestGelu, gelu_gpu_rocm) {
   std::vector<double> gelu(nloc, 0.0);
-  
-  double * gelu_dev = NULL, * xx_dev = NULL;
+
+  double *gelu_dev = NULL, *xx_dev = NULL;
   deepmd::malloc_device_memory_sync(gelu_dev, gelu);
   deepmd::malloc_device_memory_sync(xx_dev, xx);
-  deepmd::gelu_gpu_rocm<double> (gelu_dev, xx_dev, nloc);
+  deepmd::gelu_gpu_rocm<double>(gelu_dev, xx_dev, nloc);
   deepmd::memcpy_device_to_host(gelu_dev, gelu);
   deepmd::delete_device_memory(gelu_dev);
   deepmd::delete_device_memory(xx_dev);
 
   EXPECT_EQ(gelu.size(), nloc);
   EXPECT_EQ(gelu.size(), expected_gelu.size());
-  for (int jj = 0; jj < gelu.size(); ++jj){
-    EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu.size(); ++jj) {
+    EXPECT_LT(fabs(gelu[jj] - expected_gelu[jj]), 1e-5);
+  }
 }
 
-TEST_F(TestGelu, gelu_grad_gpu_rocm)
-{
+TEST_F(TestGelu, gelu_grad_gpu_rocm) {
   std::vector<double> dy(100, 1.0);
   std::vector<double> gelu_grad(nloc, 0.0);
 
-  double * gelu_grad_dev = NULL, * xx_dev = NULL, * dy_dev = NULL;
+  double *gelu_grad_dev = NULL, *xx_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(gelu_grad_dev, gelu_grad);
   deepmd::malloc_device_memory_sync(xx_dev, xx);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::gelu_grad_gpu_rocm<double> (gelu_grad_dev, xx_dev, dy_dev, nloc);
+  deepmd::gelu_grad_gpu_rocm<double>(gelu_grad_dev, xx_dev, dy_dev, nloc);
   deepmd::memcpy_device_to_host(gelu_grad_dev, gelu_grad);
   deepmd::delete_device_memory(gelu_grad_dev);
   deepmd::delete_device_memory(xx_dev);
@@ -252,23 +246,24 @@ TEST_F(TestGelu, gelu_grad_gpu_rocm)
 
   EXPECT_EQ(gelu_grad.size(), nloc);
   EXPECT_EQ(gelu_grad.size(), expected_gelu_grad.size());
-  for (int jj = 0; jj < gelu_grad.size(); ++jj){
-    EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu_grad.size(); ++jj) {
+    EXPECT_LT(fabs(gelu_grad[jj] - expected_gelu_grad[jj]), 1e-5);
+  }
 }
 
-TEST_F(TestGelu, gelu_grad_grad_gpu_rocm)
-{
+TEST_F(TestGelu, gelu_grad_grad_gpu_rocm) {
   std::vector<double> dy(100, 1.0);
   std::vector<double> dy_2(100, 1.0);
   std::vector<double> gelu_grad_grad(nloc, 0.0);
 
-  double * gelu_grad_grad_dev = NULL, * xx_dev = NULL, * dy_dev = NULL, * dy_2_dev = NULL;
+  double *gelu_grad_grad_dev = NULL, *xx_dev = NULL, *dy_dev = NULL,
+         *dy_2_dev = NULL;
   deepmd::malloc_device_memory_sync(gelu_grad_grad_dev, gelu_grad_grad);
   deepmd::malloc_device_memory_sync(xx_dev, xx);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
   deepmd::malloc_device_memory_sync(dy_2_dev, dy_2);
-  deepmd::gelu_grad_grad_gpu_rocm<double> (gelu_grad_grad_dev, xx_dev, dy_dev, dy_2_dev, nloc);
+  deepmd::gelu_grad_grad_gpu_rocm<double>(gelu_grad_grad_dev, xx_dev, dy_dev,
+                                          dy_2_dev, nloc);
   deepmd::memcpy_device_to_host(gelu_grad_grad_dev, gelu_grad_grad);
   deepmd::delete_device_memory(gelu_grad_grad_dev);
   deepmd::delete_device_memory(xx_dev);
@@ -277,8 +272,8 @@ TEST_F(TestGelu, gelu_grad_grad_gpu_rocm)
 
   EXPECT_EQ(gelu_grad_grad.size(), nloc);
   EXPECT_EQ(gelu_grad_grad.size(), expected_gelu_grad_grad.size());
-  for (int jj = 0; jj < gelu_grad_grad.size(); ++jj){
-    EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < gelu_grad_grad.size(); ++jj) {
+    EXPECT_LT(fabs(gelu_grad_grad[jj] - expected_gelu_grad_grad[jj]), 1e-5);
+  }
 }
-#endif // TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_map_aparam.cc b/source/lib/tests/test_map_aparam.cc
index a393345eb0..8c5afeea66 100644
--- a/source/lib/tests/test_map_aparam.cc
+++ b/source/lib/tests/test_map_aparam.cc
@@ -1,28 +1,25 @@
-#include <iostream>
 #include <gtest/gtest.h>
+
+#include <iostream>
+
 #include "fmt_nlist.h"
-#include "neighbor_list.h"
 #include "map_aparam.h"
+#include "neighbor_list.h"
 
-class TestMapAparam : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestMapAparam : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   int numb_aparam = 2;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -31,54 +28,64 @@ class TestMapAparam : public ::testing::Test
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
   std::vector<double> aparam;
-  std::vector<double > expected_output = {
-    3.40000,  3.30000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  9.80000,  9.70000,  3.60000,  3.50000,  3.20000,  3.10000,  3.00000,  2.90000,  0.00000,  0.00000, 10.00000,  9.90000,  3.40000,  3.30000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  3.60000,  3.50000,  3.20000,  3.10000,  3.00000,  2.90000,  0.00000,  0.00000,  0.00000,  0.00000,  8.80000,  8.70000,  9.40000,  9.30000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  8.60000,  8.50000,  9.20000,  9.10000,  9.00000,  8.90000,  0.00000,  0.00000,  0.00000,  0.00000,  8.80000,  8.70000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  9.20000,  9.10000,  9.00000,  8.90000,  9.60000,  9.50000,  8.60000,  8.50000,  0.00000,  0.00000,  9.40000,  9.30000,  8.80000,  8.70000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  9.00000,  8.90000,  9.60000,  9.50000,  8.60000,  8.50000,  0.00000,  0.00000,  0.00000,  0.00000,  9.40000,  9.30000,  8.80000,  8.70000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  9.20000,  9.10000,  9.60000,  9.50000,  8.60000,  8.50000,  0.00000,  0.00000,  0.00000,  0.00000, 
-  };  
-  
+  std::vector<double> expected_output = {
+      3.40000, 3.30000, 0.00000, 0.00000, 0.00000,  0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 9.80000, 9.70000, 3.60000,  3.50000, 3.20000, 3.10000,
+      3.00000, 2.90000, 0.00000, 0.00000, 10.00000, 9.90000, 3.40000, 3.30000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.00000,  0.00000, 3.60000, 3.50000,
+      3.20000, 3.10000, 3.00000, 2.90000, 0.00000,  0.00000, 0.00000, 0.00000,
+      8.80000, 8.70000, 9.40000, 9.30000, 0.00000,  0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 8.60000, 8.50000, 9.20000,  9.10000, 9.00000, 8.90000,
+      0.00000, 0.00000, 0.00000, 0.00000, 8.80000,  8.70000, 0.00000, 0.00000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.00000,  0.00000, 9.20000, 9.10000,
+      9.00000, 8.90000, 9.60000, 9.50000, 8.60000,  8.50000, 0.00000, 0.00000,
+      9.40000, 9.30000, 8.80000, 8.70000, 0.00000,  0.00000, 0.00000, 0.00000,
+      0.00000, 0.00000, 9.00000, 8.90000, 9.60000,  9.50000, 8.60000, 8.50000,
+      0.00000, 0.00000, 0.00000, 0.00000, 9.40000,  9.30000, 8.80000, 8.70000,
+      0.00000, 0.00000, 0.00000, 0.00000, 0.00000,  0.00000, 9.20000, 9.10000,
+      9.60000, 9.50000, 8.60000, 8.50000, 0.00000,  0.00000, 0.00000, 0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
-    ndescrpt = nnei * 4;    
+    ndescrpt = nnei * 4;
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
     }
     aparam.resize(nall * numb_aparam);
-    for(int ii = 0; ii < nall * numb_aparam; ++ii){
+    for (int ii = 0; ii < nall * numb_aparam; ++ii) {
       aparam[ii] = 10 - 0.1 * ii;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestMapAparam, cpu)
-{
+TEST_F(TestMapAparam, cpu) {
   std::vector<double> output(nloc * nnei * numb_aparam);
-  deepmd::map_aparam_cpu(
-      &output[0],
-      &aparam[0],
-      &nlist[0],
-      nloc,
-      nnei,
-      numb_aparam);  
-  for (int jj = 0; jj < nloc * nnei * numb_aparam; ++jj){
+  deepmd::map_aparam_cpu(&output[0], &aparam[0], &nlist[0], nloc, nnei,
+                         numb_aparam);
+  for (int jj = 0; jj < nloc * nnei * numb_aparam; ++jj) {
     EXPECT_LT(fabs(output[jj] - expected_output[jj]), 1e-10);
   }
   // for (int jj = 0; jj < nloc * nnei * numb_aparam; ++jj){
diff --git a/source/lib/tests/test_neighbor_list.cc b/source/lib/tests/test_neighbor_list.cc
index 193279070f..cc4202f18d 100644
--- a/source/lib/tests/test_neighbor_list.cc
+++ b/source/lib/tests/test_neighbor_list.cc
@@ -1,178 +1,188 @@
 #include <gtest/gtest.h>
+
+#include "device.h"
 #include "fmt_nlist.h"
 #include "neighbor_list.h"
-#include "device.h"
 
-class TestNeighborList : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {
-    12.83, 2.56, 2.18, 
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.36, 3.00, 1.81,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestNeighborList : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall;
   double rc = 6;
   std::vector<double> boxt = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
   std::vector<int> mapping, ncell, ngcell;
   std::vector<std::vector<int>> expect_nlist_cpy = {
-    std::vector<int>({33, 1 , 32, 34, 35,}), 
-    std::vector<int>({0 , 33, 32, 34, 35,}),
-    std::vector<int>({6 , 3 , 7 , 4 , 5 ,}),
-    std::vector<int>({6 , 4 , 5 , 2 , 7 ,}),
-    std::vector<int>({3 , 6 , 5 , 2 , 7 ,}),
-    std::vector<int>({3 , 6 , 4 , 2 , 7 ,}),
+      std::vector<int>({
+          33,
+          1,
+          32,
+          34,
+          35,
+      }),
+      std::vector<int>({
+          0,
+          33,
+          32,
+          34,
+          35,
+      }),
+      std::vector<int>({
+          6,
+          3,
+          7,
+          4,
+          5,
+      }),
+      std::vector<int>({
+          6,
+          4,
+          5,
+          2,
+          7,
+      }),
+      std::vector<int>({
+          3,
+          6,
+          5,
+          2,
+          7,
+      }),
+      std::vector<int>({
+          3,
+          6,
+          4,
+          2,
+          7,
+      }),
   };
 
   void SetUp() override {
     SimulationRegion<double> region;
     region.reinitBox(&boxt[0]);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     EXPECT_EQ(expect_nlist_cpy.size(), nloc);
-    for(int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       std::sort(expect_nlist_cpy[ii].begin(), expect_nlist_cpy[ii].end());
     }
-  }  
+  }
 };
 
-
-TEST_F(TestNeighborList, cpu)
-{
+TEST_F(TestNeighborList, cpu) {
   int mem_size = 10;
-  int * ilist = new int[nloc];
-  int * numneigh = new int[nloc];  
-  int ** firstneigh = new int*[nloc];  
-  for(int ii = 0; ii < nloc; ++ii){
+  int* ilist = new int[nloc];
+  int* numneigh = new int[nloc];
+  int** firstneigh = new int*[nloc];
+  for (int ii = 0; ii < nloc; ++ii) {
     firstneigh[ii] = new int[mem_size];
   }
 
   deepmd::InputNlist nlist(nloc, ilist, numneigh, firstneigh);
   int max_list_size;
-  int ret = build_nlist_cpu(
-      nlist,
-      &max_list_size,
-      &posi_cpy[0],
-      nloc,
-      nall,
-      mem_size,
-      rc);
+  int ret = build_nlist_cpu(nlist, &max_list_size, &posi_cpy[0], nloc, nall,
+                            mem_size, rc);
   EXPECT_EQ(ret, 0);
   EXPECT_EQ(nlist.inum, nloc);
   EXPECT_EQ(max_list_size, 5);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     EXPECT_EQ(nlist.ilist[ii], ii);
     EXPECT_EQ(nlist.numneigh[ii], expect_nlist_cpy[ii].size());
     std::sort(nlist.firstneigh[ii], nlist.firstneigh[ii] + nlist.numneigh[ii]);
-    for(int jj = 0; jj < nlist.numneigh[ii]; ++jj){
+    for (int jj = 0; jj < nlist.numneigh[ii]; ++jj) {
       EXPECT_EQ(nlist.firstneigh[ii][jj], expect_nlist_cpy[ii][jj]);
     }
-  }  
-  
+  }
+
   delete[] ilist;
   delete[] numneigh;
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     delete[] firstneigh[ii];
   }
   delete[] firstneigh;
 }
 
-TEST_F(TestNeighborList, cpu_lessmem)
-{
+TEST_F(TestNeighborList, cpu_lessmem) {
   int mem_size = 2;
-  int * ilist = new int[nloc];
-  int * numneigh = new int[nloc];  
-  int ** firstneigh = new int*[nloc];  
-  for(int ii = 0; ii < nloc; ++ii){
+  int* ilist = new int[nloc];
+  int* numneigh = new int[nloc];
+  int** firstneigh = new int*[nloc];
+  for (int ii = 0; ii < nloc; ++ii) {
     firstneigh[ii] = new int[mem_size];
   }
 
   deepmd::InputNlist nlist(nloc, ilist, numneigh, firstneigh);
   int max_list_size;
-  int ret = build_nlist_cpu(
-      nlist,
-      &max_list_size,
-      &posi_cpy[0],
-      nloc,
-      nall,
-      mem_size,
-      rc);
+  int ret = build_nlist_cpu(nlist, &max_list_size, &posi_cpy[0], nloc, nall,
+                            mem_size, rc);
   EXPECT_EQ(ret, 1);
   EXPECT_EQ(nlist.inum, nloc);
   EXPECT_EQ(max_list_size, 5);
-  
+
   delete[] ilist;
   delete[] numneigh;
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     delete[] firstneigh[ii];
   }
   delete[] firstneigh;
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestNeighborList, gpu)
-{
+TEST_F(TestNeighborList, gpu) {
   int mem_size = 48;
 
-  int * nlist_data_dev=NULL, * jlist_dev=NULL, * ilist_dev=NULL, * numneigh_dev=NULL;
-  int ** firstneigh_dev=NULL;
-  std::vector<int* > temp_firstneigh(nloc);
-  double * c_cpy_dev=NULL;
+  int *nlist_data_dev = NULL, *jlist_dev = NULL, *ilist_dev = NULL,
+      *numneigh_dev = NULL;
+  int** firstneigh_dev = NULL;
+  std::vector<int*> temp_firstneigh(nloc);
+  double* c_cpy_dev = NULL;
 
   deepmd::malloc_device_memory(nlist_data_dev, 2 * nloc * mem_size);
   deepmd::malloc_device_memory(jlist_dev, nloc * mem_size);
   deepmd::malloc_device_memory(ilist_dev, nloc);
   deepmd::malloc_device_memory(numneigh_dev, nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     temp_firstneigh[ii] = jlist_dev + ii * mem_size;
   }
   deepmd::malloc_device_memory_sync(firstneigh_dev, temp_firstneigh);
   deepmd::malloc_device_memory_sync(c_cpy_dev, posi_cpy);
   deepmd::InputNlist nlist_dev(nloc, ilist_dev, numneigh_dev, firstneigh_dev);
-  
+
   int max_list_size;
-  int ret = deepmd::build_nlist_gpu(
-      nlist_dev,
-      &max_list_size,
-      nlist_data_dev,
-      c_cpy_dev, 
-      nloc, 
-      nall, 
-      mem_size,
-      rc);
-  
+  int ret = deepmd::build_nlist_gpu(nlist_dev, &max_list_size, nlist_data_dev,
+                                    c_cpy_dev, nloc, nall, mem_size, rc);
+
   EXPECT_EQ(ret, 0);
-  int * ilist = new int[nloc];
-  int * numneigh = new int[nloc];  
-  int ** firstneigh = new int*[nloc];
-  int * jlist = new int[nloc * mem_size];
+  int* ilist = new int[nloc];
+  int* numneigh = new int[nloc];
+  int** firstneigh = new int*[nloc];
+  int* jlist = new int[nloc * mem_size];
   deepmd::memcpy_device_to_host(jlist_dev, jlist, nloc * mem_size);
   deepmd::memcpy_device_to_host(ilist_dev, ilist, nloc);
   deepmd::memcpy_device_to_host(numneigh_dev, numneigh, nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     firstneigh[ii] = jlist + ii * mem_size;
   }
 
   deepmd::InputNlist nlist(nlist_dev.inum, ilist, numneigh, firstneigh);
   EXPECT_EQ(nlist.inum, nloc);
   EXPECT_EQ(max_list_size, 5);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     EXPECT_EQ(nlist.ilist[ii], ii);
     EXPECT_EQ(nlist.numneigh[ii], expect_nlist_cpy[ii].size());
     std::sort(nlist.firstneigh[ii], nlist.firstneigh[ii] + nlist.numneigh[ii]);
-    for(int jj = 0; jj < nlist.numneigh[ii]; ++jj){
+    for (int jj = 0; jj < nlist.numneigh[ii]; ++jj) {
       EXPECT_EQ(nlist.firstneigh[ii][jj], expect_nlist_cpy[ii][jj]);
     }
-  }  
-  
+  }
+
   delete[] ilist;
   delete[] numneigh;
   delete[] jlist;
@@ -185,37 +195,30 @@ TEST_F(TestNeighborList, gpu)
   deepmd::delete_device_memory(c_cpy_dev);
 }
 
-TEST_F(TestNeighborList, gpu_lessmem)
-{
+TEST_F(TestNeighborList, gpu_lessmem) {
   int mem_size = 47;
 
-  int * nlist_data_dev=NULL, * jlist_dev=NULL, * ilist_dev=NULL, * numneigh_dev=NULL;
-  int ** firstneigh_dev=NULL;
-  std::vector<int* > temp_firstneigh(nloc);
-  double * c_cpy_dev=NULL;
+  int *nlist_data_dev = NULL, *jlist_dev = NULL, *ilist_dev = NULL,
+      *numneigh_dev = NULL;
+  int** firstneigh_dev = NULL;
+  std::vector<int*> temp_firstneigh(nloc);
+  double* c_cpy_dev = NULL;
 
   deepmd::malloc_device_memory(nlist_data_dev, 2 * nloc * mem_size);
   deepmd::malloc_device_memory(jlist_dev, nloc * mem_size);
   deepmd::malloc_device_memory(ilist_dev, nloc);
   deepmd::malloc_device_memory(numneigh_dev, nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     temp_firstneigh[ii] = jlist_dev + ii * mem_size;
   }
   deepmd::malloc_device_memory_sync(firstneigh_dev, temp_firstneigh);
   deepmd::malloc_device_memory_sync(c_cpy_dev, posi_cpy);
   deepmd::InputNlist nlist_dev(nloc, ilist_dev, numneigh_dev, firstneigh_dev);
-  
+
   int max_list_size;
-  int ret = deepmd::build_nlist_gpu(
-      nlist_dev,
-      &max_list_size,
-      nlist_data_dev,
-      c_cpy_dev, 
-      nloc, 
-      nall, 
-      mem_size,
-      rc);
-  
+  int ret = deepmd::build_nlist_gpu(nlist_dev, &max_list_size, nlist_data_dev,
+                                    c_cpy_dev, nloc, nall, mem_size, rc);
+
   EXPECT_EQ(ret, 1);
   deepmd::delete_device_memory(nlist_data_dev);
   deepmd::delete_device_memory(jlist_dev);
@@ -225,65 +228,58 @@ TEST_F(TestNeighborList, gpu_lessmem)
   deepmd::delete_device_memory(c_cpy_dev);
 }
 
-#endif //GOOGLE_CUDA
-
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestNeighborList, gpu)
-{
+TEST_F(TestNeighborList, gpu) {
   int mem_size = 48;
 
-  int * nlist_data_dev=NULL, * jlist_dev=NULL, * ilist_dev=NULL, * numneigh_dev=NULL;
-  int ** firstneigh_dev=NULL;
-  std::vector<int* > temp_firstneigh(nloc);
-  double * c_cpy_dev=NULL;
+  int *nlist_data_dev = NULL, *jlist_dev = NULL, *ilist_dev = NULL,
+      *numneigh_dev = NULL;
+  int** firstneigh_dev = NULL;
+  std::vector<int*> temp_firstneigh(nloc);
+  double* c_cpy_dev = NULL;
 
   deepmd::malloc_device_memory(nlist_data_dev, 2 * nloc * mem_size);
   deepmd::malloc_device_memory(jlist_dev, nloc * mem_size);
   deepmd::malloc_device_memory(ilist_dev, nloc);
   deepmd::malloc_device_memory(numneigh_dev, nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     temp_firstneigh[ii] = jlist_dev + ii * mem_size;
   }
   deepmd::malloc_device_memory_sync(firstneigh_dev, temp_firstneigh);
   deepmd::malloc_device_memory_sync(c_cpy_dev, posi_cpy);
   deepmd::InputNlist nlist_dev(nloc, ilist_dev, numneigh_dev, firstneigh_dev);
-  
+
   int max_list_size;
-  int ret = deepmd::build_nlist_gpu_rocm(
-      nlist_dev,
-      &max_list_size,
-      nlist_data_dev,
-      c_cpy_dev, 
-      nloc, 
-      nall, 
-      mem_size,
-      rc);
-  
+  int ret =
+      deepmd::build_nlist_gpu_rocm(nlist_dev, &max_list_size, nlist_data_dev,
+                                   c_cpy_dev, nloc, nall, mem_size, rc);
+
   EXPECT_EQ(ret, 0);
-  int * ilist = new int[nloc];
-  int * numneigh = new int[nloc];  
-  int ** firstneigh = new int*[nloc];
-  int * jlist = new int[nloc * mem_size];
+  int* ilist = new int[nloc];
+  int* numneigh = new int[nloc];
+  int** firstneigh = new int*[nloc];
+  int* jlist = new int[nloc * mem_size];
   deepmd::memcpy_device_to_host(jlist_dev, jlist, nloc * mem_size);
   deepmd::memcpy_device_to_host(ilist_dev, ilist, nloc);
   deepmd::memcpy_device_to_host(numneigh_dev, numneigh, nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     firstneigh[ii] = jlist + ii * mem_size;
   }
 
   deepmd::InputNlist nlist(nlist_dev.inum, ilist, numneigh, firstneigh);
   EXPECT_EQ(nlist.inum, nloc);
   EXPECT_EQ(max_list_size, 5);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     EXPECT_EQ(nlist.ilist[ii], ii);
     EXPECT_EQ(nlist.numneigh[ii], expect_nlist_cpy[ii].size());
     std::sort(nlist.firstneigh[ii], nlist.firstneigh[ii] + nlist.numneigh[ii]);
-    for(int jj = 0; jj < nlist.numneigh[ii]; ++jj){
+    for (int jj = 0; jj < nlist.numneigh[ii]; ++jj) {
       EXPECT_EQ(nlist.firstneigh[ii][jj], expect_nlist_cpy[ii][jj]);
     }
-  }  
-  
+  }
+
   delete[] ilist;
   delete[] numneigh;
   delete[] jlist;
@@ -296,37 +292,31 @@ TEST_F(TestNeighborList, gpu)
   deepmd::delete_device_memory(c_cpy_dev);
 }
 
-TEST_F(TestNeighborList, gpu_lessmem)
-{
+TEST_F(TestNeighborList, gpu_lessmem) {
   int mem_size = 47;
 
-  int * nlist_data_dev=NULL, * jlist_dev=NULL, * ilist_dev=NULL, * numneigh_dev=NULL;
-  int ** firstneigh_dev=NULL;
-  std::vector<int* > temp_firstneigh(nloc);
-  double * c_cpy_dev=NULL;
+  int *nlist_data_dev = NULL, *jlist_dev = NULL, *ilist_dev = NULL,
+      *numneigh_dev = NULL;
+  int** firstneigh_dev = NULL;
+  std::vector<int*> temp_firstneigh(nloc);
+  double* c_cpy_dev = NULL;
 
   deepmd::malloc_device_memory(nlist_data_dev, 2 * nloc * mem_size);
   deepmd::malloc_device_memory(jlist_dev, nloc * mem_size);
   deepmd::malloc_device_memory(ilist_dev, nloc);
   deepmd::malloc_device_memory(numneigh_dev, nloc);
-  for(int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     temp_firstneigh[ii] = jlist_dev + ii * mem_size;
   }
   deepmd::malloc_device_memory_sync(firstneigh_dev, temp_firstneigh);
   deepmd::malloc_device_memory_sync(c_cpy_dev, posi_cpy);
   deepmd::InputNlist nlist_dev(nloc, ilist_dev, numneigh_dev, firstneigh_dev);
-  
+
   int max_list_size;
-  int ret = deepmd::build_nlist_gpu_rocm(
-      nlist_dev,
-      &max_list_size,
-      nlist_data_dev,
-      c_cpy_dev, 
-      nloc, 
-      nall, 
-      mem_size,
-      rc);
-  
+  int ret =
+      deepmd::build_nlist_gpu_rocm(nlist_dev, &max_list_size, nlist_data_dev,
+                                   c_cpy_dev, nloc, nall, mem_size, rc);
+
   EXPECT_EQ(ret, 1);
   deepmd::delete_device_memory(nlist_data_dev);
   deepmd::delete_device_memory(jlist_dev);
@@ -336,4 +326,4 @@ TEST_F(TestNeighborList, gpu_lessmem)
   deepmd::delete_device_memory(c_cpy_dev);
 }
 
-#endif //TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_pair_tab.cc b/source/lib/tests/test_pair_tab.cc
index c55e96369f..e9586f39cb 100644
--- a/source/lib/tests/test_pair_tab.cc
+++ b/source/lib/tests/test_pair_tab.cc
@@ -1,47 +1,40 @@
-#include <iostream>
-#include <algorithm>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <algorithm>
+#include <iostream>
+
 #include "env_mat.h"
-#include "prod_env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "pair_tab.h"
+#include "prod_env_mat.h"
 
-inline void
-_cum_sum (
-    std::vector<int> & sec,
-    const std::vector<int> & n_sel) {
-  sec.resize (n_sel.size() + 1);
+inline void _cum_sum(std::vector<int>& sec, const std::vector<int>& n_sel) {
+  sec.resize(n_sel.size() + 1);
   sec[0] = 0;
-  for (int ii = 1; ii < sec.size(); ++ii){
-    sec[ii] = sec[ii-1] + n_sel[ii-1];
+  for (int ii = 1; ii < sec.size(); ++ii) {
+    sec[ii] = sec[ii - 1] + n_sel[ii - 1];
   }
 }
 
-class TestPairTab : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {
-    12.83, 2.56, 2.18, 
-    3.36, 3.00, 1.81,
-    12.09, 2.87, 2.74,
-    00.25, 3.32, 1.68,
-    3.51, 2.51, 2.60,
-    4.27, 3.22, 1.56
-  };
+class TestPairTab : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 3.36,  3.00, 1.81,
+                              12.09, 2.87, 2.74, 00.25, 3.32, 1.68,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
   std::vector<double> box = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
-  std::vector<int > atype = {0, 0, 1, 1, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+  std::vector<int> atype = {0, 0, 1, 1, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   std::vector<int> natoms;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sel_a = {5, 5};
-  std::vector<int> sel_r = {0, 0};  
+  std::vector<int> sel_r = {0, 0};
   std::vector<int> sec_a, sec_r;
   // std::vector<int> sec_a = {0, 5, 10};
   // std::vector<int> sec_r = {0, 0, 0};
@@ -50,116 +43,257 @@ class TestPairTab : public ::testing::Test
   std::vector<double> env, env_deriv, rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > tab_info = {
-    0.000000000000000000e+00,5.000000000000000000e-01,1.900000000000000000e+01,2.000000000000000000e+00,
+  std::vector<double> tab_info = {
+      0.000000000000000000e+00,
+      5.000000000000000000e-01,
+      1.900000000000000000e+01,
+      2.000000000000000000e+00,
   };
-  std::vector<double > tab_data = {
-    -1.645731450145992980e-02,-9.318102695874852515e-03,5.052009558015377877e-01,0.000000000000000000e+00,-1.645731450146042940e-02,-5.869004620025458641e-02,4.371928069054082933e-01,4.794255386042030054e-01,-6.354779499724752534e-03,-1.080619897046354722e-01,2.704407710005182763e-01,8.414709848078965049e-01,3.676315333364321702e-03,-1.271263282038100351e-01,3.525245309207285238e-02,9.974949866040544455e-01,1.324335679744598204e-02,-1.160973822037167924e-01,-2.079712573154540167e-01,9.092974268256817094e-01,1.945112709784019289e-02,-7.636731181137940139e-02,-4.004359513305503215e-01,5.984721441039565493e-01,2.092788842815984651e-02,-1.801393051785876720e-02,-4.948171936597881015e-01,1.411200080598672135e-01,1.727238702609701360e-02,4.476973476662088336e-02,-4.680613894110260964e-01,-3.507832276896198365e-01,9.390240597413182511e-03,9.658689584491141067e-02,-3.267047587994936220e-01,-7.568024953079282025e-01,-7.915293177619131537e-04,1.247576176371514023e-01,-1.053602453174308090e-01,-9.775301176650970092e-01,-1.077948259470512538e-02,1.223830296838656073e-01,1.417804020035860202e-01,-9.589242746631384540e-01,-1.812776811548627576e-02,9.004458189975028670e-02,3.542080135872019420e-01,-7.055403255703919241e-01,-2.103966430679454769e-02,3.566127755329190352e-02,4.799138730402439101e-01,-2.794154981989258602e-01,-1.879310922837906794e-02,-2.745771536709157301e-02,4.881174352264442962e-01,2.151199880878155168e-01,-1.197225569894497244e-02,-8.383704305222891562e-02,3.768226768071237243e-01,6.569865987187890610e-01,-2.119743608124211032e-03,-1.197538101490639856e-01,1.732318236058309757e-01,9.379999767747388972e-01,7.876934490214426710e-03,-1.261130409734364521e-01,-7.263502751666940649e-02,9.893582466233817874e-01,1.734391611395907917e-02,-1.024822375027934496e-01,-3.012303059928992388e-01,7.984871126234902583e-01,1.734391611395902366e-02,-5.045048916091599001e-02,-4.541630326566090115e-01,4.121184852417565936e-01,-2.857038556912050442e-03,-1.088944917710427673e-01,3.641943791261976759e-01,7.071067811865474617e-01,-2.857038556912189220e-03,-1.174656074417785578e-01,1.378342799133762120e-01,9.595496299847904309e-01,9.997734062661967069e-03,-1.260367231125156806e-01,-1.056680506409179154e-01,9.770612638994756738e-01,1.714771792589186994e-02,-9.604352092452944634e-02,-3.277482946779627926e-01,7.553542242087043501e-01,2.097205155371145713e-02,-4.460036714685383652e-02,-4.683921827493462975e-01,3.487101265321038701e-01,1.942785366828164717e-02,1.831578751428042384e-02,-4.946767623819197657e-01,-1.433103718103848900e-01,1.318969794248442406e-02,7.659934851912514331e-02,-3.997616263485141985e-01,-6.002434930097426680e-01,3.705452789754024034e-03,1.161684423465786514e-01,-2.069938354828104177e-01,-9.102160728966471881e-01,-6.681509290541898238e-03,1.272848007158404737e-01,3.645940757960873524e-02,-9.973360132431250413e-01,-1.543384209711462507e-02,1.072402728442147790e-01,2.709844811396642239e-01,-8.402733142382176057e-01,-2.040699032626563936e-02,6.093874655287040421e-02,4.391635005367497957e-01,-4.774824023514532834e-01,-2.038432541044177260e-02,-2.822244259263473332e-04,4.998200226636939081e-01,2.212854411901365656e-03,-1.536918924167118838e-02,-6.143520065725210921e-02,4.381025975805153405e-01,4.813663272392268988e-01,-6.597319578834796860e-03,-1.075427683822652303e-01,2.691246285409980010e-01,8.426645349208191638e-01,3.812846748232762151e-03,-1.273347271187698360e-01,3.424713303996269192e-02,9.976490755007169087e-01,1.320350429809030723e-02,-1.158961868740716050e-01,-2.089837809528787005e-01,9.083743281701425198e-01,1.968240442794899625e-02,-7.628567397980012821e-02,-4.011656418067505725e-01,5.966978646412827159e-01,2.014467273961967342e-02,-1.723846069595319497e-02,-4.946897764825041177e-01,1.389289532826809004e-01,2.014467273961895177e-02,4.319555752290565875e-02,-4.687326796555513764e-01,-3.528546111561566834e-01,-2.857038556912050442e-03,-1.088944917710427673e-01,3.641943791261976759e-01,7.071067811865474617e-01,-2.857038556912189220e-03,-1.174656074417785578e-01,1.378342799133762120e-01,9.595496299847904309e-01,9.997734062661967069e-03,-1.260367231125156806e-01,-1.056680506409179154e-01,9.770612638994756738e-01,1.714771792589186994e-02,-9.604352092452944634e-02,-3.277482946779627926e-01,7.553542242087043501e-01,2.097205155371145713e-02,-4.460036714685383652e-02,-4.683921827493462975e-01,3.487101265321038701e-01,1.942785366828164717e-02,1.831578751428042384e-02,-4.946767623819197657e-01,-1.433103718103848900e-01,1.318969794248442406e-02,7.659934851912514331e-02,-3.997616263485141985e-01,-6.002434930097426680e-01,3.705452789754024034e-03,1.161684423465786514e-01,-2.069938354828104177e-01,-9.102160728966471881e-01,-6.681509290541898238e-03,1.272848007158404737e-01,3.645940757960873524e-02,-9.973360132431250413e-01,-1.543384209711462507e-02,1.072402728442147790e-01,2.709844811396642239e-01,-8.402733142382176057e-01,-2.040699032626563936e-02,6.093874655287040421e-02,4.391635005367497957e-01,-4.774824023514532834e-01,-2.038432541044177260e-02,-2.822244259263473332e-04,4.998200226636939081e-01,2.212854411901365656e-03,-1.536918924167118838e-02,-6.143520065725210921e-02,4.381025975805153405e-01,4.813663272392268988e-01,-6.597319578834796860e-03,-1.075427683822652303e-01,2.691246285409980010e-01,8.426645349208191638e-01,3.812846748232762151e-03,-1.273347271187698360e-01,3.424713303996269192e-02,9.976490755007169087e-01,1.320350429809030723e-02,-1.158961868740716050e-01,-2.089837809528787005e-01,9.083743281701425198e-01,1.968240442794899625e-02,-7.628567397980012821e-02,-4.011656418067505725e-01,5.966978646412827159e-01,2.014467273961967342e-02,-1.723846069595319497e-02,-4.946897764825041177e-01,1.389289532826809004e-01,2.014467273961895177e-02,4.319555752290565875e-02,-4.687326796555513764e-01,-3.528546111561566834e-01,1.241685182605223314e-02,-1.446819644344595757e-01,9.847674498780101260e-03,1.000000000000000000e+00,1.241685182605223314e-02,-1.074314089563028762e-01,-2.422656988919823506e-01,8.775825618903727587e-01,2.049371060414068024e-02,-7.018085347814612129e-02,-4.198779613264314037e-01,5.403023058681397650e-01,2.057421992118041443e-02,-8.699721665724191588e-03,-4.987585364703014945e-01,7.073720166770297579e-02,1.641560294060040448e-02,5.302293809781688516e-02,-4.544353200382087454e-01,-4.161468365471423514e-01,8.024007047643694213e-03,1.022697469196180153e-01,-2.991426350207735396e-01,-8.011436155469336962e-01,-2.274838714293875297e-03,1.263417680625494866e-01,-7.053112003860620427e-02,-9.899924966004454152e-01,-1.203208543609352033e-02,1.195172519196672223e-01,1.753278999436104768e-01,-9.364566872907963413e-01,-1.883932165321944296e-02,8.342099561138682784e-02,3.782661474746648045e-01,-6.536436208636119405e-01,-2.103521949550246628e-02,2.690303065172838792e-02,4.885901737377801313e-01,-2.107957994307797789e-01,-1.808035989191653092e-02,-3.620262783477895541e-02,4.792905765547292862e-01,2.836621854632261908e-01,-1.070002133978709136e-02,-9.044370751052860369e-02,3.526442412094217826e-01,7.086697742912599907e-01,-6.956515614555680571e-04,-1.225437715298900998e-01,1.396567621690030792e-01,9.601702866503659672e-01,9.463090404681517853e-03,-1.246307262142566930e-01,-1.075177355751436026e-01,9.765876257280234896e-01,1.736443528154563154e-02,-9.624145500021202837e-02,-3.283899167896124349e-01,7.539022543433047119e-01,2.079231845733497952e-02,-4.414814915557463415e-02,-4.687795209453995970e-01,3.466353178350258801e-01,1.995818879190347506e-02,1.822880621643041543e-02,-4.946988638845440933e-01,-1.455000338086134548e-01,1.114495328397846485e-02,7.810337259214095162e-02,-3.983666850759726152e-01,-6.020119026848235189e-01,1.114495328397829832e-02,1.115382324440758466e-01,-2.087250800397557615e-01,-9.111302618846769397e-01,
-  };  
-  std::vector<double> expected_energy = {
-    -0.1306167788188060, -0.0255597250848064, 0.1587325724681873, -0.6817885971798407, -0.5510062343672764, 0.0991809936197377, 
+  std::vector<double> tab_data = {
+      -1.645731450145992980e-02, -9.318102695874852515e-03,
+      5.052009558015377877e-01,  0.000000000000000000e+00,
+      -1.645731450146042940e-02, -5.869004620025458641e-02,
+      4.371928069054082933e-01,  4.794255386042030054e-01,
+      -6.354779499724752534e-03, -1.080619897046354722e-01,
+      2.704407710005182763e-01,  8.414709848078965049e-01,
+      3.676315333364321702e-03,  -1.271263282038100351e-01,
+      3.525245309207285238e-02,  9.974949866040544455e-01,
+      1.324335679744598204e-02,  -1.160973822037167924e-01,
+      -2.079712573154540167e-01, 9.092974268256817094e-01,
+      1.945112709784019289e-02,  -7.636731181137940139e-02,
+      -4.004359513305503215e-01, 5.984721441039565493e-01,
+      2.092788842815984651e-02,  -1.801393051785876720e-02,
+      -4.948171936597881015e-01, 1.411200080598672135e-01,
+      1.727238702609701360e-02,  4.476973476662088336e-02,
+      -4.680613894110260964e-01, -3.507832276896198365e-01,
+      9.390240597413182511e-03,  9.658689584491141067e-02,
+      -3.267047587994936220e-01, -7.568024953079282025e-01,
+      -7.915293177619131537e-04, 1.247576176371514023e-01,
+      -1.053602453174308090e-01, -9.775301176650970092e-01,
+      -1.077948259470512538e-02, 1.223830296838656073e-01,
+      1.417804020035860202e-01,  -9.589242746631384540e-01,
+      -1.812776811548627576e-02, 9.004458189975028670e-02,
+      3.542080135872019420e-01,  -7.055403255703919241e-01,
+      -2.103966430679454769e-02, 3.566127755329190352e-02,
+      4.799138730402439101e-01,  -2.794154981989258602e-01,
+      -1.879310922837906794e-02, -2.745771536709157301e-02,
+      4.881174352264442962e-01,  2.151199880878155168e-01,
+      -1.197225569894497244e-02, -8.383704305222891562e-02,
+      3.768226768071237243e-01,  6.569865987187890610e-01,
+      -2.119743608124211032e-03, -1.197538101490639856e-01,
+      1.732318236058309757e-01,  9.379999767747388972e-01,
+      7.876934490214426710e-03,  -1.261130409734364521e-01,
+      -7.263502751666940649e-02, 9.893582466233817874e-01,
+      1.734391611395907917e-02,  -1.024822375027934496e-01,
+      -3.012303059928992388e-01, 7.984871126234902583e-01,
+      1.734391611395902366e-02,  -5.045048916091599001e-02,
+      -4.541630326566090115e-01, 4.121184852417565936e-01,
+      -2.857038556912050442e-03, -1.088944917710427673e-01,
+      3.641943791261976759e-01,  7.071067811865474617e-01,
+      -2.857038556912189220e-03, -1.174656074417785578e-01,
+      1.378342799133762120e-01,  9.595496299847904309e-01,
+      9.997734062661967069e-03,  -1.260367231125156806e-01,
+      -1.056680506409179154e-01, 9.770612638994756738e-01,
+      1.714771792589186994e-02,  -9.604352092452944634e-02,
+      -3.277482946779627926e-01, 7.553542242087043501e-01,
+      2.097205155371145713e-02,  -4.460036714685383652e-02,
+      -4.683921827493462975e-01, 3.487101265321038701e-01,
+      1.942785366828164717e-02,  1.831578751428042384e-02,
+      -4.946767623819197657e-01, -1.433103718103848900e-01,
+      1.318969794248442406e-02,  7.659934851912514331e-02,
+      -3.997616263485141985e-01, -6.002434930097426680e-01,
+      3.705452789754024034e-03,  1.161684423465786514e-01,
+      -2.069938354828104177e-01, -9.102160728966471881e-01,
+      -6.681509290541898238e-03, 1.272848007158404737e-01,
+      3.645940757960873524e-02,  -9.973360132431250413e-01,
+      -1.543384209711462507e-02, 1.072402728442147790e-01,
+      2.709844811396642239e-01,  -8.402733142382176057e-01,
+      -2.040699032626563936e-02, 6.093874655287040421e-02,
+      4.391635005367497957e-01,  -4.774824023514532834e-01,
+      -2.038432541044177260e-02, -2.822244259263473332e-04,
+      4.998200226636939081e-01,  2.212854411901365656e-03,
+      -1.536918924167118838e-02, -6.143520065725210921e-02,
+      4.381025975805153405e-01,  4.813663272392268988e-01,
+      -6.597319578834796860e-03, -1.075427683822652303e-01,
+      2.691246285409980010e-01,  8.426645349208191638e-01,
+      3.812846748232762151e-03,  -1.273347271187698360e-01,
+      3.424713303996269192e-02,  9.976490755007169087e-01,
+      1.320350429809030723e-02,  -1.158961868740716050e-01,
+      -2.089837809528787005e-01, 9.083743281701425198e-01,
+      1.968240442794899625e-02,  -7.628567397980012821e-02,
+      -4.011656418067505725e-01, 5.966978646412827159e-01,
+      2.014467273961967342e-02,  -1.723846069595319497e-02,
+      -4.946897764825041177e-01, 1.389289532826809004e-01,
+      2.014467273961895177e-02,  4.319555752290565875e-02,
+      -4.687326796555513764e-01, -3.528546111561566834e-01,
+      -2.857038556912050442e-03, -1.088944917710427673e-01,
+      3.641943791261976759e-01,  7.071067811865474617e-01,
+      -2.857038556912189220e-03, -1.174656074417785578e-01,
+      1.378342799133762120e-01,  9.595496299847904309e-01,
+      9.997734062661967069e-03,  -1.260367231125156806e-01,
+      -1.056680506409179154e-01, 9.770612638994756738e-01,
+      1.714771792589186994e-02,  -9.604352092452944634e-02,
+      -3.277482946779627926e-01, 7.553542242087043501e-01,
+      2.097205155371145713e-02,  -4.460036714685383652e-02,
+      -4.683921827493462975e-01, 3.487101265321038701e-01,
+      1.942785366828164717e-02,  1.831578751428042384e-02,
+      -4.946767623819197657e-01, -1.433103718103848900e-01,
+      1.318969794248442406e-02,  7.659934851912514331e-02,
+      -3.997616263485141985e-01, -6.002434930097426680e-01,
+      3.705452789754024034e-03,  1.161684423465786514e-01,
+      -2.069938354828104177e-01, -9.102160728966471881e-01,
+      -6.681509290541898238e-03, 1.272848007158404737e-01,
+      3.645940757960873524e-02,  -9.973360132431250413e-01,
+      -1.543384209711462507e-02, 1.072402728442147790e-01,
+      2.709844811396642239e-01,  -8.402733142382176057e-01,
+      -2.040699032626563936e-02, 6.093874655287040421e-02,
+      4.391635005367497957e-01,  -4.774824023514532834e-01,
+      -2.038432541044177260e-02, -2.822244259263473332e-04,
+      4.998200226636939081e-01,  2.212854411901365656e-03,
+      -1.536918924167118838e-02, -6.143520065725210921e-02,
+      4.381025975805153405e-01,  4.813663272392268988e-01,
+      -6.597319578834796860e-03, -1.075427683822652303e-01,
+      2.691246285409980010e-01,  8.426645349208191638e-01,
+      3.812846748232762151e-03,  -1.273347271187698360e-01,
+      3.424713303996269192e-02,  9.976490755007169087e-01,
+      1.320350429809030723e-02,  -1.158961868740716050e-01,
+      -2.089837809528787005e-01, 9.083743281701425198e-01,
+      1.968240442794899625e-02,  -7.628567397980012821e-02,
+      -4.011656418067505725e-01, 5.966978646412827159e-01,
+      2.014467273961967342e-02,  -1.723846069595319497e-02,
+      -4.946897764825041177e-01, 1.389289532826809004e-01,
+      2.014467273961895177e-02,  4.319555752290565875e-02,
+      -4.687326796555513764e-01, -3.528546111561566834e-01,
+      1.241685182605223314e-02,  -1.446819644344595757e-01,
+      9.847674498780101260e-03,  1.000000000000000000e+00,
+      1.241685182605223314e-02,  -1.074314089563028762e-01,
+      -2.422656988919823506e-01, 8.775825618903727587e-01,
+      2.049371060414068024e-02,  -7.018085347814612129e-02,
+      -4.198779613264314037e-01, 5.403023058681397650e-01,
+      2.057421992118041443e-02,  -8.699721665724191588e-03,
+      -4.987585364703014945e-01, 7.073720166770297579e-02,
+      1.641560294060040448e-02,  5.302293809781688516e-02,
+      -4.544353200382087454e-01, -4.161468365471423514e-01,
+      8.024007047643694213e-03,  1.022697469196180153e-01,
+      -2.991426350207735396e-01, -8.011436155469336962e-01,
+      -2.274838714293875297e-03, 1.263417680625494866e-01,
+      -7.053112003860620427e-02, -9.899924966004454152e-01,
+      -1.203208543609352033e-02, 1.195172519196672223e-01,
+      1.753278999436104768e-01,  -9.364566872907963413e-01,
+      -1.883932165321944296e-02, 8.342099561138682784e-02,
+      3.782661474746648045e-01,  -6.536436208636119405e-01,
+      -2.103521949550246628e-02, 2.690303065172838792e-02,
+      4.885901737377801313e-01,  -2.107957994307797789e-01,
+      -1.808035989191653092e-02, -3.620262783477895541e-02,
+      4.792905765547292862e-01,  2.836621854632261908e-01,
+      -1.070002133978709136e-02, -9.044370751052860369e-02,
+      3.526442412094217826e-01,  7.086697742912599907e-01,
+      -6.956515614555680571e-04, -1.225437715298900998e-01,
+      1.396567621690030792e-01,  9.601702866503659672e-01,
+      9.463090404681517853e-03,  -1.246307262142566930e-01,
+      -1.075177355751436026e-01, 9.765876257280234896e-01,
+      1.736443528154563154e-02,  -9.624145500021202837e-02,
+      -3.283899167896124349e-01, 7.539022543433047119e-01,
+      2.079231845733497952e-02,  -4.414814915557463415e-02,
+      -4.687795209453995970e-01, 3.466353178350258801e-01,
+      1.995818879190347506e-02,  1.822880621643041543e-02,
+      -4.946988638845440933e-01, -1.455000338086134548e-01,
+      1.114495328397846485e-02,  7.810337259214095162e-02,
+      -3.983666850759726152e-01, -6.020119026848235189e-01,
+      1.114495328397829832e-02,  1.115382324440758466e-01,
+      -2.087250800397557615e-01, -9.111302618846769397e-01,
   };
-  
-  void SetUp() override {
-    do_setup();
+  std::vector<double> expected_energy = {
+      -0.1306167788188060, -0.0255597250848064, 0.1587325724681873,
+      -0.6817885971798407, -0.5510062343672764, 0.0991809936197377,
   };
-  void do_setup(){
+
+  void SetUp() override { do_setup(); };
+  void do_setup() {
     _cum_sum(sec_a, sel_a);
-    _cum_sum(sec_r, sel_r);    
+    _cum_sum(sec_r, sel_r);
     region.reinitBox(&box[0]);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
     ndescrpt = nnei * 4;
-    natoms.resize(ntypes+2, 0);
+    natoms.resize(ntypes + 2, 0);
     natoms[0] = nloc;
     natoms[1] = nall;
-    for (int ii = 0; ii < nloc; ++ii){
-      natoms[atype[ii]+2] ++;
+    for (int ii = 0; ii < nloc; ++ii) {
+      natoms[atype[ii] + 2]++;
     }
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-
-class TestPairTabTriBox : public TestPairTab
-{
-protected:  
+class TestPairTabTriBox : public TestPairTab {
+ protected:
   void SetUp() override {
     box = std::vector<double>({13., 0.3, 0.1, 0., 13., 0.2, 0., 0., 13.});
     do_setup();
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
+TEST_F(TestPairTab, cpu) {
+  std::vector<double> energy(nloc);
+  std::vector<double> force(nall * 3);
+  std::vector<double> virial(nall * 9);
+  std::vector<double> scale(nloc, 1.0);
 
-TEST_F(TestPairTab, cpu)
-{
-  std::vector<double > energy(nloc);
-  std::vector<double > force(nall * 3);
-  std::vector<double > virial(nall * 9);
-  std::vector<double > scale(nloc, 1.0);
-
-  deepmd::pair_tab_cpu(
-      &energy[0],
-      &force[0],
-      &virial[0],
-      &tab_info[0],
-      &tab_data[0],
-      &rij[0],
-      &scale[0],
-      &atype_cpy[0],
-      &nlist[0],
-      &natoms[0],
-      sel_a, 
-      sel_r);  
+  deepmd::pair_tab_cpu(&energy[0], &force[0], &virial[0], &tab_info[0],
+                       &tab_data[0], &rij[0], &scale[0], &atype_cpy[0],
+                       &nlist[0], &natoms[0], sel_a, sel_r);
 
   EXPECT_EQ(energy.size(), expected_energy.size());
   EXPECT_EQ(energy.size(), nloc);
-  for (int ii = 0; ii < nloc; ++ii){
+  for (int ii = 0; ii < nloc; ++ii) {
     EXPECT_LT(fabs(energy[ii] - expected_energy[ii]), 1e-8);
   }
   // for (int ii = 0; ii < nloc; ++ii){
-  //   printf("%.16f, ", energy[ii]);    
+  //   printf("%.16f, ", energy[ii]);
   // }
   // printf("\n");
 }
 
-
 // int make_inter_nlist(
 //     std::vector<int> &ilist,
 //     std::vector<int> &jrange,
@@ -189,612 +323,544 @@ TEST_F(TestPairTab, cpu)
 //   return max_nbor_size;
 // }
 
-
-TEST_F(TestPairTab, cpu_f_num_deriv)
-{  
-  std::vector<double > energy(nloc);
-  std::vector<double > force(nall * 3);
-  std::vector<double > virial(9, 0.);
-  std::vector<double > atom_virial(nall * 9);
-  std::vector<double > scale(nloc, 1.0);
-  deepmd::pair_tab_cpu(
-      &energy[0],
-      &force[0],
-      &atom_virial[0],
-      &tab_info[0],
-      &tab_data[0],
-      &rij[0],
-      &scale[0],
-      &atype_cpy[0],
-      &nlist[0],
-      &natoms[0],
-      sel_a, 
-      sel_r);  
-  for (int ii = nloc; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
+TEST_F(TestPairTab, cpu_f_num_deriv) {
+  std::vector<double> energy(nloc);
+  std::vector<double> force(nall * 3);
+  std::vector<double> virial(9, 0.);
+  std::vector<double> atom_virial(nall * 9);
+  std::vector<double> scale(nloc, 1.0);
+  deepmd::pair_tab_cpu(&energy[0], &force[0], &atom_virial[0], &tab_info[0],
+                       &tab_data[0], &rij[0], &scale[0], &atype_cpy[0],
+                       &nlist[0], &natoms[0], sel_a, sel_r);
+  for (int ii = nloc; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       int orig_idx = mapping[ii];
-      force[orig_idx*3+dd] += force[ii*3+dd];
+      force[orig_idx * 3 + dd] += force[ii * 3 + dd];
     }
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 9; ++dd){
-      virial[dd] += atom_virial[ii*9+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 9; ++dd) {
+      virial[dd] += atom_virial[ii * 9 + dd];
     }
   }
   double hh = 1e-4;
-  for(int ii = 0; ii < nloc; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       std::vector<double> posi_0(posi);
       std::vector<double> posi_1(posi);
-      posi_0[ii*3+dd] -= hh;
-      posi_1[ii*3+dd] += hh;
+      posi_0[ii * 3 + dd] -= hh;
+      posi_1[ii * 3 + dd] += hh;
       std::vector<double> posi_cpy_0, posi_cpy_1;
       std::vector<int> atype_cpy_0, atype_cpy_1;
       std::vector<int> t_mapping;
-      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0, atype, rc, region);
-      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1, atype, rc, region);
+      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0,
+                 atype, rc, region);
+      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1,
+                 atype, rc, region);
       EXPECT_EQ(atype_cpy_0, atype_cpy_1);
-      for (int jj = 0; jj < atype_cpy_0.size(); ++jj){
-	EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
+      for (int jj = 0; jj < atype_cpy_0.size(); ++jj) {
+        EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
       }
       std::vector<std::vector<int>> nlist_cpy_0, nlist_cpy_1, t_nlist;
-      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);;
+      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region, ncell);
+      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region, ncell);
+      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc),
+          numneigh_1(nloc);
+      ;
       std::vector<int*> firstneigh_0(nloc), firstneigh_1(nloc);
-      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]);
-      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]);
+      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0],
+                                  &firstneigh_0[0]);
+      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0],
+                                  &firstneigh_1[0]);
       convert_nlist(inlist_0, nlist_cpy_0);
       convert_nlist(inlist_1, nlist_cpy_1);
       int max_nnei_0 = max_numneigh(inlist_0);
       int max_nnei_1 = max_numneigh(inlist_1);
       EXPECT_EQ(max_nnei_0, max_nnei_1);
-      std::vector<double> t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3);
+      std::vector<double> t_em(nloc * ndescrpt),
+          t_em_deriv(nloc * ndescrpt * 3);
       std::vector<double> rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3);
       std::vector<int> nlist_0(nloc * nnei), nlist_1(nloc * nnei);
-      std::vector<double > avg(ntypes * ndescrpt, 0);
-      std::vector<double > std(ntypes * ndescrpt, 1);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+      std::vector<double> avg(ntypes * ndescrpt, 0);
+      std::vector<double> std(ntypes * ndescrpt, 1);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0],
+                                 &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0],
+                                 inlist_0, max_nnei_0, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0],
+                                 &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0],
+                                 inlist_1, max_nnei_1, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
       std::vector<double> energy_0(nloc), energy_1(nloc);
       std::vector<double> t_force(nall * 3), t_virial(nall * 9);
-      deepmd::pair_tab_cpu(
-	  &energy_0[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_0[0],
-	  &scale[0],
-	  &atype_cpy_0[0],
-	  &nlist_0[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r);  
-      deepmd::pair_tab_cpu(
-	  &energy_1[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_1[0],
-	  &scale[0],
-	  &atype_cpy_1[0],
-	  &nlist_1[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r); 
+      deepmd::pair_tab_cpu(&energy_0[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_0[0], &scale[0],
+                           &atype_cpy_0[0], &nlist_0[0], &natoms[0], sel_a,
+                           sel_r);
+      deepmd::pair_tab_cpu(&energy_1[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_1[0], &scale[0],
+                           &atype_cpy_1[0], &nlist_1[0], &natoms[0], sel_a,
+                           sel_r);
       double tot_e_0(0), tot_e_1(0);
-      for(int ii = 0; ii < nloc; ++ii){
-	tot_e_0 += energy_0[ii];
-	tot_e_1 += energy_1[ii];
+      for (int ii = 0; ii < nloc; ++ii) {
+        tot_e_0 += energy_0[ii];
+        tot_e_1 += energy_1[ii];
       }
-      double num_deriv = - (tot_e_1 - tot_e_0) / (2. * hh);
-      double ana_deriv = force[ii*3+dd];
+      double num_deriv = -(tot_e_1 - tot_e_0) / (2. * hh);
+      double ana_deriv = force[ii * 3 + dd];
       EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-8);
-    }  
+    }
   }
 }
 
-
-TEST_F(TestPairTab, cpu_f_num_deriv_scale)
-{  
+TEST_F(TestPairTab, cpu_f_num_deriv_scale) {
   double new_scale = 0.3;
-  std::vector<double > energy(nloc);
-  std::vector<double > force(nall * 3);
-  std::vector<double > virial(9, 0.);
-  std::vector<double > atom_virial(nall * 9);
-  std::vector<double > scale(nloc, new_scale);
-  deepmd::pair_tab_cpu(
-      &energy[0],
-      &force[0],
-      &atom_virial[0],
-      &tab_info[0],
-      &tab_data[0],
-      &rij[0],
-      &scale[0],
-      &atype_cpy[0],
-      &nlist[0],
-      &natoms[0],
-      sel_a, 
-      sel_r);  
-  for (int ii = nloc; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
+  std::vector<double> energy(nloc);
+  std::vector<double> force(nall * 3);
+  std::vector<double> virial(9, 0.);
+  std::vector<double> atom_virial(nall * 9);
+  std::vector<double> scale(nloc, new_scale);
+  deepmd::pair_tab_cpu(&energy[0], &force[0], &atom_virial[0], &tab_info[0],
+                       &tab_data[0], &rij[0], &scale[0], &atype_cpy[0],
+                       &nlist[0], &natoms[0], sel_a, sel_r);
+  for (int ii = nloc; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       int orig_idx = mapping[ii];
-      force[orig_idx*3+dd] += force[ii*3+dd];
+      force[orig_idx * 3 + dd] += force[ii * 3 + dd];
     }
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 9; ++dd){
-      virial[dd] += atom_virial[ii*9+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 9; ++dd) {
+      virial[dd] += atom_virial[ii * 9 + dd];
     }
   }
   double hh = 1e-4;
-  for(int ii = 0; ii < nloc; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       std::vector<double> posi_0(posi);
       std::vector<double> posi_1(posi);
-      posi_0[ii*3+dd] -= hh;
-      posi_1[ii*3+dd] += hh;
+      posi_0[ii * 3 + dd] -= hh;
+      posi_1[ii * 3 + dd] += hh;
       std::vector<double> posi_cpy_0, posi_cpy_1;
       std::vector<int> atype_cpy_0, atype_cpy_1;
       std::vector<int> t_mapping;
-      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0, atype, rc, region);
-      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1, atype, rc, region);
+      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0,
+                 atype, rc, region);
+      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1,
+                 atype, rc, region);
       EXPECT_EQ(atype_cpy_0, atype_cpy_1);
-      for (int jj = 0; jj < atype_cpy_0.size(); ++jj){
-	EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
+      for (int jj = 0; jj < atype_cpy_0.size(); ++jj) {
+        EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
       }
       std::vector<std::vector<int>> nlist_cpy_0, nlist_cpy_1, t_nlist;
-      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);;
+      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region, ncell);
+      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region, ncell);
+      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc),
+          numneigh_1(nloc);
+      ;
       std::vector<int*> firstneigh_0(nloc), firstneigh_1(nloc);
-      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]);
-      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]);
+      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0],
+                                  &firstneigh_0[0]);
+      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0],
+                                  &firstneigh_1[0]);
       convert_nlist(inlist_0, nlist_cpy_0);
       convert_nlist(inlist_1, nlist_cpy_1);
       int max_nnei_0 = max_numneigh(inlist_0);
       int max_nnei_1 = max_numneigh(inlist_1);
       EXPECT_EQ(max_nnei_0, max_nnei_1);
-      std::vector<double> t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3);
+      std::vector<double> t_em(nloc * ndescrpt),
+          t_em_deriv(nloc * ndescrpt * 3);
       std::vector<double> rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3);
       std::vector<int> nlist_0(nloc * nnei), nlist_1(nloc * nnei);
-      std::vector<double > avg(ntypes * ndescrpt, 0);
-      std::vector<double > std(ntypes * ndescrpt, 1);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+      std::vector<double> avg(ntypes * ndescrpt, 0);
+      std::vector<double> std(ntypes * ndescrpt, 1);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0],
+                                 &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0],
+                                 inlist_0, max_nnei_0, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0],
+                                 &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0],
+                                 inlist_1, max_nnei_1, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
       std::vector<double> energy_0(nloc), energy_1(nloc);
       std::vector<double> t_force(nall * 3), t_virial(nall * 9);
-      deepmd::pair_tab_cpu(
-	  &energy_0[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_0[0],
-	  &scale[0],
-	  &atype_cpy_0[0],
-	  &nlist_0[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r);  
-      deepmd::pair_tab_cpu(
-	  &energy_1[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_1[0],
-	  &scale[0],
-	  &atype_cpy_1[0],
-	  &nlist_1[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r); 
+      deepmd::pair_tab_cpu(&energy_0[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_0[0], &scale[0],
+                           &atype_cpy_0[0], &nlist_0[0], &natoms[0], sel_a,
+                           sel_r);
+      deepmd::pair_tab_cpu(&energy_1[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_1[0], &scale[0],
+                           &atype_cpy_1[0], &nlist_1[0], &natoms[0], sel_a,
+                           sel_r);
       double tot_e_0(0), tot_e_1(0);
-      for(int ii = 0; ii < nloc; ++ii){
-	tot_e_0 += energy_0[ii];
-	tot_e_1 += energy_1[ii];
+      for (int ii = 0; ii < nloc; ++ii) {
+        tot_e_0 += energy_0[ii];
+        tot_e_1 += energy_1[ii];
       }
-      double num_deriv = - (tot_e_1 - tot_e_0) / (2. * hh);
-      double ana_deriv = force[ii*3+dd];
+      double num_deriv = -(tot_e_1 - tot_e_0) / (2. * hh);
+      double ana_deriv = force[ii * 3 + dd];
       EXPECT_LT(fabs(new_scale * num_deriv - ana_deriv), 1e-8);
-    }  
+    }
   }
 }
 
-TEST_F(TestPairTab, cpu_v_num_deriv)
-{
-  std::vector<double > energy(nloc);
-  std::vector<double > force(nall * 3);
-  std::vector<double > virial(9, 0.);
-  std::vector<double > atom_virial(nall * 9);
-  std::vector<double > scale(nloc, 1.0);
-  deepmd::pair_tab_cpu(
-      &energy[0],
-      &force[0],
-      &atom_virial[0],
-      &tab_info[0],
-      &tab_data[0],
-      &rij[0],
-      &scale[0],
-      &atype_cpy[0],
-      &nlist[0],
-      &natoms[0],
-      sel_a, 
-      sel_r);  
-  for (int ii = nloc; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
+TEST_F(TestPairTab, cpu_v_num_deriv) {
+  std::vector<double> energy(nloc);
+  std::vector<double> force(nall * 3);
+  std::vector<double> virial(9, 0.);
+  std::vector<double> atom_virial(nall * 9);
+  std::vector<double> scale(nloc, 1.0);
+  deepmd::pair_tab_cpu(&energy[0], &force[0], &atom_virial[0], &tab_info[0],
+                       &tab_data[0], &rij[0], &scale[0], &atype_cpy[0],
+                       &nlist[0], &natoms[0], sel_a, sel_r);
+  for (int ii = nloc; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       int orig_idx = mapping[ii];
-      force[orig_idx*3+dd] += force[ii*3+dd];
+      force[orig_idx * 3 + dd] += force[ii * 3 + dd];
     }
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 9; ++dd){
-      virial[dd] += atom_virial[ii*9+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 9; ++dd) {
+      virial[dd] += atom_virial[ii * 9 + dd];
     }
   }
   double hh = 1e-4;
   std::vector<double> num_deriv(9);
-  for(int dd0 = 0; dd0 < 3; ++dd0){
-    for(int dd1 = 0; dd1 < 3; ++dd1){
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
       std::vector<double> box_0(9);
       std::vector<double> box_1(9);
       std::copy(box.begin(), box.end(), box_0.begin());
       std::copy(box.begin(), box.end(), box_1.begin());
-      box_0[dd0*3+dd1] -= hh;
-      box_1[dd0*3+dd1] += hh;
+      box_0[dd0 * 3 + dd1] -= hh;
+      box_1[dd0 * 3 + dd1] += hh;
       SimulationRegion<double> region_0, region_1;
       region_0.reinitBox(&box_0[0]);
       region_1.reinitBox(&box_1[0]);
       std::vector<double> posi_0(nloc * 3), posi_1(nloc * 3);
-      for(int jj = 0; jj < nloc; ++jj){
-	double ci[3], co[3];
-	region.phys2Inter(ci, &posi[jj*3]);
-	region_0.inter2Phys(co, ci);
-	std::copy(co, co+3, posi_0.begin() + jj*3);
-	region_1.inter2Phys(co, ci);
-	std::copy(co, co+3, posi_1.begin() + jj*3);	
+      for (int jj = 0; jj < nloc; ++jj) {
+        double ci[3], co[3];
+        region.phys2Inter(ci, &posi[jj * 3]);
+        region_0.inter2Phys(co, ci);
+        std::copy(co, co + 3, posi_0.begin() + jj * 3);
+        region_1.inter2Phys(co, ci);
+        std::copy(co, co + 3, posi_1.begin() + jj * 3);
       }
       std::vector<double> posi_cpy_0, posi_cpy_1;
       std::vector<int> atype_cpy_0, atype_cpy_1;
       std::vector<int> t_mapping;
-      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0, atype, rc, region_0);
-      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1, atype, rc, region_1);
+      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0,
+                 atype, rc, region_0);
+      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1,
+                 atype, rc, region_1);
       EXPECT_EQ(atype_cpy_0, atype_cpy_1);
-      for (int jj = 0; jj < atype_cpy_0.size(); ++jj){
-	EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
+      for (int jj = 0; jj < atype_cpy_0.size(); ++jj) {
+        EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
       }
       std::vector<std::vector<int>> nlist_cpy_0, nlist_cpy_1, t_nlist;
-      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_0, ncell);
-      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_1, ncell);
-      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);;
+      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region_0, ncell);
+      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region_1, ncell);
+      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc),
+          numneigh_1(nloc);
+      ;
       std::vector<int*> firstneigh_0(nloc), firstneigh_1(nloc);
-      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]);
-      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]);
+      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0],
+                                  &firstneigh_0[0]);
+      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0],
+                                  &firstneigh_1[0]);
       convert_nlist(inlist_0, nlist_cpy_0);
       convert_nlist(inlist_1, nlist_cpy_1);
       int max_nnei_0 = max_numneigh(inlist_0);
       int max_nnei_1 = max_numneigh(inlist_1);
       EXPECT_EQ(max_nnei_0, max_nnei_1);
-      std::vector<double> t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3);
+      std::vector<double> t_em(nloc * ndescrpt),
+          t_em_deriv(nloc * ndescrpt * 3);
       std::vector<double> rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3);
       std::vector<int> nlist_0(nloc * nnei), nlist_1(nloc * nnei);
-      std::vector<double > avg(ntypes * ndescrpt, 0);
-      std::vector<double > std(ntypes * ndescrpt, 1);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+      std::vector<double> avg(ntypes * ndescrpt, 0);
+      std::vector<double> std(ntypes * ndescrpt, 1);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0],
+                                 &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0],
+                                 inlist_0, max_nnei_0, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0],
+                                 &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0],
+                                 inlist_1, max_nnei_1, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
       std::vector<double> energy_0(nloc), energy_1(nloc);
       std::vector<double> t_force(nall * 3), t_virial(nall * 9);
-      deepmd::pair_tab_cpu(
-	  &energy_0[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_0[0],
-	  &scale[0],
-	  &atype_cpy_0[0],
-	  &nlist_0[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r);  
-      deepmd::pair_tab_cpu(
-	  &energy_1[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_1[0],
-	  &scale[0],
-	  &atype_cpy_1[0],
-	  &nlist_1[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r); 
+      deepmd::pair_tab_cpu(&energy_0[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_0[0], &scale[0],
+                           &atype_cpy_0[0], &nlist_0[0], &natoms[0], sel_a,
+                           sel_r);
+      deepmd::pair_tab_cpu(&energy_1[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_1[0], &scale[0],
+                           &atype_cpy_1[0], &nlist_1[0], &natoms[0], sel_a,
+                           sel_r);
       double tot_e_0(0), tot_e_1(0);
-      for(int ii = 0; ii < nloc; ++ii){
-	tot_e_0 += energy_0[ii];
-	tot_e_1 += energy_1[ii];
+      for (int ii = 0; ii < nloc; ++ii) {
+        tot_e_0 += energy_0[ii];
+        tot_e_1 += energy_1[ii];
       }
-      num_deriv[dd0*3+dd1] = - (tot_e_1 - tot_e_0) / (2. * hh);
+      num_deriv[dd0 * 3 + dd1] = -(tot_e_1 - tot_e_0) / (2. * hh);
       // std::cout << num_deriv[dd0*3+dd1] << std::endl;
-    }  
+    }
   }
   std::vector<double> num_vir(9, 0);
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
-      num_vir[dd0*3+dd1] = 0;
-      for (int dd = 0; dd < 3; ++dd){
-	num_vir[dd0*3+dd1] += num_deriv[dd*3+dd0] * box[dd*3+dd1];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      num_vir[dd0 * 3 + dd1] = 0;
+      for (int dd = 0; dd < 3; ++dd) {
+        num_vir[dd0 * 3 + dd1] += num_deriv[dd * 3 + dd0] * box[dd * 3 + dd1];
       }
-      // std::cout << num_vir[dd0*3+dd1] << " " << virial[dd0*3+dd1] << std::endl;
-      EXPECT_LT(fabs(num_vir[dd0*3+dd1] - virial[dd0*3+dd1]), 1e-8);
+      // std::cout << num_vir[dd0*3+dd1] << " " << virial[dd0*3+dd1] <<
+      // std::endl;
+      EXPECT_LT(fabs(num_vir[dd0 * 3 + dd1] - virial[dd0 * 3 + dd1]), 1e-8);
     }
   }
 }
 
-TEST_F(TestPairTab, cpu_v_num_deriv_scale)
-{
+TEST_F(TestPairTab, cpu_v_num_deriv_scale) {
   double new_scale = 0.3;
-  std::vector<double > energy(nloc);
-  std::vector<double > force(nall * 3);
-  std::vector<double > virial(9, 0.);
-  std::vector<double > atom_virial(nall * 9);
-  std::vector<double > scale(nloc, new_scale);
-  deepmd::pair_tab_cpu(
-      &energy[0],
-      &force[0],
-      &atom_virial[0],
-      &tab_info[0],
-      &tab_data[0],
-      &rij[0],
-      &scale[0],
-      &atype_cpy[0],
-      &nlist[0],
-      &natoms[0],
-      sel_a, 
-      sel_r);  
-  for (int ii = nloc; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
+  std::vector<double> energy(nloc);
+  std::vector<double> force(nall * 3);
+  std::vector<double> virial(9, 0.);
+  std::vector<double> atom_virial(nall * 9);
+  std::vector<double> scale(nloc, new_scale);
+  deepmd::pair_tab_cpu(&energy[0], &force[0], &atom_virial[0], &tab_info[0],
+                       &tab_data[0], &rij[0], &scale[0], &atype_cpy[0],
+                       &nlist[0], &natoms[0], sel_a, sel_r);
+  for (int ii = nloc; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       int orig_idx = mapping[ii];
-      force[orig_idx*3+dd] += force[ii*3+dd];
+      force[orig_idx * 3 + dd] += force[ii * 3 + dd];
     }
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 9; ++dd){
-      virial[dd] += atom_virial[ii*9+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 9; ++dd) {
+      virial[dd] += atom_virial[ii * 9 + dd];
     }
   }
   double hh = 1e-4;
   std::vector<double> num_deriv(9);
-  for(int dd0 = 0; dd0 < 3; ++dd0){
-    for(int dd1 = 0; dd1 < 3; ++dd1){
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
       std::vector<double> box_0(9);
       std::vector<double> box_1(9);
       std::copy(box.begin(), box.end(), box_0.begin());
       std::copy(box.begin(), box.end(), box_1.begin());
-      box_0[dd0*3+dd1] -= hh;
-      box_1[dd0*3+dd1] += hh;
+      box_0[dd0 * 3 + dd1] -= hh;
+      box_1[dd0 * 3 + dd1] += hh;
       SimulationRegion<double> region_0, region_1;
       region_0.reinitBox(&box_0[0]);
       region_1.reinitBox(&box_1[0]);
       std::vector<double> posi_0(nloc * 3), posi_1(nloc * 3);
-      for(int jj = 0; jj < nloc; ++jj){
-	double ci[3], co[3];
-	region.phys2Inter(ci, &posi[jj*3]);
-	region_0.inter2Phys(co, ci);
-	std::copy(co, co+3, posi_0.begin() + jj*3);
-	region_1.inter2Phys(co, ci);
-	std::copy(co, co+3, posi_1.begin() + jj*3);	
+      for (int jj = 0; jj < nloc; ++jj) {
+        double ci[3], co[3];
+        region.phys2Inter(ci, &posi[jj * 3]);
+        region_0.inter2Phys(co, ci);
+        std::copy(co, co + 3, posi_0.begin() + jj * 3);
+        region_1.inter2Phys(co, ci);
+        std::copy(co, co + 3, posi_1.begin() + jj * 3);
       }
       std::vector<double> posi_cpy_0, posi_cpy_1;
       std::vector<int> atype_cpy_0, atype_cpy_1;
       std::vector<int> t_mapping;
-      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0, atype, rc, region_0);
-      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1, atype, rc, region_1);
+      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0,
+                 atype, rc, region_0);
+      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1,
+                 atype, rc, region_1);
       EXPECT_EQ(atype_cpy_0, atype_cpy_1);
-      for (int jj = 0; jj < atype_cpy_0.size(); ++jj){
-	EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
+      for (int jj = 0; jj < atype_cpy_0.size(); ++jj) {
+        EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
       }
       std::vector<std::vector<int>> nlist_cpy_0, nlist_cpy_1, t_nlist;
-      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_0, ncell);
-      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_1, ncell);
-      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);;
+      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region_0, ncell);
+      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region_1, ncell);
+      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc),
+          numneigh_1(nloc);
+      ;
       std::vector<int*> firstneigh_0(nloc), firstneigh_1(nloc);
-      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]);
-      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]);
+      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0],
+                                  &firstneigh_0[0]);
+      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0],
+                                  &firstneigh_1[0]);
       convert_nlist(inlist_0, nlist_cpy_0);
       convert_nlist(inlist_1, nlist_cpy_1);
       int max_nnei_0 = max_numneigh(inlist_0);
       int max_nnei_1 = max_numneigh(inlist_1);
       EXPECT_EQ(max_nnei_0, max_nnei_1);
-      std::vector<double> t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3);
+      std::vector<double> t_em(nloc * ndescrpt),
+          t_em_deriv(nloc * ndescrpt * 3);
       std::vector<double> rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3);
       std::vector<int> nlist_0(nloc * nnei), nlist_1(nloc * nnei);
-      std::vector<double > avg(ntypes * ndescrpt, 0);
-      std::vector<double > std(ntypes * ndescrpt, 1);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+      std::vector<double> avg(ntypes * ndescrpt, 0);
+      std::vector<double> std(ntypes * ndescrpt, 1);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0],
+                                 &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0],
+                                 inlist_0, max_nnei_0, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0],
+                                 &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0],
+                                 inlist_1, max_nnei_1, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
       std::vector<double> energy_0(nloc), energy_1(nloc);
       std::vector<double> t_force(nall * 3), t_virial(nall * 9);
-      deepmd::pair_tab_cpu(
-	  &energy_0[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_0[0],
-	  &scale[0],
-	  &atype_cpy_0[0],
-	  &nlist_0[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r);  
-      deepmd::pair_tab_cpu(
-	  &energy_1[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_1[0],
-	  &scale[0],
-	  &atype_cpy_1[0],
-	  &nlist_1[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r); 
+      deepmd::pair_tab_cpu(&energy_0[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_0[0], &scale[0],
+                           &atype_cpy_0[0], &nlist_0[0], &natoms[0], sel_a,
+                           sel_r);
+      deepmd::pair_tab_cpu(&energy_1[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_1[0], &scale[0],
+                           &atype_cpy_1[0], &nlist_1[0], &natoms[0], sel_a,
+                           sel_r);
       double tot_e_0(0), tot_e_1(0);
-      for(int ii = 0; ii < nloc; ++ii){
-	tot_e_0 += energy_0[ii];
-	tot_e_1 += energy_1[ii];
+      for (int ii = 0; ii < nloc; ++ii) {
+        tot_e_0 += energy_0[ii];
+        tot_e_1 += energy_1[ii];
       }
-      num_deriv[dd0*3+dd1] = - (tot_e_1 - tot_e_0) / (2. * hh);
+      num_deriv[dd0 * 3 + dd1] = -(tot_e_1 - tot_e_0) / (2. * hh);
       // std::cout << num_deriv[dd0*3+dd1] << std::endl;
-    }  
+    }
   }
   std::vector<double> num_vir(9, 0);
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
-      num_vir[dd0*3+dd1] = 0;
-      for (int dd = 0; dd < 3; ++dd){
-	num_vir[dd0*3+dd1] += num_deriv[dd*3+dd0] * box[dd*3+dd1];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      num_vir[dd0 * 3 + dd1] = 0;
+      for (int dd = 0; dd < 3; ++dd) {
+        num_vir[dd0 * 3 + dd1] += num_deriv[dd * 3 + dd0] * box[dd * 3 + dd1];
       }
-      // std::cout << num_vir[dd0*3+dd1] << " " << virial[dd0*3+dd1] << std::endl;
-      EXPECT_LT(fabs(new_scale * num_vir[dd0*3+dd1] - virial[dd0*3+dd1]), 1e-8);
+      // std::cout << num_vir[dd0*3+dd1] << " " << virial[dd0*3+dd1] <<
+      // std::endl;
+      EXPECT_LT(
+          fabs(new_scale * num_vir[dd0 * 3 + dd1] - virial[dd0 * 3 + dd1]),
+          1e-8);
     }
   }
 }
 
-
-TEST_F(TestPairTabTriBox, cpu_v_num_deriv)
-{  
-  std::vector<double > energy(nloc);
-  std::vector<double > force(nall * 3);
-  std::vector<double > virial(9, 0.);
-  std::vector<double > atom_virial(nall * 9);
-  std::vector<double > scale(nloc, 1.0);
-  deepmd::pair_tab_cpu(
-      &energy[0],
-      &force[0],
-      &atom_virial[0],
-      &tab_info[0],
-      &tab_data[0],
-      &rij[0],
-      &scale[0],
-      &atype_cpy[0],
-      &nlist[0],
-      &natoms[0],
-      sel_a, 
-      sel_r);  
-  for (int ii = nloc; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
+TEST_F(TestPairTabTriBox, cpu_v_num_deriv) {
+  std::vector<double> energy(nloc);
+  std::vector<double> force(nall * 3);
+  std::vector<double> virial(9, 0.);
+  std::vector<double> atom_virial(nall * 9);
+  std::vector<double> scale(nloc, 1.0);
+  deepmd::pair_tab_cpu(&energy[0], &force[0], &atom_virial[0], &tab_info[0],
+                       &tab_data[0], &rij[0], &scale[0], &atype_cpy[0],
+                       &nlist[0], &natoms[0], sel_a, sel_r);
+  for (int ii = nloc; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       int orig_idx = mapping[ii];
-      force[orig_idx*3+dd] += force[ii*3+dd];
+      force[orig_idx * 3 + dd] += force[ii * 3 + dd];
     }
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 9; ++dd){
-      virial[dd] += atom_virial[ii*9+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 9; ++dd) {
+      virial[dd] += atom_virial[ii * 9 + dd];
     }
   }
   double hh = 1e-4;
   std::vector<double> num_deriv(9);
-  for(int dd0 = 0; dd0 < 3; ++dd0){
-    for(int dd1 = 0; dd1 < 3; ++dd1){
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
       std::vector<double> box_0(9);
       std::vector<double> box_1(9);
       std::copy(box.begin(), box.end(), box_0.begin());
       std::copy(box.begin(), box.end(), box_1.begin());
-      box_0[dd0*3+dd1] -= hh;
-      box_1[dd0*3+dd1] += hh;
+      box_0[dd0 * 3 + dd1] -= hh;
+      box_1[dd0 * 3 + dd1] += hh;
       SimulationRegion<double> region_0, region_1;
       region_0.reinitBox(&box_0[0]);
       region_1.reinitBox(&box_1[0]);
       std::vector<double> posi_0(nloc * 3), posi_1(nloc * 3);
-      for(int jj = 0; jj < nloc; ++jj){
-	double ci[3], co[3];
-	region.phys2Inter(ci, &posi[jj*3]);
-	region_0.inter2Phys(co, ci);
-	std::copy(co, co+3, posi_0.begin() + jj*3);
-	region_1.inter2Phys(co, ci);
-	std::copy(co, co+3, posi_1.begin() + jj*3);	
+      for (int jj = 0; jj < nloc; ++jj) {
+        double ci[3], co[3];
+        region.phys2Inter(ci, &posi[jj * 3]);
+        region_0.inter2Phys(co, ci);
+        std::copy(co, co + 3, posi_0.begin() + jj * 3);
+        region_1.inter2Phys(co, ci);
+        std::copy(co, co + 3, posi_1.begin() + jj * 3);
       }
       std::vector<double> posi_cpy_0, posi_cpy_1;
       std::vector<int> atype_cpy_0, atype_cpy_1;
       std::vector<int> t_mapping;
-      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0, atype, rc, region_0);
-      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1, atype, rc, region_1);
+      copy_coord(posi_cpy_0, atype_cpy_0, t_mapping, ncell, ngcell, posi_0,
+                 atype, rc, region_0);
+      copy_coord(posi_cpy_1, atype_cpy_1, t_mapping, ncell, ngcell, posi_1,
+                 atype, rc, region_1);
       EXPECT_EQ(atype_cpy_0, atype_cpy_1);
-      for (int jj = 0; jj < atype_cpy_0.size(); ++jj){
-	EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
+      for (int jj = 0; jj < atype_cpy_0.size(); ++jj) {
+        EXPECT_EQ(atype_cpy_0[jj], atype_cpy_1[jj]);
       }
       std::vector<std::vector<int>> nlist_cpy_0, nlist_cpy_1, t_nlist;
-      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_0, ncell);
-      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region_1, ncell);
-      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc), numneigh_1(nloc);;
+      build_nlist(nlist_cpy_0, t_nlist, posi_cpy_0, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region_0, ncell);
+      build_nlist(nlist_cpy_1, t_nlist, posi_cpy_1, nloc, rc, rc, nat_stt,
+                  ncell, ext_stt, ext_end, region_1, ncell);
+      std::vector<int> ilist_0(nloc), numneigh_0(nloc), ilist_1(nloc),
+          numneigh_1(nloc);
+      ;
       std::vector<int*> firstneigh_0(nloc), firstneigh_1(nloc);
-      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0], &firstneigh_0[0]);
-      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0], &firstneigh_1[0]);
+      deepmd::InputNlist inlist_0(nloc, &ilist_0[0], &numneigh_0[0],
+                                  &firstneigh_0[0]);
+      deepmd::InputNlist inlist_1(nloc, &ilist_1[0], &numneigh_1[0],
+                                  &firstneigh_1[0]);
       convert_nlist(inlist_0, nlist_cpy_0);
       convert_nlist(inlist_1, nlist_cpy_1);
       int max_nnei_0 = max_numneigh(inlist_0);
       int max_nnei_1 = max_numneigh(inlist_1);
       EXPECT_EQ(max_nnei_0, max_nnei_1);
-      std::vector<double> t_em(nloc * ndescrpt), t_em_deriv(nloc * ndescrpt * 3);
+      std::vector<double> t_em(nloc * ndescrpt),
+          t_em_deriv(nloc * ndescrpt * 3);
       std::vector<double> rij_0(nloc * nnei * 3), rij_1(nloc * nnei * 3);
       std::vector<int> nlist_0(nloc * nnei), nlist_1(nloc * nnei);
-      std::vector<double > avg(ntypes * ndescrpt, 0);
-      std::vector<double > std(ntypes * ndescrpt, 1);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0], &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0], inlist_0, max_nnei_0, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
-      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0], &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0], inlist_1, max_nnei_1, &avg[0], &std[0], nloc, nall, rc, rc_smth, sec_a);
+      std::vector<double> avg(ntypes * ndescrpt, 0);
+      std::vector<double> std(ntypes * ndescrpt, 1);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_0[0],
+                                 &nlist_0[0], &posi_cpy_0[0], &atype_cpy_0[0],
+                                 inlist_0, max_nnei_0, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
+      deepmd::prod_env_mat_a_cpu(&t_em[0], &t_em_deriv[0], &rij_1[0],
+                                 &nlist_1[0], &posi_cpy_1[0], &atype_cpy_1[0],
+                                 inlist_1, max_nnei_1, &avg[0], &std[0], nloc,
+                                 nall, rc, rc_smth, sec_a);
       std::vector<double> energy_0(nloc), energy_1(nloc);
       std::vector<double> t_force(nall * 3), t_virial(nall * 9);
-      deepmd::pair_tab_cpu(
-	  &energy_0[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_0[0],
-	  &scale[0],
-	  &atype_cpy_0[0],
-	  &nlist_0[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r);  
-      deepmd::pair_tab_cpu(
-	  &energy_1[0],
-	  &t_force[0],
-	  &t_virial[0],
-	  &tab_info[0],
-	  &tab_data[0],
-	  &rij_1[0],
-	  &scale[0],
-	  &atype_cpy_1[0],
-	  &nlist_1[0],
-	  &natoms[0],
-	  sel_a,
-	  sel_r); 
+      deepmd::pair_tab_cpu(&energy_0[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_0[0], &scale[0],
+                           &atype_cpy_0[0], &nlist_0[0], &natoms[0], sel_a,
+                           sel_r);
+      deepmd::pair_tab_cpu(&energy_1[0], &t_force[0], &t_virial[0],
+                           &tab_info[0], &tab_data[0], &rij_1[0], &scale[0],
+                           &atype_cpy_1[0], &nlist_1[0], &natoms[0], sel_a,
+                           sel_r);
       double tot_e_0(0), tot_e_1(0);
-      for(int ii = 0; ii < nloc; ++ii){
-	tot_e_0 += energy_0[ii];
-	tot_e_1 += energy_1[ii];
+      for (int ii = 0; ii < nloc; ++ii) {
+        tot_e_0 += energy_0[ii];
+        tot_e_1 += energy_1[ii];
       }
-      num_deriv[dd0*3+dd1] = - (tot_e_1 - tot_e_0) / (2. * hh);
+      num_deriv[dd0 * 3 + dd1] = -(tot_e_1 - tot_e_0) / (2. * hh);
       // std::cout << num_deriv[dd0*3+dd1] << std::endl;
-    }  
+    }
   }
   std::vector<double> num_vir(9, 0);
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
-      num_vir[dd0*3+dd1] = 0;
-      for (int dd = 0; dd < 3; ++dd){
-	num_vir[dd0*3+dd1] += num_deriv[dd*3+dd0] * box[dd*3+dd1];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      num_vir[dd0 * 3 + dd1] = 0;
+      for (int dd = 0; dd < 3; ++dd) {
+        num_vir[dd0 * 3 + dd1] += num_deriv[dd * 3 + dd0] * box[dd * 3 + dd1];
       }
-      // std::cout << num_vir[dd0*3+dd1] << " " << virial[dd0*3+dd1] << std::endl;
-      EXPECT_LT(fabs(num_vir[dd0*3+dd1] - virial[dd0*3+dd1]), 1e-8);
+      // std::cout << num_vir[dd0*3+dd1] << " " << virial[dd0*3+dd1] <<
+      // std::endl;
+      EXPECT_LT(fabs(num_vir[dd0 * 3 + dd1] - virial[dd0 * 3 + dd1]), 1e-8);
     }
   }
 }
diff --git a/source/lib/tests/test_prod_force_a.cc b/source/lib/tests/test_prod_force_a.cc
index 71685de2aa..03ef65fa3f 100644
--- a/source/lib/tests/test_prod_force_a.cc
+++ b/source/lib/tests/test_prod_force_a.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_force.h"
-#include "device.h"
 
-class TestProdForceA : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdForceA : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,14 +30,38 @@ class TestProdForceA : public ::testing::Test
   std::vector<double> env, env_deriv, rij_a;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_force = {
-    9.44498, -13.86254, 10.52884, -19.42688,  8.09273, 19.64478,  4.81771, 11.39255, 12.38830, -16.65832,  6.65153, -10.15585,  1.16660, -14.43259, 22.97076, 22.86479,  7.42726, -11.41943, -7.67893, -7.23287, -11.33442, -4.51184, -3.80588, -2.44935,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.16217,  6.16192, -28.79094,  3.81076, -0.01986, -1.01629,  3.65869, -0.49195, -0.07437,  1.35028,  0.11969, -0.29201,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
-  };  
-  
+  std::vector<double> expected_force = {
+      9.44498,  -13.86254, 10.52884,  -19.42688, 8.09273,  19.64478,
+      4.81771,  11.39255,  12.38830,  -16.65832, 6.65153,  -10.15585,
+      1.16660,  -14.43259, 22.97076,  22.86479,  7.42726,  -11.41943,
+      -7.67893, -7.23287,  -11.33442, -4.51184,  -3.80588, -2.44935,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      1.16217,  6.16192,   -28.79094, 3.81076,   -0.01986, -1.01629,
+      3.65869,  -0.49195,  -0.07437,  1.35028,   0.11969,  -0.29201,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -48,50 +69,53 @@ class TestProdForceA : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij_a.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij_a;
+      std::vector<double> t_env, t_env_deriv, t_rij_a;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
     }
     net_deriv.resize(nloc * ndescrpt);
-    for (int ii = 0; ii < nloc * ndescrpt; ++ii){
+    for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdForceA, cpu)
-{
+TEST_F(TestProdForceA, cpu) {
   std::vector<double> force(nall * 3);
   int n_a_sel = nnei;
-  deepmd::prod_force_a_cpu<double> (&force[0], &net_deriv[0], &env_deriv[0], &nlist[0], nloc, nall, nnei);
+  deepmd::prod_force_a_cpu<double>(&force[0], &net_deriv[0], &env_deriv[0],
+                                   &nlist[0], nloc, nall, nnei);
   EXPECT_EQ(force.size(), nall * 3);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nall * 3; ++jj){
   //   printf("%8.5f, ", force[jj]);
   // }
@@ -99,21 +123,21 @@ TEST_F(TestProdForceA, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdForceA, gpu_cuda)
-{
+TEST_F(TestProdForceA, gpu_cuda) {
   std::vector<double> force(nall * 3, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * force_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *force_dev = NULL, *net_deriv_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(force_dev, force);
   deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
 
-  deepmd::prod_force_a_gpu_cuda<double> (force_dev, net_deriv_dev, env_deriv_dev, nlist_dev, nloc, nall, nnei);
-  
+  deepmd::prod_force_a_gpu_cuda<double>(force_dev, net_deriv_dev, env_deriv_dev,
+                                        nlist_dev, nloc, nall, nnei);
+
   deepmd::memcpy_device_to_host(force_dev, force);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(force_dev);
@@ -122,28 +146,28 @@ TEST_F(TestProdForceA, gpu_cuda)
 
   EXPECT_EQ(force.size(), nall * 3);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
   }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdForceA, gpu_rocm)
-{
+TEST_F(TestProdForceA, gpu_rocm) {
   std::vector<double> force(nall * 3, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * force_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *force_dev = NULL, *net_deriv_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(force_dev, force);
   deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
 
-  deepmd::prod_force_a_gpu_rocm<double> (force_dev, net_deriv_dev, env_deriv_dev, nlist_dev, nloc, nall, nnei);
-  
+  deepmd::prod_force_a_gpu_rocm<double>(force_dev, net_deriv_dev, env_deriv_dev,
+                                        nlist_dev, nloc, nall, nnei);
+
   deepmd::memcpy_device_to_host(force_dev, force);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(force_dev);
@@ -152,8 +176,8 @@ TEST_F(TestProdForceA, gpu_rocm)
 
   EXPECT_EQ(force.size(), nall * 3);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
   }
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_force_grad_a.cc b/source/lib/tests/test_prod_force_grad_a.cc
index 45f576a667..bd88a86c77 100644
--- a/source/lib/tests/test_prod_force_grad_a.cc
+++ b/source/lib/tests/test_prod_force_grad_a.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_force_grad.h"
-#include "device.h"
 
-class TestProdForceGradA : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdForceGradA : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,14 +30,49 @@ class TestProdForceGradA : public ::testing::Test
   std::vector<double> env, env_deriv, rij_a;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_grad_net = {
-    -0.12141, -0.11963,  0.01198,  0.04647,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04188,  0.37642,  0.28680,  0.26547, -0.40861,  0.25610, -0.02009,  1.00344, -0.16166, -0.16355,  0.03691,  0.01165, -0.08770, -0.08561, -0.00398,  0.02366,  0.00000,  0.00000,  0.00000,  0.00000, -0.04188, -0.37642, -0.28680, -0.26547, -0.03357, -0.03151,  0.00454,  0.01377,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04304,  0.05219,  0.08677,  0.16032, -0.05232, -0.05123,  0.01227,  0.00935, -0.01420, -0.01366, -0.00022,  0.00404,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.40861, -0.25610,  0.02009, -1.00344, -0.04863, -0.04701,  0.02501,  0.01556,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04304, -0.05219, -0.08677, -0.16032, -0.08249, -0.07502,  0.04767, -0.00448, -0.08260, -0.08165,  0.01821,  0.01869,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.12141,  0.11963, -0.01198, -0.04647,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.16227,  0.28667,  0.50683,  0.06651, -0.58330, -0.45376,  0.37464,  0.93891, -0.04863,  0.04701, -0.02501, -0.01556, -0.03357,  0.03151, -0.00454, -0.01377,  0.00000,  0.00000,  0.00000,  0.00000, -0.16227, -0.28667, -0.50683, -0.06651, -0.16166,  0.16355, -0.03691, -0.01165,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04418,  0.09284,  0.09569,  0.19565, -0.08249,  0.07502, -0.04767,  0.00448, -0.05232,  0.05123, -0.01227, -0.00935,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.58330,  0.45376, -0.37464, -0.93891, -0.08770,  0.08561,  0.00398, -0.02366,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04418, -0.09284, -0.09569, -0.19565, -0.08260,  0.08165, -0.01821, -0.01869, -0.01420,  0.01366,  0.00022, -0.00404,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
-  };  
-  
+  std::vector<double> expected_grad_net = {
+      -0.12141, -0.11963, 0.01198,  0.04647,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  -0.04188,
+      0.37642,  0.28680,  0.26547,  -0.40861, 0.25610,  -0.02009, 1.00344,
+      -0.16166, -0.16355, 0.03691,  0.01165,  -0.08770, -0.08561, -0.00398,
+      0.02366,  0.00000,  0.00000,  0.00000,  0.00000,  -0.04188, -0.37642,
+      -0.28680, -0.26547, -0.03357, -0.03151, 0.00454,  0.01377,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  -0.04304, 0.05219,  0.08677,
+      0.16032,  -0.05232, -0.05123, 0.01227,  0.00935,  -0.01420, -0.01366,
+      -0.00022, 0.00404,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  -0.40861, -0.25610, 0.02009,  -1.00344,
+      -0.04863, -0.04701, 0.02501,  0.01556,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  -0.04304, -0.05219, -0.08677, -0.16032, -0.08249,
+      -0.07502, 0.04767,  -0.00448, -0.08260, -0.08165, 0.01821,  0.01869,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  -0.12141, 0.11963,  -0.01198, -0.04647, 0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      -0.16227, 0.28667,  0.50683,  0.06651,  -0.58330, -0.45376, 0.37464,
+      0.93891,  -0.04863, 0.04701,  -0.02501, -0.01556, -0.03357, 0.03151,
+      -0.00454, -0.01377, 0.00000,  0.00000,  0.00000,  0.00000,  -0.16227,
+      -0.28667, -0.50683, -0.06651, -0.16166, 0.16355,  -0.03691, -0.01165,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  -0.04418, 0.09284,
+      0.09569,  0.19565,  -0.08249, 0.07502,  -0.04767, 0.00448,  -0.05232,
+      0.05123,  -0.01227, -0.00935, 0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  -0.58330, 0.45376,  -0.37464,
+      -0.93891, -0.08770, 0.08561,  0.00398,  -0.02366, 0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  -0.04418, -0.09284, -0.09569, -0.19565,
+      -0.08260, 0.08165,  -0.01821, -0.01869, -0.01420, 0.01366,  0.00022,
+      -0.00404, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -48,49 +80,52 @@ class TestProdForceGradA : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij_a.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij_a;
+      std::vector<double> t_env, t_env_deriv, t_rij_a;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
     }
     grad.resize(nloc * 3);
-    for (int ii = 0; ii < nloc * 3; ++ii){
+    for (int ii = 0; ii < nloc * 3; ++ii) {
       grad[ii] = 10 - ii * 0.1;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdForceGradA, cpu)
-{
+TEST_F(TestProdForceGradA, cpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
-  deepmd::prod_force_grad_a_cpu<double>(&grad_net[0], &grad[0], &env_deriv[0], &nlist[0], nloc, nnei);
+  deepmd::prod_force_grad_a_cpu<double>(&grad_net[0], &grad[0], &env_deriv[0],
+                                        &nlist[0], nloc, nnei);
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
@@ -98,17 +133,17 @@ TEST_F(TestProdForceGradA, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdForceGradA, gpu)
-{
+TEST_F(TestProdForceGradA, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_force_grad_a_gpu_cuda<double>(grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_force_grad_a_gpu_cuda<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -117,28 +152,28 @@ TEST_F(TestProdForceGradA, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdForceGradA, gpu)
-{
+TEST_F(TestProdForceGradA, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_force_grad_a_gpu_rocm<double>(grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_force_grad_a_gpu_rocm<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -147,12 +182,12 @@ TEST_F(TestProdForceGradA, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_force_grad_r.cc b/source/lib/tests/test_prod_force_grad_r.cc
index 5db405931d..b0c062902f 100644
--- a/source/lib/tests/test_prod_force_grad_r.cc
+++ b/source/lib/tests/test_prod_force_grad_r.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_force_grad.h"
-#include "device.h"
 
-class TestProdForceGradR : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdForceGradR : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,14 +30,23 @@ class TestProdForceGradR : public ::testing::Test
   std::vector<double> env, env_deriv, rij_a;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_grad_net = {
-    -0.12141, -1.33062,  0.12948,  0.50970,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04188,  0.75283,  9.26593,  8.54987, -0.54546, -0.01575,  0.00681,  0.02755,  0.00000,  0.00000, -0.40861,  0.12805, -0.45057, 15.54539, -1.52411,  0.04701,  0.05002,  0.04668,  0.00000,  0.00000, -0.12141, -1.21099,  0.11750,  0.46323,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.16227, -1.14667, 14.61804,  1.94488,  1.18285, -0.04089, -0.01845, -0.00874,  0.00000,  0.00000, -0.58330,  1.13439,  5.18696, 13.10426,  0.49773,  0.01712,  0.00239, -0.01893,  0.00000,  0.00000,
-  };  
-  
+  std::vector<double> expected_grad_net = {
+      -0.12141, -1.33062, 0.12948,  0.50970,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  -0.04188, 0.75283,  9.26593,  8.54987,
+      -0.54546, -0.01575, 0.00681,  0.02755,  0.00000,  0.00000,  -0.40861,
+      0.12805,  -0.45057, 15.54539, -1.52411, 0.04701,  0.05002,  0.04668,
+      0.00000,  0.00000,  -0.12141, -1.21099, 0.11750,  0.46323,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  -0.16227, -1.14667,
+      14.61804, 1.94488,  1.18285,  -0.04089, -0.01845, -0.00874, 0.00000,
+      0.00000,  -0.58330, 1.13439,  5.18696,  13.10426, 0.49773,  0.01712,
+      0.00239,  -0.01893, 0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -48,49 +54,52 @@ class TestProdForceGradR : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij_a.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij_a;
+      std::vector<double> t_env, t_env_deriv, t_rij_a;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
     }
     grad.resize(nloc * 3);
-    for (int ii = 0; ii < nloc * 3; ++ii){
+    for (int ii = 0; ii < nloc * 3; ++ii) {
       grad[ii] = 10 - ii * 0.1;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdForceGradR, cpu)
-{
+TEST_F(TestProdForceGradR, cpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
-  deepmd::prod_force_grad_r_cpu<double>(&grad_net[0], &grad[0], &env_deriv[0], &nlist[0], nloc, nnei);
+  deepmd::prod_force_grad_r_cpu<double>(&grad_net[0], &grad[0], &env_deriv[0],
+                                        &nlist[0], nloc, nnei);
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
@@ -98,17 +107,17 @@ TEST_F(TestProdForceGradR, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdForceGradR, gpu)
-{
+TEST_F(TestProdForceGradR, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_force_grad_r_gpu_cuda<double>(grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_force_grad_r_gpu_cuda<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -117,28 +126,28 @@ TEST_F(TestProdForceGradR, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdForceGradR, gpu)
-{
+TEST_F(TestProdForceGradR, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_force_grad_r_gpu_rocm<double>(grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_force_grad_r_gpu_rocm<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -147,12 +156,12 @@ TEST_F(TestProdForceGradR, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_force_r.cc b/source/lib/tests/test_prod_force_r.cc
index 6577d184a0..a1b5392355 100644
--- a/source/lib/tests/test_prod_force_r.cc
+++ b/source/lib/tests/test_prod_force_r.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_force.h"
-#include "device.h"
 
-class TestProdForceR : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdForceR : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,14 +30,35 @@ class TestProdForceR : public ::testing::Test
   std::vector<double> env, env_deriv, rij_a;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_force = {
-    8.47974, -14.40744, -6.87937, -20.21879,  5.47417, 14.86084, -0.70576,  9.63198, -8.41144, -17.41399,  6.52118, -13.13187,  2.88846, -16.66137, 25.88393, 26.73044,  9.31580, -12.21548, -7.12901, -7.68483,  4.98461, -4.37014, -1.12277,  2.89025,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  7.16157,  8.72191, -7.70101,  2.00363,  0.18561, -0.28694,  1.86052, -0.06905,  0.11829,  0.71335,  0.09481, -0.11182,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
-  };  
-  
+  std::vector<double> expected_force = {
+      8.47974,  -14.40744, -6.87937,  -20.21879, 5.47417,   14.86084, -0.70576,
+      9.63198,  -8.41144,  -17.41399, 6.52118,   -13.13187, 2.88846,  -16.66137,
+      25.88393, 26.73044,  9.31580,   -12.21548, -7.12901,  -7.68483, 4.98461,
+      -4.37014, -1.12277,  2.89025,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   7.16157,  8.72191,
+      -7.70101, 2.00363,   0.18561,   -0.28694,  1.86052,   -0.06905, 0.11829,
+      0.71335,  0.09481,   -0.11182,  0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+      0.00000,  0.00000,   0.00000,   0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -48,50 +66,53 @@ class TestProdForceR : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij_a.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij_a;
+      std::vector<double> t_env, t_env_deriv, t_rij_a;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_r_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_r_cpu<double>(t_env, t_env_deriv, t_rij_a, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
     }
     net_deriv.resize(nloc * ndescrpt);
-    for (int ii = 0; ii < nloc * ndescrpt; ++ii){
+    for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdForceR, cpu)
-{
+TEST_F(TestProdForceR, cpu) {
   std::vector<double> force(nall * 3);
   int n_a_sel = nnei;
-  deepmd::prod_force_r_cpu<double> (&force[0], &net_deriv[0], &env_deriv[0], &nlist[0], nloc, nall, nnei);
+  deepmd::prod_force_r_cpu<double>(&force[0], &net_deriv[0], &env_deriv[0],
+                                   &nlist[0], nloc, nall, nnei);
   EXPECT_EQ(force.size(), nall * 3);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nall * 3; ++jj){
   //   printf("%8.5f, ", force[jj]);
   // }
@@ -99,20 +120,20 @@ TEST_F(TestProdForceR, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdForceR, gpu_cuda)
-{
+TEST_F(TestProdForceR, gpu_cuda) {
   std::vector<double> force(nall * 3, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * force_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *force_dev = NULL, *net_deriv_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(force_dev, force);
   deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
 
-  deepmd::prod_force_r_gpu_cuda<double> (force_dev, net_deriv_dev, env_deriv_dev, nlist_dev, nloc, nall, nnei);
+  deepmd::prod_force_r_gpu_cuda<double>(force_dev, net_deriv_dev, env_deriv_dev,
+                                        nlist_dev, nloc, nall, nnei);
 
   deepmd::memcpy_device_to_host(force_dev, force);
   deepmd::delete_device_memory(nlist_dev);
@@ -122,27 +143,27 @@ TEST_F(TestProdForceR, gpu_cuda)
 
   EXPECT_EQ(force.size(), nall * 3);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
+  }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdForceR, gpu_rocm)
-{
+TEST_F(TestProdForceR, gpu_rocm) {
   std::vector<double> force(nall * 3, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * force_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL;
+  int* nlist_dev = NULL;
+  double *force_dev = NULL, *net_deriv_dev = NULL, *env_deriv_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(force_dev, force);
   deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
 
-  deepmd::prod_force_r_gpu_rocm<double> (force_dev, net_deriv_dev, env_deriv_dev, nlist_dev, nloc, nall, nnei);
+  deepmd::prod_force_r_gpu_rocm<double>(force_dev, net_deriv_dev, env_deriv_dev,
+                                        nlist_dev, nloc, nall, nnei);
 
   deepmd::memcpy_device_to_host(force_dev, force);
   deepmd::delete_device_memory(nlist_dev);
@@ -152,8 +173,8 @@ TEST_F(TestProdForceR, gpu_rocm)
 
   EXPECT_EQ(force.size(), nall * 3);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
+  }
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_virial_a.cc b/source/lib/tests/test_prod_virial_a.cc
index fcea02891d..2013ee4980 100644
--- a/source/lib/tests/test_prod_virial_a.cc
+++ b/source/lib/tests/test_prod_virial_a.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_virial.h"
-#include "device.h"
 
-class TestProdVirialA : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdVirialA : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,17 +30,80 @@ class TestProdVirialA : public ::testing::Test
   std::vector<double> env, env_deriv, rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_virial = {
-    100.14628,  7.21146, -24.62874,  6.19651, 23.31547, -19.77773, -26.79150, -20.92554, 38.84203,
-  };  
-  std::vector<double > expected_atom_virial = {
-    -3.24191,  1.35810,  2.45333, -9.14879,  3.83260,  6.92341, -10.54930,  4.41930,  7.98326, 14.83563, -6.21493, -11.22697,  4.51124, -1.88984, -3.41391,  2.04717, -0.85760, -1.54921,  0.84708, -0.10308,  0.07324,  3.51825, -0.49788,  0.40314,  2.91345, -0.37264,  0.27386, 12.62246, -5.19874,  7.42677,  4.80217, -2.69029,  5.41896,  9.55811, -2.42899,  5.14893,  9.90295,  4.54279, -7.75115, -2.89155, 13.50055, -20.91993,  4.00314, -1.76293,  2.92724, 20.15105,  2.86856, -3.55868, -4.22796, -1.12700,  1.46999, -21.43180, -9.30194, 12.54538,  2.86811,  5.92934, -3.94618,  4.83313,  5.21197, -3.36488,  6.67852,  8.34225, -5.44992,  5.97941,  1.92669, -4.70211,  4.91215,  1.63145, -3.96250,  3.27415,  1.02612, -2.52585,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.38833,  0.50613, -1.26233,  1.39901,  5.18116, -2.18118, -17.72748, -19.52039, 18.66001, 14.31034,  1.31715, -2.05955, -0.10872,  0.00743,  0.03656, -3.85572, -0.33481,  0.57900, 14.31190, -0.53814,  0.89498, -1.94166,  0.07960, -0.10726, -0.35985,  0.03981,  0.03397,  6.17091,  0.81760, -0.97011,  0.53923,  0.07572, -0.08012, -1.34189, -0.17373,  0.21536,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
-  };  
-  
+  std::vector<double> expected_virial = {
+      100.14628, 7.21146,   -24.62874, 6.19651,  23.31547,
+      -19.77773, -26.79150, -20.92554, 38.84203,
+  };
+  std::vector<double> expected_atom_virial = {
+      -3.24191,  1.35810,   2.45333,   -9.14879, 3.83260,   6.92341,  -10.54930,
+      4.41930,   7.98326,   14.83563,  -6.21493, -11.22697, 4.51124,  -1.88984,
+      -3.41391,  2.04717,   -0.85760,  -1.54921, 0.84708,   -0.10308, 0.07324,
+      3.51825,   -0.49788,  0.40314,   2.91345,  -0.37264,  0.27386,  12.62246,
+      -5.19874,  7.42677,   4.80217,   -2.69029, 5.41896,   9.55811,  -2.42899,
+      5.14893,   9.90295,   4.54279,   -7.75115, -2.89155,  13.50055, -20.91993,
+      4.00314,   -1.76293,  2.92724,   20.15105, 2.86856,   -3.55868, -4.22796,
+      -1.12700,  1.46999,   -21.43180, -9.30194, 12.54538,  2.86811,  5.92934,
+      -3.94618,  4.83313,   5.21197,   -3.36488, 6.67852,   8.34225,  -5.44992,
+      5.97941,   1.92669,   -4.70211,  4.91215,  1.63145,   -3.96250, 3.27415,
+      1.02612,   -2.52585,  0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   1.38833,   0.50613,   -1.26233, 1.39901,   5.18116,  -2.18118,
+      -17.72748, -19.52039, 18.66001,  14.31034, 1.31715,   -2.05955, -0.10872,
+      0.00743,   0.03656,   -3.85572,  -0.33481, 0.57900,   14.31190, -0.53814,
+      0.89498,   -1.94166,  0.07960,   -0.10726, -0.35985,  0.03981,  0.03397,
+      6.17091,   0.81760,   -0.97011,  0.53923,  0.07572,   -0.08012, -1.34189,
+      -0.17373,  0.21536,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000,   0.00000,   0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -51,59 +111,63 @@ class TestProdVirialA : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
     net_deriv.resize(nloc * ndescrpt);
-    for (int ii = 0; ii < nloc * ndescrpt; ++ii){
+    for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdVirialA, cpu)
-{
+TEST_F(TestProdVirialA, cpu) {
   std::vector<double> virial(9);
   std::vector<double> atom_virial(nall * 9);
   int n_a_sel = nnei;
-  deepmd::prod_virial_a_cpu<double> (&virial[0], &atom_virial[0], &net_deriv[0], &env_deriv[0], &rij[0], &nlist[0], nloc, nall, nnei);
+  deepmd::prod_virial_a_cpu<double>(&virial[0], &atom_virial[0], &net_deriv[0],
+                                    &env_deriv[0], &rij[0], &nlist[0], nloc,
+                                    nall, nnei);
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(virial.size(), expected_virial.size());
-  EXPECT_EQ(atom_virial.size(), nall * 9);  
-  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());  
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
-  }  
+  EXPECT_EQ(atom_virial.size(), nall * 9);
+  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < 9; ++jj){
   //   printf("%8.5f, ", virial[jj]);
   // }
@@ -114,24 +178,26 @@ TEST_F(TestProdVirialA, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdVirialA, gpu_cuda)
-{
+TEST_F(TestProdVirialA, gpu_cuda) {
   std::vector<double> virial(9, 0.0);
   std::vector<double> atom_virial(nall * 9, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * virial_dev = NULL, *atom_virial_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *virial_dev = NULL, *atom_virial_dev = NULL, *net_deriv_dev = NULL,
+         *env_deriv_dev = NULL, *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(virial_dev, virial);
   deepmd::malloc_device_memory_sync(atom_virial_dev, atom_virial);
-  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);  
-  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);  
-  deepmd::malloc_device_memory_sync(rij_dev, rij);  
+  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
+  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
+  deepmd::malloc_device_memory_sync(rij_dev, rij);
+
+  deepmd::prod_virial_a_gpu_cuda<double>(virial_dev, atom_virial_dev,
+                                         net_deriv_dev, env_deriv_dev, rij_dev,
+                                         nlist_dev, nloc, nall, nnei);
 
-  deepmd::prod_virial_a_gpu_cuda<double> (virial_dev, atom_virial_dev, net_deriv_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nall, nnei);
-  
   deepmd::memcpy_device_to_host(virial_dev, virial);
   deepmd::memcpy_device_to_host(atom_virial_dev, atom_virial);
   deepmd::delete_device_memory(nlist_dev);
@@ -149,36 +215,38 @@ TEST_F(TestProdVirialA, gpu_cuda)
   // }
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(virial.size(), expected_virial.size());
-  EXPECT_EQ(atom_virial.size(), nall * 9);  
-  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());  
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
-  }  
+  EXPECT_EQ(atom_virial.size(), nall * 9);
+  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
+  }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdVirialA, gpu_rocm)
-{
+TEST_F(TestProdVirialA, gpu_rocm) {
   std::vector<double> virial(9, 0.0);
   std::vector<double> atom_virial(nall * 9, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * virial_dev = NULL, *atom_virial_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *virial_dev = NULL, *atom_virial_dev = NULL, *net_deriv_dev = NULL,
+         *env_deriv_dev = NULL, *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(virial_dev, virial);
   deepmd::malloc_device_memory_sync(atom_virial_dev, atom_virial);
-  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);  
-  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);  
-  deepmd::malloc_device_memory_sync(rij_dev, rij);  
+  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
+  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
+  deepmd::malloc_device_memory_sync(rij_dev, rij);
+
+  deepmd::prod_virial_a_gpu_rocm<double>(virial_dev, atom_virial_dev,
+                                         net_deriv_dev, env_deriv_dev, rij_dev,
+                                         nlist_dev, nloc, nall, nnei);
 
-  deepmd::prod_virial_a_gpu_rocm<double> (virial_dev, atom_virial_dev, net_deriv_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nall, nnei);
-  
   deepmd::memcpy_device_to_host(virial_dev, virial);
   deepmd::memcpy_device_to_host(atom_virial_dev, atom_virial);
   deepmd::delete_device_memory(nlist_dev);
@@ -196,13 +264,13 @@ TEST_F(TestProdVirialA, gpu_rocm)
   // }
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(virial.size(), expected_virial.size());
-  EXPECT_EQ(atom_virial.size(), nall * 9);  
-  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());  
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
-  }  
+  EXPECT_EQ(atom_virial.size(), nall * 9);
+  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
+  }
 }
-#endif //TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_virial_grad_a.cc b/source/lib/tests/test_prod_virial_grad_a.cc
index 469c7d4027..979ed0d4b4 100644
--- a/source/lib/tests/test_prod_virial_grad_a.cc
+++ b/source/lib/tests/test_prod_virial_grad_a.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_virial_grad.h"
-#include "device.h"
 
-class TestProdVirialGradA : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdVirialGradA : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,14 +30,49 @@ class TestProdVirialGradA : public ::testing::Test
   std::vector<double> env, env_deriv, rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_grad_net = {
-    5.01828,  4.97546, -0.09569, -1.15305,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.61704,  1.06623,  0.15319,  0.24608,  5.28467, -2.59553,  3.00729, -8.19962,  5.03021,  5.02151, -0.86956,  0.26289,  2.75500,  2.70125,  0.22900, -0.54729,  0.00000,  0.00000,  0.00000,  0.00000, -0.61704, -1.06623, -0.15319, -0.24608,  2.32844,  2.23467, -0.16758, -0.70940,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.74748, -0.30379, -1.11004, -3.49833,  2.42774,  2.39284, -0.45567, -0.22216,  0.60993,  0.59054,  0.02135, -0.15332,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  5.28467,  2.59553, -3.00729,  8.19962,  4.77234,  4.62396, -1.90919, -0.44792,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.74748,  0.30379,  1.11004,  3.49833,  4.06655,  3.57849, -2.07817,  0.88468,  3.61241,  3.58881, -0.57839, -0.39969,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  5.01828, -4.97546,  0.09569,  1.15305,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.07573, -3.82089, -2.40143, -0.67375,  9.64382,  8.39638, -2.48922, -9.00792,  4.77234, -4.62396,  1.90919,  0.44792,  2.32844, -2.23467,  0.16758,  0.70940,  0.00000,  0.00000,  0.00000,  0.00000,  0.07573,  3.82089,  2.40143,  0.67375,  5.03021, -5.02151,  0.86956, -0.26289,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.44012, -1.15994, -0.66718, -3.33981,  4.06655, -3.57849,  2.07817, -0.88468,  2.42774, -2.39284,  0.45567,  0.22216,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  9.64382, -8.39638,  2.48922,  9.00792,  2.75500, -2.70125, -0.22900,  0.54729,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.44012,  1.15994,  0.66718,  3.33981,  3.61241, -3.58881,  0.57839,  0.39969,  0.60993, -0.59054, -0.02135,  0.15332,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
-  };  
-  
+  std::vector<double> expected_grad_net = {
+      5.01828,  4.97546,  -0.09569, -1.15305, 0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  -0.61704,
+      1.06623,  0.15319,  0.24608,  5.28467,  -2.59553, 3.00729,  -8.19962,
+      5.03021,  5.02151,  -0.86956, 0.26289,  2.75500,  2.70125,  0.22900,
+      -0.54729, 0.00000,  0.00000,  0.00000,  0.00000,  -0.61704, -1.06623,
+      -0.15319, -0.24608, 2.32844,  2.23467,  -0.16758, -0.70940, 0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  1.74748,  -0.30379, -1.11004,
+      -3.49833, 2.42774,  2.39284,  -0.45567, -0.22216, 0.60993,  0.59054,
+      0.02135,  -0.15332, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  5.28467,  2.59553,  -3.00729, 8.19962,
+      4.77234,  4.62396,  -1.90919, -0.44792, 0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  1.74748,  0.30379,  1.11004,  3.49833,  4.06655,
+      3.57849,  -2.07817, 0.88468,  3.61241,  3.58881,  -0.57839, -0.39969,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  5.01828,  -4.97546, 0.09569,  1.15305,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.07573,  -3.82089, -2.40143, -0.67375, 9.64382,  8.39638,  -2.48922,
+      -9.00792, 4.77234,  -4.62396, 1.90919,  0.44792,  2.32844,  -2.23467,
+      0.16758,  0.70940,  0.00000,  0.00000,  0.00000,  0.00000,  0.07573,
+      3.82089,  2.40143,  0.67375,  5.03021,  -5.02151, 0.86956,  -0.26289,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.44012,  -1.15994,
+      -0.66718, -3.33981, 4.06655,  -3.57849, 2.07817,  -0.88468, 2.42774,
+      -2.39284, 0.45567,  0.22216,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  9.64382,  -8.39638, 2.48922,
+      9.00792,  2.75500,  -2.70125, -0.22900, 0.54729,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  1.44012,  1.15994,  0.66718,  3.33981,
+      3.61241,  -3.58881, 0.57839,  0.39969,  0.60993,  -0.59054, -0.02135,
+      0.15332,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -48,53 +80,56 @@ class TestProdVirialGradA : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
     grad.resize(9);
-    for (int ii = 0; ii < 9; ++ii){
+    for (int ii = 0; ii < 9; ++ii) {
       grad[ii] = 10 - ii * 1.;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdVirialGradA, cpu)
-{
+TEST_F(TestProdVirialGradA, cpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
   int n_a_sel = nnei;
-  deepmd::prod_virial_grad_a_cpu<double> (&grad_net[0], &grad[0], &env_deriv[0], &rij[0], &nlist[0], nloc, nnei);
+  deepmd::prod_virial_grad_a_cpu<double>(&grad_net[0], &grad[0], &env_deriv[0],
+                                         &rij[0], &nlist[0], nloc, nnei);
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
@@ -102,19 +137,20 @@ TEST_F(TestProdVirialGradA, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdVirialGradA, gpu)
-{
+TEST_F(TestProdVirialGradA, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
   int n_a_sel = nnei;
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL,
+         *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_virial_grad_a_gpu_cuda<double>(grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_virial_grad_a_gpu_cuda<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -124,31 +160,31 @@ TEST_F(TestProdVirialGradA, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // GOOGLE_CUDA
-
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdVirialGradA, gpu)
-{
+TEST_F(TestProdVirialGradA, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
   int n_a_sel = nnei;
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL,
+         *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_virial_grad_a_gpu_rocm<double>(grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_virial_grad_a_gpu_rocm<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -158,12 +194,12 @@ TEST_F(TestProdVirialGradA, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_virial_grad_r.cc b/source/lib/tests/test_prod_virial_grad_r.cc
index c517cd60ef..c19875a314 100644
--- a/source/lib/tests/test_prod_virial_grad_r.cc
+++ b/source/lib/tests/test_prod_virial_grad_r.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_virial_grad.h"
-#include "device.h"
 
-class TestProdVirialGradR : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdVirialGradR : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,14 +30,23 @@ class TestProdVirialGradR : public ::testing::Test
   std::vector<double> env, env_deriv, rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_grad_net = {
-    5.01828,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.61704, -21.60321,  0.00000,  0.00000,  0.00000,  0.42750, -0.17937, -0.88567,  0.00000,  0.00000,  5.28467, -10.73121,  0.00000,  0.00000,  0.00000, -1.13085, -2.11178, -0.59649,  0.00000,  0.00000,  5.01828,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.07573, 43.00720,  0.00000,  0.00000,  0.00000,  0.73130,  0.71210, -0.24082,  0.00000,  0.00000,  9.64382, -42.10583,  0.00000,  0.00000,  0.00000, -0.33429, -0.19384,  0.55423,  0.00000,  0.00000,    
-  };  
-  
+  std::vector<double> expected_grad_net = {
+      5.01828,   0.00000, 0.00000,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000, 0.00000,   -0.61704, -21.60321, 0.00000,  0.00000,
+      0.00000,   0.42750, -0.17937,  -0.88567, 0.00000,   0.00000,  5.28467,
+      -10.73121, 0.00000, 0.00000,   0.00000,  -1.13085,  -2.11178, -0.59649,
+      0.00000,   0.00000, 5.01828,   0.00000,  0.00000,   0.00000,  0.00000,
+      0.00000,   0.00000, 0.00000,   0.00000,  0.00000,   0.07573,  43.00720,
+      0.00000,   0.00000, 0.00000,   0.73130,  0.71210,   -0.24082, 0.00000,
+      0.00000,   9.64382, -42.10583, 0.00000,  0.00000,   0.00000,  -0.33429,
+      -0.19384,  0.55423, 0.00000,   0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -48,53 +54,56 @@ class TestProdVirialGradR : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
     grad.resize(9);
-    for (int ii = 0; ii < 9; ++ii){
+    for (int ii = 0; ii < 9; ++ii) {
       grad[ii] = 10 - ii * 1.;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdVirialGradR, cpu)
-{
+TEST_F(TestProdVirialGradR, cpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
   int n_a_sel = nnei;
-  deepmd::prod_virial_grad_r_cpu<double> (&grad_net[0], &grad[0], &env_deriv[0], &rij[0], &nlist[0], nloc, nnei);
+  deepmd::prod_virial_grad_r_cpu<double>(&grad_net[0], &grad[0], &env_deriv[0],
+                                         &rij[0], &nlist[0], nloc, nnei);
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
@@ -102,19 +111,20 @@ TEST_F(TestProdVirialGradR, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdVirialGradR, gpu)
-{
+TEST_F(TestProdVirialGradR, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
   int n_a_sel = nnei;
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL,
+         *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_virial_grad_r_gpu_cuda<double>(grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_virial_grad_r_gpu_cuda<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -124,30 +134,31 @@ TEST_F(TestProdVirialGradR, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdVirialGradR, gpu)
-{
+TEST_F(TestProdVirialGradR, gpu) {
   std::vector<double> grad_net(nloc * ndescrpt);
   int n_a_sel = nnei;
-  int * nlist_dev = NULL;
-  double * grad_net_dev = NULL, * grad_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *grad_net_dev = NULL, *grad_dev = NULL, *env_deriv_dev = NULL,
+         *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(grad_dev, grad);
   deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
   deepmd::malloc_device_memory(grad_net_dev, nloc * ndescrpt);
-  deepmd::prod_virial_grad_r_gpu_rocm<double>(grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
+  deepmd::prod_virial_grad_r_gpu_rocm<double>(
+      grad_net_dev, grad_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nnei);
   deepmd::memcpy_device_to_host(grad_net_dev, grad_net);
   deepmd::delete_device_memory(nlist_dev);
   deepmd::delete_device_memory(grad_dev);
@@ -157,12 +168,12 @@ TEST_F(TestProdVirialGradR, gpu)
 
   EXPECT_EQ(grad_net.size(), nloc * ndescrpt);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc * ndescrpt; ++jj){
   //   printf("%8.5f, ", grad_net[jj]);
   // }
   // printf("\n");
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_prod_virial_r.cc b/source/lib/tests/test_prod_virial_r.cc
index 33cf9a9ed8..6696fe87bd 100644
--- a/source/lib/tests/test_prod_virial_r.cc
+++ b/source/lib/tests/test_prod_virial_r.cc
@@ -1,29 +1,26 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
+#include "device.h"
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "prod_virial.h"
-#include "device.h"
 
-class TestProdVirialR : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestProdVirialR : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -33,17 +30,80 @@ class TestProdVirialR : public ::testing::Test
   std::vector<double> env, env_deriv, rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_virial = {
-    105.83531,  8.37873, -26.31645,  8.37873, 25.29640, -22.08303, -26.31645, -22.08303, 41.52565,
-  };  
-  std::vector<double > expected_atom_virial = {
-    5.82162, -2.43879, -4.40555, -2.43879,  1.02165,  1.84557, -4.40555,  1.84557,  3.33393,  5.85102, -2.45110, -4.42780, -2.45110,  1.02681,  1.85489, -4.42780,  1.85489,  3.35077, 12.99134, -1.65136,  1.27337, -1.65136,  0.31236, -0.30952,  1.27337, -0.30952,  0.34172, 14.20717,  0.71207, -0.80046,  0.71207,  3.33417, -5.06665, -0.80046, -5.06665,  7.86673,  6.35288, -0.15554,  0.00838, -0.15554,  4.67701, -7.17573,  0.00838, -7.17573, 11.07561, 14.50559,  3.80226, -5.12103,  3.80226,  2.16638, -2.99774, -5.12103, -2.99774,  4.20621, 13.02204,  4.00163, -2.38372,  4.00163,  5.79404, -3.84611, -2.38372, -3.84611,  2.60729,  9.69976,  1.23534, -3.98748,  1.23534,  0.53911, -1.23540, -3.98748, -1.23540,  3.03034,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  5.21346,  4.51882, -5.24989,  4.51882,  6.27021, -5.00845, -5.24989, -5.00845,  5.37572,  7.57664,  0.67053, -1.12262,  0.67053,  0.07524, -0.08028, -1.12262, -0.08028,  0.18921,  7.32402, -0.29298,  0.42021, -0.29298,  0.01974,  0.00042,  0.42021,  0.00042,  0.06112,  3.26976,  0.42787, -0.51985,  0.42787,  0.05967, -0.06402, -0.51985, -0.06402,  0.08700,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
-  };  
-  
+  std::vector<double> expected_virial = {
+      105.83531, 8.37873,   -26.31645, 8.37873,  25.29640,
+      -22.08303, -26.31645, -22.08303, 41.52565,
+  };
+  std::vector<double> expected_atom_virial = {
+      5.82162,  -2.43879, -4.40555, -2.43879, 1.02165,  1.84557,  -4.40555,
+      1.84557,  3.33393,  5.85102,  -2.45110, -4.42780, -2.45110, 1.02681,
+      1.85489,  -4.42780, 1.85489,  3.35077,  12.99134, -1.65136, 1.27337,
+      -1.65136, 0.31236,  -0.30952, 1.27337,  -0.30952, 0.34172,  14.20717,
+      0.71207,  -0.80046, 0.71207,  3.33417,  -5.06665, -0.80046, -5.06665,
+      7.86673,  6.35288,  -0.15554, 0.00838,  -0.15554, 4.67701,  -7.17573,
+      0.00838,  -7.17573, 11.07561, 14.50559, 3.80226,  -5.12103, 3.80226,
+      2.16638,  -2.99774, -5.12103, -2.99774, 4.20621,  13.02204, 4.00163,
+      -2.38372, 4.00163,  5.79404,  -3.84611, -2.38372, -3.84611, 2.60729,
+      9.69976,  1.23534,  -3.98748, 1.23534,  0.53911,  -1.23540, -3.98748,
+      -1.23540, 3.03034,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  5.21346,  4.51882,  -5.24989, 4.51882,  6.27021,  -5.00845,
+      -5.24989, -5.00845, 5.37572,  7.57664,  0.67053,  -1.12262, 0.67053,
+      0.07524,  -0.08028, -1.12262, -0.08028, 0.18921,  7.32402,  -0.29298,
+      0.42021,  -0.29298, 0.01974,  0.00042,  0.42021,  0.00042,  0.06112,
+      3.26976,  0.42787,  -0.51985, 0.42787,  0.05967,  -0.06402, -0.51985,
+      -0.06402, 0.08700,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -51,59 +111,63 @@ class TestProdVirialR : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     env.resize(nloc * ndescrpt);
     env_deriv.resize(nloc * ndescrpt * 3);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_r_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < ndescrpt; ++jj){
-	env[ii*ndescrpt+jj] = t_env[jj];
-	for (int dd = 0; dd < 3; ++dd){
-	  env_deriv[ii*ndescrpt*3+jj*3+dd] = t_env_deriv[jj*3+dd];
-	}
+      deepmd::env_mat_r_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < ndescrpt; ++jj) {
+        env[ii * ndescrpt + jj] = t_env[jj];
+        for (int dd = 0; dd < 3; ++dd) {
+          env_deriv[ii * ndescrpt * 3 + jj * 3 + dd] = t_env_deriv[jj * 3 + dd];
+        }
       }
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
       }
     }
     net_deriv.resize(nloc * ndescrpt);
-    for (int ii = 0; ii < nloc * ndescrpt; ++ii){
+    for (int ii = 0; ii < nloc * ndescrpt; ++ii) {
       net_deriv[ii] = 10 - ii * 0.01;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestProdVirialR, cpu)
-{
+TEST_F(TestProdVirialR, cpu) {
   std::vector<double> virial(9);
   std::vector<double> atom_virial(nall * 9);
   int n_a_sel = nnei;
-  deepmd::prod_virial_r_cpu<double> (&virial[0], &atom_virial[0], &net_deriv[0], &env_deriv[0], &rij[0], &nlist[0], nloc, nall, nnei);
+  deepmd::prod_virial_r_cpu<double>(&virial[0], &atom_virial[0], &net_deriv[0],
+                                    &env_deriv[0], &rij[0], &nlist[0], nloc,
+                                    nall, nnei);
   EXPECT_EQ(virial.size(), 9);
-  EXPECT_EQ(atom_virial.size(), nall * 9);  
+  EXPECT_EQ(atom_virial.size(), nall * 9);
   EXPECT_EQ(virial.size(), expected_virial.size());
   EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < 9; ++jj){
   //   printf("%8.5f, ", virial[jj]);
   // }
@@ -114,24 +178,26 @@ TEST_F(TestProdVirialR, cpu)
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestProdVirialR, gpu_cuda)
-{
+TEST_F(TestProdVirialR, gpu_cuda) {
   std::vector<double> virial(9, 0.0);
   std::vector<double> atom_virial(nall * 9, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * virial_dev = NULL, *atom_virial_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *virial_dev = NULL, *atom_virial_dev = NULL, *net_deriv_dev = NULL,
+         *env_deriv_dev = NULL, *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(virial_dev, virial);
   deepmd::malloc_device_memory_sync(atom_virial_dev, atom_virial);
-  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);  
-  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);  
-  deepmd::malloc_device_memory_sync(rij_dev, rij);  
+  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
+  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
+  deepmd::malloc_device_memory_sync(rij_dev, rij);
+
+  deepmd::prod_virial_r_gpu_cuda<double>(virial_dev, atom_virial_dev,
+                                         net_deriv_dev, env_deriv_dev, rij_dev,
+                                         nlist_dev, nloc, nall, nnei);
 
-  deepmd::prod_virial_r_gpu_cuda<double> (virial_dev, atom_virial_dev, net_deriv_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nall, nnei);
-  
   deepmd::memcpy_device_to_host(virial_dev, virial);
   deepmd::memcpy_device_to_host(atom_virial_dev, atom_virial);
   deepmd::delete_device_memory(nlist_dev);
@@ -149,36 +215,38 @@ TEST_F(TestProdVirialR, gpu_cuda)
   // }
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(virial.size(), expected_virial.size());
-  EXPECT_EQ(atom_virial.size(), nall * 9);  
-  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());  
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
+  EXPECT_EQ(atom_virial.size(), nall * 9);
+  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
   }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestProdVirialR, gpu_rocm)
-{
+TEST_F(TestProdVirialR, gpu_rocm) {
   std::vector<double> virial(9, 0.0);
   std::vector<double> atom_virial(nall * 9, 0.0);
   int n_a_sel = nnei;
 
-  int * nlist_dev = NULL;
-  double * virial_dev = NULL, *atom_virial_dev = NULL, * net_deriv_dev = NULL, * env_deriv_dev = NULL, * rij_dev = NULL;
+  int* nlist_dev = NULL;
+  double *virial_dev = NULL, *atom_virial_dev = NULL, *net_deriv_dev = NULL,
+         *env_deriv_dev = NULL, *rij_dev = NULL;
 
   deepmd::malloc_device_memory_sync(nlist_dev, nlist);
   deepmd::malloc_device_memory_sync(virial_dev, virial);
   deepmd::malloc_device_memory_sync(atom_virial_dev, atom_virial);
-  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);  
-  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);  
-  deepmd::malloc_device_memory_sync(rij_dev, rij);  
+  deepmd::malloc_device_memory_sync(net_deriv_dev, net_deriv);
+  deepmd::malloc_device_memory_sync(env_deriv_dev, env_deriv);
+  deepmd::malloc_device_memory_sync(rij_dev, rij);
+
+  deepmd::prod_virial_r_gpu_rocm<double>(virial_dev, atom_virial_dev,
+                                         net_deriv_dev, env_deriv_dev, rij_dev,
+                                         nlist_dev, nloc, nall, nnei);
 
-  deepmd::prod_virial_r_gpu_rocm<double> (virial_dev, atom_virial_dev, net_deriv_dev, env_deriv_dev, rij_dev, nlist_dev, nloc, nall, nnei);
-  
   deepmd::memcpy_device_to_host(virial_dev, virial);
   deepmd::memcpy_device_to_host(atom_virial_dev, atom_virial);
   deepmd::delete_device_memory(nlist_dev);
@@ -196,13 +264,13 @@ TEST_F(TestProdVirialR, gpu_rocm)
   // }
   EXPECT_EQ(virial.size(), 9);
   EXPECT_EQ(virial.size(), expected_virial.size());
-  EXPECT_EQ(atom_virial.size(), nall * 9);  
-  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());  
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
+  EXPECT_EQ(atom_virial.size(), nall * 9);
+  EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
   }
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_simulation_region.cc b/source/lib/tests/test_simulation_region.cc
index e90ec55783..3305d93733 100644
--- a/source/lib/tests/test_simulation_region.cc
+++ b/source/lib/tests/test_simulation_region.cc
@@ -1,50 +1,50 @@
 #include <gtest/gtest.h>
+
 #include <cmath>
 #include <vector>
-#include "region.h"
+
 #include "SimulationRegion.h"
 #include "device.h"
+#include "region.h"
 
-class TestRegion : public ::testing::Test
-{
-protected:
-  std::vector<double > ref_boxt = {
-    3.27785716,  0.09190842,  0.14751448,  0.02331264,  4.36482777, -0.2999871 , -0.47510999, -0.38123489,  5.33561809
-  };
+class TestRegion : public ::testing::Test {
+ protected:
+  std::vector<double> ref_boxt = {3.27785716,  0.09190842,  0.14751448,
+                                  0.02331264,  4.36482777,  -0.2999871,
+                                  -0.47510999, -0.38123489, 5.33561809};
   double expected_vol = 76.26958621360133;
   // rec_boxt = boxt^{-T}
-  std::vector<double > ref_rec_boxt = {
-    3.0385229041853185e-01,  2.3783430948044884e-04, 2.7073513689027690e-02, -7.1670232142159460e-03, 2.3022911797728179e-01,  1.5811897837543720e-02, -8.8035961973365381e-03,  1.2937710358702505e-02, 1.8756020637229892e-01
-  };
-  std::vector<double > ref_rp = {
-    1.5, 2.5, 3.5
-  };
-  std::vector<double > ref_ri = {
-    0.5511303193130958, 0.6201639025532836, 0.6755996039037975, 
+  std::vector<double> ref_rec_boxt = {
+      3.0385229041853185e-01,  2.3783430948044884e-04, 2.7073513689027690e-02,
+      -7.1670232142159460e-03, 2.3022911797728179e-01, 1.5811897837543720e-02,
+      -8.8035961973365381e-03, 1.2937710358702505e-02, 1.8756020637229892e-01};
+  std::vector<double> ref_rp = {1.5, 2.5, 3.5};
+  std::vector<double> ref_ri = {
+      0.5511303193130958,
+      0.6201639025532836,
+      0.6755996039037975,
   };
 };
 
-TEST_F(TestRegion, orig)
-{
+TEST_F(TestRegion, orig) {
   SimulationRegion<double> region;
   region.reinitBox(&ref_boxt[0]);
-  const double * rec_boxt = region.getRecBoxTensor();
-  for(int ii = 0; ii < 9; ++ii){
+  const double* rec_boxt = region.getRecBoxTensor();
+  for (int ii = 0; ii < 9; ++ii) {
     EXPECT_LT(fabs(rec_boxt[ii] - ref_rec_boxt[ii]), 1e-10);
   }
   double ri[3];
   region.phys2Inter(ri, &ref_rp[0]);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri[ii] - ref_ri[ii]), 1e-10);
   }
 }
 
-TEST_F(TestRegion, cpu)
-{
+TEST_F(TestRegion, cpu) {
   // check rec_box
   deepmd::Region<double> region;
   init_region_cpu(region, &ref_boxt[0]);
-  for(int ii = 0; ii < 9; ++ii){
+  for (int ii = 0; ii < 9; ++ii) {
     EXPECT_LT(fabs(region.rec_boxt[ii] - ref_rec_boxt[ii]), 1e-10);
   }
   // check volume
@@ -53,37 +53,36 @@ TEST_F(TestRegion, cpu)
   // check conversion between phys and inter coords.
   double ri[3];
   convert_to_inter_cpu(ri, region, &ref_rp[0]);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri[ii] - ref_ri[ii]), 1e-10);
   }
   double rp2[3];
   convert_to_phys_cpu(rp2, region, ri);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(rp2[ii] - ref_rp[ii]), 1e-10);
   }
   double rp[3];
   convert_to_phys_cpu(rp, region, &ref_ri[0]);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(rp[ii] - ref_rp[ii]), 1e-10);
   }
   double ri2[3];
   convert_to_inter_cpu(ri2, region, rp);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri2[ii] - ref_ri[ii]), 1e-10);
   }
 }
 #if GOOGLE_CUDA
-TEST_F(TestRegion, gpu)
-{
+TEST_F(TestRegion, gpu) {
   // check rec_box
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
-  double * boxt_dev = NULL, * rec_boxt_dev = NULL;
-  double * ref_rp_dev = NULL, * ref_ri_dev = NULL;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
+  double *boxt_dev = NULL, *rec_boxt_dev = NULL;
+  double *ref_rp_dev = NULL, *ref_ri_dev = NULL;
   init_region_cpu(region, &ref_boxt[0]);
-  for(int ii = 0; ii < 9; ++ii){
+  for (int ii = 0; ii < 9; ++ii) {
     EXPECT_LT(fabs(region.rec_boxt[ii] - ref_rec_boxt[ii]), 1e-10);
   }
   deepmd::malloc_device_memory_sync(boxt_dev, region.boxt, 9);
@@ -94,42 +93,42 @@ TEST_F(TestRegion, gpu)
   region_dev.rec_boxt = rec_boxt_dev;
   // check volume
   double vol[1];
-  double * vol_dev = NULL;
+  double* vol_dev = NULL;
   deepmd::malloc_device_memory(vol_dev, 1);
   deepmd::volume_gpu(vol_dev, region_dev);
   deepmd::memcpy_device_to_host(vol_dev, vol, 1);
   EXPECT_LT(fabs(vol[0] - expected_vol), 1e-10);
   // check conversion between phys and inter coords.
   double ri[3];
-  double * ri_dev = NULL;
+  double* ri_dev = NULL;
   deepmd::malloc_device_memory(ri_dev, 3);
   deepmd::convert_to_inter_gpu(ri_dev, region_dev, ref_rp_dev);
   deepmd::memcpy_device_to_host(ri_dev, ri, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri[ii] - ref_ri[ii]), 1e-10);
   }
   double rp2[3];
-  double * rp2_dev = NULL;
+  double* rp2_dev = NULL;
   deepmd::malloc_device_memory(rp2_dev, 3);
   deepmd::convert_to_phys_gpu(rp2_dev, region_dev, ri_dev);
   deepmd::memcpy_device_to_host(rp2_dev, rp2, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(rp2[ii] - ref_rp[ii]), 1e-10);
   }
   double rp[3];
-  double * rp_dev = NULL;
+  double* rp_dev = NULL;
   deepmd::malloc_device_memory(rp_dev, 3);
   deepmd::convert_to_phys_gpu(rp_dev, region_dev, ref_ri_dev);
   deepmd::memcpy_device_to_host(rp_dev, rp, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(rp[ii] - ref_rp[ii]), 1e-10);
   }
   double ri2[3];
-  double * ri2_dev = NULL;
+  double* ri2_dev = NULL;
   deepmd::malloc_device_memory(ri2_dev, 3);
   deepmd::convert_to_inter_gpu(ri2_dev, region_dev, rp_dev);
   deepmd::memcpy_device_to_host(ri2_dev, ri2, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri2[ii] - ref_ri[ii]), 1e-10);
   }
   deepmd::delete_device_memory(boxt_dev);
@@ -144,32 +143,30 @@ TEST_F(TestRegion, gpu)
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
 }
-#endif // GOOGLE_CUDA
-    
+#endif  // GOOGLE_CUDA
 
 // double square_root (const double xx)
 // {
 //   return sqrt(xx);
 // }
 
-// TEST (SquareRootTest, PositiveNos) { 
+// TEST (SquareRootTest, PositiveNos) {
 //     EXPECT_EQ (18.0, square_root (324.0));
 //     EXPECT_EQ (25.4, square_root (645.16));
 //     EXPECT_EQ (50.332, square_root (2533.310224));
 // }
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestRegion, gpu)
-{
+TEST_F(TestRegion, gpu) {
   // check rec_box
   deepmd::Region<double> region;
   deepmd::Region<double> region_dev;
-  double * new_boxt = region_dev.boxt;
-  double * new_rec_boxt = region_dev.rec_boxt;
-  double * boxt_dev = NULL, * rec_boxt_dev = NULL;
-  double * ref_rp_dev = NULL, * ref_ri_dev = NULL;
+  double* new_boxt = region_dev.boxt;
+  double* new_rec_boxt = region_dev.rec_boxt;
+  double *boxt_dev = NULL, *rec_boxt_dev = NULL;
+  double *ref_rp_dev = NULL, *ref_ri_dev = NULL;
   init_region_cpu(region, &ref_boxt[0]);
-  for(int ii = 0; ii < 9; ++ii){
+  for (int ii = 0; ii < 9; ++ii) {
     EXPECT_LT(fabs(region.rec_boxt[ii] - ref_rec_boxt[ii]), 1e-10);
   }
   deepmd::malloc_device_memory_sync(boxt_dev, region.boxt, 9);
@@ -180,42 +177,42 @@ TEST_F(TestRegion, gpu)
   region_dev.rec_boxt = rec_boxt_dev;
   // check volume
   double vol[1];
-  double * vol_dev = NULL;
+  double* vol_dev = NULL;
   deepmd::malloc_device_memory(vol_dev, 1);
   deepmd::volume_gpu_rocm(vol_dev, region_dev);
   deepmd::memcpy_device_to_host(vol_dev, vol, 1);
   EXPECT_LT(fabs(vol[0] - expected_vol), 1e-10);
   // check conversion between phys and inter coords.
   double ri[3];
-  double * ri_dev = NULL;
+  double* ri_dev = NULL;
   deepmd::malloc_device_memory(ri_dev, 3);
   deepmd::convert_to_inter_gpu_rocm(ri_dev, region_dev, ref_rp_dev);
   deepmd::memcpy_device_to_host(ri_dev, ri, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri[ii] - ref_ri[ii]), 1e-10);
   }
   double rp2[3];
-  double * rp2_dev = NULL;
+  double* rp2_dev = NULL;
   deepmd::malloc_device_memory(rp2_dev, 3);
   deepmd::convert_to_phys_gpu_rocm(rp2_dev, region_dev, ri_dev);
   deepmd::memcpy_device_to_host(rp2_dev, rp2, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(rp2[ii] - ref_rp[ii]), 1e-10);
   }
   double rp[3];
-  double * rp_dev = NULL;
+  double* rp_dev = NULL;
   deepmd::malloc_device_memory(rp_dev, 3);
   deepmd::convert_to_phys_gpu_rocm(rp_dev, region_dev, ref_ri_dev);
   deepmd::memcpy_device_to_host(rp_dev, rp, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(rp[ii] - ref_rp[ii]), 1e-10);
   }
   double ri2[3];
-  double * ri2_dev = NULL;
+  double* ri2_dev = NULL;
   deepmd::malloc_device_memory(ri2_dev, 3);
   deepmd::convert_to_inter_gpu_rocm(ri2_dev, region_dev, rp_dev);
   deepmd::memcpy_device_to_host(ri2_dev, ri2, 3);
-  for(int ii = 0; ii < 3; ++ii){
+  for (int ii = 0; ii < 3; ++ii) {
     EXPECT_LT(fabs(ri2[ii] - ref_ri[ii]), 1e-10);
   }
   deepmd::delete_device_memory(boxt_dev);
@@ -230,4 +227,4 @@ TEST_F(TestRegion, gpu)
   region_dev.boxt = new_boxt;
   region_dev.rec_boxt = new_rec_boxt;
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_soft_min_switch.cc b/source/lib/tests/test_soft_min_switch.cc
index e8e1a0eddc..c87c086bb6 100644
--- a/source/lib/tests/test_soft_min_switch.cc
+++ b/source/lib/tests/test_soft_min_switch.cc
@@ -1,31 +1,28 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "soft_min_switch.h"
 
-class TestSoftMinSwitch : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestSoftMinSwitch : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
   double alpha = 0.5;
   double rmin = 0.8;
   double rmax = 1.5;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -35,14 +32,15 @@ class TestSoftMinSwitch : public ::testing::Test
   std::vector<double> rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_value = {
-     0.84693,  0.57040,  0.41834,  0.89258,  0.63482,  0.60391, 
-  };  
-  
+  std::vector<double> expected_value = {
+      0.84693, 0.57040, 0.41834, 0.89258, 0.63482, 0.60391,
+  };
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -50,94 +48,106 @@ class TestSoftMinSwitch : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
-      }      
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
+      }
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestSoftMinSwitch, cpu)
-{
+TEST_F(TestSoftMinSwitch, cpu) {
   std::vector<double> sw_value(nloc);
   std::vector<double> sw_deriv(nloc * nnei * 3);
-  deepmd::soft_min_switch_cpu<double> (&sw_value[0], &sw_deriv[0], &rij[0], &nlist[0], nloc, nnei, alpha, rmin, rmax);
+  deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
+                                      &nlist[0], nloc, nnei, alpha, rmin, rmax);
   EXPECT_EQ(sw_value.size(), nloc);
   EXPECT_EQ(sw_value.size(), expected_value.size());
   EXPECT_EQ(sw_deriv.size(), nloc * nnei * 3);
-  for (int jj = 0; jj < nloc; ++jj){
-    EXPECT_LT(fabs(sw_value[jj] - expected_value[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < nloc; ++jj) {
+    EXPECT_LT(fabs(sw_value[jj] - expected_value[jj]), 1e-5);
+  }
   // for (int jj = 0; jj < nloc; ++jj){
   //   printf("%8.5f, ", sw_value[jj]);
   // }
   // printf("\n");
 }
 
-TEST_F(TestSoftMinSwitch, cpu_num_deriv)
-{
+TEST_F(TestSoftMinSwitch, cpu_num_deriv) {
   std::vector<double> sw_value(nloc);
   std::vector<double> sw_deriv(nloc * nnei * 3);
   std::vector<double> sw_value_0(nloc);
   std::vector<double> sw_deriv_0(nloc * nnei * 3);
   std::vector<double> sw_value_1(nloc);
   std::vector<double> sw_deriv_1(nloc * nnei * 3);
-  std::vector<double > env, env_deriv;
+  std::vector<double> env, env_deriv;
   std::vector<double> t_rij_0, t_rij_1;
   std::vector<double> rij_0, rij_1;
   std::vector<int> fmt_nlist_a;
   double hh = 1e-5;
-  
-  deepmd::soft_min_switch_cpu<double> (&sw_value[0], &sw_deriv[0], &rij[0], &nlist[0], nloc, nnei, alpha, rmin, rmax);
+
+  deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
+                                      &nlist[0], nloc, nnei, alpha, rmin, rmax);
   EXPECT_EQ(sw_value.size(), nloc);
   EXPECT_EQ(sw_deriv.size(), nloc * nnei * 3);
 
-  for (int ii = 0; ii < nloc; ++ii){
-    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);    
+  for (int ii = 0; ii < nloc; ++ii) {
+    int ret = format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                         nlist_a_cpy[ii], rc, sec_a);
     EXPECT_EQ(ret, -1);
-    
+
     int i_idx = ii;
-    for (int jj = 0; jj < nnei; ++jj){
-      int j_idx = nlist[ii*nnei + jj];
+    for (int jj = 0; jj < nnei; ++jj) {
+      int j_idx = nlist[ii * nnei + jj];
       if (j_idx < 0) continue;
-      for (int dd = 0; dd < 3; ++dd){
-	std::vector<double> posi_0 = posi_cpy;
-	std::vector<double> posi_1 = posi_cpy;
-	posi_0[j_idx*3+dd] -= hh;
-	posi_1[j_idx*3+dd] += hh;
-	deepmd::env_mat_a_cpu<double>(env, env_deriv, t_rij_0, posi_0, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	deepmd::env_mat_a_cpu<double>(env, env_deriv, t_rij_1, posi_1, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);
-	EXPECT_EQ(t_rij_0.size(), nnei * 3);
-	EXPECT_EQ(t_rij_1.size(), nnei * 3);
-	rij_0 = rij;
-	rij_1 = rij;
-	for (int dd1 = 0; dd1 < 3; ++dd1){
-	  rij_0[ii*nnei*3 + jj*3 + dd] = t_rij_0[jj*3 + dd];
-	  rij_1[ii*nnei*3 + jj*3 + dd] = t_rij_1[jj*3 + dd];
-	}      
-	deepmd::soft_min_switch_cpu<double> (&sw_value_0[0], &sw_deriv_0[0], &rij_0[0], &nlist[0], nloc, nnei, alpha, rmin, rmax);
-	deepmd::soft_min_switch_cpu<double> (&sw_value_1[0], &sw_deriv_1[0], &rij_1[0], &nlist[0], nloc, nnei, alpha, rmin, rmax);
-	double ana_deriv = sw_deriv[ii*nnei*3 + jj*3 + dd];
-	double num_deriv = (sw_value_1[ii] - sw_value_0[ii]) / (2. * hh);
-	EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
+      for (int dd = 0; dd < 3; ++dd) {
+        std::vector<double> posi_0 = posi_cpy;
+        std::vector<double> posi_1 = posi_cpy;
+        posi_0[j_idx * 3 + dd] -= hh;
+        posi_1[j_idx * 3 + dd] += hh;
+        deepmd::env_mat_a_cpu<double>(env, env_deriv, t_rij_0, posi_0,
+                                      atype_cpy, ii, fmt_nlist_a, sec_a,
+                                      rc_smth, rc);
+        deepmd::env_mat_a_cpu<double>(env, env_deriv, t_rij_1, posi_1,
+                                      atype_cpy, ii, fmt_nlist_a, sec_a,
+                                      rc_smth, rc);
+        EXPECT_EQ(t_rij_0.size(), nnei * 3);
+        EXPECT_EQ(t_rij_1.size(), nnei * 3);
+        rij_0 = rij;
+        rij_1 = rij;
+        for (int dd1 = 0; dd1 < 3; ++dd1) {
+          rij_0[ii * nnei * 3 + jj * 3 + dd] = t_rij_0[jj * 3 + dd];
+          rij_1[ii * nnei * 3 + jj * 3 + dd] = t_rij_1[jj * 3 + dd];
+        }
+        deepmd::soft_min_switch_cpu<double>(&sw_value_0[0], &sw_deriv_0[0],
+                                            &rij_0[0], &nlist[0], nloc, nnei,
+                                            alpha, rmin, rmax);
+        deepmd::soft_min_switch_cpu<double>(&sw_value_1[0], &sw_deriv_1[0],
+                                            &rij_1[0], &nlist[0], nloc, nnei,
+                                            alpha, rmin, rmax);
+        double ana_deriv = sw_deriv[ii * nnei * 3 + jj * 3 + dd];
+        double num_deriv = (sw_value_1[ii] - sw_value_0[ii]) / (2. * hh);
+        EXPECT_LT(fabs(num_deriv - ana_deriv), 1e-5);
       }
     }
   }
diff --git a/source/lib/tests/test_soft_min_switch_force.cc b/source/lib/tests/test_soft_min_switch_force.cc
index da40ab662b..fa75a9a693 100644
--- a/source/lib/tests/test_soft_min_switch_force.cc
+++ b/source/lib/tests/test_soft_min_switch_force.cc
@@ -1,32 +1,29 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "soft_min_switch.h"
 #include "soft_min_switch_force.h"
 
-class TestSoftMinSwitchForce : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestSoftMinSwitchForce : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
   double alpha = .5;
   double rmin = 0.8;
   double rmax = 1.5;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -36,14 +33,35 @@ class TestSoftMinSwitchForce : public ::testing::Test
   std::vector<double> rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_force = {
-    2.24044, -1.75363, -1.50088, -2.54065,  1.08035,  1.93630,  1.12909,  1.64972, -1.10112, -2.07854,  0.69062, -1.29217,  0.14032, -1.16008,  1.86286,  1.71311,  0.49339, -0.59049, -0.88441, -1.66176,  1.09480, -0.01957, -0.01188,  0.02612,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.36743,  0.67600, -0.43959, -0.03250, -0.00298,  0.00469, -0.02791,  0.00109, -0.00167, -0.00681, -0.00083,  0.00115,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
+  std::vector<double> expected_force = {
+      2.24044,  -1.75363, -1.50088, -2.54065, 1.08035,  1.93630,  1.12909,
+      1.64972,  -1.10112, -2.07854, 0.69062,  -1.29217, 0.14032,  -1.16008,
+      1.86286,  1.71311,  0.49339,  -0.59049, -0.88441, -1.66176, 1.09480,
+      -0.01957, -0.01188, 0.02612,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.36743,  0.67600,
+      -0.43959, -0.03250, -0.00298, 0.00469,  -0.02791, 0.00109,  -0.00167,
+      -0.00681, -0.00083, 0.00115,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,
   };
-  
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -51,57 +69,53 @@ class TestSoftMinSwitchForce : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
-      }      
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
+      }
     }
     sw_value.resize(nloc);
     sw_deriv.resize(nloc * nnei * 3);
-    deepmd::soft_min_switch_cpu<double> (&sw_value[0], &sw_deriv[0], &rij[0], &nlist[0], nloc, 
-				 nnei, alpha, rmin, rmax);
+    deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
+                                        &nlist[0], nloc, nnei, alpha, rmin,
+                                        rmax);
     du.resize(nloc);
-    for (int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       du[ii] = 1.0 - ii * 0.1;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestSoftMinSwitchForce, cpu)
-{
+TEST_F(TestSoftMinSwitchForce, cpu) {
   std::vector<double> force(nall * 3);
-  deepmd::soft_min_switch_force_cpu(
-      &force[0],
-      &du[0],
-      &sw_deriv[0],
-      &nlist[0],
-      nloc,
-      nall,
-      nnei);
+  deepmd::soft_min_switch_force_cpu(&force[0], &du[0], &sw_deriv[0], &nlist[0],
+                                    nloc, nall, nnei);
   EXPECT_EQ(force.size(), expected_force.size());
-  for (int jj = 0; jj < force.size(); ++jj){
-    EXPECT_LT(fabs(force[jj] - expected_force[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < force.size(); ++jj) {
+    EXPECT_LT(fabs(force[jj] - expected_force[jj]), 1e-5);
+  }
   // for (int ii = 0; ii < nall * 3; ++ii){
   //   printf("%8.5f, ", force[ii]);
-  // }  
+  // }
   // printf("\n");
 }
-
diff --git a/source/lib/tests/test_soft_min_switch_force_grad.cc b/source/lib/tests/test_soft_min_switch_force_grad.cc
index 0591b91e3f..d877b64a15 100644
--- a/source/lib/tests/test_soft_min_switch_force_grad.cc
+++ b/source/lib/tests/test_soft_min_switch_force_grad.cc
@@ -1,32 +1,29 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "soft_min_switch.h"
 #include "soft_min_switch_force_grad.h"
 
-class TestSoftMinSwitchForceGrad : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestSoftMinSwitchForceGrad : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
   double alpha = .5;
   double rmin = 0.8;
   double rmax = 1.5;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -36,14 +33,15 @@ class TestSoftMinSwitchForceGrad : public ::testing::Test
   std::vector<double> rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_grad_net = {
-    -0.62289, -0.08638, -1.94404,  0.01995,  0.04023,  0.01040,
+  std::vector<double> expected_grad_net = {
+      -0.62289, -0.08638, -1.94404, 0.01995, 0.04023, 0.01040,
   };
-  
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -51,56 +49,53 @@ class TestSoftMinSwitchForceGrad : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
-      }      
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
+      }
     }
     sw_value.resize(nloc);
     sw_deriv.resize(nloc * nnei * 3);
-    deepmd::soft_min_switch_cpu<double> (&sw_value[0], &sw_deriv[0], &rij[0], &nlist[0], nloc, 
-				 nnei, alpha, rmin, rmax);
+    deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
+                                        &nlist[0], nloc, nnei, alpha, rmin,
+                                        rmax);
     grad.resize(nloc * 3);
-    for (int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       grad[ii] = 1.0 - ii * 0.1;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestSoftMinSwitchForceGrad, cpu)
-{
+TEST_F(TestSoftMinSwitchForceGrad, cpu) {
   std::vector<double> grad_net(nloc);
-  deepmd::soft_min_switch_force_grad_cpu(
-      &grad_net[0],
-      &grad[0],
-      &sw_deriv[0],
-      &nlist[0],
-      nloc,
-      nnei);
+  deepmd::soft_min_switch_force_grad_cpu(&grad_net[0], &grad[0], &sw_deriv[0],
+                                         &nlist[0], nloc, nnei);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int ii = 0; ii < nloc; ++ii){
   //   printf("%8.5f, ", grad_net[ii]);
-  // }  
+  // }
   // printf("\n");
 }
-
diff --git a/source/lib/tests/test_soft_min_switch_virial.cc b/source/lib/tests/test_soft_min_switch_virial.cc
index 69471eb9ce..fa1ec00ceb 100644
--- a/source/lib/tests/test_soft_min_switch_virial.cc
+++ b/source/lib/tests/test_soft_min_switch_virial.cc
@@ -1,32 +1,29 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "soft_min_switch.h"
 #include "soft_min_switch_virial.h"
 
-class TestSoftMinSwitchVirial : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestSoftMinSwitchVirial : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
   double alpha = .5;
   double rmin = 0.8;
   double rmax = 1.5;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -36,17 +33,80 @@ class TestSoftMinSwitchVirial : public ::testing::Test
   std::vector<double> rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_virial = {
-    3.06079,  0.53537, -2.41160,  0.53537,  2.78437, -1.61110, -2.41160, -1.61110,  3.49276,
+  std::vector<double> expected_virial = {
+      3.06079,  0.53537,  -2.41160, 0.53537, 2.78437,
+      -1.61110, -2.41160, -1.61110, 3.49276,
   };
-  std::vector<double > expected_atom_virial = {
-    1.35376, -0.56712, -1.02447, -0.56712,  0.23758,  0.42917, -1.02447,  0.42917,  0.77527,  0.54989, -0.23036, -0.41613, -0.23036,  0.09650,  0.17433, -0.41613,  0.17433,  0.31491, -0.19229,  0.02835, -0.02453,  0.02835, -0.00576,  0.00594, -0.02453,  0.00594, -0.00660,  0.87609,  0.15772, -0.10844,  0.15772,  0.43891, -0.68116, -0.10844, -0.68116,  1.06620, -0.07197,  0.02107, -0.01394,  0.02107,  0.17539, -0.28148, -0.01394, -0.28148,  0.45073,  0.41749,  0.14761, -0.17569,  0.14761,  0.06082, -0.08056, -0.17569, -0.08056,  0.10804,  0.25593,  0.69163, -0.46058,  0.69163,  1.26326, -0.83076, -0.46058, -0.83076,  0.54550, -0.01118,  0.01474, -0.02646,  0.01474,  0.00500, -0.01216, -0.02646, -0.01216,  0.02792,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.14854,  0.28167, -0.17934,  0.28167,  0.51470, -0.33630, -0.17934, -0.33630,  0.21579, -0.12311, -0.01078,  0.01838, -0.01078, -0.00120,  0.00130,  0.01838,  0.00130, -0.00312, -0.11042,  0.00462, -0.00589,  0.00462, -0.00032, -0.00002, -0.00589, -0.00002, -0.00089, -0.03191, -0.00379,  0.00549, -0.00379, -0.00050,  0.00060,  0.00549,  0.00060, -0.00100,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
+  std::vector<double> expected_atom_virial = {
+      1.35376,  -0.56712, -1.02447, -0.56712, 0.23758,  0.42917,  -1.02447,
+      0.42917,  0.77527,  0.54989,  -0.23036, -0.41613, -0.23036, 0.09650,
+      0.17433,  -0.41613, 0.17433,  0.31491,  -0.19229, 0.02835,  -0.02453,
+      0.02835,  -0.00576, 0.00594,  -0.02453, 0.00594,  -0.00660, 0.87609,
+      0.15772,  -0.10844, 0.15772,  0.43891,  -0.68116, -0.10844, -0.68116,
+      1.06620,  -0.07197, 0.02107,  -0.01394, 0.02107,  0.17539,  -0.28148,
+      -0.01394, -0.28148, 0.45073,  0.41749,  0.14761,  -0.17569, 0.14761,
+      0.06082,  -0.08056, -0.17569, -0.08056, 0.10804,  0.25593,  0.69163,
+      -0.46058, 0.69163,  1.26326,  -0.83076, -0.46058, -0.83076, 0.54550,
+      -0.01118, 0.01474,  -0.02646, 0.01474,  0.00500,  -0.01216, -0.02646,
+      -0.01216, 0.02792,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.14854,  0.28167,  -0.17934, 0.28167,  0.51470,  -0.33630,
+      -0.17934, -0.33630, 0.21579,  -0.12311, -0.01078, 0.01838,  -0.01078,
+      -0.00120, 0.00130,  0.01838,  0.00130,  -0.00312, -0.11042, 0.00462,
+      -0.00589, 0.00462,  -0.00032, -0.00002, -0.00589, -0.00002, -0.00089,
+      -0.03191, -0.00379, 0.00549,  -0.00379, -0.00050, 0.00060,  0.00549,
+      0.00060,  -0.00100, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
+      0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
   };
-  
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -54,67 +114,62 @@ class TestSoftMinSwitchVirial : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
-      }      
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
+      }
     }
     sw_value.resize(nloc);
     sw_deriv.resize(nloc * nnei * 3);
-    deepmd::soft_min_switch_cpu<double> (&sw_value[0], &sw_deriv[0], &rij[0], &nlist[0], nloc, 
-				 nnei, alpha, rmin, rmax);
+    deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
+                                        &nlist[0], nloc, nnei, alpha, rmin,
+                                        rmax);
     du.resize(nloc);
-    for (int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       du[ii] = 1.0 - ii * 0.1;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestSoftMinSwitchVirial, cpu)
-{
+TEST_F(TestSoftMinSwitchVirial, cpu) {
   std::vector<double> virial(9);
   std::vector<double> atom_virial(nall * 9);
-  deepmd::soft_min_switch_virial_cpu(
-      &virial[0],
-      &atom_virial[0],
-      &du[0],
-      &sw_deriv[0],
-      &rij[0],
-      &nlist[0],
-      nloc,
-      nall,
-      nnei);
+  deepmd::soft_min_switch_virial_cpu(&virial[0], &atom_virial[0], &du[0],
+                                     &sw_deriv[0], &rij[0], &nlist[0], nloc,
+                                     nall, nnei);
   EXPECT_EQ(virial.size(), expected_virial.size());
-  for (int jj = 0; jj < virial.size(); ++jj){
-    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < virial.size(); ++jj) {
+    EXPECT_LT(fabs(virial[jj] - expected_virial[jj]), 1e-5);
+  }
   EXPECT_EQ(atom_virial.size(), expected_atom_virial.size());
-  for (int jj = 0; jj < atom_virial.size(); ++jj){
-    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < atom_virial.size(); ++jj) {
+    EXPECT_LT(fabs(atom_virial[jj] - expected_atom_virial[jj]), 1e-5);
+  }
   // for (int ii = 0; ii < 9; ++ii){
   //   printf("%8.5f, ", virial[ii]);
-  // }  
+  // }
   // for (int ii = 0; ii < 9 * nall; ++ii){
   //   printf("%8.5f, ", atom_virial[ii]);
-  // }  
+  // }
   // printf("\n");
 }
-
diff --git a/source/lib/tests/test_soft_min_switch_virial_grad.cc b/source/lib/tests/test_soft_min_switch_virial_grad.cc
index db5b05fe26..6962ab99b6 100644
--- a/source/lib/tests/test_soft_min_switch_virial_grad.cc
+++ b/source/lib/tests/test_soft_min_switch_virial_grad.cc
@@ -1,32 +1,29 @@
-#include <iostream>
 #include <gtest/gtest.h>
-#include "fmt_nlist.h"
+
+#include <iostream>
+
 #include "env_mat.h"
+#include "fmt_nlist.h"
 #include "neighbor_list.h"
 #include "soft_min_switch.h"
 #include "soft_min_switch_virial_grad.h"
 
-class TestSoftMinSwitchVirialGrad : public ::testing::Test
-{
-protected:
-  std::vector<double > posi = {12.83, 2.56, 2.18, 
-			       12.09, 2.87, 2.74,
-			       00.25, 3.32, 1.68,
-			       3.36, 3.00, 1.81,
-			       3.51, 2.51, 2.60,
-			       4.27, 3.22, 1.56
-  };
-  std::vector<int > atype = {0, 1, 1, 0, 1, 1};
-  std::vector<double > posi_cpy;
-  std::vector<int > atype_cpy;
-  int ntypes = 2;  
+class TestSoftMinSwitchVirialGrad : public ::testing::Test {
+ protected:
+  std::vector<double> posi = {12.83, 2.56, 2.18, 12.09, 2.87, 2.74,
+                              00.25, 3.32, 1.68, 3.36,  3.00, 1.81,
+                              3.51,  2.51, 2.60, 4.27,  3.22, 1.56};
+  std::vector<int> atype = {0, 1, 1, 0, 1, 1};
+  std::vector<double> posi_cpy;
+  std::vector<int> atype_cpy;
+  int ntypes = 2;
   int nloc, nall, nnei, ndescrpt;
   double rc = 6;
   double rc_smth = 0.8;
   double alpha = .5;
   double rmin = 0.8;
   double rmax = 1.5;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   std::vector<int> mapping, ncell, ngcell;
   std::vector<int> sec_a = {0, 5, 10};
   std::vector<int> sec_r = {0, 0, 0};
@@ -36,14 +33,15 @@ class TestSoftMinSwitchVirialGrad : public ::testing::Test
   std::vector<double> rij;
   std::vector<int> nlist;
   std::vector<int> fmt_nlist_a;
-  std::vector<double > expected_grad_net = {
-     0.42208, -0.12835,  1.44546,  0.53673, -0.31928,  2.41220, 
+  std::vector<double> expected_grad_net = {
+      0.42208, -0.12835, 1.44546, 0.53673, -0.31928, 2.41220,
   };
-  
+
   void SetUp() override {
     double box[] = {13., 0., 0., 0., 13., 0., 0., 0., 13.};
     region.reinitBox(box);
-    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc, region);
+    copy_coord(posi_cpy, atype_cpy, mapping, ncell, ngcell, posi, atype, rc,
+               region);
     nloc = posi.size() / 3;
     nall = posi_cpy.size() / 3;
     nnei = sec_a.back();
@@ -51,57 +49,53 @@ class TestSoftMinSwitchVirialGrad : public ::testing::Test
     nat_stt.resize(3);
     ext_stt.resize(3);
     ext_end.resize(3);
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       ext_stt[dd] = -ngcell[dd];
       ext_end[dd] = ncell[dd] + ngcell[dd];
     }
-    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt, ncell, ext_stt, ext_end, region, ncell);
+    build_nlist(nlist_a_cpy, nlist_r_cpy, posi_cpy, nloc, rc, rc, nat_stt,
+                ncell, ext_stt, ext_end, region, ncell);
     nlist.resize(nloc * nnei);
     rij.resize(nloc * nnei * 3);
-    for(int ii = 0; ii < nloc; ++ii){      
+    for (int ii = 0; ii < nloc; ++ii) {
       // format nlist and record
-      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii, nlist_a_cpy[ii], rc, sec_a);
-      for (int jj = 0; jj < nnei; ++jj){
-	nlist[ii*nnei + jj] = fmt_nlist_a[jj];
+      format_nlist_i_cpu<double>(fmt_nlist_a, posi_cpy, atype_cpy, ii,
+                                 nlist_a_cpy[ii], rc, sec_a);
+      for (int jj = 0; jj < nnei; ++jj) {
+        nlist[ii * nnei + jj] = fmt_nlist_a[jj];
       }
-      std::vector<double > t_env, t_env_deriv, t_rij;
+      std::vector<double> t_env, t_env_deriv, t_rij;
       // compute env_mat and its deriv, record
-      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy, atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth, rc);    
-      for (int jj = 0; jj < nnei * 3; ++jj){
-	rij[ii*nnei*3 + jj] = t_rij[jj];
-      }      
+      deepmd::env_mat_a_cpu<double>(t_env, t_env_deriv, t_rij, posi_cpy,
+                                    atype_cpy, ii, fmt_nlist_a, sec_a, rc_smth,
+                                    rc);
+      for (int jj = 0; jj < nnei * 3; ++jj) {
+        rij[ii * nnei * 3 + jj] = t_rij[jj];
+      }
     }
     sw_value.resize(nloc);
     sw_deriv.resize(nloc * nnei * 3);
-    deepmd::soft_min_switch_cpu<double> (&sw_value[0], &sw_deriv[0], &rij[0], &nlist[0], nloc, 
-				 nnei, alpha, rmin, rmax);
+    deepmd::soft_min_switch_cpu<double>(&sw_value[0], &sw_deriv[0], &rij[0],
+                                        &nlist[0], nloc, nnei, alpha, rmin,
+                                        rmax);
     grad.resize(nloc * 3);
-    for (int ii = 0; ii < nloc; ++ii){
+    for (int ii = 0; ii < nloc; ++ii) {
       grad[ii] = 1.0 - ii * 0.1;
     }
   }
-  void TearDown() override {
-  }
+  void TearDown() override {}
 };
 
-TEST_F(TestSoftMinSwitchVirialGrad, cpu)
-{
+TEST_F(TestSoftMinSwitchVirialGrad, cpu) {
   std::vector<double> grad_net(nloc);
-  deepmd::soft_min_switch_virial_grad_cpu(
-      &grad_net[0],
-      &grad[0],
-      &sw_deriv[0],
-      &rij[0],
-      &nlist[0],
-      nloc,
-      nnei);
+  deepmd::soft_min_switch_virial_grad_cpu(&grad_net[0], &grad[0], &sw_deriv[0],
+                                          &rij[0], &nlist[0], nloc, nnei);
   EXPECT_EQ(grad_net.size(), expected_grad_net.size());
-  for (int jj = 0; jj < grad_net.size(); ++jj){
-    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]) , 1e-5);
-  }  
+  for (int jj = 0; jj < grad_net.size(); ++jj) {
+    EXPECT_LT(fabs(grad_net[jj] - expected_grad_net[jj]), 1e-5);
+  }
   // for (int ii = 0; ii < nloc; ++ii){
   //   printf("%8.5f, ", grad_net[ii]);
-  // }  
+  // }
   // printf("\n");
 }
-
diff --git a/source/lib/tests/test_tabulate_se_a.cc b/source/lib/tests/test_tabulate_se_a.cc
index 8903711904..f4cf9789cf 100644
--- a/source/lib/tests/test_tabulate_se_a.cc
+++ b/source/lib/tests/test_tabulate_se_a.cc
@@ -1,189 +1,659 @@
-#include <vector>
+#include <gtest/gtest.h>
+
 #include <iostream>
+#include <vector>
+
 #include "device.h"
 #include "tabulate.h"
-#include <gtest/gtest.h>
 #include "utilities.h"
 
-class TestTabulateSeA : public ::testing::Test
-{
-protected:
+class TestTabulateSeA : public ::testing::Test {
+ protected:
   // em_x = tf.random.uniform([4, 16], minval=0, maxval=0.2, dtype = tf.float64)
-  std::vector<double > info = {
-    0, 0.2, 0.4, 0.01, 0.1, -1 
-  };  
-  std::vector<double > em_x = {
-    0.0343909 ,
-    0.11357423,
-    0.0858676 ,
-    0.19337772,
-    0.1935728 ,
-    0.0477744 ,
-    0.05845198,
-    0.19080509,
-    0.16111261,
-    0.07179262,
-    0.10078013,
-    0.04640909,
-    0.10433399,
-    0.15650861,
-    0.17527857,
-    0.04249097
-  };
-  std::vector<double > em = {
-    0.0343909 , 0.08394249, 0.06791791, 0.00903334, 0.11357423, 0.10597251,
-    0.05738069, 0.10071109, 0.0858676 , 0.17410445, 0.05390256, 0.09495758,
-    0.19337772, 0.02045487, 0.04095526, 0.18431305,
-    0.1935728 , 0.03930614, 0.0304133 , 0.15261676, 0.0477744,  0.06838737,
-    0.12824902, 0.14125861, 0.05845198, 0.12731053, 0.0315968,  0.14927774,
-    0.19080509, 0.19206871, 0.14361383, 0.04083437,
-    0.16111261, 0.19944826, 0.16563484, 0.00797179, 0.07179262, 0.16993159,
-    0.01834742, 0.08405   , 0.10078013, 0.0773945 , 0.09541813, 0.0042979,
-    0.04640909, 0.07968697, 0.18046262, 0.11724063,
-    0.10433399, 0.16910201, 0.10653732, 0.07434702, 0.15650861, 0.0350976,
-    0.04088021, 0.15753491, 0.17527857, 0.03178642, 0.01599623, 0.08095053,
-    0.04249097, 0.17082205, 0.18275348, 0.02921504
-  }; 
+  std::vector<double> info = {0, 0.2, 0.4, 0.01, 0.1, -1};
+  std::vector<double> em_x = {0.0343909,  0.11357423, 0.0858676,  0.19337772,
+                              0.1935728,  0.0477744,  0.05845198, 0.19080509,
+                              0.16111261, 0.07179262, 0.10078013, 0.04640909,
+                              0.10433399, 0.15650861, 0.17527857, 0.04249097};
+  std::vector<double> em = {
+      0.0343909,  0.08394249, 0.06791791, 0.00903334, 0.11357423, 0.10597251,
+      0.05738069, 0.10071109, 0.0858676,  0.17410445, 0.05390256, 0.09495758,
+      0.19337772, 0.02045487, 0.04095526, 0.18431305, 0.1935728,  0.03930614,
+      0.0304133,  0.15261676, 0.0477744,  0.06838737, 0.12824902, 0.14125861,
+      0.05845198, 0.12731053, 0.0315968,  0.14927774, 0.19080509, 0.19206871,
+      0.14361383, 0.04083437, 0.16111261, 0.19944826, 0.16563484, 0.00797179,
+      0.07179262, 0.16993159, 0.01834742, 0.08405,    0.10078013, 0.0773945,
+      0.09541813, 0.0042979,  0.04640909, 0.07968697, 0.18046262, 0.11724063,
+      0.10433399, 0.16910201, 0.10653732, 0.07434702, 0.15650861, 0.0350976,
+      0.04088021, 0.15753491, 0.17527857, 0.03178642, 0.01599623, 0.08095053,
+      0.04249097, 0.17082205, 0.18275348, 0.02921504};
   std::vector<double> table = {
-    6.348551343037398542e-01, 4.209465843706336474e-04, 6.390862740714405368e-03, -1.544448595628262176e-04, -1.891095227974180087e-04, 2.695025951562175852e-05, -1.317549846042939343e+00, -5.624478206903206490e-02, 1.274284553146523905e-02, -6.836227424141475689e-04, -1.438066096020836407e-04, -1.854932873974712940e-06, -9.996964112615246423e-01, 6.928234423723647617e-02, -4.974719973810486084e-03, -2.019584729176823030e-04, 1.077254539742680247e-04, -8.024209768588029797e-06, 3.552689563657350780e-01, -3.578299775339799371e-02, -1.319946251007718743e-03, 1.016701374495701440e-03, -1.057336720791906388e-04,  5.182678943855506567e-06, 1.227750369557627286e+00, 4.100352079064395472e-02, 3.586869164810712295e-03, -4.304540913340443135e-04, -1.269943482892440004e-04, 1.459465404430219674e-05, -1.472642501673147031e+00, -1.611354921283318364e-01, 1.645427874390196360e-02, 2.107392978135091402e-04, -2.193541011180757461e-04, 1.915392497459551146e-05, -2.855174490181606739e-01, 9.774337856626263976e-02, -2.140891880666230714e-03, -7.148328890055103638e-04, 1.965696332267534503e-05,-4.593489654121371453e-06, -1.468441009949382314e+00, -6.360828127262234399e-02, 4.751283295356955282e-03, 8.711899561753186068e-05, -9.937008678852959884e-06,  4.273569346584811685e-07,
-    6.348599826995243722e-01, 5.487167506364742930e-04, 6.386116198716365253e-03, -1.619832375568118791e-04, -1.877328309473502049e-04, 2.134130914519164856e-05, -1.318111020264137512e+00, -5.599013082054477008e-02, 1.272225054666903735e-02, -6.893710047488201898e-04, -1.434367581078517366e-04, 3.329508890614227371e-05 , -9.990040854920316793e-01, 6.918278968071900348e-02, -4.980714172967731085e-03, -1.976574487947816198e-04, 1.070037204086153902e-04, -7.859875077388093586e-06, 3.549109954092205532e-01, -3.580909209068139365e-02, -1.289508598157979719e-03, 1.012474257117017967e-03, -1.054418924402112718e-04, -1.245498322204730900e-05, 1.228160763020727630e+00, 4.107512853046493134e-02, 3.573879491390910459e-03, -4.355190226638688713e-04, -1.258433981470396103e-04, 1.610862268100766631e-05, -1.474252210958008291e+00, -1.608063442081248406e-01, 1.646046950167207382e-02, 2.019843636566674109e-04, -2.185756589083626730e-04, 1.978479879983412190e-05, -2.845402300363228942e-01, 9.770034635718018168e-02, -2.162325119197382531e-03, -7.140472215558940627e-04, 1.956302663031799223e-05, 1.932584474244053378e-05, -1.469076617546759334e+00, -6.351322951074317436e-02, 4.753890907276497185e-03, 8.672114560243554321e-05, -1.004574434175897967e-05, -4.345700882560937596e-06,
-    6.348661083147921769e-01, 6.763897297752743953e-04, 6.381144275303845745e-03, -1.694690463885140694e-04, -1.868179426353836598e-04, 3.439291082765030046e-05, -1.318669650038090335e+00, -5.573589319299507294e-02, 1.270148368741391351e-02, -6.950749719342792137e-04, -1.422194703304518733e-04, 3.454751241752252323e-05 , -9.983127558632299836e-01, 6.908311652764687061e-02, -4.986579772806746212e-03, -1.933888092529071571e-04, 1.068327546750306073e-04, -2.976978385983384886e-05, 3.545527765488725169e-01, -3.583457894275744043e-02, -1.259197760082061621e-03, 1.008246479193084487e-03, -1.059401869200098984e-04,  1.721968053146218465e-06, 1.228571871257205572e+00, 4.114647496201748883e-02, 3.560738575723638825e-03, -4.405332425718102457e-04, -1.251648759618972115e-04, 3.659080417076460655e-05, -1.475858628153338792e+00, -1.604770750960976822e-01, 1.646639808472218428e-02, 1.932598402043995316e-04, -2.175904819601363058e-04, 1.230256868634094333e-05, -2.835634435191126679e-01, 9.765688571984927624e-02, -2.183734604613508240e-03, -7.132463811570244078e-04, 2.021887442373574272e-05, 1.321401495096886281e-05, -1.469711274366155784e+00, -6.341812571665436660e-02, 4.756486470714936521e-03, 8.631384191910702040e-05, -1.010516500002806932e-05, -1.110874413279218719e-05,
-    6.348735101551836735e-01, 8.039610290153098582e-04, 6.375948457075718626e-03, -1.769074132993461279e-04, -1.855677150383903214e-04, 3.421271436711027645e-05, -1.319225739518145257e+00, -5.548207260888919634e-02, 1.268054645200545304e-02, -7.007297564176242621e-04, -1.408885818822980523e-04, 3.124701885930576017e-05 , -9.976224235482542557e-01, 6.898332734138989952e-02, -4.992317635216104131e-03, -1.891404922064061889e-04, 1.053957535708985289e-04, -1.089286646983666076e-06, 3.541943058468561834e-01, -3.585946084769019160e-02, -1.229013912637771933e-03, 1.004009466262262241e-03, -1.059129033455631863e-04, -4.941663399086282537e-06, 1.228983691638902087e+00, 4.121755707472917613e-02, 3.547447845420277635e-03, -4.455036207721562607e-04, -1.239172256532283074e-04, 3.437341080261359686e-05, -1.477461752073406132e+00, -1.601476900261984693e-01, 1.647206544856073471e-02, 1.845724864086241608e-04, -2.173853638475303177e-04, 3.620505631412716563e-05, -2.825870937484175061e-01, 9.761299713537928413e-02, -2.205119732548723246e-03, -7.124245958910824846e-04, 2.074820558303217398e-05, 1.209381466404663338e-05, -1.470344979888463577e+00, -6.332297013406351649e-02, 4.759069711794740656e-03, 8.589935708505183382e-05, -1.045842324058424788e-05, -6.134254562752213537e-06,
-    6.348821871815598650e-01, 9.314261853726121809e-04, 6.370530236175125580e-03, -1.842978984547447257e-04, -1.840210089691990327e-04, 2.234897510077387526e-05, -1.319779292891724465e+00, -5.522867246076747227e-02, 1.265944033870337014e-02, -7.063360380236871801e-04, -1.393416734992873119e-04, 1.931167378610719847e-05 , -9.969330896946905218e-01, 6.888342466806646192e-02, -4.997928623431705138e-03, -1.849303524006284602e-04, 1.053651633995249134e-04, -2.870133904891753420e-05, 3.538355893399378616e-01, -3.588374034700148041e-02, -1.198957225773849763e-03, 9.997681359810027708e-04, -1.060678155548662341e-04, -4.107776618240329050e-06, 1.229396221507694564e+00, 4.128837188660083868e-02, 3.534008730169808672e-03, -4.504275777948374090e-04, -1.224778886969254976e-04, 2.455513266683544498e-05, -1.479061581584721008e+00, -1.598181942132129441e-01, 1.647747255391585064e-02, 1.759082956613747337e-04, -2.158335508261176197e-04, 6.406725844410341030e-06, -2.816111850012528728e-01, 9.756868109694678826e-02, -2.226479900633348240e-03, -7.115823288942964460e-04, 2.121038517729223415e-05, 1.358027318850170435e-05, -1.470977733597038872e+00, -6.322776301216057049e-02, 4.761640356162846754e-03, 8.547576468445008296e-05, -1.081874527005240631e-05, -8.845528475774308509e-07,
-    6.348921383103013349e-01, 1.058780765759985421e-03, 6.364891110105044131e-03, -1.916363332792569681e-04, -1.827768871456785058e-04, 2.275707291847725182e-05, -1.320330314380025793e+00, -5.497569611120622923e-02, 1.263816684562326688e-02, -7.118908987616576157e-04, -1.380182662155302303e-04, 1.630252530406085050e-05 , -9.962447554247517711e-01, 6.878341103651769428e-02, -5.003413601927745452e-03, -1.807403991329658622e-04, 1.040363362483998831e-04, -4.422604643727719699e-06, 3.534766330394523148e-01, -3.590741998555346121e-02, -1.169027863565602274e-03, 9.955202772264954043e-04, -1.060447700647724903e-04, -1.021743279826507342e-05, 1.229809458175783687e+00, 4.135891644424664892e-02, 3.520422661584679015e-03, -4.553035794622276055e-04, -1.210679214963379874e-04, 1.595827246550979495e-05, -1.480658115605847147e+00, -1.594885928526604546e-01, 1.648262036665308974e-02, 1.672799673730459213e-04, -2.148155690753495697e-04,-1.867405535452657550e-06, -2.806357215496423363e-01, 9.752393810975558408e-02, -2.247814508535729908e-03, -7.107227883497464890e-04, 2.207595560206285042e-05,-1.137331983229785190e-06, -1.471609534977757372e+00, -6.313250460562676303e-02, 4.764198129054059844e-03, 8.503999275315992160e-05, -1.072692568096017848e-05, -1.373273803695183988e-05,
-    6.349033624136081189e-01, 1.186020367092407990e-03, 6.359032581545111251e-03, -1.989262833250400370e-04, -1.812752661309344573e-04, 1.302837915648187095e-05, -1.320878808237722746e+00, -5.472314689282183064e-02, 1.261672747063919374e-02, -7.173917679890315846e-04, -1.373052781380030543e-04, 3.768455339511444900e-05 , -9.955574218354472649e-01, 6.868328895828368363e-02, -5.008773436308684712e-03, -1.765844799686671349e-04, 1.034810966435298563e-04, -1.111176255155353207e-05, 3.531174429312692320e-01, -3.593050231143132822e-02, -1.139225984250480384e-03, 9.912704081392112714e-04, -1.064918174657224404e-04,  2.680738443515978403e-06, 1.230223398925979650e+00, 4.142918782293085467e-02, 3.506691073047987512e-03, -4.601302388532728274e-04, -1.198865987378785417e-04, 1.656386182477533959e-05, -1.482251353107205460e+00, -1.591588911206925361e-01, 1.648750985769346228e-02, 1.586901819247656846e-04, -2.147074421644348298e-04, 2.641762503224190698e-05, -2.796607076604977760e-01, 9.747876869099537933e-02, -2.269122958003529523e-03, -7.098388532529275848e-04, 2.226701915637888804e-05, 1.106237844209756009e-05, -1.472240383519069384e+00, -6.303719517464229094e-02, 4.766742755353862819e-03, 8.459962202271287246e-05, -1.132218730142039535e-05,  8.958476322974335592e-07,
-    6.349158583197994643e-01, 1.313140616388666637e-03, 6.352956158169477396e-03, -2.061601622854974502e-04, -1.806298821034440756e-04, 3.770936817966389514e-05, -1.321424778752664952e+00, -5.447102810827629538e-02, 1.259512371128685033e-02, -7.228490733933210606e-04, -1.356407402355522122e-04, 2.099832634320949299e-05 , -9.948710899987588396e-01, 6.858306092758209571e-02, -5.014008993202081696e-03, -1.724573933478598642e-04, 1.029144894329912032e-04, -1.738522780636760158e-05, 3.527580249757622521e-01, -3.595298987582695727e-02, -1.109551740263377793e-03, 9.870126155001155040e-04, -1.064931456292656029e-04, -2.059910396978558087e-06, 1.230638041011988815e+00, 4.149918312660194619e-02, 3.492815399561766294e-03, -4.649051157564728157e-04, -1.192927614880224277e-04, 4.072077917749542957e-05, -1.483841293110880866e+00, -1.588290941739924356e-01, 1.649214200293154520e-02, 1.501282794678792006e-04, -2.138853834118830831e-04, 2.633111784219914963e-05, -2.786861475954987011e-01, 9.743317336979973042e-02, -2.290404652904617314e-03, -7.089360554728917595e-04, 2.260180638238835256e-05, 1.741828165826791135e-05, -1.472870278712053782e+00, -6.294183498489253070e-02, 4.769273959660644442e-03, 8.414681093302789892e-05, -1.142905205912834352e-05, -4.014065121916994726e-06,
-    6.349296248136164778e-01, 1.440137170869312810e-03, 6.346663352465874847e-03, -2.133510744796659759e-04, -1.788513201196447670e-04, 1.721163944875696416e-05, -1.321968230245579967e+00, -5.421934303028537461e-02, 1.257335706466754244e-02, -7.282542863230233527e-04, -1.343059033644905889e-04, 1.747822893445653714e-05 , -9.941857609618123259e-01, 6.848272942128874607e-02, -5.019121140152461337e-03, -1.683596869525186377e-04, 1.024142382012053007e-04, -2.632719129544749384e-05, 3.523983851077774343e-01, -3.597488523292310947e-02, -1.080005278271846739e-03, 9.827512175914082399e-04, -1.066680880078371994e-04,  3.403258606315080555e-07, 1.231053381658700818e+00, 4.156889948792314576e-02, 3.478797077596604108e-03, -4.696409807358484993e-04, -1.173636798436718986e-04, 1.149931408689037458e-05, -1.485427934690428442e+00, -1.584992071496764965e-01, 1.649651778315383566e-02, 1.415960091521040870e-04, -2.125888038426753843e-04, 7.384582528889821378e-06, -2.777120456109742896e-01, 9.738715268720327112e-02, -2.311658999267464203e-03, -7.080165982958596923e-04, 2.340034491729013294e-05, 5.174033942788913380e-06, -1.473499220050474623e+00, -6.284642430757329812e-02, 4.771791466347353149e-03, 8.368540130389298475e-05, -1.162498575113560591e-05, -5.381585801785509468e-06,
-    6.349446606365225509e-01, 1.567005718051586727e-03, 6.340155681555815353e-03, -2.204854663573854625e-04, -1.779502948888764897e-04, 3.196283450610521294e-05, -1.322509167069771951e+00, -5.396809490162747525e-02, 1.255142902735281209e-02, -7.336077414823606981e-04, -1.332538502428148267e-04, 2.525523713666122703e-05 , -9.935014357470516311e-01, 6.838229689892011409e-02, -5.024110745516051704e-03, -1.642860423419652261e-04, 1.011792892256958577e-04, -5.902237032851650630e-06, 3.520385292366049468e-01, -3.599619093977864809e-02, -1.050586739210998023e-03, 9.784837539753422735e-04, -1.066187407206570670e-04, -6.052991441884039902e-06, 1.231469418062474341e+00, 4.163833406830096812e-02, 3.464637544942418459e-03, -4.743218246565151001e-04, -1.164951133813105271e-04, 2.473911917278243621e-05, -1.487011276970676033e+00, -1.581692351651968476e-01, 1.650063818395723983e-02, 1.331001312464952355e-04, -2.118074389246019866e-04, 9.192428068946771109e-06, -2.767384059577842614e-01, 9.734070719609828892e-02, -2.332885405321092481e-03, -7.070743922828596519e-04, 2.373777250910882265e-05, 1.127700884024945933e-05, -1.474127207030835107e+00, -6.275096341939470634e-02, 4.774294999622533293e-03, 8.321347296773265077e-05, -1.162225195759229858e-05, -1.468175407624093560e-05,
-    6.349609644870094494e-01, 1.693741975839754832e-03, 6.333434667015966531e-03, -2.275719866012916918e-04, -1.766077012712487378e-04, 2.919052022666632077e-05, -1.323047593610823247e+00, -5.371728693515605280e-02, 1.252934109528984138e-02, -7.389107006611626187e-04, -1.322992615601379437e-04, 3.689337377145077536e-05 , -9.928181153524118230e-01, 6.828176580261838269e-02, -5.028978678356570489e-03, -1.602449667799085492e-04, 1.004819833385002965e-04, -7.012859043909368637e-06, 3.516784632459502014e-01, -3.601690955621394963e-02, -1.021296258318379370e-03, 9.742140050919662845e-04, -1.068837890347894775e-04,  3.261791903209577241e-07, 1.231886147391427544e+00, 4.170748405790913882e-02, 3.450338240560582581e-03, -4.789562532735843967e-04, -1.153902983973557932e-04, 2.856018069496295048e-05, -1.488591319127526624e+00, -1.578391833182464787e-01, 1.650450419566778376e-02, 1.246407552546250339e-04, -2.115332183818513349e-04, 3.149345367837511192e-05, -2.757652328811996956e-01, 9.729383746118988596e-02, -2.354083281534554220e-03, -7.061133365182417328e-04, 2.418809213597686327e-05, 1.280494807360028992e-05, -1.474754239152433311e+00, -6.265545260258377491e-02, 4.776784283590801948e-03, 8.273687806363864625e-05, -1.229952261449745124e-05,  3.204146150058887708e-06,
-    6.349785350208994039e-01, 1.820341692612803541e-03, 6.326501834700739083e-03, -2.346100929840904846e-04, -1.748840426396014729e-04, 1.130785525935554482e-05, -1.323583514286295282e+00, -5.346692231381247606e-02, 1.250709476370755191e-02, -7.441705970339035966e-04, -1.303302437099287372e-04, 7.935577538626925858e-06 , -9.921358007514943234e-01, 6.818113855713830995e-02, -5.033725808341922223e-03, -1.562353718150353687e-04, 1.001568149392305130e-04, -2.302258383924021595e-05, 3.513181929939074299e-01, -3.603704364469759169e-02, -9.921339651685744804e-04, 9.699384566370250092e-04, -1.069081013817698415e-04, -2.744679484186812129e-06, 1.232303566785723392e+00, 4.177634667571154814e-02, 3.435900604437185177e-03, -4.835440426346156498e-04, -1.140781768005934266e-04, 2.411509316948267986e-05, -1.490168060387760951e+00, -1.575090566866652331e-01, 1.650811681325956015e-02, 1.162064642248029450e-04, -2.100324946396962247e-04, 4.868837971279583202e-06, -2.747925306207861240e-01, 9.724654405895133413e-02, -2.375252040655950400e-03, -7.051355614741510987e-04, 2.505903781065493165e-05,-2.569082101323676566e-06, -1.475380315917416585e+00, -6.255989214488603956e-02, 4.779259042312647421e-03, 8.224491253736542200e-05, -1.205054378062991984e-05, -1.594987943813344381e-05,
-    6.349973708516511994e-01, 1.946800647308156995e-03, 6.319358714566076195e-03, -2.415904693897710526e-04, -1.741570105122868483e-04, 3.342152683043006766e-05, -1.324116933545430141e+00, -5.321700419064152865e-02, 1.248469152702344660e-02, -7.493727578058629766e-04, -1.295525827398787404e-04, 2.659942231629285135e-05 , -9.914544928937398804e-01, 6.808041756983601589e-02, -5.038353005641925050e-03, -1.522500103683389601e-04, 9.911425811568465554e-05, -1.035676665958809070e-05, 3.509577243129330393e-01, -3.605659577023319351e-02, -9.630999837076988784e-04, 9.656594578503095369e-04, -1.070158919994286978e-04, -2.281503112307771063e-06, 1.232721673357858538e+00, 4.184491916948063911e-02, 3.421326077437690516e-03, -4.880823132679394552e-04, -1.129872290747681817e-04, 2.854952342195995698e-05, -1.491741500028839651e+00, -1.571788603283475749e-01, 1.651147703627379656e-02, 1.078118218043548068e-04, -2.094656285123614196e-04, 1.573608604543182341e-05, -2.738203034102859035e-01, 9.719882757757769554e-02, -2.396391097750961291e-03, -7.041328812172977002e-04, 2.511128111671661627e-05, 1.472819566023977703e-05, -1.476005436830838402e+00, -6.246428233956573262e-02, 4.781718999863710830e-03, 8.175246233396933941e-05, -1.310850420537104008e-05,  1.717274673157189222e-05,
-    6.350174705506670403e-01, 2.073114649501703322e-03, 6.312006840494438151e-03, -2.485262001215581039e-04, -1.724445833892894095e-04, 1.623821996891234705e-05, -1.324647855868849478e+00, -5.296753568880858964e-02, 1.246213287875118370e-02, -7.545274547770323926e-04, -1.284298383236558551e-04, 3.142127009671183137e-05 , -9.907741927046019859e-01, 6.797960523066012839e-02, -5.042861140826992473e-03, -1.482946605870891395e-04, 9.821987974303589589e-05, -3.593831829470692349e-06, 3.505970630098214080e-01, -3.607556850024738748e-02, -9.341944322877257512e-04, 9.613773761737330267e-04, -1.072343182304808093e-04,  2.791451096706449119e-06, 1.233140464192951757e+00, 4.191319881581374862e-02, 3.406616101162745613e-03, -4.925758895926437772e-04, -1.113902906060245713e-04, 1.275308331152581608e-05, -1.493311637378700762e+00, -1.568485992811522733e-01, 1.651458586873823589e-02, 9.944841367174414462e-05, -2.085492230796830474e-04, 1.276456024245067926e-05, -2.728485554775001987e-01, 9.715068861693920699e-02, -2.417499870240937074e-03, -7.031148500958378164e-04, 2.576543833825076558e-05, 7.841889896124507091e-06, -1.476629601400710978e+00, -6.236862348540499201e-02, 4.784163880393361643e-03, 8.124213252544174404e-05, -1.286332078849730127e-05, -1.821996546344873330e-06,
-    6.350388326475970846e-01, 2.199279539485121671e-03, 6.304447750121061969e-03, -2.554047701160370044e-04, -1.716061813901302753e-04, 3.413524324276134592e-05, -1.325176285768258300e+00, -5.271851990161838253e-02, 1.243942031140890699e-02, -7.596346042592860793e-04, -1.269803855069738714e-04, 2.314478643438959578e-05 , -9.900949010857222898e-01, 6.787870391214460841e-02, -5.047251084767826433e-03, -1.443753107913585767e-04, 9.837034053479728221e-05, -3.865274593462701621e-05, 3.502362148656810170e-01, -3.609396440447816545e-02, -9.054174237006253068e-04, 9.570894530963515055e-04, -1.071221722792567601e-04, -5.180134097885568801e-06, 1.233559936349031494e+00, 4.198118292014653419e-02, 3.391772117805412056e-03, -4.970162819604460663e-04, -1.105584293158747960e-04, 2.757032189173095048e-05, -1.494878471815561216e+00, -1.565182785628131401e-01, 1.651744431908664865e-02, 9.112268062696188113e-05, -2.082277461664644284e-04, 3.370820636496137736e-05, -2.718772910441742408e-01, 9.710212778853387350e-02, -2.438577777940475859e-03, -7.020756635958485484e-04, 2.613933618298708639e-05, 1.211520684095310762e-05, -1.477252809138063672e+00, -6.227291588670166161e-02, 4.786593408182711167e-03, 8.072392747742672100e-05, -1.281499371544444526e-05, -1.293175202324119235e-05,
-    6.350614556306495295e-01, 2.325291188338546311e-03, 6.296682984661446623e-03, -2.622362895631248896e-04, -1.701076322674243866e-04, 2.573454296903621253e-05, -1.325702227786145437e+00, -5.246995989253622206e-02, 1.241655531642829255e-02, -7.646904682589584622e-04, -1.257704658362481128e-04, 2.439373356208127567e-05 , -9.894166189151047952e-01, 6.777771596940393439e-02, -5.051523708536139086e-03, -1.404733355821404265e-04, 9.677082285072928253e-05, -3.720510878458014501e-06, 3.498751856359115786e-01, -3.611178605486395354e-02, -8.767690652124425499e-04, 9.527998576480508275e-04, -1.072771816869139909e-04, -2.281376475091892258e-06, 1.233980086857325631e+00, 4.204886881676297983e-02, 3.376795570009583514e-03, -5.014114486109571937e-04, -1.092957353261917852e-04, 2.516456964431257380e-05, -1.496442002767713664e+00, -1.561879031708521548e-01, 1.652005340007862977e-02, 8.282284133744905071e-05, -2.067123325224875000e-04, 7.057486539657783089e-06, -2.709065143258797548e-01, 9.705314571543909030e-02, -2.459624243094573216e-03, -7.010187162791577066e-04, 2.672975399789282626e-05, 7.629793933874534523e-06, -1.477875059556995385e+00, -6.217715985326619649e-02, 4.789007307701962507e-03, 8.019935829649041371e-05, -1.318861260046749971e-05, -7.150339348059032240e-06,
-    6.350853379468965887e-01, 2.451145498001100487e-03, 6.288714088740080324e-03, -2.690159202421790068e-04, -1.686584359429067433e-04, 1.941481480743946700e-05, -1.326225686495484890e+00, -5.222185869521017709e-02, 1.239353938406437261e-02, -7.696964132049412353e-04, -1.246012242240120604e-04, 2.724071141974432252e-05 , -9.887393470472876089e-01, 6.767664374012982709e-02, -5.055679883306329545e-03, -1.366074591188833347e-04, 9.623033677044332457e-05, -1.113456896173822779e-05, 3.495139810501832756e-01, -3.612903602543367232e-02, -8.482494585971035728e-04, 9.485064841097947883e-04, -1.073561607316583907e-04, -2.239996380309942211e-06, 1.234400912722548371e+00, 4.211625386880359784e-02, 3.361687900729734210e-03, -5.057597926077623488e-04, -1.078411892315765344e-04, 1.508800592977199686e-05, -1.498002229713325750e+00, -1.558574780824932282e-01, 1.652241412871961052e-02, 7.456368677257522147e-05, -2.062001731191939454e-04, 2.069621557469772063e-05, -2.699362295319003291e-01, 9.700374303226286243e-02, -2.480638690415259105e-03, -6.999405672986690023e-04, 2.700789474676622474e-05, 1.556143061449123430e-05, -1.478496352174730522e+00, -6.208135570041733303e-02, 4.791405303667145565e-03, 7.966538051836852740e-05, -1.352687841609079228e-05, -2.789411930543395566e-06,
-    6.351104780025849106e-01, 2.576838401336829787e-03, 6.280542610220480118e-03, -2.757414391158645754e-04, -1.675762649448408429e-04, 2.787462665161048641e-05, -1.326746666499438287e+00, -5.197421931349595348e-02, 1.237037400330611749e-02, -7.746541492504023475e-04, -1.232228491818352083e-04, 2.166599538617633252e-05 , -9.880630863135209108e-01, 6.757548954459043078e-02, -5.059720480258220535e-03, -1.327693574508429343e-04, 9.550030312894054513e-05, -1.096549240339310371e-05, 3.491526068124157778e-01, -3.614571689219699124e-02, -8.198587001702131727e-04, 9.442100079790295610e-04, -1.074330339280879455e-04, -2.103241190440061311e-06, 1.234822410923189784e+00, 4.218333546826981417e-02, 3.346450553092000530e-03, -5.100549148199152614e-04, -1.071543306169886722e-04, 3.572075491055831030e-05, -1.499559152180234056e+00, -1.555270082545787691e-01, 1.652452752618108200e-02, 6.633607063542407416e-05, -2.052990867644106118e-04, 1.891505702101457936e-05, -2.689664408651156746e-01, 9.695392038509384469e-02, -2.501620547117759490e-03, -6.988464710389351081e-04, 2.774961528830105395e-05, 4.843681010028069226e-06, -1.479116686511674494e+00, -6.198550374897651011e-02, 4.793787121096219732e-03, 7.912045955652986253e-05, -1.359696279035538403e-05, -9.132339849453571562e-06,
-    6.351368741634448867e-01, 2.702365862198193025e-03, 6.272170100036473551e-03, -2.824171711189519380e-04, -1.661976899287730559e-04, 2.457347650017094835e-05, -1.327265172431057128e+00, -5.172704472148267896e-02, 1.234706066178771662e-02, -7.795630288411945592e-04, -1.217395799935142969e-04, 1.184741714306808905e-05 , -9.873878375219384829e-01, 6.747425568563097942e-02, -5.063646370480812467e-03, -1.289626891970745083e-04, 9.513074838211379970e-05, -2.521433322545949321e-05, 3.487910686007592576e-01, -3.616183123303555458e-02, -7.915968808226425679e-04, 9.399119246579864433e-04, -1.077055728285351480e-04,  6.031191175422362627e-06, 1.235244578411804905e+00, 4.225011103602600848e-02, 3.331084970256580589e-03, -5.143079026275864784e-04, -1.055716785023949844e-04, 2.051193936812822612e-05, -1.501112769745742259e+00, -1.551964986234863897e-01, 1.652639461772111712e-02, 5.814089462644928566e-05, -2.041249358339155683e-04, 6.311073191969795411e-06, -2.679971525218879380e-01, 9.690367843145115956e-02, -2.522569242956208650e-03, -6.977319783847560700e-04, 2.827424678587480721e-05, 2.739673941330651616e-06, -1.479736062091468574e+00, -6.188960432526132566e-02, 4.796152485364500034e-03, 7.856828747830194362e-05, -1.395147193446202365e-05, -4.087221013031299888e-06,
-    6.351645247550001816e-01, 2.827723875485507743e-03, 6.263598112024793517e-03, -2.890409134869928735e-04, -1.648390823803598971e-04, 2.215887759642637032e-05, -1.327781208952985015e+00, -5.148033786352124164e-02, 1.232360084570068709e-02, -7.844171563535663055e-04, -1.210428935521009746e-04, 3.344327592646507844e-05 , -9.867136014577331249e-01, 6.737294444867666932e-02, -5.067458424877044516e-03, -1.251812701937470213e-04, 9.419473244264059593e-05, -1.679002076268449654e-05, 3.484293720675762929e-01, -3.617738162759492893e-02, -7.634640860539731316e-04, 9.356082122653546981e-04, -1.075431084112703954e-04, -3.044614041061100766e-06, 1.235667412115300623e+00, 4.231657802179918798e-02, 3.315592595281378029e-03, -5.185116053649769336e-04, -1.041674655671950871e-04, 1.242766263135090892e-05, -1.502663082036415076e+00, -1.548659541050484978e-01, 1.652801643260504508e-02, 4.998556989557471122e-05, -2.037688261998792680e-04, 2.657243869390409541e-05, -2.670283686919466826e-01, 9.685301784023310490e-02, -2.543484210258855835e-03, -6.965966582328896994e-04, 2.850491087748043708e-05, 1.232179636112698650e-05, -1.480354478441044286e+00, -6.179365776107784841e-02, 4.798501122259496952e-03, 7.800586916120723585e-05, -1.413851691566035862e-05, -5.727587674967719880e-06,
-    6.351934280628791507e-01, 2.952908467203564646e-03, 6.254828202758994093e-03, -2.956111985445306826e-04, -1.636502852942454153e-04, 2.616921494951480123e-05, -1.328294780757159899e+00, -5.123410165425365537e-02, 1.229999603970671068e-02, -7.892274520450543677e-04, -1.195721301312790567e-04, 2.454197033093738297e-05 , -9.860403788833298488e-01, 6.727155810173718331e-02, -5.071157514069617352e-03, -1.214296539729165295e-04, 9.340570341953608358e-05, -1.444050153586573228e-05, 3.480675228394242149e-01, -3.619237065717702262e-02, -7.354603960058733389e-04, 9.313051737393654526e-04, -1.076930273455606579e-04, -7.696053039474192446e-07, 1.236090908935226107e+00, 4.238273390417521269e-02, 3.299974870987111650e-03, -5.226642260988254756e-04, -1.032474625011560351e-04, 2.396475265799989632e-05, -1.504210088727871764e+00, -1.545353795944727493e-01, 1.652939400402650763e-02, 4.186078937618800693e-05, -2.027012231708198600e-04, 1.761148452766873776e-05, -2.660600935582757565e-01, 9.680193929166537592e-02, -2.564364883962782712e-03, -6.954454205710857090e-04, 2.907017700829073683e-05, 9.120785771591908463e-06, -1.480971935090678926e+00, -6.169766439371183325e-02, 4.800832758035045861e-03, 7.743502257440657043e-05, -1.440171540732098418e-05, -4.489324897938611976e-06,
-    6.355509554770921721e-01, 4.194364255265300989e-03, 6.156587518227093006e-03, -3.584539136959086518e-04, -1.505562336471176987e-04, 2.631189526673375584e-05, -1.333295991901433553e+00, -4.879824528740911438e-02, 1.205629889598585497e-02, -8.346035033896359156e-04, -1.072962342948566929e-04, 2.412331753624817981e-05 , -9.793640468817854661e-01, 6.625405011186732973e-02, -5.102126473064734317e-03, -8.551069374443776396e-05, 8.618032279329005427e-05, -1.422030758858379208e-05, 3.444418516979214084e-01, -3.631195473807800889e-02, -4.625381215785304145e-04, 8.881537622047225473e-04, -1.080757789189670570e-04,  5.820590714360855199e-08, 1.240361649325028681e+00, 4.302664794411619614e-02, 3.137220402938139478e-03, -5.615677039256951981e-04, -9.125763978623760322e-05, 2.367398552885374808e-05, -1.519498310980496925e+00, -1.512290469691385253e-01, 1.652996628226939199e-02,-3.745688059096337011e-05, -1.938906911473592626e-04, 1.811217640451412989e-05, -2.564062357251438717e-01, 9.626832379335603651e-02, -2.771163091665611831e-03, -6.829069315554202020e-04, 3.363238372709415958e-05, 8.623099725596635004e-06, -1.487093617252511990e+00, -6.073523464295225993e-02, 4.823154268625621383e-03, 7.122599345182346051e-05, -1.664931178025436733e-05, -4.312450972708557703e-06
-  };
-  std::vector<double > expected_xyz_scatter = {
-    0.2713011,  -0.56606281, -0.42305039,  0.14965803,  0.52695372,
-   -0.63845663, -0.11624505, -0.63103203,
-    0.24412213, -0.50842224, -0.38203148,  0.1353771,   0.47343798,
-   -0.57158622, -0.10647548, -0.56671287,
-    0.13979394, -0.29123603, -0.21862063,  0.07744574,  0.27118433,
-   -0.32761487, -0.06077287, -0.32463492,
-    0.24704819, -0.51555848, -0.38509326,  0.1362072,   0.47992214,
-   -0.58168358, -0.10566162, -0.57473633,
-    0.31158834, -0.65068838, -0.48501479,  0.17143258,  0.60565326,
-   -0.73506803, -0.13233106, -0.72541595,
-    0.27121003, -0.5656669 , -0.42318034,  0.14974857,  0.52662422,
-   -0.637633  , -0.11658482, -0.6305842 ,
-    0.21202135, -0.44212972, -0.33094666,  0.11713047,  0.41162829,
-   -0.4982129 , -0.0913087 , -0.49286515,
-    0.30733526, -0.64054639, -0.48022212,  0.17004692,  0.59640929,
-   -0.72111726, -0.13304347, -0.71402776,
-    0.24135931, -0.50316388, -0.37699907,  0.13347531,  0.46846154,
-   -0.56664651, -0.10429212, -0.56088123,
-    0.33429479, -0.69669061, -0.52246841,  0.18502927,  0.64867706,
-   -0.78417021, -0.14487244, -0.77659533,
-    0.29200237, -0.60840668, -0.45656557,  0.16172246,  0.56650319,
-   -0.68453038, -0.12681616, -0.67817995,
-    0.13559139, -0.28210652, -0.21258614,  0.07539812,  0.26274303,
-   -0.3166084 , -0.05968776, -0.31443544,
-    0.30394432, -0.63428311, -0.47381417,  0.16759396,  0.59043739,
-   -0.71559513, -0.13003802, -0.70708354,
-    0.25830471, -0.53796239, -0.40421268,  0.14323456,  0.50094757,
-   -0.6048126 , -0.11264426, -0.59964242,
-    0.21979687, -0.45763438, -0.34413143,  0.12197404,  0.42616899,
-   -0.51425659, -0.09609854, -0.51009828,
-    0.2172166 , -0.45326447, -0.33866506,  0.11979851,  0.421936,
-   -0.51130404, -0.09300045, -0.50528542
-  }; 
-  std::vector<double > expected_dy_dem_x = {
-    -0.02067741,
-    -0.03787612,
-    -0.04180199,
-    -0.04158797,
-    -0.03938578,
-    -0.04047081,
-    -0.03819692,
-    -0.05383372,
-    -0.05179508,
-    -0.03552708,
-    -0.02812173,
-    -0.04451295,
-    -0.04586229,
-    -0.03794369,
-    -0.02917727,
-    -0.04478649
-  };
-  std::vector<double > expected_dy_dem = {
-    -3.32965609, -3.32965609, -3.32965609, -3.32965609, -3.33781886, -3.33781886,
-    -3.33781886, -3.33781886, -3.33501296, -3.33501296, -3.33501296, -3.33501296,
-    -3.34559974, -3.34559974, -3.34559974, -3.34559974,
-    -3.34561821, -3.34561821, -3.34561821, -3.34561821, -3.33106684, -3.33106684,
-    -3.33106684, -3.33106684, -3.33218328, -3.33218328, -3.33218328, -3.33218328,
-    -3.34535585, -3.34535585, -3.34535585, -3.34535585,
-    -3.34250754, -3.34250754, -3.34250754, -3.34250754, -3.33356685, -3.33356685,
-    -3.33356685, -3.33356685, -3.33652989, -3.33652989, -3.33652989, -3.33652989,
-    -3.3309235 , -3.3309235 , -3.3309235 , -3.3309235 ,
-    -3.33688909, -3.33688909, -3.33688909, -3.33688909, -3.34206038, -3.34206038,
-    -3.34206038, -3.34206038, -3.34387412, -3.34387412, -3.34387412, -3.34387412,
-    -3.33051143, -3.33051143, -3.33051143, -3.33051143
-  };
+      6.348551343037398542e-01,  4.209465843706336474e-04,
+      6.390862740714405368e-03,  -1.544448595628262176e-04,
+      -1.891095227974180087e-04, 2.695025951562175852e-05,
+      -1.317549846042939343e+00, -5.624478206903206490e-02,
+      1.274284553146523905e-02,  -6.836227424141475689e-04,
+      -1.438066096020836407e-04, -1.854932873974712940e-06,
+      -9.996964112615246423e-01, 6.928234423723647617e-02,
+      -4.974719973810486084e-03, -2.019584729176823030e-04,
+      1.077254539742680247e-04,  -8.024209768588029797e-06,
+      3.552689563657350780e-01,  -3.578299775339799371e-02,
+      -1.319946251007718743e-03, 1.016701374495701440e-03,
+      -1.057336720791906388e-04, 5.182678943855506567e-06,
+      1.227750369557627286e+00,  4.100352079064395472e-02,
+      3.586869164810712295e-03,  -4.304540913340443135e-04,
+      -1.269943482892440004e-04, 1.459465404430219674e-05,
+      -1.472642501673147031e+00, -1.611354921283318364e-01,
+      1.645427874390196360e-02,  2.107392978135091402e-04,
+      -2.193541011180757461e-04, 1.915392497459551146e-05,
+      -2.855174490181606739e-01, 9.774337856626263976e-02,
+      -2.140891880666230714e-03, -7.148328890055103638e-04,
+      1.965696332267534503e-05,  -4.593489654121371453e-06,
+      -1.468441009949382314e+00, -6.360828127262234399e-02,
+      4.751283295356955282e-03,  8.711899561753186068e-05,
+      -9.937008678852959884e-06, 4.273569346584811685e-07,
+      6.348599826995243722e-01,  5.487167506364742930e-04,
+      6.386116198716365253e-03,  -1.619832375568118791e-04,
+      -1.877328309473502049e-04, 2.134130914519164856e-05,
+      -1.318111020264137512e+00, -5.599013082054477008e-02,
+      1.272225054666903735e-02,  -6.893710047488201898e-04,
+      -1.434367581078517366e-04, 3.329508890614227371e-05,
+      -9.990040854920316793e-01, 6.918278968071900348e-02,
+      -4.980714172967731085e-03, -1.976574487947816198e-04,
+      1.070037204086153902e-04,  -7.859875077388093586e-06,
+      3.549109954092205532e-01,  -3.580909209068139365e-02,
+      -1.289508598157979719e-03, 1.012474257117017967e-03,
+      -1.054418924402112718e-04, -1.245498322204730900e-05,
+      1.228160763020727630e+00,  4.107512853046493134e-02,
+      3.573879491390910459e-03,  -4.355190226638688713e-04,
+      -1.258433981470396103e-04, 1.610862268100766631e-05,
+      -1.474252210958008291e+00, -1.608063442081248406e-01,
+      1.646046950167207382e-02,  2.019843636566674109e-04,
+      -2.185756589083626730e-04, 1.978479879983412190e-05,
+      -2.845402300363228942e-01, 9.770034635718018168e-02,
+      -2.162325119197382531e-03, -7.140472215558940627e-04,
+      1.956302663031799223e-05,  1.932584474244053378e-05,
+      -1.469076617546759334e+00, -6.351322951074317436e-02,
+      4.753890907276497185e-03,  8.672114560243554321e-05,
+      -1.004574434175897967e-05, -4.345700882560937596e-06,
+      6.348661083147921769e-01,  6.763897297752743953e-04,
+      6.381144275303845745e-03,  -1.694690463885140694e-04,
+      -1.868179426353836598e-04, 3.439291082765030046e-05,
+      -1.318669650038090335e+00, -5.573589319299507294e-02,
+      1.270148368741391351e-02,  -6.950749719342792137e-04,
+      -1.422194703304518733e-04, 3.454751241752252323e-05,
+      -9.983127558632299836e-01, 6.908311652764687061e-02,
+      -4.986579772806746212e-03, -1.933888092529071571e-04,
+      1.068327546750306073e-04,  -2.976978385983384886e-05,
+      3.545527765488725169e-01,  -3.583457894275744043e-02,
+      -1.259197760082061621e-03, 1.008246479193084487e-03,
+      -1.059401869200098984e-04, 1.721968053146218465e-06,
+      1.228571871257205572e+00,  4.114647496201748883e-02,
+      3.560738575723638825e-03,  -4.405332425718102457e-04,
+      -1.251648759618972115e-04, 3.659080417076460655e-05,
+      -1.475858628153338792e+00, -1.604770750960976822e-01,
+      1.646639808472218428e-02,  1.932598402043995316e-04,
+      -2.175904819601363058e-04, 1.230256868634094333e-05,
+      -2.835634435191126679e-01, 9.765688571984927624e-02,
+      -2.183734604613508240e-03, -7.132463811570244078e-04,
+      2.021887442373574272e-05,  1.321401495096886281e-05,
+      -1.469711274366155784e+00, -6.341812571665436660e-02,
+      4.756486470714936521e-03,  8.631384191910702040e-05,
+      -1.010516500002806932e-05, -1.110874413279218719e-05,
+      6.348735101551836735e-01,  8.039610290153098582e-04,
+      6.375948457075718626e-03,  -1.769074132993461279e-04,
+      -1.855677150383903214e-04, 3.421271436711027645e-05,
+      -1.319225739518145257e+00, -5.548207260888919634e-02,
+      1.268054645200545304e-02,  -7.007297564176242621e-04,
+      -1.408885818822980523e-04, 3.124701885930576017e-05,
+      -9.976224235482542557e-01, 6.898332734138989952e-02,
+      -4.992317635216104131e-03, -1.891404922064061889e-04,
+      1.053957535708985289e-04,  -1.089286646983666076e-06,
+      3.541943058468561834e-01,  -3.585946084769019160e-02,
+      -1.229013912637771933e-03, 1.004009466262262241e-03,
+      -1.059129033455631863e-04, -4.941663399086282537e-06,
+      1.228983691638902087e+00,  4.121755707472917613e-02,
+      3.547447845420277635e-03,  -4.455036207721562607e-04,
+      -1.239172256532283074e-04, 3.437341080261359686e-05,
+      -1.477461752073406132e+00, -1.601476900261984693e-01,
+      1.647206544856073471e-02,  1.845724864086241608e-04,
+      -2.173853638475303177e-04, 3.620505631412716563e-05,
+      -2.825870937484175061e-01, 9.761299713537928413e-02,
+      -2.205119732548723246e-03, -7.124245958910824846e-04,
+      2.074820558303217398e-05,  1.209381466404663338e-05,
+      -1.470344979888463577e+00, -6.332297013406351649e-02,
+      4.759069711794740656e-03,  8.589935708505183382e-05,
+      -1.045842324058424788e-05, -6.134254562752213537e-06,
+      6.348821871815598650e-01,  9.314261853726121809e-04,
+      6.370530236175125580e-03,  -1.842978984547447257e-04,
+      -1.840210089691990327e-04, 2.234897510077387526e-05,
+      -1.319779292891724465e+00, -5.522867246076747227e-02,
+      1.265944033870337014e-02,  -7.063360380236871801e-04,
+      -1.393416734992873119e-04, 1.931167378610719847e-05,
+      -9.969330896946905218e-01, 6.888342466806646192e-02,
+      -4.997928623431705138e-03, -1.849303524006284602e-04,
+      1.053651633995249134e-04,  -2.870133904891753420e-05,
+      3.538355893399378616e-01,  -3.588374034700148041e-02,
+      -1.198957225773849763e-03, 9.997681359810027708e-04,
+      -1.060678155548662341e-04, -4.107776618240329050e-06,
+      1.229396221507694564e+00,  4.128837188660083868e-02,
+      3.534008730169808672e-03,  -4.504275777948374090e-04,
+      -1.224778886969254976e-04, 2.455513266683544498e-05,
+      -1.479061581584721008e+00, -1.598181942132129441e-01,
+      1.647747255391585064e-02,  1.759082956613747337e-04,
+      -2.158335508261176197e-04, 6.406725844410341030e-06,
+      -2.816111850012528728e-01, 9.756868109694678826e-02,
+      -2.226479900633348240e-03, -7.115823288942964460e-04,
+      2.121038517729223415e-05,  1.358027318850170435e-05,
+      -1.470977733597038872e+00, -6.322776301216057049e-02,
+      4.761640356162846754e-03,  8.547576468445008296e-05,
+      -1.081874527005240631e-05, -8.845528475774308509e-07,
+      6.348921383103013349e-01,  1.058780765759985421e-03,
+      6.364891110105044131e-03,  -1.916363332792569681e-04,
+      -1.827768871456785058e-04, 2.275707291847725182e-05,
+      -1.320330314380025793e+00, -5.497569611120622923e-02,
+      1.263816684562326688e-02,  -7.118908987616576157e-04,
+      -1.380182662155302303e-04, 1.630252530406085050e-05,
+      -9.962447554247517711e-01, 6.878341103651769428e-02,
+      -5.003413601927745452e-03, -1.807403991329658622e-04,
+      1.040363362483998831e-04,  -4.422604643727719699e-06,
+      3.534766330394523148e-01,  -3.590741998555346121e-02,
+      -1.169027863565602274e-03, 9.955202772264954043e-04,
+      -1.060447700647724903e-04, -1.021743279826507342e-05,
+      1.229809458175783687e+00,  4.135891644424664892e-02,
+      3.520422661584679015e-03,  -4.553035794622276055e-04,
+      -1.210679214963379874e-04, 1.595827246550979495e-05,
+      -1.480658115605847147e+00, -1.594885928526604546e-01,
+      1.648262036665308974e-02,  1.672799673730459213e-04,
+      -2.148155690753495697e-04, -1.867405535452657550e-06,
+      -2.806357215496423363e-01, 9.752393810975558408e-02,
+      -2.247814508535729908e-03, -7.107227883497464890e-04,
+      2.207595560206285042e-05,  -1.137331983229785190e-06,
+      -1.471609534977757372e+00, -6.313250460562676303e-02,
+      4.764198129054059844e-03,  8.503999275315992160e-05,
+      -1.072692568096017848e-05, -1.373273803695183988e-05,
+      6.349033624136081189e-01,  1.186020367092407990e-03,
+      6.359032581545111251e-03,  -1.989262833250400370e-04,
+      -1.812752661309344573e-04, 1.302837915648187095e-05,
+      -1.320878808237722746e+00, -5.472314689282183064e-02,
+      1.261672747063919374e-02,  -7.173917679890315846e-04,
+      -1.373052781380030543e-04, 3.768455339511444900e-05,
+      -9.955574218354472649e-01, 6.868328895828368363e-02,
+      -5.008773436308684712e-03, -1.765844799686671349e-04,
+      1.034810966435298563e-04,  -1.111176255155353207e-05,
+      3.531174429312692320e-01,  -3.593050231143132822e-02,
+      -1.139225984250480384e-03, 9.912704081392112714e-04,
+      -1.064918174657224404e-04, 2.680738443515978403e-06,
+      1.230223398925979650e+00,  4.142918782293085467e-02,
+      3.506691073047987512e-03,  -4.601302388532728274e-04,
+      -1.198865987378785417e-04, 1.656386182477533959e-05,
+      -1.482251353107205460e+00, -1.591588911206925361e-01,
+      1.648750985769346228e-02,  1.586901819247656846e-04,
+      -2.147074421644348298e-04, 2.641762503224190698e-05,
+      -2.796607076604977760e-01, 9.747876869099537933e-02,
+      -2.269122958003529523e-03, -7.098388532529275848e-04,
+      2.226701915637888804e-05,  1.106237844209756009e-05,
+      -1.472240383519069384e+00, -6.303719517464229094e-02,
+      4.766742755353862819e-03,  8.459962202271287246e-05,
+      -1.132218730142039535e-05, 8.958476322974335592e-07,
+      6.349158583197994643e-01,  1.313140616388666637e-03,
+      6.352956158169477396e-03,  -2.061601622854974502e-04,
+      -1.806298821034440756e-04, 3.770936817966389514e-05,
+      -1.321424778752664952e+00, -5.447102810827629538e-02,
+      1.259512371128685033e-02,  -7.228490733933210606e-04,
+      -1.356407402355522122e-04, 2.099832634320949299e-05,
+      -9.948710899987588396e-01, 6.858306092758209571e-02,
+      -5.014008993202081696e-03, -1.724573933478598642e-04,
+      1.029144894329912032e-04,  -1.738522780636760158e-05,
+      3.527580249757622521e-01,  -3.595298987582695727e-02,
+      -1.109551740263377793e-03, 9.870126155001155040e-04,
+      -1.064931456292656029e-04, -2.059910396978558087e-06,
+      1.230638041011988815e+00,  4.149918312660194619e-02,
+      3.492815399561766294e-03,  -4.649051157564728157e-04,
+      -1.192927614880224277e-04, 4.072077917749542957e-05,
+      -1.483841293110880866e+00, -1.588290941739924356e-01,
+      1.649214200293154520e-02,  1.501282794678792006e-04,
+      -2.138853834118830831e-04, 2.633111784219914963e-05,
+      -2.786861475954987011e-01, 9.743317336979973042e-02,
+      -2.290404652904617314e-03, -7.089360554728917595e-04,
+      2.260180638238835256e-05,  1.741828165826791135e-05,
+      -1.472870278712053782e+00, -6.294183498489253070e-02,
+      4.769273959660644442e-03,  8.414681093302789892e-05,
+      -1.142905205912834352e-05, -4.014065121916994726e-06,
+      6.349296248136164778e-01,  1.440137170869312810e-03,
+      6.346663352465874847e-03,  -2.133510744796659759e-04,
+      -1.788513201196447670e-04, 1.721163944875696416e-05,
+      -1.321968230245579967e+00, -5.421934303028537461e-02,
+      1.257335706466754244e-02,  -7.282542863230233527e-04,
+      -1.343059033644905889e-04, 1.747822893445653714e-05,
+      -9.941857609618123259e-01, 6.848272942128874607e-02,
+      -5.019121140152461337e-03, -1.683596869525186377e-04,
+      1.024142382012053007e-04,  -2.632719129544749384e-05,
+      3.523983851077774343e-01,  -3.597488523292310947e-02,
+      -1.080005278271846739e-03, 9.827512175914082399e-04,
+      -1.066680880078371994e-04, 3.403258606315080555e-07,
+      1.231053381658700818e+00,  4.156889948792314576e-02,
+      3.478797077596604108e-03,  -4.696409807358484993e-04,
+      -1.173636798436718986e-04, 1.149931408689037458e-05,
+      -1.485427934690428442e+00, -1.584992071496764965e-01,
+      1.649651778315383566e-02,  1.415960091521040870e-04,
+      -2.125888038426753843e-04, 7.384582528889821378e-06,
+      -2.777120456109742896e-01, 9.738715268720327112e-02,
+      -2.311658999267464203e-03, -7.080165982958596923e-04,
+      2.340034491729013294e-05,  5.174033942788913380e-06,
+      -1.473499220050474623e+00, -6.284642430757329812e-02,
+      4.771791466347353149e-03,  8.368540130389298475e-05,
+      -1.162498575113560591e-05, -5.381585801785509468e-06,
+      6.349446606365225509e-01,  1.567005718051586727e-03,
+      6.340155681555815353e-03,  -2.204854663573854625e-04,
+      -1.779502948888764897e-04, 3.196283450610521294e-05,
+      -1.322509167069771951e+00, -5.396809490162747525e-02,
+      1.255142902735281209e-02,  -7.336077414823606981e-04,
+      -1.332538502428148267e-04, 2.525523713666122703e-05,
+      -9.935014357470516311e-01, 6.838229689892011409e-02,
+      -5.024110745516051704e-03, -1.642860423419652261e-04,
+      1.011792892256958577e-04,  -5.902237032851650630e-06,
+      3.520385292366049468e-01,  -3.599619093977864809e-02,
+      -1.050586739210998023e-03, 9.784837539753422735e-04,
+      -1.066187407206570670e-04, -6.052991441884039902e-06,
+      1.231469418062474341e+00,  4.163833406830096812e-02,
+      3.464637544942418459e-03,  -4.743218246565151001e-04,
+      -1.164951133813105271e-04, 2.473911917278243621e-05,
+      -1.487011276970676033e+00, -1.581692351651968476e-01,
+      1.650063818395723983e-02,  1.331001312464952355e-04,
+      -2.118074389246019866e-04, 9.192428068946771109e-06,
+      -2.767384059577842614e-01, 9.734070719609828892e-02,
+      -2.332885405321092481e-03, -7.070743922828596519e-04,
+      2.373777250910882265e-05,  1.127700884024945933e-05,
+      -1.474127207030835107e+00, -6.275096341939470634e-02,
+      4.774294999622533293e-03,  8.321347296773265077e-05,
+      -1.162225195759229858e-05, -1.468175407624093560e-05,
+      6.349609644870094494e-01,  1.693741975839754832e-03,
+      6.333434667015966531e-03,  -2.275719866012916918e-04,
+      -1.766077012712487378e-04, 2.919052022666632077e-05,
+      -1.323047593610823247e+00, -5.371728693515605280e-02,
+      1.252934109528984138e-02,  -7.389107006611626187e-04,
+      -1.322992615601379437e-04, 3.689337377145077536e-05,
+      -9.928181153524118230e-01, 6.828176580261838269e-02,
+      -5.028978678356570489e-03, -1.602449667799085492e-04,
+      1.004819833385002965e-04,  -7.012859043909368637e-06,
+      3.516784632459502014e-01,  -3.601690955621394963e-02,
+      -1.021296258318379370e-03, 9.742140050919662845e-04,
+      -1.068837890347894775e-04, 3.261791903209577241e-07,
+      1.231886147391427544e+00,  4.170748405790913882e-02,
+      3.450338240560582581e-03,  -4.789562532735843967e-04,
+      -1.153902983973557932e-04, 2.856018069496295048e-05,
+      -1.488591319127526624e+00, -1.578391833182464787e-01,
+      1.650450419566778376e-02,  1.246407552546250339e-04,
+      -2.115332183818513349e-04, 3.149345367837511192e-05,
+      -2.757652328811996956e-01, 9.729383746118988596e-02,
+      -2.354083281534554220e-03, -7.061133365182417328e-04,
+      2.418809213597686327e-05,  1.280494807360028992e-05,
+      -1.474754239152433311e+00, -6.265545260258377491e-02,
+      4.776784283590801948e-03,  8.273687806363864625e-05,
+      -1.229952261449745124e-05, 3.204146150058887708e-06,
+      6.349785350208994039e-01,  1.820341692612803541e-03,
+      6.326501834700739083e-03,  -2.346100929840904846e-04,
+      -1.748840426396014729e-04, 1.130785525935554482e-05,
+      -1.323583514286295282e+00, -5.346692231381247606e-02,
+      1.250709476370755191e-02,  -7.441705970339035966e-04,
+      -1.303302437099287372e-04, 7.935577538626925858e-06,
+      -9.921358007514943234e-01, 6.818113855713830995e-02,
+      -5.033725808341922223e-03, -1.562353718150353687e-04,
+      1.001568149392305130e-04,  -2.302258383924021595e-05,
+      3.513181929939074299e-01,  -3.603704364469759169e-02,
+      -9.921339651685744804e-04, 9.699384566370250092e-04,
+      -1.069081013817698415e-04, -2.744679484186812129e-06,
+      1.232303566785723392e+00,  4.177634667571154814e-02,
+      3.435900604437185177e-03,  -4.835440426346156498e-04,
+      -1.140781768005934266e-04, 2.411509316948267986e-05,
+      -1.490168060387760951e+00, -1.575090566866652331e-01,
+      1.650811681325956015e-02,  1.162064642248029450e-04,
+      -2.100324946396962247e-04, 4.868837971279583202e-06,
+      -2.747925306207861240e-01, 9.724654405895133413e-02,
+      -2.375252040655950400e-03, -7.051355614741510987e-04,
+      2.505903781065493165e-05,  -2.569082101323676566e-06,
+      -1.475380315917416585e+00, -6.255989214488603956e-02,
+      4.779259042312647421e-03,  8.224491253736542200e-05,
+      -1.205054378062991984e-05, -1.594987943813344381e-05,
+      6.349973708516511994e-01,  1.946800647308156995e-03,
+      6.319358714566076195e-03,  -2.415904693897710526e-04,
+      -1.741570105122868483e-04, 3.342152683043006766e-05,
+      -1.324116933545430141e+00, -5.321700419064152865e-02,
+      1.248469152702344660e-02,  -7.493727578058629766e-04,
+      -1.295525827398787404e-04, 2.659942231629285135e-05,
+      -9.914544928937398804e-01, 6.808041756983601589e-02,
+      -5.038353005641925050e-03, -1.522500103683389601e-04,
+      9.911425811568465554e-05,  -1.035676665958809070e-05,
+      3.509577243129330393e-01,  -3.605659577023319351e-02,
+      -9.630999837076988784e-04, 9.656594578503095369e-04,
+      -1.070158919994286978e-04, -2.281503112307771063e-06,
+      1.232721673357858538e+00,  4.184491916948063911e-02,
+      3.421326077437690516e-03,  -4.880823132679394552e-04,
+      -1.129872290747681817e-04, 2.854952342195995698e-05,
+      -1.491741500028839651e+00, -1.571788603283475749e-01,
+      1.651147703627379656e-02,  1.078118218043548068e-04,
+      -2.094656285123614196e-04, 1.573608604543182341e-05,
+      -2.738203034102859035e-01, 9.719882757757769554e-02,
+      -2.396391097750961291e-03, -7.041328812172977002e-04,
+      2.511128111671661627e-05,  1.472819566023977703e-05,
+      -1.476005436830838402e+00, -6.246428233956573262e-02,
+      4.781718999863710830e-03,  8.175246233396933941e-05,
+      -1.310850420537104008e-05, 1.717274673157189222e-05,
+      6.350174705506670403e-01,  2.073114649501703322e-03,
+      6.312006840494438151e-03,  -2.485262001215581039e-04,
+      -1.724445833892894095e-04, 1.623821996891234705e-05,
+      -1.324647855868849478e+00, -5.296753568880858964e-02,
+      1.246213287875118370e-02,  -7.545274547770323926e-04,
+      -1.284298383236558551e-04, 3.142127009671183137e-05,
+      -9.907741927046019859e-01, 6.797960523066012839e-02,
+      -5.042861140826992473e-03, -1.482946605870891395e-04,
+      9.821987974303589589e-05,  -3.593831829470692349e-06,
+      3.505970630098214080e-01,  -3.607556850024738748e-02,
+      -9.341944322877257512e-04, 9.613773761737330267e-04,
+      -1.072343182304808093e-04, 2.791451096706449119e-06,
+      1.233140464192951757e+00,  4.191319881581374862e-02,
+      3.406616101162745613e-03,  -4.925758895926437772e-04,
+      -1.113902906060245713e-04, 1.275308331152581608e-05,
+      -1.493311637378700762e+00, -1.568485992811522733e-01,
+      1.651458586873823589e-02,  9.944841367174414462e-05,
+      -2.085492230796830474e-04, 1.276456024245067926e-05,
+      -2.728485554775001987e-01, 9.715068861693920699e-02,
+      -2.417499870240937074e-03, -7.031148500958378164e-04,
+      2.576543833825076558e-05,  7.841889896124507091e-06,
+      -1.476629601400710978e+00, -6.236862348540499201e-02,
+      4.784163880393361643e-03,  8.124213252544174404e-05,
+      -1.286332078849730127e-05, -1.821996546344873330e-06,
+      6.350388326475970846e-01,  2.199279539485121671e-03,
+      6.304447750121061969e-03,  -2.554047701160370044e-04,
+      -1.716061813901302753e-04, 3.413524324276134592e-05,
+      -1.325176285768258300e+00, -5.271851990161838253e-02,
+      1.243942031140890699e-02,  -7.596346042592860793e-04,
+      -1.269803855069738714e-04, 2.314478643438959578e-05,
+      -9.900949010857222898e-01, 6.787870391214460841e-02,
+      -5.047251084767826433e-03, -1.443753107913585767e-04,
+      9.837034053479728221e-05,  -3.865274593462701621e-05,
+      3.502362148656810170e-01,  -3.609396440447816545e-02,
+      -9.054174237006253068e-04, 9.570894530963515055e-04,
+      -1.071221722792567601e-04, -5.180134097885568801e-06,
+      1.233559936349031494e+00,  4.198118292014653419e-02,
+      3.391772117805412056e-03,  -4.970162819604460663e-04,
+      -1.105584293158747960e-04, 2.757032189173095048e-05,
+      -1.494878471815561216e+00, -1.565182785628131401e-01,
+      1.651744431908664865e-02,  9.112268062696188113e-05,
+      -2.082277461664644284e-04, 3.370820636496137736e-05,
+      -2.718772910441742408e-01, 9.710212778853387350e-02,
+      -2.438577777940475859e-03, -7.020756635958485484e-04,
+      2.613933618298708639e-05,  1.211520684095310762e-05,
+      -1.477252809138063672e+00, -6.227291588670166161e-02,
+      4.786593408182711167e-03,  8.072392747742672100e-05,
+      -1.281499371544444526e-05, -1.293175202324119235e-05,
+      6.350614556306495295e-01,  2.325291188338546311e-03,
+      6.296682984661446623e-03,  -2.622362895631248896e-04,
+      -1.701076322674243866e-04, 2.573454296903621253e-05,
+      -1.325702227786145437e+00, -5.246995989253622206e-02,
+      1.241655531642829255e-02,  -7.646904682589584622e-04,
+      -1.257704658362481128e-04, 2.439373356208127567e-05,
+      -9.894166189151047952e-01, 6.777771596940393439e-02,
+      -5.051523708536139086e-03, -1.404733355821404265e-04,
+      9.677082285072928253e-05,  -3.720510878458014501e-06,
+      3.498751856359115786e-01,  -3.611178605486395354e-02,
+      -8.767690652124425499e-04, 9.527998576480508275e-04,
+      -1.072771816869139909e-04, -2.281376475091892258e-06,
+      1.233980086857325631e+00,  4.204886881676297983e-02,
+      3.376795570009583514e-03,  -5.014114486109571937e-04,
+      -1.092957353261917852e-04, 2.516456964431257380e-05,
+      -1.496442002767713664e+00, -1.561879031708521548e-01,
+      1.652005340007862977e-02,  8.282284133744905071e-05,
+      -2.067123325224875000e-04, 7.057486539657783089e-06,
+      -2.709065143258797548e-01, 9.705314571543909030e-02,
+      -2.459624243094573216e-03, -7.010187162791577066e-04,
+      2.672975399789282626e-05,  7.629793933874534523e-06,
+      -1.477875059556995385e+00, -6.217715985326619649e-02,
+      4.789007307701962507e-03,  8.019935829649041371e-05,
+      -1.318861260046749971e-05, -7.150339348059032240e-06,
+      6.350853379468965887e-01,  2.451145498001100487e-03,
+      6.288714088740080324e-03,  -2.690159202421790068e-04,
+      -1.686584359429067433e-04, 1.941481480743946700e-05,
+      -1.326225686495484890e+00, -5.222185869521017709e-02,
+      1.239353938406437261e-02,  -7.696964132049412353e-04,
+      -1.246012242240120604e-04, 2.724071141974432252e-05,
+      -9.887393470472876089e-01, 6.767664374012982709e-02,
+      -5.055679883306329545e-03, -1.366074591188833347e-04,
+      9.623033677044332457e-05,  -1.113456896173822779e-05,
+      3.495139810501832756e-01,  -3.612903602543367232e-02,
+      -8.482494585971035728e-04, 9.485064841097947883e-04,
+      -1.073561607316583907e-04, -2.239996380309942211e-06,
+      1.234400912722548371e+00,  4.211625386880359784e-02,
+      3.361687900729734210e-03,  -5.057597926077623488e-04,
+      -1.078411892315765344e-04, 1.508800592977199686e-05,
+      -1.498002229713325750e+00, -1.558574780824932282e-01,
+      1.652241412871961052e-02,  7.456368677257522147e-05,
+      -2.062001731191939454e-04, 2.069621557469772063e-05,
+      -2.699362295319003291e-01, 9.700374303226286243e-02,
+      -2.480638690415259105e-03, -6.999405672986690023e-04,
+      2.700789474676622474e-05,  1.556143061449123430e-05,
+      -1.478496352174730522e+00, -6.208135570041733303e-02,
+      4.791405303667145565e-03,  7.966538051836852740e-05,
+      -1.352687841609079228e-05, -2.789411930543395566e-06,
+      6.351104780025849106e-01,  2.576838401336829787e-03,
+      6.280542610220480118e-03,  -2.757414391158645754e-04,
+      -1.675762649448408429e-04, 2.787462665161048641e-05,
+      -1.326746666499438287e+00, -5.197421931349595348e-02,
+      1.237037400330611749e-02,  -7.746541492504023475e-04,
+      -1.232228491818352083e-04, 2.166599538617633252e-05,
+      -9.880630863135209108e-01, 6.757548954459043078e-02,
+      -5.059720480258220535e-03, -1.327693574508429343e-04,
+      9.550030312894054513e-05,  -1.096549240339310371e-05,
+      3.491526068124157778e-01,  -3.614571689219699124e-02,
+      -8.198587001702131727e-04, 9.442100079790295610e-04,
+      -1.074330339280879455e-04, -2.103241190440061311e-06,
+      1.234822410923189784e+00,  4.218333546826981417e-02,
+      3.346450553092000530e-03,  -5.100549148199152614e-04,
+      -1.071543306169886722e-04, 3.572075491055831030e-05,
+      -1.499559152180234056e+00, -1.555270082545787691e-01,
+      1.652452752618108200e-02,  6.633607063542407416e-05,
+      -2.052990867644106118e-04, 1.891505702101457936e-05,
+      -2.689664408651156746e-01, 9.695392038509384469e-02,
+      -2.501620547117759490e-03, -6.988464710389351081e-04,
+      2.774961528830105395e-05,  4.843681010028069226e-06,
+      -1.479116686511674494e+00, -6.198550374897651011e-02,
+      4.793787121096219732e-03,  7.912045955652986253e-05,
+      -1.359696279035538403e-05, -9.132339849453571562e-06,
+      6.351368741634448867e-01,  2.702365862198193025e-03,
+      6.272170100036473551e-03,  -2.824171711189519380e-04,
+      -1.661976899287730559e-04, 2.457347650017094835e-05,
+      -1.327265172431057128e+00, -5.172704472148267896e-02,
+      1.234706066178771662e-02,  -7.795630288411945592e-04,
+      -1.217395799935142969e-04, 1.184741714306808905e-05,
+      -9.873878375219384829e-01, 6.747425568563097942e-02,
+      -5.063646370480812467e-03, -1.289626891970745083e-04,
+      9.513074838211379970e-05,  -2.521433322545949321e-05,
+      3.487910686007592576e-01,  -3.616183123303555458e-02,
+      -7.915968808226425679e-04, 9.399119246579864433e-04,
+      -1.077055728285351480e-04, 6.031191175422362627e-06,
+      1.235244578411804905e+00,  4.225011103602600848e-02,
+      3.331084970256580589e-03,  -5.143079026275864784e-04,
+      -1.055716785023949844e-04, 2.051193936812822612e-05,
+      -1.501112769745742259e+00, -1.551964986234863897e-01,
+      1.652639461772111712e-02,  5.814089462644928566e-05,
+      -2.041249358339155683e-04, 6.311073191969795411e-06,
+      -2.679971525218879380e-01, 9.690367843145115956e-02,
+      -2.522569242956208650e-03, -6.977319783847560700e-04,
+      2.827424678587480721e-05,  2.739673941330651616e-06,
+      -1.479736062091468574e+00, -6.188960432526132566e-02,
+      4.796152485364500034e-03,  7.856828747830194362e-05,
+      -1.395147193446202365e-05, -4.087221013031299888e-06,
+      6.351645247550001816e-01,  2.827723875485507743e-03,
+      6.263598112024793517e-03,  -2.890409134869928735e-04,
+      -1.648390823803598971e-04, 2.215887759642637032e-05,
+      -1.327781208952985015e+00, -5.148033786352124164e-02,
+      1.232360084570068709e-02,  -7.844171563535663055e-04,
+      -1.210428935521009746e-04, 3.344327592646507844e-05,
+      -9.867136014577331249e-01, 6.737294444867666932e-02,
+      -5.067458424877044516e-03, -1.251812701937470213e-04,
+      9.419473244264059593e-05,  -1.679002076268449654e-05,
+      3.484293720675762929e-01,  -3.617738162759492893e-02,
+      -7.634640860539731316e-04, 9.356082122653546981e-04,
+      -1.075431084112703954e-04, -3.044614041061100766e-06,
+      1.235667412115300623e+00,  4.231657802179918798e-02,
+      3.315592595281378029e-03,  -5.185116053649769336e-04,
+      -1.041674655671950871e-04, 1.242766263135090892e-05,
+      -1.502663082036415076e+00, -1.548659541050484978e-01,
+      1.652801643260504508e-02,  4.998556989557471122e-05,
+      -2.037688261998792680e-04, 2.657243869390409541e-05,
+      -2.670283686919466826e-01, 9.685301784023310490e-02,
+      -2.543484210258855835e-03, -6.965966582328896994e-04,
+      2.850491087748043708e-05,  1.232179636112698650e-05,
+      -1.480354478441044286e+00, -6.179365776107784841e-02,
+      4.798501122259496952e-03,  7.800586916120723585e-05,
+      -1.413851691566035862e-05, -5.727587674967719880e-06,
+      6.351934280628791507e-01,  2.952908467203564646e-03,
+      6.254828202758994093e-03,  -2.956111985445306826e-04,
+      -1.636502852942454153e-04, 2.616921494951480123e-05,
+      -1.328294780757159899e+00, -5.123410165425365537e-02,
+      1.229999603970671068e-02,  -7.892274520450543677e-04,
+      -1.195721301312790567e-04, 2.454197033093738297e-05,
+      -9.860403788833298488e-01, 6.727155810173718331e-02,
+      -5.071157514069617352e-03, -1.214296539729165295e-04,
+      9.340570341953608358e-05,  -1.444050153586573228e-05,
+      3.480675228394242149e-01,  -3.619237065717702262e-02,
+      -7.354603960058733389e-04, 9.313051737393654526e-04,
+      -1.076930273455606579e-04, -7.696053039474192446e-07,
+      1.236090908935226107e+00,  4.238273390417521269e-02,
+      3.299974870987111650e-03,  -5.226642260988254756e-04,
+      -1.032474625011560351e-04, 2.396475265799989632e-05,
+      -1.504210088727871764e+00, -1.545353795944727493e-01,
+      1.652939400402650763e-02,  4.186078937618800693e-05,
+      -2.027012231708198600e-04, 1.761148452766873776e-05,
+      -2.660600935582757565e-01, 9.680193929166537592e-02,
+      -2.564364883962782712e-03, -6.954454205710857090e-04,
+      2.907017700829073683e-05,  9.120785771591908463e-06,
+      -1.480971935090678926e+00, -6.169766439371183325e-02,
+      4.800832758035045861e-03,  7.743502257440657043e-05,
+      -1.440171540732098418e-05, -4.489324897938611976e-06,
+      6.355509554770921721e-01,  4.194364255265300989e-03,
+      6.156587518227093006e-03,  -3.584539136959086518e-04,
+      -1.505562336471176987e-04, 2.631189526673375584e-05,
+      -1.333295991901433553e+00, -4.879824528740911438e-02,
+      1.205629889598585497e-02,  -8.346035033896359156e-04,
+      -1.072962342948566929e-04, 2.412331753624817981e-05,
+      -9.793640468817854661e-01, 6.625405011186732973e-02,
+      -5.102126473064734317e-03, -8.551069374443776396e-05,
+      8.618032279329005427e-05,  -1.422030758858379208e-05,
+      3.444418516979214084e-01,  -3.631195473807800889e-02,
+      -4.625381215785304145e-04, 8.881537622047225473e-04,
+      -1.080757789189670570e-04, 5.820590714360855199e-08,
+      1.240361649325028681e+00,  4.302664794411619614e-02,
+      3.137220402938139478e-03,  -5.615677039256951981e-04,
+      -9.125763978623760322e-05, 2.367398552885374808e-05,
+      -1.519498310980496925e+00, -1.512290469691385253e-01,
+      1.652996628226939199e-02,  -3.745688059096337011e-05,
+      -1.938906911473592626e-04, 1.811217640451412989e-05,
+      -2.564062357251438717e-01, 9.626832379335603651e-02,
+      -2.771163091665611831e-03, -6.829069315554202020e-04,
+      3.363238372709415958e-05,  8.623099725596635004e-06,
+      -1.487093617252511990e+00, -6.073523464295225993e-02,
+      4.823154268625621383e-03,  7.122599345182346051e-05,
+      -1.664931178025436733e-05, -4.312450972708557703e-06};
+  std::vector<double> expected_xyz_scatter = {
+      0.2713011,   -0.56606281, -0.42305039, 0.14965803,  0.52695372,
+      -0.63845663, -0.11624505, -0.63103203, 0.24412213,  -0.50842224,
+      -0.38203148, 0.1353771,   0.47343798,  -0.57158622, -0.10647548,
+      -0.56671287, 0.13979394,  -0.29123603, -0.21862063, 0.07744574,
+      0.27118433,  -0.32761487, -0.06077287, -0.32463492, 0.24704819,
+      -0.51555848, -0.38509326, 0.1362072,   0.47992214,  -0.58168358,
+      -0.10566162, -0.57473633, 0.31158834,  -0.65068838, -0.48501479,
+      0.17143258,  0.60565326,  -0.73506803, -0.13233106, -0.72541595,
+      0.27121003,  -0.5656669,  -0.42318034, 0.14974857,  0.52662422,
+      -0.637633,   -0.11658482, -0.6305842,  0.21202135,  -0.44212972,
+      -0.33094666, 0.11713047,  0.41162829,  -0.4982129,  -0.0913087,
+      -0.49286515, 0.30733526,  -0.64054639, -0.48022212, 0.17004692,
+      0.59640929,  -0.72111726, -0.13304347, -0.71402776, 0.24135931,
+      -0.50316388, -0.37699907, 0.13347531,  0.46846154,  -0.56664651,
+      -0.10429212, -0.56088123, 0.33429479,  -0.69669061, -0.52246841,
+      0.18502927,  0.64867706,  -0.78417021, -0.14487244, -0.77659533,
+      0.29200237,  -0.60840668, -0.45656557, 0.16172246,  0.56650319,
+      -0.68453038, -0.12681616, -0.67817995, 0.13559139,  -0.28210652,
+      -0.21258614, 0.07539812,  0.26274303,  -0.3166084,  -0.05968776,
+      -0.31443544, 0.30394432,  -0.63428311, -0.47381417, 0.16759396,
+      0.59043739,  -0.71559513, -0.13003802, -0.70708354, 0.25830471,
+      -0.53796239, -0.40421268, 0.14323456,  0.50094757,  -0.6048126,
+      -0.11264426, -0.59964242, 0.21979687,  -0.45763438, -0.34413143,
+      0.12197404,  0.42616899,  -0.51425659, -0.09609854, -0.51009828,
+      0.2172166,   -0.45326447, -0.33866506, 0.11979851,  0.421936,
+      -0.51130404, -0.09300045, -0.50528542};
+  std::vector<double> expected_dy_dem_x = {
+      -0.02067741, -0.03787612, -0.04180199, -0.04158797,
+      -0.03938578, -0.04047081, -0.03819692, -0.05383372,
+      -0.05179508, -0.03552708, -0.02812173, -0.04451295,
+      -0.04586229, -0.03794369, -0.02917727, -0.04478649};
+  std::vector<double> expected_dy_dem = {
+      -3.32965609, -3.32965609, -3.32965609, -3.32965609, -3.33781886,
+      -3.33781886, -3.33781886, -3.33781886, -3.33501296, -3.33501296,
+      -3.33501296, -3.33501296, -3.34559974, -3.34559974, -3.34559974,
+      -3.34559974, -3.34561821, -3.34561821, -3.34561821, -3.34561821,
+      -3.33106684, -3.33106684, -3.33106684, -3.33106684, -3.33218328,
+      -3.33218328, -3.33218328, -3.33218328, -3.34535585, -3.34535585,
+      -3.34535585, -3.34535585, -3.34250754, -3.34250754, -3.34250754,
+      -3.34250754, -3.33356685, -3.33356685, -3.33356685, -3.33356685,
+      -3.33652989, -3.33652989, -3.33652989, -3.33652989, -3.3309235,
+      -3.3309235,  -3.3309235,  -3.3309235,  -3.33688909, -3.33688909,
+      -3.33688909, -3.33688909, -3.34206038, -3.34206038, -3.34206038,
+      -3.34206038, -3.34387412, -3.34387412, -3.34387412, -3.34387412,
+      -3.33051143, -3.33051143, -3.33051143, -3.33051143};
   const int nloc = 4;
   const int nnei = 4;
   const int last_layer_size = 8;
 
-  void SetUp() override {
-  }
-  void TearDown() override {
-  }
+  void SetUp() override {}
+  void TearDown() override {}
 };
 
-TEST_F(TestTabulateSeA, tabulate_fusion_se_a_cpu)
-{
+TEST_F(TestTabulateSeA, tabulate_fusion_se_a_cpu) {
   std::vector<double> xyz_scatter(nloc * nnei * last_layer_size);
-  deepmd::tabulate_fusion_se_a_cpu<double>(&xyz_scatter[0], &table[0], &info[0], &em_x[0], &em[0], nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_a_cpu<double>(&xyz_scatter[0], &table[0], &info[0],
+                                           &em_x[0], &em[0], nloc, nnei,
+                                           last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size(); ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_cpu)
-{
+TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_cpu) {
   std::vector<double> dy_dem_x(em_x.size());
   std::vector<double> dy_dem(em.size());
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
-  deepmd::tabulate_fusion_se_a_grad_cpu<double>(&dy_dem_x[0], &dy_dem[0], &table[0], &info[0], &em_x[0], &em[0], &dy[0], nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_a_grad_cpu<double>(
+      &dy_dem_x[0], &dy_dem[0], &table[0], &info[0], &em_x[0], &em[0], &dy[0],
+      nloc, nnei, last_layer_size);
   EXPECT_EQ(dy_dem_x.size(), nloc * nnei);
   EXPECT_EQ(dy_dem.size(), nloc * nnei * 4);
   EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
-  for (int jj = 0; jj < dy_dem_x.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
   }
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu_cuda)
-{
+TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu_cuda) {
   std::vector<double> xyz_scatter(nloc * nnei * last_layer_size, 0.0);
 
-  double * xyz_scatter_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL;
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
+         *em_dev = NULL;
   deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
-  deepmd::tabulate_fusion_se_a_gpu_cuda<double>(xyz_scatter_dev, table_dev, &info[0], em_x_dev, em_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_a_gpu_cuda<double>(xyz_scatter_dev, table_dev,
+                                                &info[0], em_x_dev, em_dev,
+                                                nloc, nnei, last_layer_size);
   deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
   deepmd::delete_device_memory(xyz_scatter_dev);
   deepmd::delete_device_memory(table_dev);
@@ -192,25 +662,27 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu_cuda)
 
   EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size(); ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu_cuda)
-{
+TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu_cuda) {
   std::vector<double> dy_dem_x(em_x.size(), 0.0);
   std::vector<double> dy_dem(em.size(), 0.0);
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
 
-  double * dy_dem_x_dev = NULL, * dy_dem_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL,
+         *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x);
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::tabulate_fusion_se_a_grad_gpu_cuda<double>(dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_a_grad_gpu_cuda<double>(
+      dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev,
+      nloc, nnei, last_layer_size);
   deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x);
   deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem);
   deepmd::delete_device_memory(dy_dem_x_dev);
@@ -224,26 +696,28 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu_cuda)
   EXPECT_EQ(dy_dem.size(), nloc * nnei * 4);
   EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
-  for (int jj = 0; jj < dy_dem_x.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
   }
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu_rocm)
-{
+TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu_rocm) {
   std::vector<double> xyz_scatter(nloc * nnei * last_layer_size, 0.0);
 
-  double * xyz_scatter_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL;
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
+         *em_dev = NULL;
   deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
-  deepmd::tabulate_fusion_se_a_gpu_rocm<double>(xyz_scatter_dev, table_dev, &info[0], em_x_dev, em_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_a_gpu_rocm<double>(xyz_scatter_dev, table_dev,
+                                                &info[0], em_x_dev, em_dev,
+                                                nloc, nnei, last_layer_size);
   deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
   deepmd::delete_device_memory(xyz_scatter_dev);
   deepmd::delete_device_memory(table_dev);
@@ -252,25 +726,27 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_gpu_rocm)
 
   EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size(); ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu_rocm)
-{
+TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu_rocm) {
   std::vector<double> dy_dem_x(em_x.size(), 0.0);
   std::vector<double> dy_dem(em.size(), 0.0);
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
 
-  double * dy_dem_x_dev = NULL, * dy_dem_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL,
+         *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x);
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::tabulate_fusion_se_a_grad_gpu_rocm<double>(dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_a_grad_gpu_rocm<double>(
+      dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev,
+      nloc, nnei, last_layer_size);
   deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x);
   deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem);
   deepmd::delete_device_memory(dy_dem_x_dev);
@@ -284,11 +760,11 @@ TEST_F(TestTabulateSeA, tabulate_fusion_se_a_grad_gpu_rocm)
   EXPECT_EQ(dy_dem.size(), nloc * nnei * 4);
   EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
-  for (int jj = 0; jj < dy_dem_x.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
   }
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_tabulate_se_r.cc b/source/lib/tests/test_tabulate_se_r.cc
index ebd4eb2295..3fdda010d5 100644
--- a/source/lib/tests/test_tabulate_se_r.cc
+++ b/source/lib/tests/test_tabulate_se_r.cc
@@ -1,110 +1,621 @@
-#include <vector>
+#include <gtest/gtest.h>
+
 #include <iostream>
+#include <vector>
+
 #include "device.h"
 #include "tabulate.h"
-#include <gtest/gtest.h>
 #include "utilities.h"
 
-class TestTabulateSeR : public ::testing::Test
-{
-protected:
+class TestTabulateSeR : public ::testing::Test {
+ protected:
   // em_x = tf.random.uniform([4, 16], minval=0, maxval=0.2, dtype = tf.float64)
-  std::vector<double > info = {
-    0, 0.2, 0.4, 0.01, 0.1, -1 
-  };  
-  std::vector<double > em = {
-    0.0343909 ,
-    0.11357423,
-    0.0858676 ,
-    0.19337772,
-    0.1935728 ,
-    0.0477744 ,
-    0.05845198,
-    0.19080509,
-    0.16111261,
-    0.07179262,
-    0.10078013,
-    0.04640909,
-    0.10433399,
-    0.15650861,
-    0.17527857,
-    0.04249097
-  };
+  std::vector<double> info = {0, 0.2, 0.4, 0.01, 0.1, -1};
+  std::vector<double> em = {0.0343909,  0.11357423, 0.0858676,  0.19337772,
+                            0.1935728,  0.0477744,  0.05845198, 0.19080509,
+                            0.16111261, 0.07179262, 0.10078013, 0.04640909,
+                            0.10433399, 0.15650861, 0.17527857, 0.04249097};
 
   std::vector<double> table = {
-    6.348551343037398542e-01, 4.209465843706336474e-04, 6.390862740714405368e-03, -1.544448595628262176e-04, -1.891095227974180087e-04, 2.695025951562175852e-05, -1.317549846042939343e+00, -5.624478206903206490e-02, 1.274284553146523905e-02, -6.836227424141475689e-04, -1.438066096020836407e-04, -1.854932873974712940e-06, -9.996964112615246423e-01, 6.928234423723647617e-02, -4.974719973810486084e-03, -2.019584729176823030e-04, 1.077254539742680247e-04, -8.024209768588029797e-06, 3.552689563657350780e-01, -3.578299775339799371e-02, -1.319946251007718743e-03, 1.016701374495701440e-03, -1.057336720791906388e-04,  5.182678943855506567e-06, 1.227750369557627286e+00, 4.100352079064395472e-02, 3.586869164810712295e-03, -4.304540913340443135e-04, -1.269943482892440004e-04, 1.459465404430219674e-05, -1.472642501673147031e+00, -1.611354921283318364e-01, 1.645427874390196360e-02, 2.107392978135091402e-04, -2.193541011180757461e-04, 1.915392497459551146e-05, -2.855174490181606739e-01, 9.774337856626263976e-02, -2.140891880666230714e-03, -7.148328890055103638e-04, 1.965696332267534503e-05,-4.593489654121371453e-06, -1.468441009949382314e+00, -6.360828127262234399e-02, 4.751283295356955282e-03, 8.711899561753186068e-05, -9.937008678852959884e-06,  4.273569346584811685e-07,
-    6.348599826995243722e-01, 5.487167506364742930e-04, 6.386116198716365253e-03, -1.619832375568118791e-04, -1.877328309473502049e-04, 2.134130914519164856e-05, -1.318111020264137512e+00, -5.599013082054477008e-02, 1.272225054666903735e-02, -6.893710047488201898e-04, -1.434367581078517366e-04, 3.329508890614227371e-05 , -9.990040854920316793e-01, 6.918278968071900348e-02, -4.980714172967731085e-03, -1.976574487947816198e-04, 1.070037204086153902e-04, -7.859875077388093586e-06, 3.549109954092205532e-01, -3.580909209068139365e-02, -1.289508598157979719e-03, 1.012474257117017967e-03, -1.054418924402112718e-04, -1.245498322204730900e-05, 1.228160763020727630e+00, 4.107512853046493134e-02, 3.573879491390910459e-03, -4.355190226638688713e-04, -1.258433981470396103e-04, 1.610862268100766631e-05, -1.474252210958008291e+00, -1.608063442081248406e-01, 1.646046950167207382e-02, 2.019843636566674109e-04, -2.185756589083626730e-04, 1.978479879983412190e-05, -2.845402300363228942e-01, 9.770034635718018168e-02, -2.162325119197382531e-03, -7.140472215558940627e-04, 1.956302663031799223e-05, 1.932584474244053378e-05, -1.469076617546759334e+00, -6.351322951074317436e-02, 4.753890907276497185e-03, 8.672114560243554321e-05, -1.004574434175897967e-05, -4.345700882560937596e-06,
-    6.348661083147921769e-01, 6.763897297752743953e-04, 6.381144275303845745e-03, -1.694690463885140694e-04, -1.868179426353836598e-04, 3.439291082765030046e-05, -1.318669650038090335e+00, -5.573589319299507294e-02, 1.270148368741391351e-02, -6.950749719342792137e-04, -1.422194703304518733e-04, 3.454751241752252323e-05 , -9.983127558632299836e-01, 6.908311652764687061e-02, -4.986579772806746212e-03, -1.933888092529071571e-04, 1.068327546750306073e-04, -2.976978385983384886e-05, 3.545527765488725169e-01, -3.583457894275744043e-02, -1.259197760082061621e-03, 1.008246479193084487e-03, -1.059401869200098984e-04,  1.721968053146218465e-06, 1.228571871257205572e+00, 4.114647496201748883e-02, 3.560738575723638825e-03, -4.405332425718102457e-04, -1.251648759618972115e-04, 3.659080417076460655e-05, -1.475858628153338792e+00, -1.604770750960976822e-01, 1.646639808472218428e-02, 1.932598402043995316e-04, -2.175904819601363058e-04, 1.230256868634094333e-05, -2.835634435191126679e-01, 9.765688571984927624e-02, -2.183734604613508240e-03, -7.132463811570244078e-04, 2.021887442373574272e-05, 1.321401495096886281e-05, -1.469711274366155784e+00, -6.341812571665436660e-02, 4.756486470714936521e-03, 8.631384191910702040e-05, -1.010516500002806932e-05, -1.110874413279218719e-05,
-    6.348735101551836735e-01, 8.039610290153098582e-04, 6.375948457075718626e-03, -1.769074132993461279e-04, -1.855677150383903214e-04, 3.421271436711027645e-05, -1.319225739518145257e+00, -5.548207260888919634e-02, 1.268054645200545304e-02, -7.007297564176242621e-04, -1.408885818822980523e-04, 3.124701885930576017e-05 , -9.976224235482542557e-01, 6.898332734138989952e-02, -4.992317635216104131e-03, -1.891404922064061889e-04, 1.053957535708985289e-04, -1.089286646983666076e-06, 3.541943058468561834e-01, -3.585946084769019160e-02, -1.229013912637771933e-03, 1.004009466262262241e-03, -1.059129033455631863e-04, -4.941663399086282537e-06, 1.228983691638902087e+00, 4.121755707472917613e-02, 3.547447845420277635e-03, -4.455036207721562607e-04, -1.239172256532283074e-04, 3.437341080261359686e-05, -1.477461752073406132e+00, -1.601476900261984693e-01, 1.647206544856073471e-02, 1.845724864086241608e-04, -2.173853638475303177e-04, 3.620505631412716563e-05, -2.825870937484175061e-01, 9.761299713537928413e-02, -2.205119732548723246e-03, -7.124245958910824846e-04, 2.074820558303217398e-05, 1.209381466404663338e-05, -1.470344979888463577e+00, -6.332297013406351649e-02, 4.759069711794740656e-03, 8.589935708505183382e-05, -1.045842324058424788e-05, -6.134254562752213537e-06,
-    6.348821871815598650e-01, 9.314261853726121809e-04, 6.370530236175125580e-03, -1.842978984547447257e-04, -1.840210089691990327e-04, 2.234897510077387526e-05, -1.319779292891724465e+00, -5.522867246076747227e-02, 1.265944033870337014e-02, -7.063360380236871801e-04, -1.393416734992873119e-04, 1.931167378610719847e-05 , -9.969330896946905218e-01, 6.888342466806646192e-02, -4.997928623431705138e-03, -1.849303524006284602e-04, 1.053651633995249134e-04, -2.870133904891753420e-05, 3.538355893399378616e-01, -3.588374034700148041e-02, -1.198957225773849763e-03, 9.997681359810027708e-04, -1.060678155548662341e-04, -4.107776618240329050e-06, 1.229396221507694564e+00, 4.128837188660083868e-02, 3.534008730169808672e-03, -4.504275777948374090e-04, -1.224778886969254976e-04, 2.455513266683544498e-05, -1.479061581584721008e+00, -1.598181942132129441e-01, 1.647747255391585064e-02, 1.759082956613747337e-04, -2.158335508261176197e-04, 6.406725844410341030e-06, -2.816111850012528728e-01, 9.756868109694678826e-02, -2.226479900633348240e-03, -7.115823288942964460e-04, 2.121038517729223415e-05, 1.358027318850170435e-05, -1.470977733597038872e+00, -6.322776301216057049e-02, 4.761640356162846754e-03, 8.547576468445008296e-05, -1.081874527005240631e-05, -8.845528475774308509e-07,
-    6.348921383103013349e-01, 1.058780765759985421e-03, 6.364891110105044131e-03, -1.916363332792569681e-04, -1.827768871456785058e-04, 2.275707291847725182e-05, -1.320330314380025793e+00, -5.497569611120622923e-02, 1.263816684562326688e-02, -7.118908987616576157e-04, -1.380182662155302303e-04, 1.630252530406085050e-05 , -9.962447554247517711e-01, 6.878341103651769428e-02, -5.003413601927745452e-03, -1.807403991329658622e-04, 1.040363362483998831e-04, -4.422604643727719699e-06, 3.534766330394523148e-01, -3.590741998555346121e-02, -1.169027863565602274e-03, 9.955202772264954043e-04, -1.060447700647724903e-04, -1.021743279826507342e-05, 1.229809458175783687e+00, 4.135891644424664892e-02, 3.520422661584679015e-03, -4.553035794622276055e-04, -1.210679214963379874e-04, 1.595827246550979495e-05, -1.480658115605847147e+00, -1.594885928526604546e-01, 1.648262036665308974e-02, 1.672799673730459213e-04, -2.148155690753495697e-04,-1.867405535452657550e-06, -2.806357215496423363e-01, 9.752393810975558408e-02, -2.247814508535729908e-03, -7.107227883497464890e-04, 2.207595560206285042e-05,-1.137331983229785190e-06, -1.471609534977757372e+00, -6.313250460562676303e-02, 4.764198129054059844e-03, 8.503999275315992160e-05, -1.072692568096017848e-05, -1.373273803695183988e-05,
-    6.349033624136081189e-01, 1.186020367092407990e-03, 6.359032581545111251e-03, -1.989262833250400370e-04, -1.812752661309344573e-04, 1.302837915648187095e-05, -1.320878808237722746e+00, -5.472314689282183064e-02, 1.261672747063919374e-02, -7.173917679890315846e-04, -1.373052781380030543e-04, 3.768455339511444900e-05 , -9.955574218354472649e-01, 6.868328895828368363e-02, -5.008773436308684712e-03, -1.765844799686671349e-04, 1.034810966435298563e-04, -1.111176255155353207e-05, 3.531174429312692320e-01, -3.593050231143132822e-02, -1.139225984250480384e-03, 9.912704081392112714e-04, -1.064918174657224404e-04,  2.680738443515978403e-06, 1.230223398925979650e+00, 4.142918782293085467e-02, 3.506691073047987512e-03, -4.601302388532728274e-04, -1.198865987378785417e-04, 1.656386182477533959e-05, -1.482251353107205460e+00, -1.591588911206925361e-01, 1.648750985769346228e-02, 1.586901819247656846e-04, -2.147074421644348298e-04, 2.641762503224190698e-05, -2.796607076604977760e-01, 9.747876869099537933e-02, -2.269122958003529523e-03, -7.098388532529275848e-04, 2.226701915637888804e-05, 1.106237844209756009e-05, -1.472240383519069384e+00, -6.303719517464229094e-02, 4.766742755353862819e-03, 8.459962202271287246e-05, -1.132218730142039535e-05,  8.958476322974335592e-07,
-    6.349158583197994643e-01, 1.313140616388666637e-03, 6.352956158169477396e-03, -2.061601622854974502e-04, -1.806298821034440756e-04, 3.770936817966389514e-05, -1.321424778752664952e+00, -5.447102810827629538e-02, 1.259512371128685033e-02, -7.228490733933210606e-04, -1.356407402355522122e-04, 2.099832634320949299e-05 , -9.948710899987588396e-01, 6.858306092758209571e-02, -5.014008993202081696e-03, -1.724573933478598642e-04, 1.029144894329912032e-04, -1.738522780636760158e-05, 3.527580249757622521e-01, -3.595298987582695727e-02, -1.109551740263377793e-03, 9.870126155001155040e-04, -1.064931456292656029e-04, -2.059910396978558087e-06, 1.230638041011988815e+00, 4.149918312660194619e-02, 3.492815399561766294e-03, -4.649051157564728157e-04, -1.192927614880224277e-04, 4.072077917749542957e-05, -1.483841293110880866e+00, -1.588290941739924356e-01, 1.649214200293154520e-02, 1.501282794678792006e-04, -2.138853834118830831e-04, 2.633111784219914963e-05, -2.786861475954987011e-01, 9.743317336979973042e-02, -2.290404652904617314e-03, -7.089360554728917595e-04, 2.260180638238835256e-05, 1.741828165826791135e-05, -1.472870278712053782e+00, -6.294183498489253070e-02, 4.769273959660644442e-03, 8.414681093302789892e-05, -1.142905205912834352e-05, -4.014065121916994726e-06,
-    6.349296248136164778e-01, 1.440137170869312810e-03, 6.346663352465874847e-03, -2.133510744796659759e-04, -1.788513201196447670e-04, 1.721163944875696416e-05, -1.321968230245579967e+00, -5.421934303028537461e-02, 1.257335706466754244e-02, -7.282542863230233527e-04, -1.343059033644905889e-04, 1.747822893445653714e-05 , -9.941857609618123259e-01, 6.848272942128874607e-02, -5.019121140152461337e-03, -1.683596869525186377e-04, 1.024142382012053007e-04, -2.632719129544749384e-05, 3.523983851077774343e-01, -3.597488523292310947e-02, -1.080005278271846739e-03, 9.827512175914082399e-04, -1.066680880078371994e-04,  3.403258606315080555e-07, 1.231053381658700818e+00, 4.156889948792314576e-02, 3.478797077596604108e-03, -4.696409807358484993e-04, -1.173636798436718986e-04, 1.149931408689037458e-05, -1.485427934690428442e+00, -1.584992071496764965e-01, 1.649651778315383566e-02, 1.415960091521040870e-04, -2.125888038426753843e-04, 7.384582528889821378e-06, -2.777120456109742896e-01, 9.738715268720327112e-02, -2.311658999267464203e-03, -7.080165982958596923e-04, 2.340034491729013294e-05, 5.174033942788913380e-06, -1.473499220050474623e+00, -6.284642430757329812e-02, 4.771791466347353149e-03, 8.368540130389298475e-05, -1.162498575113560591e-05, -5.381585801785509468e-06,
-    6.349446606365225509e-01, 1.567005718051586727e-03, 6.340155681555815353e-03, -2.204854663573854625e-04, -1.779502948888764897e-04, 3.196283450610521294e-05, -1.322509167069771951e+00, -5.396809490162747525e-02, 1.255142902735281209e-02, -7.336077414823606981e-04, -1.332538502428148267e-04, 2.525523713666122703e-05 , -9.935014357470516311e-01, 6.838229689892011409e-02, -5.024110745516051704e-03, -1.642860423419652261e-04, 1.011792892256958577e-04, -5.902237032851650630e-06, 3.520385292366049468e-01, -3.599619093977864809e-02, -1.050586739210998023e-03, 9.784837539753422735e-04, -1.066187407206570670e-04, -6.052991441884039902e-06, 1.231469418062474341e+00, 4.163833406830096812e-02, 3.464637544942418459e-03, -4.743218246565151001e-04, -1.164951133813105271e-04, 2.473911917278243621e-05, -1.487011276970676033e+00, -1.581692351651968476e-01, 1.650063818395723983e-02, 1.331001312464952355e-04, -2.118074389246019866e-04, 9.192428068946771109e-06, -2.767384059577842614e-01, 9.734070719609828892e-02, -2.332885405321092481e-03, -7.070743922828596519e-04, 2.373777250910882265e-05, 1.127700884024945933e-05, -1.474127207030835107e+00, -6.275096341939470634e-02, 4.774294999622533293e-03, 8.321347296773265077e-05, -1.162225195759229858e-05, -1.468175407624093560e-05,
-    6.349609644870094494e-01, 1.693741975839754832e-03, 6.333434667015966531e-03, -2.275719866012916918e-04, -1.766077012712487378e-04, 2.919052022666632077e-05, -1.323047593610823247e+00, -5.371728693515605280e-02, 1.252934109528984138e-02, -7.389107006611626187e-04, -1.322992615601379437e-04, 3.689337377145077536e-05 , -9.928181153524118230e-01, 6.828176580261838269e-02, -5.028978678356570489e-03, -1.602449667799085492e-04, 1.004819833385002965e-04, -7.012859043909368637e-06, 3.516784632459502014e-01, -3.601690955621394963e-02, -1.021296258318379370e-03, 9.742140050919662845e-04, -1.068837890347894775e-04,  3.261791903209577241e-07, 1.231886147391427544e+00, 4.170748405790913882e-02, 3.450338240560582581e-03, -4.789562532735843967e-04, -1.153902983973557932e-04, 2.856018069496295048e-05, -1.488591319127526624e+00, -1.578391833182464787e-01, 1.650450419566778376e-02, 1.246407552546250339e-04, -2.115332183818513349e-04, 3.149345367837511192e-05, -2.757652328811996956e-01, 9.729383746118988596e-02, -2.354083281534554220e-03, -7.061133365182417328e-04, 2.418809213597686327e-05, 1.280494807360028992e-05, -1.474754239152433311e+00, -6.265545260258377491e-02, 4.776784283590801948e-03, 8.273687806363864625e-05, -1.229952261449745124e-05,  3.204146150058887708e-06,
-    6.349785350208994039e-01, 1.820341692612803541e-03, 6.326501834700739083e-03, -2.346100929840904846e-04, -1.748840426396014729e-04, 1.130785525935554482e-05, -1.323583514286295282e+00, -5.346692231381247606e-02, 1.250709476370755191e-02, -7.441705970339035966e-04, -1.303302437099287372e-04, 7.935577538626925858e-06 , -9.921358007514943234e-01, 6.818113855713830995e-02, -5.033725808341922223e-03, -1.562353718150353687e-04, 1.001568149392305130e-04, -2.302258383924021595e-05, 3.513181929939074299e-01, -3.603704364469759169e-02, -9.921339651685744804e-04, 9.699384566370250092e-04, -1.069081013817698415e-04, -2.744679484186812129e-06, 1.232303566785723392e+00, 4.177634667571154814e-02, 3.435900604437185177e-03, -4.835440426346156498e-04, -1.140781768005934266e-04, 2.411509316948267986e-05, -1.490168060387760951e+00, -1.575090566866652331e-01, 1.650811681325956015e-02, 1.162064642248029450e-04, -2.100324946396962247e-04, 4.868837971279583202e-06, -2.747925306207861240e-01, 9.724654405895133413e-02, -2.375252040655950400e-03, -7.051355614741510987e-04, 2.505903781065493165e-05,-2.569082101323676566e-06, -1.475380315917416585e+00, -6.255989214488603956e-02, 4.779259042312647421e-03, 8.224491253736542200e-05, -1.205054378062991984e-05, -1.594987943813344381e-05,
-    6.349973708516511994e-01, 1.946800647308156995e-03, 6.319358714566076195e-03, -2.415904693897710526e-04, -1.741570105122868483e-04, 3.342152683043006766e-05, -1.324116933545430141e+00, -5.321700419064152865e-02, 1.248469152702344660e-02, -7.493727578058629766e-04, -1.295525827398787404e-04, 2.659942231629285135e-05 , -9.914544928937398804e-01, 6.808041756983601589e-02, -5.038353005641925050e-03, -1.522500103683389601e-04, 9.911425811568465554e-05, -1.035676665958809070e-05, 3.509577243129330393e-01, -3.605659577023319351e-02, -9.630999837076988784e-04, 9.656594578503095369e-04, -1.070158919994286978e-04, -2.281503112307771063e-06, 1.232721673357858538e+00, 4.184491916948063911e-02, 3.421326077437690516e-03, -4.880823132679394552e-04, -1.129872290747681817e-04, 2.854952342195995698e-05, -1.491741500028839651e+00, -1.571788603283475749e-01, 1.651147703627379656e-02, 1.078118218043548068e-04, -2.094656285123614196e-04, 1.573608604543182341e-05, -2.738203034102859035e-01, 9.719882757757769554e-02, -2.396391097750961291e-03, -7.041328812172977002e-04, 2.511128111671661627e-05, 1.472819566023977703e-05, -1.476005436830838402e+00, -6.246428233956573262e-02, 4.781718999863710830e-03, 8.175246233396933941e-05, -1.310850420537104008e-05,  1.717274673157189222e-05,
-    6.350174705506670403e-01, 2.073114649501703322e-03, 6.312006840494438151e-03, -2.485262001215581039e-04, -1.724445833892894095e-04, 1.623821996891234705e-05, -1.324647855868849478e+00, -5.296753568880858964e-02, 1.246213287875118370e-02, -7.545274547770323926e-04, -1.284298383236558551e-04, 3.142127009671183137e-05 , -9.907741927046019859e-01, 6.797960523066012839e-02, -5.042861140826992473e-03, -1.482946605870891395e-04, 9.821987974303589589e-05, -3.593831829470692349e-06, 3.505970630098214080e-01, -3.607556850024738748e-02, -9.341944322877257512e-04, 9.613773761737330267e-04, -1.072343182304808093e-04,  2.791451096706449119e-06, 1.233140464192951757e+00, 4.191319881581374862e-02, 3.406616101162745613e-03, -4.925758895926437772e-04, -1.113902906060245713e-04, 1.275308331152581608e-05, -1.493311637378700762e+00, -1.568485992811522733e-01, 1.651458586873823589e-02, 9.944841367174414462e-05, -2.085492230796830474e-04, 1.276456024245067926e-05, -2.728485554775001987e-01, 9.715068861693920699e-02, -2.417499870240937074e-03, -7.031148500958378164e-04, 2.576543833825076558e-05, 7.841889896124507091e-06, -1.476629601400710978e+00, -6.236862348540499201e-02, 4.784163880393361643e-03, 8.124213252544174404e-05, -1.286332078849730127e-05, -1.821996546344873330e-06,
-    6.350388326475970846e-01, 2.199279539485121671e-03, 6.304447750121061969e-03, -2.554047701160370044e-04, -1.716061813901302753e-04, 3.413524324276134592e-05, -1.325176285768258300e+00, -5.271851990161838253e-02, 1.243942031140890699e-02, -7.596346042592860793e-04, -1.269803855069738714e-04, 2.314478643438959578e-05 , -9.900949010857222898e-01, 6.787870391214460841e-02, -5.047251084767826433e-03, -1.443753107913585767e-04, 9.837034053479728221e-05, -3.865274593462701621e-05, 3.502362148656810170e-01, -3.609396440447816545e-02, -9.054174237006253068e-04, 9.570894530963515055e-04, -1.071221722792567601e-04, -5.180134097885568801e-06, 1.233559936349031494e+00, 4.198118292014653419e-02, 3.391772117805412056e-03, -4.970162819604460663e-04, -1.105584293158747960e-04, 2.757032189173095048e-05, -1.494878471815561216e+00, -1.565182785628131401e-01, 1.651744431908664865e-02, 9.112268062696188113e-05, -2.082277461664644284e-04, 3.370820636496137736e-05, -2.718772910441742408e-01, 9.710212778853387350e-02, -2.438577777940475859e-03, -7.020756635958485484e-04, 2.613933618298708639e-05, 1.211520684095310762e-05, -1.477252809138063672e+00, -6.227291588670166161e-02, 4.786593408182711167e-03, 8.072392747742672100e-05, -1.281499371544444526e-05, -1.293175202324119235e-05,
-    6.350614556306495295e-01, 2.325291188338546311e-03, 6.296682984661446623e-03, -2.622362895631248896e-04, -1.701076322674243866e-04, 2.573454296903621253e-05, -1.325702227786145437e+00, -5.246995989253622206e-02, 1.241655531642829255e-02, -7.646904682589584622e-04, -1.257704658362481128e-04, 2.439373356208127567e-05 , -9.894166189151047952e-01, 6.777771596940393439e-02, -5.051523708536139086e-03, -1.404733355821404265e-04, 9.677082285072928253e-05, -3.720510878458014501e-06, 3.498751856359115786e-01, -3.611178605486395354e-02, -8.767690652124425499e-04, 9.527998576480508275e-04, -1.072771816869139909e-04, -2.281376475091892258e-06, 1.233980086857325631e+00, 4.204886881676297983e-02, 3.376795570009583514e-03, -5.014114486109571937e-04, -1.092957353261917852e-04, 2.516456964431257380e-05, -1.496442002767713664e+00, -1.561879031708521548e-01, 1.652005340007862977e-02, 8.282284133744905071e-05, -2.067123325224875000e-04, 7.057486539657783089e-06, -2.709065143258797548e-01, 9.705314571543909030e-02, -2.459624243094573216e-03, -7.010187162791577066e-04, 2.672975399789282626e-05, 7.629793933874534523e-06, -1.477875059556995385e+00, -6.217715985326619649e-02, 4.789007307701962507e-03, 8.019935829649041371e-05, -1.318861260046749971e-05, -7.150339348059032240e-06,
-    6.350853379468965887e-01, 2.451145498001100487e-03, 6.288714088740080324e-03, -2.690159202421790068e-04, -1.686584359429067433e-04, 1.941481480743946700e-05, -1.326225686495484890e+00, -5.222185869521017709e-02, 1.239353938406437261e-02, -7.696964132049412353e-04, -1.246012242240120604e-04, 2.724071141974432252e-05 , -9.887393470472876089e-01, 6.767664374012982709e-02, -5.055679883306329545e-03, -1.366074591188833347e-04, 9.623033677044332457e-05, -1.113456896173822779e-05, 3.495139810501832756e-01, -3.612903602543367232e-02, -8.482494585971035728e-04, 9.485064841097947883e-04, -1.073561607316583907e-04, -2.239996380309942211e-06, 1.234400912722548371e+00, 4.211625386880359784e-02, 3.361687900729734210e-03, -5.057597926077623488e-04, -1.078411892315765344e-04, 1.508800592977199686e-05, -1.498002229713325750e+00, -1.558574780824932282e-01, 1.652241412871961052e-02, 7.456368677257522147e-05, -2.062001731191939454e-04, 2.069621557469772063e-05, -2.699362295319003291e-01, 9.700374303226286243e-02, -2.480638690415259105e-03, -6.999405672986690023e-04, 2.700789474676622474e-05, 1.556143061449123430e-05, -1.478496352174730522e+00, -6.208135570041733303e-02, 4.791405303667145565e-03, 7.966538051836852740e-05, -1.352687841609079228e-05, -2.789411930543395566e-06,
-    6.351104780025849106e-01, 2.576838401336829787e-03, 6.280542610220480118e-03, -2.757414391158645754e-04, -1.675762649448408429e-04, 2.787462665161048641e-05, -1.326746666499438287e+00, -5.197421931349595348e-02, 1.237037400330611749e-02, -7.746541492504023475e-04, -1.232228491818352083e-04, 2.166599538617633252e-05 , -9.880630863135209108e-01, 6.757548954459043078e-02, -5.059720480258220535e-03, -1.327693574508429343e-04, 9.550030312894054513e-05, -1.096549240339310371e-05, 3.491526068124157778e-01, -3.614571689219699124e-02, -8.198587001702131727e-04, 9.442100079790295610e-04, -1.074330339280879455e-04, -2.103241190440061311e-06, 1.234822410923189784e+00, 4.218333546826981417e-02, 3.346450553092000530e-03, -5.100549148199152614e-04, -1.071543306169886722e-04, 3.572075491055831030e-05, -1.499559152180234056e+00, -1.555270082545787691e-01, 1.652452752618108200e-02, 6.633607063542407416e-05, -2.052990867644106118e-04, 1.891505702101457936e-05, -2.689664408651156746e-01, 9.695392038509384469e-02, -2.501620547117759490e-03, -6.988464710389351081e-04, 2.774961528830105395e-05, 4.843681010028069226e-06, -1.479116686511674494e+00, -6.198550374897651011e-02, 4.793787121096219732e-03, 7.912045955652986253e-05, -1.359696279035538403e-05, -9.132339849453571562e-06,
-    6.351368741634448867e-01, 2.702365862198193025e-03, 6.272170100036473551e-03, -2.824171711189519380e-04, -1.661976899287730559e-04, 2.457347650017094835e-05, -1.327265172431057128e+00, -5.172704472148267896e-02, 1.234706066178771662e-02, -7.795630288411945592e-04, -1.217395799935142969e-04, 1.184741714306808905e-05 , -9.873878375219384829e-01, 6.747425568563097942e-02, -5.063646370480812467e-03, -1.289626891970745083e-04, 9.513074838211379970e-05, -2.521433322545949321e-05, 3.487910686007592576e-01, -3.616183123303555458e-02, -7.915968808226425679e-04, 9.399119246579864433e-04, -1.077055728285351480e-04,  6.031191175422362627e-06, 1.235244578411804905e+00, 4.225011103602600848e-02, 3.331084970256580589e-03, -5.143079026275864784e-04, -1.055716785023949844e-04, 2.051193936812822612e-05, -1.501112769745742259e+00, -1.551964986234863897e-01, 1.652639461772111712e-02, 5.814089462644928566e-05, -2.041249358339155683e-04, 6.311073191969795411e-06, -2.679971525218879380e-01, 9.690367843145115956e-02, -2.522569242956208650e-03, -6.977319783847560700e-04, 2.827424678587480721e-05, 2.739673941330651616e-06, -1.479736062091468574e+00, -6.188960432526132566e-02, 4.796152485364500034e-03, 7.856828747830194362e-05, -1.395147193446202365e-05, -4.087221013031299888e-06,
-    6.351645247550001816e-01, 2.827723875485507743e-03, 6.263598112024793517e-03, -2.890409134869928735e-04, -1.648390823803598971e-04, 2.215887759642637032e-05, -1.327781208952985015e+00, -5.148033786352124164e-02, 1.232360084570068709e-02, -7.844171563535663055e-04, -1.210428935521009746e-04, 3.344327592646507844e-05 , -9.867136014577331249e-01, 6.737294444867666932e-02, -5.067458424877044516e-03, -1.251812701937470213e-04, 9.419473244264059593e-05, -1.679002076268449654e-05, 3.484293720675762929e-01, -3.617738162759492893e-02, -7.634640860539731316e-04, 9.356082122653546981e-04, -1.075431084112703954e-04, -3.044614041061100766e-06, 1.235667412115300623e+00, 4.231657802179918798e-02, 3.315592595281378029e-03, -5.185116053649769336e-04, -1.041674655671950871e-04, 1.242766263135090892e-05, -1.502663082036415076e+00, -1.548659541050484978e-01, 1.652801643260504508e-02, 4.998556989557471122e-05, -2.037688261998792680e-04, 2.657243869390409541e-05, -2.670283686919466826e-01, 9.685301784023310490e-02, -2.543484210258855835e-03, -6.965966582328896994e-04, 2.850491087748043708e-05, 1.232179636112698650e-05, -1.480354478441044286e+00, -6.179365776107784841e-02, 4.798501122259496952e-03, 7.800586916120723585e-05, -1.413851691566035862e-05, -5.727587674967719880e-06,
-    6.351934280628791507e-01, 2.952908467203564646e-03, 6.254828202758994093e-03, -2.956111985445306826e-04, -1.636502852942454153e-04, 2.616921494951480123e-05, -1.328294780757159899e+00, -5.123410165425365537e-02, 1.229999603970671068e-02, -7.892274520450543677e-04, -1.195721301312790567e-04, 2.454197033093738297e-05 , -9.860403788833298488e-01, 6.727155810173718331e-02, -5.071157514069617352e-03, -1.214296539729165295e-04, 9.340570341953608358e-05, -1.444050153586573228e-05, 3.480675228394242149e-01, -3.619237065717702262e-02, -7.354603960058733389e-04, 9.313051737393654526e-04, -1.076930273455606579e-04, -7.696053039474192446e-07, 1.236090908935226107e+00, 4.238273390417521269e-02, 3.299974870987111650e-03, -5.226642260988254756e-04, -1.032474625011560351e-04, 2.396475265799989632e-05, -1.504210088727871764e+00, -1.545353795944727493e-01, 1.652939400402650763e-02, 4.186078937618800693e-05, -2.027012231708198600e-04, 1.761148452766873776e-05, -2.660600935582757565e-01, 9.680193929166537592e-02, -2.564364883962782712e-03, -6.954454205710857090e-04, 2.907017700829073683e-05, 9.120785771591908463e-06, -1.480971935090678926e+00, -6.169766439371183325e-02, 4.800832758035045861e-03, 7.743502257440657043e-05, -1.440171540732098418e-05, -4.489324897938611976e-06,
-    6.355509554770921721e-01, 4.194364255265300989e-03, 6.156587518227093006e-03, -3.584539136959086518e-04, -1.505562336471176987e-04, 2.631189526673375584e-05, -1.333295991901433553e+00, -4.879824528740911438e-02, 1.205629889598585497e-02, -8.346035033896359156e-04, -1.072962342948566929e-04, 2.412331753624817981e-05 , -9.793640468817854661e-01, 6.625405011186732973e-02, -5.102126473064734317e-03, -8.551069374443776396e-05, 8.618032279329005427e-05, -1.422030758858379208e-05, 3.444418516979214084e-01, -3.631195473807800889e-02, -4.625381215785304145e-04, 8.881537622047225473e-04, -1.080757789189670570e-04,  5.820590714360855199e-08, 1.240361649325028681e+00, 4.302664794411619614e-02, 3.137220402938139478e-03, -5.615677039256951981e-04, -9.125763978623760322e-05, 2.367398552885374808e-05, -1.519498310980496925e+00, -1.512290469691385253e-01, 1.652996628226939199e-02,-3.745688059096337011e-05, -1.938906911473592626e-04, 1.811217640451412989e-05, -2.564062357251438717e-01, 9.626832379335603651e-02, -2.771163091665611831e-03, -6.829069315554202020e-04, 3.363238372709415958e-05, 8.623099725596635004e-06, -1.487093617252511990e+00, -6.073523464295225993e-02, 4.823154268625621383e-03, 7.122599345182346051e-05, -1.664931178025436733e-05, -4.312450972708557703e-06
-  };
-  std::vector<double > expected_xyz_scatter = {
-   0.634877, -1.319469, -0.997320, 0.354037, 1.229165, -1.478165, -0.282159, -1.470623, 0.634985, -1.323774, -0.991892, 0.351189, 1.232453, -1.490731, -0.274445, -1.475604, 0.634938, -1.322286, -0.993784, 0.352187, 1.231297, -1.486357, -0.277141, -1.473868, 0.635174, -1.327955, -0.986486, 0.348307, 1.235810, -1.503186, -0.266701, -1.480563, 0.635175, -1.327965, -0.986473, 0.348300, 1.235819, -1.503216, -0.266682, -1.480575, 0.634890, -1.320208, -0.996398, 0.353557, 1.229717, -1.480303, -0.280853, -1.471469, 0.634902, -1.320794, -0.995664, 0.353173, 1.230159, -1.482005, -0.279812, -1.472143, 0.635167, -1.327823, -0.986659, 0.348400, 1.235701, -1.502788, -0.266950, -1.480404, 0.635088, -1.326284, -0.988664, 0.349474, 1.234448, -1.498176, -0.269828, -1.478565, 0.634918, -1.321522, -0.994748, 0.352694, 1.230712, -1.484126, -0.278511, -1.472983, 0.634962, -1.323089, -0.992765, 0.351650, 1.231919, -1.488714, -0.275689, -1.474803, 0.634888, -1.320133, -0.996492, 0.353606, 1.229661, -1.480085, -0.280986, -1.471383, 0.634968, -1.323280, -0.992522, 0.351522, 1.232067, -1.489275, -0.275344, -1.475026, 0.635077, -1.326043, -0.988976, 0.349640, 1.234254, -1.497458, -0.270275, -1.478280, 0.635124, -1.327021, -0.987707, 0.348962, 1.235045, -1.500380, -0.268455, -1.479444, 0.634885, -1.319917, -0.996762, 0.353746, 1.229499, -1.479460, -0.281368, -1.471135
-  }; 
-  std::vector<double > expected_dy_dem = {
-   -0.105883, -0.100297, -0.102247, -0.094712, -0.094698, -0.104937, -0.104182, -0.094891, -0.096964, -0.103240, -0.101197, -0.105033, -0.100947, -0.097286, -0.095974, -0.105310
-  };
+      6.348551343037398542e-01,  4.209465843706336474e-04,
+      6.390862740714405368e-03,  -1.544448595628262176e-04,
+      -1.891095227974180087e-04, 2.695025951562175852e-05,
+      -1.317549846042939343e+00, -5.624478206903206490e-02,
+      1.274284553146523905e-02,  -6.836227424141475689e-04,
+      -1.438066096020836407e-04, -1.854932873974712940e-06,
+      -9.996964112615246423e-01, 6.928234423723647617e-02,
+      -4.974719973810486084e-03, -2.019584729176823030e-04,
+      1.077254539742680247e-04,  -8.024209768588029797e-06,
+      3.552689563657350780e-01,  -3.578299775339799371e-02,
+      -1.319946251007718743e-03, 1.016701374495701440e-03,
+      -1.057336720791906388e-04, 5.182678943855506567e-06,
+      1.227750369557627286e+00,  4.100352079064395472e-02,
+      3.586869164810712295e-03,  -4.304540913340443135e-04,
+      -1.269943482892440004e-04, 1.459465404430219674e-05,
+      -1.472642501673147031e+00, -1.611354921283318364e-01,
+      1.645427874390196360e-02,  2.107392978135091402e-04,
+      -2.193541011180757461e-04, 1.915392497459551146e-05,
+      -2.855174490181606739e-01, 9.774337856626263976e-02,
+      -2.140891880666230714e-03, -7.148328890055103638e-04,
+      1.965696332267534503e-05,  -4.593489654121371453e-06,
+      -1.468441009949382314e+00, -6.360828127262234399e-02,
+      4.751283295356955282e-03,  8.711899561753186068e-05,
+      -9.937008678852959884e-06, 4.273569346584811685e-07,
+      6.348599826995243722e-01,  5.487167506364742930e-04,
+      6.386116198716365253e-03,  -1.619832375568118791e-04,
+      -1.877328309473502049e-04, 2.134130914519164856e-05,
+      -1.318111020264137512e+00, -5.599013082054477008e-02,
+      1.272225054666903735e-02,  -6.893710047488201898e-04,
+      -1.434367581078517366e-04, 3.329508890614227371e-05,
+      -9.990040854920316793e-01, 6.918278968071900348e-02,
+      -4.980714172967731085e-03, -1.976574487947816198e-04,
+      1.070037204086153902e-04,  -7.859875077388093586e-06,
+      3.549109954092205532e-01,  -3.580909209068139365e-02,
+      -1.289508598157979719e-03, 1.012474257117017967e-03,
+      -1.054418924402112718e-04, -1.245498322204730900e-05,
+      1.228160763020727630e+00,  4.107512853046493134e-02,
+      3.573879491390910459e-03,  -4.355190226638688713e-04,
+      -1.258433981470396103e-04, 1.610862268100766631e-05,
+      -1.474252210958008291e+00, -1.608063442081248406e-01,
+      1.646046950167207382e-02,  2.019843636566674109e-04,
+      -2.185756589083626730e-04, 1.978479879983412190e-05,
+      -2.845402300363228942e-01, 9.770034635718018168e-02,
+      -2.162325119197382531e-03, -7.140472215558940627e-04,
+      1.956302663031799223e-05,  1.932584474244053378e-05,
+      -1.469076617546759334e+00, -6.351322951074317436e-02,
+      4.753890907276497185e-03,  8.672114560243554321e-05,
+      -1.004574434175897967e-05, -4.345700882560937596e-06,
+      6.348661083147921769e-01,  6.763897297752743953e-04,
+      6.381144275303845745e-03,  -1.694690463885140694e-04,
+      -1.868179426353836598e-04, 3.439291082765030046e-05,
+      -1.318669650038090335e+00, -5.573589319299507294e-02,
+      1.270148368741391351e-02,  -6.950749719342792137e-04,
+      -1.422194703304518733e-04, 3.454751241752252323e-05,
+      -9.983127558632299836e-01, 6.908311652764687061e-02,
+      -4.986579772806746212e-03, -1.933888092529071571e-04,
+      1.068327546750306073e-04,  -2.976978385983384886e-05,
+      3.545527765488725169e-01,  -3.583457894275744043e-02,
+      -1.259197760082061621e-03, 1.008246479193084487e-03,
+      -1.059401869200098984e-04, 1.721968053146218465e-06,
+      1.228571871257205572e+00,  4.114647496201748883e-02,
+      3.560738575723638825e-03,  -4.405332425718102457e-04,
+      -1.251648759618972115e-04, 3.659080417076460655e-05,
+      -1.475858628153338792e+00, -1.604770750960976822e-01,
+      1.646639808472218428e-02,  1.932598402043995316e-04,
+      -2.175904819601363058e-04, 1.230256868634094333e-05,
+      -2.835634435191126679e-01, 9.765688571984927624e-02,
+      -2.183734604613508240e-03, -7.132463811570244078e-04,
+      2.021887442373574272e-05,  1.321401495096886281e-05,
+      -1.469711274366155784e+00, -6.341812571665436660e-02,
+      4.756486470714936521e-03,  8.631384191910702040e-05,
+      -1.010516500002806932e-05, -1.110874413279218719e-05,
+      6.348735101551836735e-01,  8.039610290153098582e-04,
+      6.375948457075718626e-03,  -1.769074132993461279e-04,
+      -1.855677150383903214e-04, 3.421271436711027645e-05,
+      -1.319225739518145257e+00, -5.548207260888919634e-02,
+      1.268054645200545304e-02,  -7.007297564176242621e-04,
+      -1.408885818822980523e-04, 3.124701885930576017e-05,
+      -9.976224235482542557e-01, 6.898332734138989952e-02,
+      -4.992317635216104131e-03, -1.891404922064061889e-04,
+      1.053957535708985289e-04,  -1.089286646983666076e-06,
+      3.541943058468561834e-01,  -3.585946084769019160e-02,
+      -1.229013912637771933e-03, 1.004009466262262241e-03,
+      -1.059129033455631863e-04, -4.941663399086282537e-06,
+      1.228983691638902087e+00,  4.121755707472917613e-02,
+      3.547447845420277635e-03,  -4.455036207721562607e-04,
+      -1.239172256532283074e-04, 3.437341080261359686e-05,
+      -1.477461752073406132e+00, -1.601476900261984693e-01,
+      1.647206544856073471e-02,  1.845724864086241608e-04,
+      -2.173853638475303177e-04, 3.620505631412716563e-05,
+      -2.825870937484175061e-01, 9.761299713537928413e-02,
+      -2.205119732548723246e-03, -7.124245958910824846e-04,
+      2.074820558303217398e-05,  1.209381466404663338e-05,
+      -1.470344979888463577e+00, -6.332297013406351649e-02,
+      4.759069711794740656e-03,  8.589935708505183382e-05,
+      -1.045842324058424788e-05, -6.134254562752213537e-06,
+      6.348821871815598650e-01,  9.314261853726121809e-04,
+      6.370530236175125580e-03,  -1.842978984547447257e-04,
+      -1.840210089691990327e-04, 2.234897510077387526e-05,
+      -1.319779292891724465e+00, -5.522867246076747227e-02,
+      1.265944033870337014e-02,  -7.063360380236871801e-04,
+      -1.393416734992873119e-04, 1.931167378610719847e-05,
+      -9.969330896946905218e-01, 6.888342466806646192e-02,
+      -4.997928623431705138e-03, -1.849303524006284602e-04,
+      1.053651633995249134e-04,  -2.870133904891753420e-05,
+      3.538355893399378616e-01,  -3.588374034700148041e-02,
+      -1.198957225773849763e-03, 9.997681359810027708e-04,
+      -1.060678155548662341e-04, -4.107776618240329050e-06,
+      1.229396221507694564e+00,  4.128837188660083868e-02,
+      3.534008730169808672e-03,  -4.504275777948374090e-04,
+      -1.224778886969254976e-04, 2.455513266683544498e-05,
+      -1.479061581584721008e+00, -1.598181942132129441e-01,
+      1.647747255391585064e-02,  1.759082956613747337e-04,
+      -2.158335508261176197e-04, 6.406725844410341030e-06,
+      -2.816111850012528728e-01, 9.756868109694678826e-02,
+      -2.226479900633348240e-03, -7.115823288942964460e-04,
+      2.121038517729223415e-05,  1.358027318850170435e-05,
+      -1.470977733597038872e+00, -6.322776301216057049e-02,
+      4.761640356162846754e-03,  8.547576468445008296e-05,
+      -1.081874527005240631e-05, -8.845528475774308509e-07,
+      6.348921383103013349e-01,  1.058780765759985421e-03,
+      6.364891110105044131e-03,  -1.916363332792569681e-04,
+      -1.827768871456785058e-04, 2.275707291847725182e-05,
+      -1.320330314380025793e+00, -5.497569611120622923e-02,
+      1.263816684562326688e-02,  -7.118908987616576157e-04,
+      -1.380182662155302303e-04, 1.630252530406085050e-05,
+      -9.962447554247517711e-01, 6.878341103651769428e-02,
+      -5.003413601927745452e-03, -1.807403991329658622e-04,
+      1.040363362483998831e-04,  -4.422604643727719699e-06,
+      3.534766330394523148e-01,  -3.590741998555346121e-02,
+      -1.169027863565602274e-03, 9.955202772264954043e-04,
+      -1.060447700647724903e-04, -1.021743279826507342e-05,
+      1.229809458175783687e+00,  4.135891644424664892e-02,
+      3.520422661584679015e-03,  -4.553035794622276055e-04,
+      -1.210679214963379874e-04, 1.595827246550979495e-05,
+      -1.480658115605847147e+00, -1.594885928526604546e-01,
+      1.648262036665308974e-02,  1.672799673730459213e-04,
+      -2.148155690753495697e-04, -1.867405535452657550e-06,
+      -2.806357215496423363e-01, 9.752393810975558408e-02,
+      -2.247814508535729908e-03, -7.107227883497464890e-04,
+      2.207595560206285042e-05,  -1.137331983229785190e-06,
+      -1.471609534977757372e+00, -6.313250460562676303e-02,
+      4.764198129054059844e-03,  8.503999275315992160e-05,
+      -1.072692568096017848e-05, -1.373273803695183988e-05,
+      6.349033624136081189e-01,  1.186020367092407990e-03,
+      6.359032581545111251e-03,  -1.989262833250400370e-04,
+      -1.812752661309344573e-04, 1.302837915648187095e-05,
+      -1.320878808237722746e+00, -5.472314689282183064e-02,
+      1.261672747063919374e-02,  -7.173917679890315846e-04,
+      -1.373052781380030543e-04, 3.768455339511444900e-05,
+      -9.955574218354472649e-01, 6.868328895828368363e-02,
+      -5.008773436308684712e-03, -1.765844799686671349e-04,
+      1.034810966435298563e-04,  -1.111176255155353207e-05,
+      3.531174429312692320e-01,  -3.593050231143132822e-02,
+      -1.139225984250480384e-03, 9.912704081392112714e-04,
+      -1.064918174657224404e-04, 2.680738443515978403e-06,
+      1.230223398925979650e+00,  4.142918782293085467e-02,
+      3.506691073047987512e-03,  -4.601302388532728274e-04,
+      -1.198865987378785417e-04, 1.656386182477533959e-05,
+      -1.482251353107205460e+00, -1.591588911206925361e-01,
+      1.648750985769346228e-02,  1.586901819247656846e-04,
+      -2.147074421644348298e-04, 2.641762503224190698e-05,
+      -2.796607076604977760e-01, 9.747876869099537933e-02,
+      -2.269122958003529523e-03, -7.098388532529275848e-04,
+      2.226701915637888804e-05,  1.106237844209756009e-05,
+      -1.472240383519069384e+00, -6.303719517464229094e-02,
+      4.766742755353862819e-03,  8.459962202271287246e-05,
+      -1.132218730142039535e-05, 8.958476322974335592e-07,
+      6.349158583197994643e-01,  1.313140616388666637e-03,
+      6.352956158169477396e-03,  -2.061601622854974502e-04,
+      -1.806298821034440756e-04, 3.770936817966389514e-05,
+      -1.321424778752664952e+00, -5.447102810827629538e-02,
+      1.259512371128685033e-02,  -7.228490733933210606e-04,
+      -1.356407402355522122e-04, 2.099832634320949299e-05,
+      -9.948710899987588396e-01, 6.858306092758209571e-02,
+      -5.014008993202081696e-03, -1.724573933478598642e-04,
+      1.029144894329912032e-04,  -1.738522780636760158e-05,
+      3.527580249757622521e-01,  -3.595298987582695727e-02,
+      -1.109551740263377793e-03, 9.870126155001155040e-04,
+      -1.064931456292656029e-04, -2.059910396978558087e-06,
+      1.230638041011988815e+00,  4.149918312660194619e-02,
+      3.492815399561766294e-03,  -4.649051157564728157e-04,
+      -1.192927614880224277e-04, 4.072077917749542957e-05,
+      -1.483841293110880866e+00, -1.588290941739924356e-01,
+      1.649214200293154520e-02,  1.501282794678792006e-04,
+      -2.138853834118830831e-04, 2.633111784219914963e-05,
+      -2.786861475954987011e-01, 9.743317336979973042e-02,
+      -2.290404652904617314e-03, -7.089360554728917595e-04,
+      2.260180638238835256e-05,  1.741828165826791135e-05,
+      -1.472870278712053782e+00, -6.294183498489253070e-02,
+      4.769273959660644442e-03,  8.414681093302789892e-05,
+      -1.142905205912834352e-05, -4.014065121916994726e-06,
+      6.349296248136164778e-01,  1.440137170869312810e-03,
+      6.346663352465874847e-03,  -2.133510744796659759e-04,
+      -1.788513201196447670e-04, 1.721163944875696416e-05,
+      -1.321968230245579967e+00, -5.421934303028537461e-02,
+      1.257335706466754244e-02,  -7.282542863230233527e-04,
+      -1.343059033644905889e-04, 1.747822893445653714e-05,
+      -9.941857609618123259e-01, 6.848272942128874607e-02,
+      -5.019121140152461337e-03, -1.683596869525186377e-04,
+      1.024142382012053007e-04,  -2.632719129544749384e-05,
+      3.523983851077774343e-01,  -3.597488523292310947e-02,
+      -1.080005278271846739e-03, 9.827512175914082399e-04,
+      -1.066680880078371994e-04, 3.403258606315080555e-07,
+      1.231053381658700818e+00,  4.156889948792314576e-02,
+      3.478797077596604108e-03,  -4.696409807358484993e-04,
+      -1.173636798436718986e-04, 1.149931408689037458e-05,
+      -1.485427934690428442e+00, -1.584992071496764965e-01,
+      1.649651778315383566e-02,  1.415960091521040870e-04,
+      -2.125888038426753843e-04, 7.384582528889821378e-06,
+      -2.777120456109742896e-01, 9.738715268720327112e-02,
+      -2.311658999267464203e-03, -7.080165982958596923e-04,
+      2.340034491729013294e-05,  5.174033942788913380e-06,
+      -1.473499220050474623e+00, -6.284642430757329812e-02,
+      4.771791466347353149e-03,  8.368540130389298475e-05,
+      -1.162498575113560591e-05, -5.381585801785509468e-06,
+      6.349446606365225509e-01,  1.567005718051586727e-03,
+      6.340155681555815353e-03,  -2.204854663573854625e-04,
+      -1.779502948888764897e-04, 3.196283450610521294e-05,
+      -1.322509167069771951e+00, -5.396809490162747525e-02,
+      1.255142902735281209e-02,  -7.336077414823606981e-04,
+      -1.332538502428148267e-04, 2.525523713666122703e-05,
+      -9.935014357470516311e-01, 6.838229689892011409e-02,
+      -5.024110745516051704e-03, -1.642860423419652261e-04,
+      1.011792892256958577e-04,  -5.902237032851650630e-06,
+      3.520385292366049468e-01,  -3.599619093977864809e-02,
+      -1.050586739210998023e-03, 9.784837539753422735e-04,
+      -1.066187407206570670e-04, -6.052991441884039902e-06,
+      1.231469418062474341e+00,  4.163833406830096812e-02,
+      3.464637544942418459e-03,  -4.743218246565151001e-04,
+      -1.164951133813105271e-04, 2.473911917278243621e-05,
+      -1.487011276970676033e+00, -1.581692351651968476e-01,
+      1.650063818395723983e-02,  1.331001312464952355e-04,
+      -2.118074389246019866e-04, 9.192428068946771109e-06,
+      -2.767384059577842614e-01, 9.734070719609828892e-02,
+      -2.332885405321092481e-03, -7.070743922828596519e-04,
+      2.373777250910882265e-05,  1.127700884024945933e-05,
+      -1.474127207030835107e+00, -6.275096341939470634e-02,
+      4.774294999622533293e-03,  8.321347296773265077e-05,
+      -1.162225195759229858e-05, -1.468175407624093560e-05,
+      6.349609644870094494e-01,  1.693741975839754832e-03,
+      6.333434667015966531e-03,  -2.275719866012916918e-04,
+      -1.766077012712487378e-04, 2.919052022666632077e-05,
+      -1.323047593610823247e+00, -5.371728693515605280e-02,
+      1.252934109528984138e-02,  -7.389107006611626187e-04,
+      -1.322992615601379437e-04, 3.689337377145077536e-05,
+      -9.928181153524118230e-01, 6.828176580261838269e-02,
+      -5.028978678356570489e-03, -1.602449667799085492e-04,
+      1.004819833385002965e-04,  -7.012859043909368637e-06,
+      3.516784632459502014e-01,  -3.601690955621394963e-02,
+      -1.021296258318379370e-03, 9.742140050919662845e-04,
+      -1.068837890347894775e-04, 3.261791903209577241e-07,
+      1.231886147391427544e+00,  4.170748405790913882e-02,
+      3.450338240560582581e-03,  -4.789562532735843967e-04,
+      -1.153902983973557932e-04, 2.856018069496295048e-05,
+      -1.488591319127526624e+00, -1.578391833182464787e-01,
+      1.650450419566778376e-02,  1.246407552546250339e-04,
+      -2.115332183818513349e-04, 3.149345367837511192e-05,
+      -2.757652328811996956e-01, 9.729383746118988596e-02,
+      -2.354083281534554220e-03, -7.061133365182417328e-04,
+      2.418809213597686327e-05,  1.280494807360028992e-05,
+      -1.474754239152433311e+00, -6.265545260258377491e-02,
+      4.776784283590801948e-03,  8.273687806363864625e-05,
+      -1.229952261449745124e-05, 3.204146150058887708e-06,
+      6.349785350208994039e-01,  1.820341692612803541e-03,
+      6.326501834700739083e-03,  -2.346100929840904846e-04,
+      -1.748840426396014729e-04, 1.130785525935554482e-05,
+      -1.323583514286295282e+00, -5.346692231381247606e-02,
+      1.250709476370755191e-02,  -7.441705970339035966e-04,
+      -1.303302437099287372e-04, 7.935577538626925858e-06,
+      -9.921358007514943234e-01, 6.818113855713830995e-02,
+      -5.033725808341922223e-03, -1.562353718150353687e-04,
+      1.001568149392305130e-04,  -2.302258383924021595e-05,
+      3.513181929939074299e-01,  -3.603704364469759169e-02,
+      -9.921339651685744804e-04, 9.699384566370250092e-04,
+      -1.069081013817698415e-04, -2.744679484186812129e-06,
+      1.232303566785723392e+00,  4.177634667571154814e-02,
+      3.435900604437185177e-03,  -4.835440426346156498e-04,
+      -1.140781768005934266e-04, 2.411509316948267986e-05,
+      -1.490168060387760951e+00, -1.575090566866652331e-01,
+      1.650811681325956015e-02,  1.162064642248029450e-04,
+      -2.100324946396962247e-04, 4.868837971279583202e-06,
+      -2.747925306207861240e-01, 9.724654405895133413e-02,
+      -2.375252040655950400e-03, -7.051355614741510987e-04,
+      2.505903781065493165e-05,  -2.569082101323676566e-06,
+      -1.475380315917416585e+00, -6.255989214488603956e-02,
+      4.779259042312647421e-03,  8.224491253736542200e-05,
+      -1.205054378062991984e-05, -1.594987943813344381e-05,
+      6.349973708516511994e-01,  1.946800647308156995e-03,
+      6.319358714566076195e-03,  -2.415904693897710526e-04,
+      -1.741570105122868483e-04, 3.342152683043006766e-05,
+      -1.324116933545430141e+00, -5.321700419064152865e-02,
+      1.248469152702344660e-02,  -7.493727578058629766e-04,
+      -1.295525827398787404e-04, 2.659942231629285135e-05,
+      -9.914544928937398804e-01, 6.808041756983601589e-02,
+      -5.038353005641925050e-03, -1.522500103683389601e-04,
+      9.911425811568465554e-05,  -1.035676665958809070e-05,
+      3.509577243129330393e-01,  -3.605659577023319351e-02,
+      -9.630999837076988784e-04, 9.656594578503095369e-04,
+      -1.070158919994286978e-04, -2.281503112307771063e-06,
+      1.232721673357858538e+00,  4.184491916948063911e-02,
+      3.421326077437690516e-03,  -4.880823132679394552e-04,
+      -1.129872290747681817e-04, 2.854952342195995698e-05,
+      -1.491741500028839651e+00, -1.571788603283475749e-01,
+      1.651147703627379656e-02,  1.078118218043548068e-04,
+      -2.094656285123614196e-04, 1.573608604543182341e-05,
+      -2.738203034102859035e-01, 9.719882757757769554e-02,
+      -2.396391097750961291e-03, -7.041328812172977002e-04,
+      2.511128111671661627e-05,  1.472819566023977703e-05,
+      -1.476005436830838402e+00, -6.246428233956573262e-02,
+      4.781718999863710830e-03,  8.175246233396933941e-05,
+      -1.310850420537104008e-05, 1.717274673157189222e-05,
+      6.350174705506670403e-01,  2.073114649501703322e-03,
+      6.312006840494438151e-03,  -2.485262001215581039e-04,
+      -1.724445833892894095e-04, 1.623821996891234705e-05,
+      -1.324647855868849478e+00, -5.296753568880858964e-02,
+      1.246213287875118370e-02,  -7.545274547770323926e-04,
+      -1.284298383236558551e-04, 3.142127009671183137e-05,
+      -9.907741927046019859e-01, 6.797960523066012839e-02,
+      -5.042861140826992473e-03, -1.482946605870891395e-04,
+      9.821987974303589589e-05,  -3.593831829470692349e-06,
+      3.505970630098214080e-01,  -3.607556850024738748e-02,
+      -9.341944322877257512e-04, 9.613773761737330267e-04,
+      -1.072343182304808093e-04, 2.791451096706449119e-06,
+      1.233140464192951757e+00,  4.191319881581374862e-02,
+      3.406616101162745613e-03,  -4.925758895926437772e-04,
+      -1.113902906060245713e-04, 1.275308331152581608e-05,
+      -1.493311637378700762e+00, -1.568485992811522733e-01,
+      1.651458586873823589e-02,  9.944841367174414462e-05,
+      -2.085492230796830474e-04, 1.276456024245067926e-05,
+      -2.728485554775001987e-01, 9.715068861693920699e-02,
+      -2.417499870240937074e-03, -7.031148500958378164e-04,
+      2.576543833825076558e-05,  7.841889896124507091e-06,
+      -1.476629601400710978e+00, -6.236862348540499201e-02,
+      4.784163880393361643e-03,  8.124213252544174404e-05,
+      -1.286332078849730127e-05, -1.821996546344873330e-06,
+      6.350388326475970846e-01,  2.199279539485121671e-03,
+      6.304447750121061969e-03,  -2.554047701160370044e-04,
+      -1.716061813901302753e-04, 3.413524324276134592e-05,
+      -1.325176285768258300e+00, -5.271851990161838253e-02,
+      1.243942031140890699e-02,  -7.596346042592860793e-04,
+      -1.269803855069738714e-04, 2.314478643438959578e-05,
+      -9.900949010857222898e-01, 6.787870391214460841e-02,
+      -5.047251084767826433e-03, -1.443753107913585767e-04,
+      9.837034053479728221e-05,  -3.865274593462701621e-05,
+      3.502362148656810170e-01,  -3.609396440447816545e-02,
+      -9.054174237006253068e-04, 9.570894530963515055e-04,
+      -1.071221722792567601e-04, -5.180134097885568801e-06,
+      1.233559936349031494e+00,  4.198118292014653419e-02,
+      3.391772117805412056e-03,  -4.970162819604460663e-04,
+      -1.105584293158747960e-04, 2.757032189173095048e-05,
+      -1.494878471815561216e+00, -1.565182785628131401e-01,
+      1.651744431908664865e-02,  9.112268062696188113e-05,
+      -2.082277461664644284e-04, 3.370820636496137736e-05,
+      -2.718772910441742408e-01, 9.710212778853387350e-02,
+      -2.438577777940475859e-03, -7.020756635958485484e-04,
+      2.613933618298708639e-05,  1.211520684095310762e-05,
+      -1.477252809138063672e+00, -6.227291588670166161e-02,
+      4.786593408182711167e-03,  8.072392747742672100e-05,
+      -1.281499371544444526e-05, -1.293175202324119235e-05,
+      6.350614556306495295e-01,  2.325291188338546311e-03,
+      6.296682984661446623e-03,  -2.622362895631248896e-04,
+      -1.701076322674243866e-04, 2.573454296903621253e-05,
+      -1.325702227786145437e+00, -5.246995989253622206e-02,
+      1.241655531642829255e-02,  -7.646904682589584622e-04,
+      -1.257704658362481128e-04, 2.439373356208127567e-05,
+      -9.894166189151047952e-01, 6.777771596940393439e-02,
+      -5.051523708536139086e-03, -1.404733355821404265e-04,
+      9.677082285072928253e-05,  -3.720510878458014501e-06,
+      3.498751856359115786e-01,  -3.611178605486395354e-02,
+      -8.767690652124425499e-04, 9.527998576480508275e-04,
+      -1.072771816869139909e-04, -2.281376475091892258e-06,
+      1.233980086857325631e+00,  4.204886881676297983e-02,
+      3.376795570009583514e-03,  -5.014114486109571937e-04,
+      -1.092957353261917852e-04, 2.516456964431257380e-05,
+      -1.496442002767713664e+00, -1.561879031708521548e-01,
+      1.652005340007862977e-02,  8.282284133744905071e-05,
+      -2.067123325224875000e-04, 7.057486539657783089e-06,
+      -2.709065143258797548e-01, 9.705314571543909030e-02,
+      -2.459624243094573216e-03, -7.010187162791577066e-04,
+      2.672975399789282626e-05,  7.629793933874534523e-06,
+      -1.477875059556995385e+00, -6.217715985326619649e-02,
+      4.789007307701962507e-03,  8.019935829649041371e-05,
+      -1.318861260046749971e-05, -7.150339348059032240e-06,
+      6.350853379468965887e-01,  2.451145498001100487e-03,
+      6.288714088740080324e-03,  -2.690159202421790068e-04,
+      -1.686584359429067433e-04, 1.941481480743946700e-05,
+      -1.326225686495484890e+00, -5.222185869521017709e-02,
+      1.239353938406437261e-02,  -7.696964132049412353e-04,
+      -1.246012242240120604e-04, 2.724071141974432252e-05,
+      -9.887393470472876089e-01, 6.767664374012982709e-02,
+      -5.055679883306329545e-03, -1.366074591188833347e-04,
+      9.623033677044332457e-05,  -1.113456896173822779e-05,
+      3.495139810501832756e-01,  -3.612903602543367232e-02,
+      -8.482494585971035728e-04, 9.485064841097947883e-04,
+      -1.073561607316583907e-04, -2.239996380309942211e-06,
+      1.234400912722548371e+00,  4.211625386880359784e-02,
+      3.361687900729734210e-03,  -5.057597926077623488e-04,
+      -1.078411892315765344e-04, 1.508800592977199686e-05,
+      -1.498002229713325750e+00, -1.558574780824932282e-01,
+      1.652241412871961052e-02,  7.456368677257522147e-05,
+      -2.062001731191939454e-04, 2.069621557469772063e-05,
+      -2.699362295319003291e-01, 9.700374303226286243e-02,
+      -2.480638690415259105e-03, -6.999405672986690023e-04,
+      2.700789474676622474e-05,  1.556143061449123430e-05,
+      -1.478496352174730522e+00, -6.208135570041733303e-02,
+      4.791405303667145565e-03,  7.966538051836852740e-05,
+      -1.352687841609079228e-05, -2.789411930543395566e-06,
+      6.351104780025849106e-01,  2.576838401336829787e-03,
+      6.280542610220480118e-03,  -2.757414391158645754e-04,
+      -1.675762649448408429e-04, 2.787462665161048641e-05,
+      -1.326746666499438287e+00, -5.197421931349595348e-02,
+      1.237037400330611749e-02,  -7.746541492504023475e-04,
+      -1.232228491818352083e-04, 2.166599538617633252e-05,
+      -9.880630863135209108e-01, 6.757548954459043078e-02,
+      -5.059720480258220535e-03, -1.327693574508429343e-04,
+      9.550030312894054513e-05,  -1.096549240339310371e-05,
+      3.491526068124157778e-01,  -3.614571689219699124e-02,
+      -8.198587001702131727e-04, 9.442100079790295610e-04,
+      -1.074330339280879455e-04, -2.103241190440061311e-06,
+      1.234822410923189784e+00,  4.218333546826981417e-02,
+      3.346450553092000530e-03,  -5.100549148199152614e-04,
+      -1.071543306169886722e-04, 3.572075491055831030e-05,
+      -1.499559152180234056e+00, -1.555270082545787691e-01,
+      1.652452752618108200e-02,  6.633607063542407416e-05,
+      -2.052990867644106118e-04, 1.891505702101457936e-05,
+      -2.689664408651156746e-01, 9.695392038509384469e-02,
+      -2.501620547117759490e-03, -6.988464710389351081e-04,
+      2.774961528830105395e-05,  4.843681010028069226e-06,
+      -1.479116686511674494e+00, -6.198550374897651011e-02,
+      4.793787121096219732e-03,  7.912045955652986253e-05,
+      -1.359696279035538403e-05, -9.132339849453571562e-06,
+      6.351368741634448867e-01,  2.702365862198193025e-03,
+      6.272170100036473551e-03,  -2.824171711189519380e-04,
+      -1.661976899287730559e-04, 2.457347650017094835e-05,
+      -1.327265172431057128e+00, -5.172704472148267896e-02,
+      1.234706066178771662e-02,  -7.795630288411945592e-04,
+      -1.217395799935142969e-04, 1.184741714306808905e-05,
+      -9.873878375219384829e-01, 6.747425568563097942e-02,
+      -5.063646370480812467e-03, -1.289626891970745083e-04,
+      9.513074838211379970e-05,  -2.521433322545949321e-05,
+      3.487910686007592576e-01,  -3.616183123303555458e-02,
+      -7.915968808226425679e-04, 9.399119246579864433e-04,
+      -1.077055728285351480e-04, 6.031191175422362627e-06,
+      1.235244578411804905e+00,  4.225011103602600848e-02,
+      3.331084970256580589e-03,  -5.143079026275864784e-04,
+      -1.055716785023949844e-04, 2.051193936812822612e-05,
+      -1.501112769745742259e+00, -1.551964986234863897e-01,
+      1.652639461772111712e-02,  5.814089462644928566e-05,
+      -2.041249358339155683e-04, 6.311073191969795411e-06,
+      -2.679971525218879380e-01, 9.690367843145115956e-02,
+      -2.522569242956208650e-03, -6.977319783847560700e-04,
+      2.827424678587480721e-05,  2.739673941330651616e-06,
+      -1.479736062091468574e+00, -6.188960432526132566e-02,
+      4.796152485364500034e-03,  7.856828747830194362e-05,
+      -1.395147193446202365e-05, -4.087221013031299888e-06,
+      6.351645247550001816e-01,  2.827723875485507743e-03,
+      6.263598112024793517e-03,  -2.890409134869928735e-04,
+      -1.648390823803598971e-04, 2.215887759642637032e-05,
+      -1.327781208952985015e+00, -5.148033786352124164e-02,
+      1.232360084570068709e-02,  -7.844171563535663055e-04,
+      -1.210428935521009746e-04, 3.344327592646507844e-05,
+      -9.867136014577331249e-01, 6.737294444867666932e-02,
+      -5.067458424877044516e-03, -1.251812701937470213e-04,
+      9.419473244264059593e-05,  -1.679002076268449654e-05,
+      3.484293720675762929e-01,  -3.617738162759492893e-02,
+      -7.634640860539731316e-04, 9.356082122653546981e-04,
+      -1.075431084112703954e-04, -3.044614041061100766e-06,
+      1.235667412115300623e+00,  4.231657802179918798e-02,
+      3.315592595281378029e-03,  -5.185116053649769336e-04,
+      -1.041674655671950871e-04, 1.242766263135090892e-05,
+      -1.502663082036415076e+00, -1.548659541050484978e-01,
+      1.652801643260504508e-02,  4.998556989557471122e-05,
+      -2.037688261998792680e-04, 2.657243869390409541e-05,
+      -2.670283686919466826e-01, 9.685301784023310490e-02,
+      -2.543484210258855835e-03, -6.965966582328896994e-04,
+      2.850491087748043708e-05,  1.232179636112698650e-05,
+      -1.480354478441044286e+00, -6.179365776107784841e-02,
+      4.798501122259496952e-03,  7.800586916120723585e-05,
+      -1.413851691566035862e-05, -5.727587674967719880e-06,
+      6.351934280628791507e-01,  2.952908467203564646e-03,
+      6.254828202758994093e-03,  -2.956111985445306826e-04,
+      -1.636502852942454153e-04, 2.616921494951480123e-05,
+      -1.328294780757159899e+00, -5.123410165425365537e-02,
+      1.229999603970671068e-02,  -7.892274520450543677e-04,
+      -1.195721301312790567e-04, 2.454197033093738297e-05,
+      -9.860403788833298488e-01, 6.727155810173718331e-02,
+      -5.071157514069617352e-03, -1.214296539729165295e-04,
+      9.340570341953608358e-05,  -1.444050153586573228e-05,
+      3.480675228394242149e-01,  -3.619237065717702262e-02,
+      -7.354603960058733389e-04, 9.313051737393654526e-04,
+      -1.076930273455606579e-04, -7.696053039474192446e-07,
+      1.236090908935226107e+00,  4.238273390417521269e-02,
+      3.299974870987111650e-03,  -5.226642260988254756e-04,
+      -1.032474625011560351e-04, 2.396475265799989632e-05,
+      -1.504210088727871764e+00, -1.545353795944727493e-01,
+      1.652939400402650763e-02,  4.186078937618800693e-05,
+      -2.027012231708198600e-04, 1.761148452766873776e-05,
+      -2.660600935582757565e-01, 9.680193929166537592e-02,
+      -2.564364883962782712e-03, -6.954454205710857090e-04,
+      2.907017700829073683e-05,  9.120785771591908463e-06,
+      -1.480971935090678926e+00, -6.169766439371183325e-02,
+      4.800832758035045861e-03,  7.743502257440657043e-05,
+      -1.440171540732098418e-05, -4.489324897938611976e-06,
+      6.355509554770921721e-01,  4.194364255265300989e-03,
+      6.156587518227093006e-03,  -3.584539136959086518e-04,
+      -1.505562336471176987e-04, 2.631189526673375584e-05,
+      -1.333295991901433553e+00, -4.879824528740911438e-02,
+      1.205629889598585497e-02,  -8.346035033896359156e-04,
+      -1.072962342948566929e-04, 2.412331753624817981e-05,
+      -9.793640468817854661e-01, 6.625405011186732973e-02,
+      -5.102126473064734317e-03, -8.551069374443776396e-05,
+      8.618032279329005427e-05,  -1.422030758858379208e-05,
+      3.444418516979214084e-01,  -3.631195473807800889e-02,
+      -4.625381215785304145e-04, 8.881537622047225473e-04,
+      -1.080757789189670570e-04, 5.820590714360855199e-08,
+      1.240361649325028681e+00,  4.302664794411619614e-02,
+      3.137220402938139478e-03,  -5.615677039256951981e-04,
+      -9.125763978623760322e-05, 2.367398552885374808e-05,
+      -1.519498310980496925e+00, -1.512290469691385253e-01,
+      1.652996628226939199e-02,  -3.745688059096337011e-05,
+      -1.938906911473592626e-04, 1.811217640451412989e-05,
+      -2.564062357251438717e-01, 9.626832379335603651e-02,
+      -2.771163091665611831e-03, -6.829069315554202020e-04,
+      3.363238372709415958e-05,  8.623099725596635004e-06,
+      -1.487093617252511990e+00, -6.073523464295225993e-02,
+      4.823154268625621383e-03,  7.122599345182346051e-05,
+      -1.664931178025436733e-05, -4.312450972708557703e-06};
+  std::vector<double> expected_xyz_scatter = {
+      0.634877,  -1.319469, -0.997320, 0.354037,  1.229165,  -1.478165,
+      -0.282159, -1.470623, 0.634985,  -1.323774, -0.991892, 0.351189,
+      1.232453,  -1.490731, -0.274445, -1.475604, 0.634938,  -1.322286,
+      -0.993784, 0.352187,  1.231297,  -1.486357, -0.277141, -1.473868,
+      0.635174,  -1.327955, -0.986486, 0.348307,  1.235810,  -1.503186,
+      -0.266701, -1.480563, 0.635175,  -1.327965, -0.986473, 0.348300,
+      1.235819,  -1.503216, -0.266682, -1.480575, 0.634890,  -1.320208,
+      -0.996398, 0.353557,  1.229717,  -1.480303, -0.280853, -1.471469,
+      0.634902,  -1.320794, -0.995664, 0.353173,  1.230159,  -1.482005,
+      -0.279812, -1.472143, 0.635167,  -1.327823, -0.986659, 0.348400,
+      1.235701,  -1.502788, -0.266950, -1.480404, 0.635088,  -1.326284,
+      -0.988664, 0.349474,  1.234448,  -1.498176, -0.269828, -1.478565,
+      0.634918,  -1.321522, -0.994748, 0.352694,  1.230712,  -1.484126,
+      -0.278511, -1.472983, 0.634962,  -1.323089, -0.992765, 0.351650,
+      1.231919,  -1.488714, -0.275689, -1.474803, 0.634888,  -1.320133,
+      -0.996492, 0.353606,  1.229661,  -1.480085, -0.280986, -1.471383,
+      0.634968,  -1.323280, -0.992522, 0.351522,  1.232067,  -1.489275,
+      -0.275344, -1.475026, 0.635077,  -1.326043, -0.988976, 0.349640,
+      1.234254,  -1.497458, -0.270275, -1.478280, 0.635124,  -1.327021,
+      -0.987707, 0.348962,  1.235045,  -1.500380, -0.268455, -1.479444,
+      0.634885,  -1.319917, -0.996762, 0.353746,  1.229499,  -1.479460,
+      -0.281368, -1.471135};
+  std::vector<double> expected_dy_dem = {
+      -0.105883, -0.100297, -0.102247, -0.094712, -0.094698, -0.104937,
+      -0.104182, -0.094891, -0.096964, -0.103240, -0.101197, -0.105033,
+      -0.100947, -0.097286, -0.095974, -0.105310};
 
   const int nloc = 4;
   const int nnei = 4;
   const int last_layer_size = 8;
 
-  void SetUp() override {
-  }
-  void TearDown() override {
-  }
+  void SetUp() override {}
+  void TearDown() override {}
 };
 
-TEST_F(TestTabulateSeR, tabulate_fusion_se_r_cpu)
-{
+TEST_F(TestTabulateSeR, tabulate_fusion_se_r_cpu) {
   std::vector<double> xyz_scatter(nloc * nnei * last_layer_size);
-  deepmd::tabulate_fusion_se_r_cpu<double>(&xyz_scatter[0], &table[0], &info[0], &em[0], nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_r_cpu<double>(&xyz_scatter[0], &table[0], &info[0],
+                                           &em[0], nloc, nnei, last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size(); ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_cpu)
-{
+TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_cpu) {
   std::vector<double> dy_dem(em.size());
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
-  deepmd::tabulate_fusion_se_r_grad_cpu<double>(&dy_dem[0], &table[0], &info[0], &em[0], &dy[0], nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_r_grad_cpu<double>(&dy_dem[0], &table[0], &info[0],
+                                                &em[0], &dy[0], nloc, nnei,
+                                                last_layer_size);
   EXPECT_EQ(dy_dem.size(), nloc * nnei);
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu_cuda)
-{
+TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu_cuda) {
   std::vector<double> xyz_scatter(nloc * nnei * last_layer_size, 0.0);
 
-  double * xyz_scatter_dev = NULL, * table_dev = NULL,  * em_dev = NULL;
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_dev = NULL;
   deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_dev, em);
-  deepmd::tabulate_fusion_se_r_gpu_cuda<double>(xyz_scatter_dev, table_dev, &info[0], em_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_r_gpu_cuda<double>(xyz_scatter_dev, table_dev,
+                                                &info[0], em_dev, nloc, nnei,
+                                                last_layer_size);
   deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
   deepmd::delete_device_memory(xyz_scatter_dev);
   deepmd::delete_device_memory(table_dev);
@@ -112,22 +623,23 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu_cuda)
 
   EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size(); ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_cuda)
-{
+TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_cuda) {
   std::vector<double> dy_dem(em.size(), 0.0);
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
 
-  double * dy_dem_dev = NULL, * table_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double *dy_dem_dev = NULL, *table_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::tabulate_fusion_se_r_grad_gpu_cuda<double>(dy_dem_dev, table_dev, &info[0], em_dev, dy_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_r_grad_gpu_cuda<double>(
+      dy_dem_dev, table_dev, &info[0], em_dev, dy_dev, nloc, nnei,
+      last_layer_size);
   deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem);
   deepmd::delete_device_memory(dy_dem_dev);
   deepmd::delete_device_memory(table_dev);
@@ -137,22 +649,23 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_cuda)
   EXPECT_EQ(dy_dem.size(), nloc * nnei);
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
 
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu_rocm)
-{
+TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu_rocm) {
   std::vector<double> xyz_scatter(nloc * nnei * last_layer_size, 0.0);
 
-  double * xyz_scatter_dev = NULL, * table_dev = NULL, * em_dev = NULL;
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_dev = NULL;
   deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_dev, em);
-  deepmd::tabulate_fusion_se_r_gpu_rocm<double>(xyz_scatter_dev, table_dev, &info[0], em_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_r_gpu_rocm<double>(xyz_scatter_dev, table_dev,
+                                                &info[0], em_dev, nloc, nnei,
+                                                last_layer_size);
   deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
   deepmd::delete_device_memory(xyz_scatter_dev);
   deepmd::delete_device_memory(table_dev);
@@ -160,22 +673,23 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_gpu_rocm)
 
   EXPECT_EQ(xyz_scatter.size(), nloc * nnei * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size(); ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_rocm)
-{
+TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_rocm) {
   std::vector<double> dy_dem(em.size(), 0.0);
   std::vector<double> dy(nloc * nnei * last_layer_size, 1.0);
 
-  double * dy_dem_dev = NULL, * table_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double *dy_dem_dev = NULL, *table_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::tabulate_fusion_se_r_grad_gpu_rocm<double>(dy_dem_dev, table_dev, &info[0], em_dev, dy_dev, nloc, nnei, last_layer_size);
+  deepmd::tabulate_fusion_se_r_grad_gpu_rocm<double>(
+      dy_dem_dev, table_dev, &info[0], em_dev, dy_dev, nloc, nnei,
+      last_layer_size);
   deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem);
   deepmd::delete_device_memory(dy_dem_dev);
   deepmd::delete_device_memory(table_dev);
@@ -185,8 +699,8 @@ TEST_F(TestTabulateSeR, tabulate_fusion_se_r_grad_gpu_rocm)
   EXPECT_EQ(dy_dem.size(), nloc * nnei);
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
 
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lib/tests/test_tabulate_se_t.cc b/source/lib/tests/test_tabulate_se_t.cc
index b4e728a5c6..0bd1641e79 100644
--- a/source/lib/tests/test_tabulate_se_t.cc
+++ b/source/lib/tests/test_tabulate_se_t.cc
@@ -1,87 +1,5278 @@
-#include <vector>
+#include <gtest/gtest.h>
+
 #include <iostream>
+#include <vector>
+
 #include "device.h"
 #include "tabulate.h"
-#include <gtest/gtest.h>
 #include "utilities.h"
 
-class TestTabulateSeT : public ::testing::Test
-{
-protected:
+class TestTabulateSeT : public ::testing::Test {
+ protected:
   // em_x = tf.random.uniform([4, 16], minval=0, maxval=0.2, dtype = tf.float64)
-  std::vector<double > info = {
-    -2.1000000000000000e+01, 2.1000000000000000e+01, 1.0500000000000000e+02, 1.0000000000000000e+00, 1.0000000000000000e+01, -1.0000000000000000e+00
-  };  
-  std::vector<double > em_x = {
-    9.3816147034272368e-01, -1.6703373029862567e-01, -4.4294526064601734e-02, -2.8798505489184573e-01, -1.6703373029862567e-01, 9.2489218226366088e-01, -2.8928196536572048e-01, -4.7833509099876154e-01, -4.4294526064601734e-02, -2.8928196536572048e-01, 5.7034320185695120e-01, 1.8771147911830000e-01, -2.8798505489184573e-01, -4.7833509099876154e-01, 1.8771147911830000e-01, 4.0174654365823070e-01, 8.4370316144902313e-01, -3.7813146789689916e-02, -3.6989397568296523e-01, -4.0554075086539937e-01, -3.7813146789689916e-02, 6.5766402633747112e-01, -4.2312966361682885e-01, 1.2685067374257861e-01, -3.6989397568296523e-01, -4.2312966361682885e-01, 6.0171576901660107e-01, 9.8283160997298613e-02, -4.0554075086539937e-01, 1.2685067374257861e-01, 9.8283160997298613e-02, 2.1324148100625978e-01, 9.7843596341516559e-01, -1.0492833888237871e-01, -1.0538688914576379e-01, -2.0453551592353389e-01, -1.0492833888237871e-01, 7.7943976693565231e-01, -1.5898500035781410e-01, 9.4834209331437741e-02, -1.0538688914576379e-01, -1.5898500035781410e-01, 7.4778071691708869e-01, -6.1895255142095873e-01, -2.0453551592353389e-01, 9.4834209331437741e-02, -6.1895255142095873e-01, 6.0844713798743799e-01, 1.0079020879244640e+00, -2.3855984150631487e-01, -3.4608276043004524e-02, -4.7448768267289088e-01, -2.3855984150631487e-01, 4.9732018171028253e-01, -3.1320787082485729e-01, -1.4528004145602180e-01, -3.4608276043004524e-02, -3.1320787082485729e-01, 4.7696729363954582e-01, 1.1723268074231248e-01, -4.7448768267289088e-01, -1.4528004145602180e-01, 1.1723268074231248e-01, 4.0511515406019899e-01, 6.9317482874286218e-01, 3.8721526993960850e-02, -1.4829415254252801e-01, 1.9079858574793401e-01, 3.8721526993960850e-02, 4.0694636061668399e-01, -1.6669745680958750e-01, -2.9455183336619600e-01, -1.4829415254252801e-01, -1.6669745680958750e-01, 3.5115749833010762e-01, 2.7972274527006624e-02, 1.9079858574793401e-01, -2.9455183336619600e-01, 2.7972274527006624e-02, 2.9261590797274251e-01, 1.0547679530528609e+00, -7.5579498870314032e-01, -2.5907966401834215e-01, 2.4868586817732710e-01, -7.5579498870314032e-01, 7.7943976693565231e-01, -2.7840391808748116e-02, -1.2197364072902048e-02, -2.5907966401834215e-01, -2.7840391808748116e-02, 2.9514791871283574e-01, -1.4724344774699041e-01, 2.4868586817732710e-01, -1.2197364072902048e-02, -1.4724344774699041e-01, 2.7333766016385419e-01, 7.1494810971572931e-01, -3.8965690625377569e-01, -2.0579233200987346e-01, 1.5638053130676866e-01, -3.8965690625377569e-01, 5.7034320185695120e-01, -2.4759451701376567e-01, 1.7075608253389668e-01, -2.0579233200987346e-01, -2.4759451701376567e-01, 4.4489736273181785e-01, -3.3903230422862907e-01, 1.5638053130676866e-01, 1.7075608253389668e-01, -3.3903230422862907e-01, 2.7328888249045513e-01, 8.2256346358859145e-01, -3.0201999278197073e-01, -1.0847299712994765e-01, -3.3355086180245408e-01, -3.0201999278197073e-01, 6.2584346461620799e-01, -2.3661062787307036e-02, 2.5559368174587482e-02, -1.0847299712994765e-01, -2.3661062787307036e-02, 6.2471079378938721e-01, -3.9923912886685187e-01, -3.3355086180245408e-01, 2.5559368174587482e-02, -3.9923912886685187e-01, 4.9732018171028253e-01, 7.4300380743635475e-01, -4.4058918654051710e-01, -2.5375568912718455e-01, 5.6029289688609013e-02, -4.4058918654051710e-01, 4.3025970692640908e-01, -1.0172468432781301e-02, -1.2941908849275471e-01, -2.5375568912718455e-01, -1.0172468432781301e-02, 4.2845986148404269e-01, -1.7460159217638957e-01, 5.6029289688609013e-02, -1.2941908849275471e-01, -1.7460159217638957e-01, 3.8138264541081690e-01, 7.4620920788925238e-01, -1.6424881399213448e-01, -2.2361648073503249e-01, -4.0424642786821852e-01, -1.6424881399213448e-01, 6.6730350814323314e-01, -2.1317459925340326e-01, 1.9376435628360764e-01, -2.2361648073503249e-01, -2.1317459925340326e-01, 5.8089947575573275e-01, -1.0277026325170427e-01, -4.0424642786821852e-01, 1.9376435628360764e-01, -1.0277026325170427e-01, 3.1673915325970592e-01, 9.3159182283013242e-01, 2.7352164674733859e-01, -3.0194015433346399e-01, -1.6581739923723970e-01, 2.7352164674733859e-01, 8.7811025837608414e-01, -5.3657994020118693e-01, -1.4168666065928759e-02, -3.0194015433346399e-01, -5.3657994020118693e-01, 5.7772908002379919e-01, -2.6766718990342830e-01, -1.6581739923723970e-01, -1.4168666065928759e-02, -2.6766718990342830e-01, 4.3044918224444273e-01, 7.5776646946384441e-01, -2.8544634168978411e-01, -4.6917826735271817e-01, 9.0648108100258265e-02, -2.8544634168978411e-01, 6.3333781479517670e-01, -1.5635743535006455e-01, -3.2225585549698127e-01, -4.6917826735271817e-01, -1.5635743535006455e-01, 5.8894646017949193e-01, -4.7549586643753167e-02, 9.0648108100258265e-02, -3.2225585549698127e-01, -4.7549586643753167e-02, 5.1470686148396338e-01, 8.9120731219577032e-01, -4.8913932884415684e-01, -2.8535162253723745e-01, -3.9481172797096048e-01, -4.8913932884415684e-01, 6.8161288899055572e-01, 5.3745624046182272e-02, -9.6415050020146331e-03, -2.8535162253723745e-01, 5.3745624046182272e-02, 3.7216282686452884e-01, 1.8327616930599899e-01, -3.9481172797096048e-01, -9.6415050020146331e-03, 1.8327616930599899e-01, 2.9889755032428134e-01, 6.6730350814323314e-01, -1.9772856508212072e-01, -1.3214864503616511e-01, 1.9656713013350754e-01, -1.9772856508212072e-01, 6.4107564273521156e-01, -3.6750949174506781e-01, -2.1514707877261979e-01, -1.3214864503616511e-01, -3.6750949174506781e-01, 4.7696729363954582e-01, -1.0605540579882382e-01, 1.9656713013350754e-01, -2.1514707877261979e-01, -1.0605540579882382e-01, 2.8720166306787565e-01, 9.3159182283013242e-01, -3.9105219662031382e-01, -4.6012852922712744e-01, -3.7107529450742310e-01, -3.9105219662031382e-01, 6.4624598213814299e-01, -1.9218513692914521e-01, 8.7864237420793134e-02, -4.6012852922712744e-01, -1.9218513692914521e-01, 5.5411629355023162e-01, 1.5181004010991156e-01, -3.7107529450742310e-01, 8.7864237420793134e-02, 1.5181004010991156e-01, 5.4717905839342551e-01, 5.5411629355023162e-01, -1.3154982153268135e-01, -2.2683561534265623e-02, -8.5067568027022145e-02, -1.3154982153268135e-01, 4.6158852919583448e-01, -7.4818007595887706e-02, -2.3446129520432515e-01, -2.2683561534265623e-02, -7.4818007595887706e-02, 4.0511515406019899e-01, -2.4964155806145566e-01, -8.5067568027022145e-02, -2.3446129520432515e-01, -2.4964155806145566e-01, 3.9586735852137039e-01, 9.4162575876886123e-01, 1.6551527461893110e-02, -1.7195190048425002e-01, -8.5321232935839153e-02, 1.6551527461893110e-02, 7.4304915761252677e-01, -1.1678199974326212e-01, -4.3729941122496446e-01, -1.7195190048425002e-01, -1.1678199974326212e-01, 6.9317482874286218e-01, 1.9191976680315659e-01, -8.5321232935839153e-02, -4.3729941122496446e-01, 1.9191976680315659e-01, 2.8206822618179617e-01, 7.5776646946384441e-01, 1.3081288362678634e-02, -3.2517003355282742e-01, -3.5562946132636442e-01, 1.3081288362678634e-02, 7.0400669442030783e-01, -2.7774712576896132e-01, -1.7472226352059492e-01, -3.2517003355282742e-01, -2.7774712576896132e-01, 5.1099339330908866e-01, 7.5918257186359903e-02, -3.5562946132636442e-01, -1.7472226352059492e-01, 7.5918257186359903e-02, 2.8288909669360418e-01, 7.4778071691708869e-01, -3.9041097955700099e-01, -5.2895674526793196e-03, -4.0620032081707269e-01, -3.9041097955700099e-01, 6.0650459602198470e-01, -1.5236577918073632e-01, 1.3182011644234659e-03, -5.2895674526793196e-03, -1.5236577918073632e-01, 5.8972872609679527e-01, -1.4295182509075030e-01, -4.0620032081707269e-01, 1.3182011644234659e-03, -1.4295182509075030e-01, 4.2821165206248513e-01, 6.2471079378938699e-01, 6.0802406816920937e-02, -3.5127267686929931e-01, -3.3963258237386684e-01, 6.0802406816920937e-02, 6.0313886404423811e-01, -2.6977044122265748e-01, -1.6348825415331092e-01, -3.5127267686929931e-01, -2.6977044122265748e-01, 5.4552055268521205e-01, -1.9316799462722023e-02, -3.3963258237386684e-01, -1.6348825415331092e-01, -1.9316799462722023e-02, 4.8171669294486208e-01, 7.0578073898484561e-01, -9.8967741792306413e-02, 1.2502659893584156e-01, -3.7971201998874682e-01, -9.8967741792306413e-02, 7.0400669442030783e-01, -6.5060917634508969e-02, -1.0206531635166906e-01, 1.2502659893584156e-01, -6.5060917634508969e-02, 6.0506774685063136e-01, -2.9019848494979322e-01, -3.7971201998874682e-01, -1.0206531635166906e-01, -2.9019848494979322e-01, 3.3340397458978077e-01, 9.7231560474448697e-01, -4.8839770953582246e-02, -5.2649040695832883e-01, 7.6239831325479152e-02, -4.8839770953582246e-02, 8.5066067412859170e-01, -2.8381943351894323e-01, -4.3719342578830916e-01, -5.2649040695832883e-01, -2.8381943351894323e-01, 7.8694914200240895e-01, -6.9042842925044076e-02, 7.6239831325479152e-02, -4.3719342578830916e-01, -6.9042842925044076e-02, 3.1673915325970592e-01, 1.1146471781363385e+00, -4.1950872828895774e-01, -7.5099287814360732e-02, -7.0747093773604913e-02, -4.1950872828895774e-01, 8.7811025837608414e-01, -7.1680109826753424e-01, 1.5469221891377744e-01, -7.5099287814360732e-02, -7.1680109826753424e-01, 7.8157368152678353e-01, -1.8674982324145417e-01, -7.0747093773604913e-02, 1.5469221891377744e-01, -1.8674982324145417e-01, 6.3333781479517670e-01, 9.4162575876886123e-01, -3.5756088285386567e-01, -4.9692587682641537e-02, -1.9259678748208950e-01, -3.5756088285386567e-01, 5.7304538706875918e-01, -1.7263416631604137e-01, 1.7497761971314219e-01, -4.9692587682641537e-02, -1.7263416631604137e-01, 5.4717905839342551e-01, 1.8011611616770615e-01, -1.9259678748208950e-01, 1.7497761971314219e-01, 1.8011611616770615e-01, 1.6303735520554410e-01, 1.1146471781363385e+00, -2.9955742667885321e-01, -4.6620550078880341e-01, -3.1378159981378489e-01, -2.9955742667885321e-01, 8.8424257114190075e-01, -2.5441067597713185e-01, -2.1591071501682862e-01, -4.6620550078880341e-01, -2.5441067597713185e-01, 7.7959145539751795e-01, -1.2407187853083543e-01, -3.1378159981378489e-01, -2.1591071501682862e-01, -1.2407187853083543e-01, 5.8972872609679527e-01, 6.0506774685063081e-01, -7.0161191563646669e-02, -3.3988044762842473e-01, -1.9107644999487627e-01, -7.0161191563646669e-02, 4.3045264342637135e-01, 2.0824077812721109e-02, -2.5586718835495170e-01, -3.3988044762842473e-01, 2.0824077812721109e-02, 4.0694636061668399e-01, -3.6902580613623392e-02, -1.9107644999487627e-01, -2.5586718835495170e-01, -3.6902580613623392e-02, 3.5713610126062301e-01, 1.0507172480981881e+00, -5.7238843768987546e-01, -8.6532548307818979e-02, -2.4797217908729449e-01, -5.7238843768987546e-01, 6.4845065425155790e-01, -2.3798654086704824e-01, -9.0951164465072995e-02, -8.6532548307818979e-02, -2.3798654086704824e-01, 4.3025970692640908e-01, 5.1916371214171878e-02, -2.4797217908729449e-01, -9.0951164465072995e-02, 5.1916371214171878e-02, 3.5115749833010762e-01, 9.7070728754011626e-01, -1.9667254906106676e-01, 1.3881360342029997e-01, 1.8332147125431175e-01, -1.9667254906106676e-01, 6.4107564273521156e-01, 6.9919471349729312e-02, -2.0671318712854408e-01, 1.3881360342029997e-01, 6.9919471349729312e-02, 1.7842412973820965e-01, -2.8800113566320312e-02, 1.8332147125431175e-01, -2.0671318712854408e-01, -2.8800113566320312e-02, 8.7697980056291452e-02, 9.9699133925078010e-01, -1.9566669897090611e-01, 1.1472638955603826e-02, -3.8137297906451501e-01, -1.9566669897090611e-01, 7.8694914200240895e-01, -4.8668226019268873e-01, -4.3398812393252877e-02, 1.1472638955603826e-02, -4.8668226019268873e-01, 4.4489736273181785e-01, -5.7511361787363047e-02, -3.8137297906451501e-01, -4.3398812393252877e-02, -5.7511361787363047e-02, 2.9261590797274251e-01, 5.8721566479597598e-01, 2.1741339413236024e-01, -4.6310740433823661e-01, 1.5212653882669683e-01, 2.1741339413236024e-01, 5.7772908002379919e-01, -1.1309986042713593e-01, -4.6297902780444065e-02, -4.6310740433823661e-01, -1.1309986042713593e-01, 5.3524434793006614e-01, 7.7916319803791656e-02, 1.5212653882669683e-01, -4.6297902780444065e-02, 7.7916319803791656e-02, 3.3070548705408681e-01, 6.4845065425155790e-01, -1.1573648783331206e-01, -1.1295787224086569e-02, -3.2976819109974104e-01, -1.1573648783331206e-01, 5.7304538706875918e-01, -4.4057056903958502e-01, 1.5118880665101278e-01, -1.1295787224086569e-02, -4.4057056903958502e-01, 5.0847535644168274e-01, -2.7508898922066710e-01, -3.2976819109974104e-01, 1.5118880665101278e-01, -2.7508898922066710e-01, 4.6158852919583448e-01, 9.2489218226366110e-01, 1.9293109981997125e-01, -4.6858170396716431e-01, 4.2486849070578564e-02, 1.9293109981997125e-01, 8.8759600097388758e-01, -9.3159319915253995e-03, -2.1166189641553734e-01, -4.6858170396716431e-01, -9.3159319915253995e-03, 5.8089947575573275e-01, -3.8498930024537814e-01, 4.2486849070578564e-02, -2.1166189641553734e-01, -3.8498930024537814e-01, 4.0608640117736616e-01, 9.0810287860174088e-01, -4.5026066211118843e-01, -2.4234797312679049e-01, -1.1635894709323868e-01, -4.5026066211118843e-01, 8.3522648701948565e-01, 7.0167369514544164e-02, -3.0380780947210834e-01, -2.4234797312679049e-01, 7.0167369514544164e-02, 2.8658902506762818e-01, 4.2786039330881952e-02, -1.1635894709323868e-01, -3.0380780947210834e-01, 4.2786039330881952e-02, 2.2990742509977113e-01, 8.7859873068865801e-01, -4.0521290114598696e-01, -3.4537197184419777e-02, -5.1541416274527041e-01, -4.0521290114598696e-01, 7.7073679568840003e-01, -5.2766218816177624e-01, 7.2009349057603700e-02, -3.4537197184419777e-02, -5.2766218816177624e-01, 5.3524434793006614e-01, 1.2774094128859217e-01, -5.1541416274527041e-01, 7.2009349057603700e-02, 1.2774094128859217e-01, 4.2821165206248513e-01, 8.0017606793538776e-01, -3.6273937320056748e-01, -4.8244644816054216e-01, 1.9000791267873704e-01, -3.6273937320056748e-01, 5.6353888423218390e-01, 3.7095579553517985e-02, 1.0807739396133707e-01, -4.8244644816054216e-01, 3.7095579553517985e-02, 4.2845986148404269e-01, -1.2901615367558902e-01, 1.9000791267873704e-01, 1.0807739396133707e-01, -1.2901615367558902e-01, 2.3908528931221024e-01, 8.7859873068865801e-01, -5.7342075379041779e-01, -2.2560792912032243e-01, 1.3840168053297142e-01, -5.7342075379041779e-01, 6.2584346461620799e-01, -1.0147381298988209e-01, -1.8792520225770581e-01, -2.2560792912032243e-01, -1.0147381298988209e-01, 4.9480400405457242e-01, -1.3243403061658676e-01, 1.3840168053297142e-01, -1.8792520225770581e-01, -1.3243403061658676e-01, 2.5543672306163845e-01, 9.7843596341516559e-01, -3.0135276617413775e-02, -6.3204064126440895e-01, 7.7858338769721036e-02, -3.0135276617413775e-02, 7.3888148744521942e-01, -2.8105601448216994e-01, -1.4222291428374428e-01, -6.3204064126440895e-01, -2.8105601448216994e-01, 5.3087899217465850e-01, -1.9954226724405721e-03, 7.7858338769721036e-02, -1.4222291428374428e-01, -1.9954226724405721e-03, 3.7962008200748842e-01, 7.3888148744521942e-01, -3.9876854953397894e-01, 7.9344112776459683e-02, 3.0881146913585039e-02, -3.9876854953397894e-01, 5.8894646017949193e-01, -2.9566251514384462e-01, -2.1875193893336647e-01, 7.9344112776459683e-02, -2.9566251514384462e-01, 2.1324148100625978e-01, 1.1337428718710466e-01, 3.0881146913585039e-02, -2.1875193893336647e-01, 1.1337428718710466e-01, 1.3167057692769979e-01, 7.2588662899153644e-01, -4.0491835492648354e-01, 3.8294782214220321e-02, -1.6245502019487457e-01, -4.0491835492648354e-01, 6.9978773302195119e-01, -2.6130189173261692e-01, -1.4963646270159869e-01, 3.8294782214220321e-02, -2.6130189173261692e-01, 4.9396599356834686e-01, -2.0453500637835592e-01, -1.6245502019487457e-01, -1.4963646270159869e-01, -2.0453500637835592e-01, 4.3044918224444273e-01, 5.4829965643085543e-01, -8.6258333909827428e-02, -9.0863809928147868e-02, -2.5335540346466701e-01, -8.6258333909827428e-02, 4.8256071581125803e-01, -1.5824234665042425e-01, -6.2251154159746636e-02, -9.0863809928147868e-02, -1.5824234665042425e-01, 3.3070548705408681e-01, -1.2171470598287978e-01, -2.5335540346466701e-01, -6.2251154159746636e-02, -1.2171470598287978e-01, 2.9993528142734771e-01, 9.9699133925078010e-01, -5.5720924928656934e-02, -3.7563354786645231e-01, -1.7003176037775222e-01, -5.5720924928656934e-02, 7.1322781206304275e-01, -1.4028476150114413e-01, -4.0725518739947821e-02, -3.7563354786645231e-01, -1.4028476150114413e-01, 6.5766402633747112e-01, -3.7470308102778649e-01, -1.7003176037775222e-01, -4.0725518739947821e-02, -3.7470308102778649e-01, 4.5518325168409662e-01, 7.2306489407047936e-01, -5.5652304518705331e-01, -1.7548443714837653e-01, -9.7244252488182109e-02, -5.5652304518705331e-01, 5.8721566479597631e-01, 1.5466175158937293e-02, 2.3395338180958331e-01, -1.7548443714837653e-01, 1.5466175158937293e-02, 5.1099339330908866e-01, 1.2309721775318454e-01, -9.7244252488182109e-02, 2.3395338180958331e-01, 1.2309721775318454e-01, 2.9948736341201787e-01, 8.9120731219577032e-01, 1.3828785543997540e-01, -6.5808954835297839e-01, 2.3928953196716229e-02, 1.3828785543997540e-01, 8.8759600097388758e-01, -1.6580688159465298e-01, -1.0200920121572279e-01, -6.5808954835297839e-01, -1.6580688159465298e-01, 6.0844713798743799e-01, -1.8482739703288106e-01, 2.3928953196716229e-02, -1.0200920121572279e-01, -1.8482739703288106e-01, 2.7328888249045513e-01, 9.7231560474448697e-01, -1.5360824814848673e-01, -3.0175556482836241e-01, 1.4427139165494557e-01, -1.5360824814848673e-01, 7.7073679568840003e-01, -4.4616437520485569e-01, -4.3386642987381790e-02, -3.0175556482836241e-01, -4.4616437520485569e-01, 6.8161288899055572e-01, -3.3586190220507495e-01, 1.4427139165494557e-01, -4.3386642987381790e-02, -3.3586190220507495e-01, 3.7736584257608707e-01, 8.4370316144902313e-01, -3.4786444025064711e-01, -4.9627086468803983e-01, -3.0284479791966981e-02, -3.4786444025064711e-01, 7.7959145539751795e-01, 5.4105959194424263e-02, -5.6892292757652424e-02, -4.9627086468803983e-01, 5.4105959194424263e-02, 3.7736584257608707e-01, 1.4233877887889113e-01, -3.0284479791966981e-02, -5.6892292757652424e-02, 1.4233877887889113e-01, 2.4312804865275472e-01, 9.3816147034272368e-01, -2.0860974495653162e-01, -3.7166959734666666e-01, 5.6250602465564153e-03, -2.0860974495653162e-01, 9.0810287860174088e-01, -9.1069253660888416e-02, -6.4962854816729276e-01, -3.7166959734666666e-01, -9.1069253660888416e-02, 8.1523507511884086e-01, -6.6856471628686520e-02, 5.6250602465564153e-03, -6.4962854816729276e-01, -6.6856471628686520e-02, 5.4817010902737695e-01, 8.3348751379486785e-01, -5.7386182749623121e-02, -1.2725671252595133e-01, -2.6056306991901751e-01, -5.7386182749623121e-02, 5.2909436738615123e-01, -4.2596706647156624e-01, -1.0919830944239709e-02, -1.2725671252595133e-01, -4.2596706647156624e-01, 3.7962008200748842e-01, 7.0344098964105775e-02, -2.6056306991901751e-01, -1.0919830944239709e-02, 7.0344098964105775e-02, 2.2679728457111192e-01, 1.0079020879244640e+00, -6.4116619730398172e-01, 1.1959784878009472e-02, -3.5036660259057062e-01, -6.4116619730398172e-01, 7.2588662899153644e-01, 1.5457663848526185e-01, -1.3123176159456695e-01, 1.1959784878009472e-02, 1.5457663848526185e-01, 7.2306489407047936e-01, 1.1120004060087613e-01, -3.5036660259057062e-01, -1.3123176159456695e-01, 1.1120004060087613e-01, 6.5291435452372704e-01, 6.5291435452372693e-01, -2.0669331449151485e-01, -1.0009057137401539e-01, -1.0458759498524857e-01, -2.0669331449151485e-01, 4.8256071581125803e-01, -1.0975185704675450e-01, -7.1400789900385064e-02, -1.0009057137401539e-01, -1.0975185704675450e-01, 3.1774836143047369e-01, -1.3420181173883666e-01, -1.0458759498524857e-01, -7.1400789900385064e-02, -1.3420181173883666e-01, 1.7842412973820965e-01, 7.4304915761252677e-01, -3.7474512353359690e-02, -2.0605417465948622e-01, 2.8786202774685965e-02, -3.7474512353359690e-02, 6.4624598213814299e-01, -1.8088533245882277e-01, -2.7294935111478841e-01, -2.0605417465948622e-01, -1.8088533245882277e-01, 4.5625158241504560e-01, -2.5573952710441156e-01, 2.8786202774685965e-02, -2.7294935111478841e-01, -2.5573952710441156e-01, 4.3045264342637135e-01, 8.1523507511884286e-01, -1.1206588973386064e-01, -2.9038245483951325e-01, -9.4044512167176214e-02, -1.1206588973386064e-01, 6.9069450025674883e-01, -3.6666406927828260e-01, -2.6295004305146680e-01, -2.9038245483951325e-01, -3.6666406927828260e-01, 6.0313886404423811e-01, -1.1364153715298203e-01, -9.4044512167176214e-02, -2.6295004305146680e-01, -1.1364153715298203e-01, 5.0847535644168274e-01, 7.8157368152678375e-01, -3.6689153089953208e-01, -1.6343042599874896e-01, 2.0681260422479955e-01, -3.6689153089953208e-01, 7.5302376148517924e-01, -3.9141809592987054e-01, -4.9792782459804974e-01, -1.6343042599874896e-01, -3.9141809592987054e-01, 4.8171669294486208e-01, 1.8620912874547166e-01, 2.0681260422479955e-01, -4.9792782459804974e-01, 1.8620912874547166e-01, 4.5625158241504560e-01, 8.5066067412859170e-01, -6.6383544595733118e-02, -4.9833493744041324e-01, 3.5985625651793141e-02, -6.6383544595733118e-02, 7.6738392766662211e-01, -1.9489169085491792e-01, -6.4543919151000315e-02, -4.9833493744041324e-01, -1.9489169085491792e-01, 5.4829965643085543e-01, -2.7028089635457442e-01, 3.5985625651793141e-02, -6.4543919151000315e-02, -2.7028089635457442e-01, 3.9586735852137039e-01, 8.8424257114190075e-01, -5.0968059146564315e-01, -9.5512640917474467e-03, -7.2900503108297598e-02, -5.0968059146564315e-01, 7.0578073898484561e-01, -7.9000528792048164e-02, -3.4471532290325835e-01, -9.5512640917474467e-03, -7.9000528792048164e-02, 6.9978773302195119e-01, -2.3508453941888172e-01, -7.2900503108297598e-02, -3.4471532290325835e-01, -2.3508453941888172e-01, 5.1462854065795050e-01, 8.3522648701948565e-01, -4.4716582952086292e-01, 5.4799614348689352e-02, -2.2914542894364148e-02, -4.4716582952086292e-01, 8.3348751379486763e-01, -6.7279352271601633e-01, -3.4916341927887895e-02, 5.4799614348689352e-02, -6.7279352271601633e-01, 7.6738392766662211e-01, -1.6809648395494664e-01, -2.2914542894364148e-02, -3.4916341927887895e-02, -1.6809648395494664e-01, 7.1322781206304275e-01, 9.7070728754011626e-01, -2.5224986479466716e-01, 5.5579119983494740e-02, -3.6768489956717509e-01, -2.5224986479466716e-01, 5.3965948062451941e-01, -4.4192312892226593e-01, 3.1513756645368173e-02, 5.5579119983494740e-02, -4.4192312892226593e-01, 5.3087899217465850e-01, 7.9885677144886064e-02, -3.6768489956717509e-01, 3.1513756645368173e-02, 7.9885677144886064e-02, 1.6104012331629208e-01, 8.6130335545327363e-01, -9.6129435640763802e-02, -1.4801620948352356e-02, 7.8150102857998158e-02, -9.6129435640763802e-02, 6.7625303314120611e-01, -5.5027301017164576e-02, -1.7520377876997034e-01, -1.4801620948352356e-02, -5.5027301017164576e-02, 3.1774836143047369e-01, -1.2376278479081343e-01, 7.8150102857998158e-02, -1.7520377876997034e-01, -1.2376278479081343e-01, 1.0841353470308304e-01, 7.4620920788925238e-01, -1.6394859485203073e-01, -2.0029713033714130e-01, -1.8645272306202978e-01, -1.6394859485203073e-01, 5.3965948062451941e-01, -3.8230461309700781e-01, 1.3847290923401767e-01, -2.0029713033714130e-01, -3.8230461309700781e-01, 5.1462854065795050e-01, -1.8561874788646704e-01, -1.8645272306202978e-01, 1.3847290923401767e-01, -1.8561874788646704e-01, 2.9993528142734771e-01, 6.7625303314120611e-01, -1.6815361388865593e-01, -3.3930776213055852e-01, -1.2114679754832709e-01, -1.6815361388865593e-01, 5.6353888423218390e-01, -7.5784855072358825e-02, -3.3221823724983218e-01, -3.3930776213055852e-01, -7.5784855072358825e-02, 5.1470686148396338e-01, -4.1819974401314991e-02, -1.2114679754832709e-01, -3.3221823724983218e-01, -4.1819974401314991e-02, 4.2819483757235705e-01, 1.0507172480981881e+00, -1.4246809991945683e-01, 4.1456358442478705e-02, -4.5365891641821365e-01, -1.4246809991945683e-01, 7.1494810971572931e-01, -2.4883057770682826e-01, -2.4395987950943798e-01, 4.1456358442478705e-02, -2.4883057770682826e-01, 5.4552055268521205e-01, 1.2834013149857285e-01, -4.5365891641821365e-01, -2.4395987950943798e-01, 1.2834013149857285e-01, 3.3340397458978077e-01, 6.9069450025674894e-01, -3.8338543582916840e-01, -1.6642138961100816e-01, -3.5535624148913159e-01, -3.8338543582916840e-01, 5.2909436738615123e-01, 9.6172848604107755e-02, 5.5205364851978050e-02, -1.6642138961100816e-01, 9.6172848604107755e-02, 4.2819483757235710e-01, 5.9926260952505861e-03, -3.5535624148913159e-01, 5.5205364851978050e-02, 5.9926260952505861e-03, 2.6226647193037689e-01, 8.6130335545327363e-01, -6.2395423965096508e-02, -3.8567287714901394e-01, -1.5408554583685347e-01, -6.2395423965096508e-02, 7.4300380743635475e-01, -4.7285239739268992e-01, -3.2035899709108612e-02, -3.8567287714901394e-01, -4.7285239739268992e-01, 6.0171576901660107e-01, -1.0536966428907621e-01, -1.5408554583685347e-01, -3.2035899709108612e-02, -1.0536966428907621e-01, 4.9396599356834686e-01, 7.5302376148517924e-01, -6.0232285163413626e-02, -2.7132649171672180e-01, -1.4054053948995909e-01, -6.0232285163413626e-02, 6.0650459602198470e-01, -2.4794771537629287e-01, -1.4485909587327089e-01, -2.7132649171672180e-01, -2.4794771537629287e-01, 4.5518325168409662e-01, -1.6092902586109215e-01, -1.4054053948995909e-01, -1.4485909587327089e-01, -1.6092902586109215e-01, 4.0174654365823070e-01, 1.0547679530528609e+00, -6.4439808979974744e-01, 6.9192182950400305e-02, -3.3250796613430167e-01, -6.4439808979974744e-01, 8.2256346358859145e-01, -4.8266166410158140e-01, -7.9980773185718390e-02, 6.9192182950400305e-02, -4.8266166410158140e-01, 8.0017606793538776e-01, -2.7854894491724819e-02, -3.3250796613430167e-01, -7.9980773185718390e-02, -2.7854894491724819e-02, 5.4817010902737695e-01, 1.5502492798962887e+01, -9.4036183520005387e+00, -9.0871843532376817e-01, 1.5458603036460241e+00, -9.4036183520005387e+00, 5.8545618851096108e+00, 8.7584459712876173e-01, -1.0260595565373307e+00, -9.0871843532376817e-01, 8.7584459712876173e-01, 7.5773424105520826e-01, -3.2391012220482440e-01, 1.5458603036460241e+00, -1.0260595565373307e+00, -3.2391012220482440e-01, 6.6152719782725655e-01, 1.5881484684546885e+01, -8.5373015094081293e+00, 1.2910407629253464e+00, 1.2854398517437213e+00, -8.5373015094081293e+00, 5.0892253563939507e+00, -3.8569840009954620e-01, -9.9850386318081996e-01, 1.2910407629253464e+00, -3.8569840009954620e-01, 4.1577040236200724e-01, -9.1388286019611339e-02, 1.2854398517437213e+00, -9.9850386318081996e-01, -9.1388286019611339e-02, 2.9351817210899528e-01, 1.8355238388750472e+01, -9.9693288297551721e+00, 1.4065955534477488e+00, 1.5236758321532125e+00, -9.9693288297551721e+00, 5.4788018334752540e+00, -7.5992948757336931e-01, -6.6807507000148814e-01, 1.4065955534477488e+00, -7.5992948757336931e-01, 5.8340911874120316e-01, 9.9856752432820606e-03, 1.5236758321532125e+00, -6.6807507000148814e-01, 9.9856752432820606e-03, 5.5177690536312463e-01, 1.7078778656839585e+01, -6.5205205128653105e+00, 1.5817366283975822e+00, -9.3640888727240412e-01, -6.5205205128653105e+00, 2.9619037115622602e+00, -1.1542576186016471e+00, -8.8584406490304773e-02, 1.5817366283975822e+00, -1.1542576186016471e+00, 8.3454304950111569e-01, 3.1272631908606119e-01, -9.3640888727240412e-01, -8.8584406490304773e-02, 3.1272631908606119e-01, 7.8087115310507782e-01, 1.5669862194862620e+01, -6.2090727600143900e+00, 1.3817794924064011e+00, -1.1475004406408358e+00, -6.2090727600143900e+00, 2.5567265715258070e+00, -5.4440265806779431e-01, 3.8699899553155109e-01, 1.3817794924064011e+00, -5.4440265806779431e-01, 3.6446802134584444e-01, -2.8076757475429592e-01, -1.1475004406408358e+00, 3.8699899553155109e-01, -2.8076757475429592e-01, 2.6130199022294265e-01, 1.7118875747980759e+01, -5.9277226216953718e+00, 1.9346813580902582e+00, -1.6872205590095239e+00, -5.9277226216953718e+00, 2.1086146215067632e+00, -8.0747551757328861e-01, 4.7699095328329716e-01, 1.9346813580902582e+00, -8.0747551757328861e-01, 5.5797080293767543e-01, 8.9832319658025406e-02, -1.6872205590095239e+00, 4.7699095328329716e-01, 8.9832319658025406e-02, 5.5399995944851188e-01, 1.7058565959549352e+01, -5.9263293882909478e+00, 1.5342405590746959e+00, -1.1630302303586997e+00, -5.9263293882909478e+00, 2.6205293378999683e+00, -4.7688305722955537e-01, 3.1804603922241220e-01, 1.5342405590746959e+00, -4.7688305722955537e-01, 3.1296424858530869e-01, -2.8669977614844999e-01, -1.1630302303586997e+00, 3.1804603922241220e-01, -2.8669977614844999e-01, 2.7020324134820112e-01, 1.6874725411781530e+01, -6.9150979008264484e+00, 1.0065221329504688e+00, -1.8347878924481371e+00, -6.9150979008264484e+00, 3.5369590566282492e+00, -5.2228937791154206e-01, 2.3672173077240022e-01, 1.0065221329504688e+00, -5.2228937791154206e-01, 6.0329263881086503e-01, 4.6309711033365369e-02, -1.8347878924481371e+00, 2.3672173077240022e-01, 4.6309711033365369e-02, 5.8765944434164141e-01, 1.6856507449690348e+01, -6.8342292657673545e+00, 8.9036581290816785e-01, -4.5904138300777919e-01, -6.8342292657673545e+00, 3.9011240056387244e+00, -4.1077122687276202e-02, 5.6895999928936403e-01, 8.9036581290816785e-01, -4.1077122687276202e-02, 2.7435588945926814e-01, 1.9420750375548933e-02, -4.5904138300777919e-01, 5.6895999928936403e-01, 1.9420750375548933e-02, 1.7277500144155888e-01, 1.7645529606875833e+01, -3.2538877192050002e+00, -5.5012517023538066e-01, 1.0677788759412139e+00, -3.2538877192050002e+00, 1.0546932562360065e+00, -2.3658187113371332e-01, 2.1841816790460902e-01, -5.5012517023538066e-01, -2.3658187113371332e-01, 7.1656120537425561e-01, -1.7040630728823944e-01, 1.0677788759412139e+00, 2.1841816790460902e-01, -1.7040630728823944e-01, 5.0974972717888634e-01, 1.7839842450333954e+01, -5.7379299944719877e+00, -1.9569351707197109e+00, 1.7713682302855058e+00, -5.7379299944719877e+00, 2.4328041444884976e+00, -1.7544854846175517e-02, -1.6864721058949111e-01, -1.9569351707197109e+00, -1.7544854846175517e-02, 1.0677513531326404e+00, -5.9215023869599626e-01, 1.7713682302855058e+00, -1.6864721058949111e-01, -5.9215023869599626e-01, 4.6360784428232726e-01, 1.5732517152453081e+01, -7.4014739000296572e+00, -1.6207000095637598e+00, 1.2383190370614494e+00, -7.4014739000296572e+00, 3.9409851277204022e+00, 7.5740083786654211e-01, -5.5800827056355640e-01, -1.6207000095637598e+00, 7.5740083786654211e-01, 7.7642511193896813e-01, -4.9466228929792039e-01, 1.2383190370614494e+00, -5.5800827056355640e-01, -4.9466228929792039e-01, 3.1958775176868037e-01, 1.6183696670937064e+01, -8.4210107899133941e+00, -1.4603023231162975e+00, -4.8487004987150362e-01, -8.4210107899133941e+00, 5.1398804721325435e+00, 2.1595570912014417e-01, 8.3208280045450567e-01, -1.4603023231162975e+00, 2.1595570912014417e-01, 5.4394923212798296e-01, -3.4805126389386676e-01, -4.8487004987150362e-01, 8.3208280045450567e-01, -3.4805126389386676e-01, 4.8452486035887499e-01, 1.6451611837149500e+01, -4.2969418428867261e+00, -7.3888024461400414e-01, -2.0815144409407478e+00, -4.2969418428867261e+00, 1.5430243106399666e+00, 1.8729935762291541e-01, 1.4033663037202498e-01, -7.3888024461400414e-01, 1.8729935762291541e-01, 7.4205169442909757e-01, 5.7628851673904691e-02, -2.0815144409407478e+00, 1.4033663037202498e-01, 5.7628851673904691e-02, 6.5242091014819859e-01, 1.5883062757360207e+01, -7.5907511804771470e+00, -9.7007798581707783e-01, 7.0138283553798109e-01, -7.5907511804771470e+00, 4.0217572548274134e+00, -2.6014708482761550e-02, -5.3204932822260864e-01, -9.7007798581707783e-01, -2.6014708482761550e-02, 1.0259721921051177e+00, -1.8715321271601759e-01, 7.0138283553798109e-01, -5.3204932822260864e-01, -1.8715321271601759e-01, 5.5149306670030152e-01, 1.5978116669665239e+01, -7.4006259214596790e+00, -6.7969646246634663e-01, 6.5546600275605982e-01, -7.4006259214596790e+00, 3.6071706440359010e+00, 5.3491110661316954e-01, -5.2666565248665098e-01, -6.7969646246634663e-01, 5.3491110661316954e-01, 4.7936057181766917e-01, -1.5025322700354665e-01, 6.5546600275605982e-01, -5.2666565248665098e-01, -1.5025322700354665e-01, 4.3112434274449579e-01, 1.6757855414725086e+01, -3.1391994419655687e+00, -3.6827363627511667e+00, -2.7493582565171533e+00, -3.1391994419655687e+00, 1.2878229875735527e+00, 6.2385886839847293e-01, 3.4411042773468631e-01, -3.6827363627511667e+00, 6.2385886839847293e-01, 1.2200979174145221e+00, 9.3261333604507307e-02, -2.7493582565171533e+00, 3.4411042773468631e-01, 9.3261333604507307e-02, 1.1795148354251150e+00, 1.5763176470987085e+01, -7.2993099681066589e+00, 2.1748999120100793e-01, -4.7546386776476129e-01, -7.2993099681066589e+00, 3.4569862729331460e+00, -1.6096289888525245e-01, 3.5019733955885657e-01, 2.1748999120100793e-01, -1.6096289888525245e-01, 4.2354082998887993e-01, -3.1304152419179737e-01, -4.7546386776476129e-01, 3.5019733955885657e-01, -3.1304152419179737e-01, 3.4623783030108429e-01, 1.7657045077804337e+01, -5.4634153749758578e+00, -2.8502393669993009e+00, 1.2603451180663379e+00, -5.4634153749758578e+00, 2.3705777605989660e+00, 4.7298264603542695e-01, -1.8824806410410400e-01, -2.8502393669993009e+00, 4.7298264603542695e-01, 8.1312479693863837e-01, -4.9719372983025689e-01, 1.2603451180663379e+00, -1.8824806410410400e-01, -4.9719372983025689e-01, 4.2735579792528156e-01, 1.7234879466141379e+01, -6.9447619542898051e+00, -1.3909105342656662e+00, -1.4629483521415327e+00, -6.9447619542898051e+00, 3.2257037587765272e+00, 3.4613714263225803e-01, 7.1038882433094597e-01, -1.3909105342656662e+00, 3.4613714263225803e-01, 5.4019461216426035e-01, -2.4590599471464314e-01, -1.4629483521415327e+00, 7.1038882433094597e-01, -2.4590599471464314e-01, 4.4551884917879320e-01, 1.7438713147183115e+01, -8.4187054220745114e+00, 9.9555878171084999e-01, 1.3301908774116342e+00, -8.4187054220745114e+00, 4.1903876236321533e+00, -7.3825814341549312e-01, -6.7163472873814567e-01, 9.9555878171084999e-01, -7.3825814341549312e-01, 5.8548847257088532e-01, 1.6930014844653318e-01, 1.3301908774116342e+00, -6.7163472873814567e-01, 1.6930014844653318e-01, 5.3684670796748524e-01, 1.6505843421877792e+01, -8.9672859383958023e+00, -5.9586737243852639e-01, -4.9508712315185199e-01, -8.9672859383958023e+00, 4.8781866610061115e+00, 2.8538236205314221e-01, 3.0156093579837179e-01, -5.9586737243852639e-01, 2.8538236205314221e-01, 4.8868089712416274e-01, -4.4039031061756390e-01, -4.9508712315185199e-01, 3.0156093579837179e-01, -4.4039031061756390e-01, 4.7201467710741113e-01, 1.8009048716596261e+01, -9.5048416816852885e+00, -1.4889359349719808e+00, 5.5230572557832114e-01, -9.5048416816852885e+00, 5.0429843614824552e+00, 8.0889627597129587e-01, -2.6367499950333312e-01, -1.4889359349719808e+00, 8.0889627597129587e-01, 1.1121395372372715e+00, -9.0442258964978972e-01, 5.5230572557832114e-01, -2.6367499950333312e-01, -9.0442258964978972e-01, 8.5074190371660285e-01, 1.8969812503837847e+01, -7.7867026450257386e+00, 1.6621469974591756e+00, -1.3535726585282810e+00, -7.7867026450257386e+00, 3.2242358228553925e+00, -5.5729571150631485e-01, 5.2143834192344174e-01, 1.6621469974591756e+00, -5.5729571150631485e-01, 7.4287944271406869e-01, -4.0168187357492424e-01, -1.3535726585282810e+00, 5.2143834192344174e-01, -4.0168187357492424e-01, 5.7837665640757685e-01, 1.6268715288654960e+01, -8.9640581019331016e+00, 1.9472562726742195e+00, 1.1634394630256961e+00, -8.9640581019331016e+00, 5.2999985470828141e+00, -9.0781775797605180e-01, -9.9127146007630118e-01, 1.9472562726742195e+00, -9.0781775797605180e-01, 5.5880703055816960e-01, -2.0482720926808279e-02, 1.1634394630256961e+00, -9.9127146007630118e-01, -2.0482720926808279e-02, 4.2314243080374431e-01, 1.7309134332310190e+01, -6.7958317179822716e+00, 1.7430014752690762e-01, 7.4781078592582828e-01, -6.7958317179822716e+00, 2.7832702389289379e+00, -2.9174947002806606e-01, -2.5266256491062944e-01, 1.7430014752690762e-01, -2.9174947002806606e-01, 4.3576818045690519e-01, -8.8896184051415236e-02, 7.4781078592582828e-01, -2.5266256491062944e-01, -8.8896184051415236e-02, 3.9997224840640028e-01, 1.7153574941598098e+01, -6.8846934950832344e+00, 1.8254724366772068e+00, 4.8039342168786198e-01, -6.8846934950832344e+00, 2.8276344038991410e+00, -5.6790635321077287e-01, -1.9270510354144452e-01, 1.8254724366772068e+00, -5.6790635321077287e-01, 6.9227219483725666e-01, -1.2709334375461934e-01, 4.8039342168786198e-01, -1.9270510354144452e-01, -1.2709334375461934e-01, 4.2917978157262676e-01, 1.8132184964363688e+01, -3.1733007722489184e+00, 1.7825953260422662e+00, -1.3140033285181132e+00, -3.1733007722489184e+00, 6.9684206950381822e-01, -5.4962857370590601e-01, -1.0351982721358500e-02, 1.7825953260422662e+00, -5.4962857370590601e-01, 6.5568195496490611e-01, 1.7607191243391238e-01, -1.3140033285181132e+00, -1.0351982721358500e-02, 1.7607191243391238e-01, 6.2263345089164690e-01, 1.6127141186360564e+01, -9.2175933167952362e+00, 4.5482959624410402e-01, 1.5607705465624031e+00, -9.2175933167952362e+00, 5.8646726994165235e+00, -7.6823914094072088e-01, -5.4262462349906693e-01, 4.5482959624410402e-01, -7.6823914094072088e-01, 5.9151161800332619e-01, -2.4286672705208728e-01, 1.5607705465624031e+00, -5.4262462349906693e-01, -2.4286672705208728e-01, 3.5666849406587792e-01, 1.6165753976243874e+01, -7.3811188161212806e+00, -2.9732014153906883e-01, 1.0894338527618317e+00, -7.3811188161212806e+00, 3.4986894854838320e+00, 1.2640223060154221e-02, -7.0520798584256972e-01, -2.9732014153906883e-01, 1.2640223060154221e-02, 5.6149770153482581e-01, 3.6870659678219064e-02, 1.0894338527618317e+00, -7.0520798584256972e-01, 3.6870659678219064e-02, 4.5536900447269846e-01, 1.5398988006027349e+01, -8.1208517349885732e+00, 8.6228252695234930e-01, 1.1537806103765487e+00, -8.1208517349885732e+00, 4.3222427499704104e+00, -5.2765755042823237e-01, -6.0836165343511328e-01, 8.6228252695234930e-01, -5.2765755042823237e-01, 4.7558839586188817e-01, -2.2039226845147369e-01, 1.1537806103765487e+00, -6.0836165343511328e-01, -2.2039226845147369e-01, 3.6326310248611182e-01, 1.6653862335772988e+01, -6.3212833166201801e+00, 1.7035548874689670e+00, 1.7818121037836856e+00, -6.3212833166201801e+00, 3.2372475346428593e+00, -1.6771360201147706e-01, -7.3260377579450964e-01, 1.7035548874689670e+00, -1.6771360201147706e-01, 4.5584861179226355e-01, 1.7938188763718371e-01, 1.7818121037836856e+00, -7.3260377579450964e-01, 1.7938188763718371e-01, 3.0343699262163576e-01, 1.6005636284380316e+01, -8.9463211901340483e+00, -1.4995353233230992e+00, 1.6144976837089247e+00, -8.9463211901340483e+00, 5.4553198002689216e+00, 8.2430401820014310e-01, -7.7450210219583759e-01, -1.4995353233230992e+00, 8.2430401820014310e-01, 3.0203181275741886e-01, -5.0431498184811262e-02, 1.6144976837089247e+00, -7.7450210219583759e-01, -5.0431498184811262e-02, 2.6690426032037673e-01, 1.5957019862552055e+01, -8.4122236810732502e+00, 1.4776406260905088e+00, 1.1752014846148688e+00, -8.4122236810732502e+00, 5.1499519890947365e+00, -1.2438745362258066e+00, -2.4793216417570077e-01, 1.4776406260905088e+00, -1.2438745362258066e+00, 4.3907159105778493e-01, -1.3201484086761139e-01, 1.1752014846148688e+00, -2.4793216417570077e-01, -1.3201484086761139e-01, 2.8971532780081477e-01, 1.7264626784129828e+01, -7.1717293181514563e+00, 1.8173778481524891e-01, 1.6638776173614904e+00, -7.1717293181514563e+00, 3.3498514787396210e+00, 5.3070406002362888e-02, -8.3211897250151612e-01, 1.8173778481524891e-01, 5.3070406002362888e-02, 3.5750188990225340e-01, 1.7905618157975495e-01, 1.6638776173614904e+00, -8.3211897250151612e-01, 1.7905618157975495e-01, 3.5631089226304219e-01, 1.6951331209440209e+01, -6.9265044010797432e+00, 1.6649505088436367e+00, 1.2476223930507584e+00, -6.9265044010797432e+00, 3.0908353140153588e+00, -7.9349084304257178e-01, -5.5641718253825512e-01, 1.6649505088436367e+00, -7.9349084304257178e-01, 5.1957810047939335e-01, -1.8256034059952314e-01, 1.2476223930507584e+00, -5.5641718253825512e-01, -1.8256034059952314e-01, 4.4508123489757256e-01, 1.9012236387930873e+01, -8.2612906987401367e+00, 1.7822397061112394e+00, 1.3130655015793740e+00, -8.2612906987401367e+00, 3.9785776745364090e+00, -4.5911569699280996e-01, -7.8540804412996457e-01, 1.7822397061112394e+00, -4.5911569699280996e-01, 7.1651557376422037e-01, -2.1545675296134048e-01, 1.3130655015793740e+00, -7.8540804412996457e-01, -2.1545675296134048e-01, 3.0131845380712763e-01, 1.6597075714728486e+01, -5.7800006677690803e+00, 1.5381006811275582e+00, -1.5197413389828438e+00, -5.7800006677690803e+00, 3.0963818435550561e+00, -7.6010709310479962e-01, 4.4547615970032189e-01, 1.5381006811275582e+00, -7.6010709310479962e-01, 3.1280696744315561e-01, -1.7575813761777823e-01, -1.5197413389828438e+00, 4.4547615970032189e-01, -1.7575813761777823e-01, 1.6771576241498387e-01, 1.7019126909133337e+01, -6.6527113537535385e+00, 1.3704144651040868e+00, 1.5022343603610107e+00, -6.6527113537535385e+00, 2.9978544936485725e+00, -1.8662862581940526e-01, -6.5208288251651947e-01, 1.3704144651040868e+00, -1.8662862581940526e-01, 5.2409376673618568e-01, -7.5645601483690317e-02, 1.5022343603610107e+00, -6.5208288251651947e-01, -7.5645601483690317e-02, 3.2522439346024634e-01, 1.7662116032947637e+01, -7.3173562685033531e+00, -4.8473999550733515e-01, 1.2177126365029898e+00, -7.3173562685033531e+00, 3.0582743607486380e+00, 9.5372312198528986e-02, -4.4792222499260004e-01, -4.8473999550733515e-01, 9.5372312198528986e-02, 4.2961213757689048e-01, -2.5302983486156821e-01, 1.2177126365029898e+00, -4.4792222499260004e-01, -2.5302983486156821e-01, 3.5756224652772239e-01, 1.6523014322908107e+01, -8.0087006367676903e+00, 1.8183754790473388e+00, 1.3553052743949423e+00, -8.0087006367676903e+00, 4.0733577269970231e+00, -1.1117655839274267e+00, -7.2719651384385020e-01, 1.8183754790473388e+00, -1.1117655839274267e+00, 5.2643538086294572e-01, 1.0149565570079074e-01, 1.3553052743949423e+00, -7.2719651384385020e-01, 1.0149565570079074e-01, 4.9227647014805931e-01, 1.7397143785954697e+01, 9.1482994934314243e-01, -1.2055972918955953e+00, 1.5195127886243485e+00, 9.1482994934314243e-01, 6.0050109036606192e-01, 2.3235888502821123e-01, -2.5896893411041405e-01, -1.2055972918955953e+00, 2.3235888502821123e-01, 4.5114243240770924e-01, -3.9549546725682455e-01, 1.5195127886243485e+00, -2.5896893411041405e-01, -3.9549546725682455e-01, 3.9713449207042850e-01, 1.6754962568939149e+01, -6.9909519455758877e+00, 1.3238214949533700e-02, 1.7461462053388506e+00, -6.9909519455758877e+00, 3.5271059676844763e+00, -4.8608852379905848e-01, -3.9767641403791087e-01, 1.3238214949533700e-02, -4.8608852379905848e-01, 4.6008364836193916e-01, -2.3283300864365591e-01, 1.7461462053388506e+00, -3.9767641403791087e-01, -2.3283300864365591e-01, 3.6997630813085547e-01, 1.6914438459335880e+01, -8.2018573101250176e+00, 1.8607992969625340e+00, 3.6214197147114746e-01, -8.2018573101250176e+00, 4.1440760753178036e+00, -7.4448077344820940e-01, 2.1802370922440709e-02, 1.8607992969625340e+00, -7.4448077344820940e-01, 4.2429585090834404e-01, 1.5770605248055045e-01, 3.6214197147114746e-01, 2.1802370922440709e-02, 1.5770605248055045e-01, 3.0822560548642208e-01, 1.7076011430674352e+01, -8.8780259960025418e+00, 4.1114807990525343e-01, 1.7160868438637338e+00, -8.8780259960025418e+00, 4.6868693538444646e+00, -3.9792997857664614e-01, -7.8413912548256937e-01, 4.1114807990525343e-01, -3.9792997857664614e-01, 4.9907853940129265e-01, -2.2844650397698621e-01, 1.7160868438637338e+00, -7.8413912548256937e-01, -2.2844650397698621e-01, 3.4562144955490959e-01, 1.6875577654372840e+01, -6.7321113543278024e+00, 1.5467259695484039e+00, 1.5835725354604495e+00, -6.7321113543278024e+00, 3.2243427952557981e+00, -3.9371520741376925e-01, -3.6997207365849999e-01, 1.5467259695484039e+00, -3.9371520741376925e-01, 7.3691512999550857e-01, -6.5365934650236268e-02, 1.5835725354604495e+00, -3.6997207365849999e-01, -6.5365934650236268e-02, 4.7827347437249718e-01, 1.7164841669190295e+01, -3.9184842337808603e+00, -1.9330224573943846e+00, -3.5384656982588716e+00, -3.9184842337808603e+00, 1.8459124004525904e+00, -2.7968361445259859e-01, 6.3051523823343891e-01, -1.9330224573943846e+00, -2.7968361445259859e-01, 9.1319007138394170e-01, 4.4823426503524189e-01, -3.5384656982588716e+00, 6.3051523823343891e-01, 4.4823426503524189e-01, 8.1043864783118424e-01, 1.7776298615595469e+01, -8.3125493630052887e+00, -7.7441931055878699e-01, 1.5226098031185762e+00, -8.3125493630052887e+00, 5.1190411322780580e+00, 2.2952835214172629e-02, -1.4773103203139837e-01, -7.7441931055878699e-01, 2.2952835214172629e-02, 8.5889441357603480e-01, -6.9917219907711145e-02, 1.5226098031185762e+00, -1.4773103203139837e-01, -6.9917219907711145e-02, 4.2035440559421033e-01, 1.5862691008342372e+01, -6.1176683909011116e+00, 1.2625855228210956e+00, -1.3435168854108823e+00, -6.1176683909011116e+00, 2.3824446972003832e+00, -5.4326864192360957e-01, 4.7940703668476647e-01, 1.2625855228210956e+00, -5.4326864192360957e-01, 4.3406572657200515e-01, 9.5842602881569275e-02, -1.3435168854108823e+00, 4.7940703668476647e-01, 9.5842602881569275e-02, 2.3854989743717084e-01, 1.8256070397132763e+01, -6.9737979519085576e+00, -2.4828924208535295e+00, -6.8067548292463942e-01, -6.9737979519085576e+00, 2.7984958050555582e+00, 6.3576956667988715e-01, 4.5488849481004173e-01, -2.4828924208535295e+00, 6.3576956667988715e-01, 1.1009943270852411e+00, -2.5112157021262832e-01, -6.8067548292463942e-01, 4.5488849481004173e-01, -2.5112157021262832e-01, 6.3576733446761735e-01, 1.7056582256752865e+01, -7.1009743841647808e+00, 1.1160779926499460e+00, 1.6103269168653844e+00, -7.1009743841647808e+00, 3.1733314893589308e+00, -7.0718250574770325e-01, -4.3660325341089939e-01, 1.1160779926499460e+00, -7.0718250574770325e-01, 5.0309713543792534e-01, -6.0959434222991160e-02, 1.6103269168653844e+00, -4.3660325341089939e-01, -6.0959434222991160e-02, 4.6051187668197546e-01, 1.8357242514234677e+01, -8.7847041747386729e+00, 2.9797124907094874e-01, -2.1027146029189812e+00, -8.7847041747386729e+00, 4.3983224978124813e+00, 1.6771532571466041e-01, 8.6501485551462776e-01, 2.9797124907094874e-01, 1.6771532571466041e-01, 6.6655368883518706e-01, -1.0627500830106132e-01, -2.1027146029189812e+00, 8.6501485551462776e-01, -1.0627500830106132e-01, 4.8427082375374647e-01, 1.6772311022508610e+01, -5.5233743204468615e+00, -1.4827239072848402e+00, -1.6683315295413981e-01, -5.5233743204468615e+00, 2.8632372577610519e+00, -1.9908041485308506e-01, 6.7147007749442378e-02, -1.4827239072848402e+00, -1.9908041485308506e-01, 5.8384443306666967e-01, 1.8696608263835957e-02, -1.6683315295413981e-01, 6.7147007749442378e-02, 1.8696608263835957e-02, 4.1948303453199803e-01, 1.6628501414098437e+01, -8.4601815957694324e+00, -1.2499311118743477e+00, 1.3390492539579859e+00, -8.4601815957694324e+00, 4.3761271922541898e+00, 6.0876619378843877e-01, -5.7252755667562849e-01, -1.2499311118743477e+00, 6.0876619378843877e-01, 5.4159859530740562e-01, 1.6462302179435517e-01, 1.3390492539579859e+00, -5.7252755667562849e-01, 1.6462302179435517e-01, 4.8726092250544450e-01, 1.7067254080129800e+01, -8.6191292214447621e+00, 8.4191297536122567e-01, 1.6991769398963010e+00, -8.6191292214447621e+00, 4.7912823899616992e+00, -9.5325899518160639e-01, -9.9723259667135367e-01, 8.4191297536122567e-01, -9.5325899518160639e-01, 8.3710075559284569e-01, 9.7283473262340187e-02, 1.6991769398963010e+00, -9.9723259667135367e-01, 9.7283473262340187e-02, 3.6200206548711727e-01, 1.7702046820994806e+01, 1.1560960095286070e+00, -3.3325166201283918e+00, -1.4775683676695714e+00, 1.1560960095286070e+00, 1.0791246214081565e+00, 1.8182679840027050e-01, -5.0410612828568047e-01, -3.3325166201283918e+00, 1.8182679840027050e-01, 9.5912816564144698e-01, 3.5209385289513767e-01, -1.4775683676695714e+00, -5.0410612828568047e-01, 3.5209385289513767e-01, 6.1173061322520472e-01, 1.7855425834543190e+01, -3.2697217693515706e+00, 1.4807091014457323e+00, -2.6764346453544805e+00, -3.2697217693515706e+00, 8.3027813273245776e-01, 4.0426879175947228e-02, 3.3848123209104880e-01, 1.4807091014457323e+00, 4.0426879175947228e-02, 6.9975094084688649e-01, -4.2689334899290365e-01, -2.6764346453544805e+00, 3.3848123209104880e-01, -4.2689334899290365e-01, 5.0049960147362493e-01, 1.6401315819584983e+01, -7.9740616572801812e+00, 1.0005058774036371e+00, -1.4203138481446598e+00, -7.9740616572801812e+00, 4.4023705076543553e+00, -9.1941184955615007e-01, 5.7637509877548454e-01, 1.0005058774036371e+00, -9.1941184955615007e-01, 4.5841164275306279e-01, 5.3383138750947870e-02, -1.4203138481446598e+00, 5.7637509877548454e-01, 5.3383138750947870e-02, 1.9979339037632937e-01, 1.7332501002967668e+01, -7.0781330453987845e+00, 9.2037176711584545e-01, -1.7498405404426292e+00, -7.0781330453987845e+00, 3.1470072102227165e+00, -6.9097983597892554e-01, 6.2185046840444402e-01, 9.2037176711584545e-01, -6.9097983597892554e-01, 5.5932116260968723e-01, 2.1858582089281359e-01, -1.7498405404426292e+00, 6.2185046840444402e-01, 2.1858582089281359e-01, 5.2680120312008438e-01, 1.7734484227892562e+01, -5.3654898514610219e+00, -1.6146396348259038e+00, 9.9033821011608625e-01, -5.3654898514610219e+00, 2.0384509120220251e+00, 2.5981041293593549e-01, 1.2547778095454426e-01, -1.6146396348259038e+00, 2.5981041293593549e-01, 7.0973144206899874e-01, -2.6410086403128091e-01, 9.9033821011608625e-01, 1.2547778095454426e-01, -2.6410086403128091e-01, 4.9890531981897601e-01, 1.7373803386373812e+01, -7.3655877697476715e+00, 7.8270125086657205e-02, -2.0999061171249407e+00, -7.3655877697476715e+00, 3.8164841996986887e+00, 3.9641030912017372e-02, 6.5278955794156168e-01, 7.8270125086657205e-02, 3.9641030912017372e-02, 4.1742585469994020e-01, 1.6686881152650240e-02, -2.0999061171249407e+00, 6.5278955794156168e-01, 1.6686881152650240e-02, 3.4144491013516665e-01, 1.7266719068868216e+01, -9.0704352506311370e+00, 1.0694216573455857e+00, -1.5795224196213768e+00, -9.0704352506311370e+00, 5.0037919681802911e+00, -9.3097451917909035e-01, 6.6974458417928873e-01, 1.0694216573455857e+00, -9.3097451917909035e-01, 7.5988767453192230e-01, 7.6311108649566889e-02, -1.5795224196213768e+00, 6.6974458417928873e-01, 7.6311108649566889e-02, 2.9489768008036216e-01, 1.6376515340100283e+01, -6.9793256398089296e+00, 1.5243144645041427e+00, 2.7079268382937571e-01, -6.9793256398089296e+00, 3.0541734354131531e+00, -7.7135074324503605e-01, -6.8397349674078733e-02, 1.5243144645041427e+00, -7.7135074324503605e-01, 3.6395566937932222e-01, 6.2728909404942329e-02, 2.7079268382937571e-01, -6.8397349674078733e-02, 6.2728909404942329e-02, 3.6164470489278866e-01, 1.7516133518582485e+01, -8.3514443126068123e+00, -1.7180303760588957e+00, 1.5748319209869406e+00, -8.3514443126068123e+00, 5.3528291603548226e+00, -3.9980896941006638e-01, -7.0540361469493940e-02, -1.7180303760588957e+00, -3.9980896941006638e-01, 1.2547848615007291e+00, -7.5175461576019531e-01, 1.5748319209869406e+00, -7.0540361469493940e-02, -7.5175461576019531e-01, 5.0207052491314630e-01, 1.6471332655238577e+01, -1.9840240794333170e+00, -2.7675952686613430e+00, 1.7245254662592493e+00, -1.9840240794333170e+00, 1.3786012470006792e+00, -1.7634724450792111e-01, -6.8648055131182217e-01, -2.7675952686613430e+00, -1.7634724450792111e-01, 7.1321367156083215e-01, -1.2327176278311069e-01, 1.7245254662592493e+00, -6.8648055131182217e-01, -1.2327176278311069e-01, 4.9397169669978858e-01, 1.7091226474203204e+01, -4.5675405166074583e+00, 1.4400642541699227e+00, 1.2141833386470169e+00, -4.5675405166074583e+00, 1.3161027292266854e+00, -3.1345863399964818e-01, -5.0066767853585703e-01, 1.4400642541699227e+00, -3.1345863399964818e-01, 6.7234081428616688e-01, -1.3503981152357039e-01, 1.2141833386470169e+00, -5.0066767853585703e-01, -1.3503981152357039e-01, 4.3385283110391992e-01, 1.8246506732367031e+01, -7.6558698630583821e+00, 1.9213354454183826e+00, 1.6373332810021559e+00, -7.6558698630583821e+00, 3.9467944680536289e+00, -4.0297301651530965e-01, -5.2963503443511684e-01, 1.9213354454183826e+00, -4.0297301651530965e-01, 5.3176392085153512e-01, 8.9574705000869517e-02, 1.6373332810021559e+00, -5.2963503443511684e-01, 8.9574705000869517e-02, 4.4536642738675492e-01, 1.7911671783219571e+01, -5.9985252112864700e+00, -4.4745656306102388e-01, 1.1721399057841080e+00, -5.9985252112864700e+00, 2.4139535616836252e+00, -1.7219293329619995e-01, -5.0172832204407780e-01, -4.4745656306102388e-01, -1.7219293329619995e-01, 9.7476836720338544e-01, -3.2171662686669505e-01, 1.1721399057841080e+00, -5.0172832204407780e-01, -3.2171662686669505e-01, 3.0939894061872220e-01, 1.6155032382388242e+01, -7.2064352418320210e+00, 2.5981969792469695e-01, 2.8727950159025917e-01, -7.2064352418320210e+00, 3.9980129943264391e+00, -5.1910321191687969e-01, 3.2409611261106097e-01, 2.5981969792469695e-01, -5.1910321191687969e-01, 6.1299094127694165e-01, -4.6966198549264621e-01, 2.8727950159025917e-01, 3.2409611261106097e-01, -4.6966198549264621e-01, 4.1154555711070495e-01, 1.5942674207027702e+01, -8.2948902984678892e+00, -1.9203061850624374e+00, 3.4964168574330856e-01, -8.2948902984678892e+00, 4.7444890690473800e+00, 6.1177999082136658e-01, -5.6828655972420206e-01, -1.9203061850624374e+00, 6.1177999082136658e-01, 6.4406756760102324e-01, 3.0795787565507104e-01, 3.4964168574330856e-01, -5.6828655972420206e-01, 3.0795787565507104e-01, 3.5590181457520187e-01, 1.6944946717618595e+01, -6.0254959800609988e+00, -2.8357767854573899e+00, 4.1473435870618813e-01, -6.0254959800609988e+00, 3.0665746424677778e+00, 2.3975145045391522e-01, 3.0627924467723289e-01, -2.8357767854573899e+00, 2.3975145045391522e-01, 1.2345787399737920e+00, -3.3814683111213717e-01, 4.1473435870618813e-01, 3.0627924467723289e-01, -3.3814683111213717e-01, 3.3103947150526031e-01, 1.7117314554066201e+01, -8.8236140201371818e+00, 1.0581536138514975e+00, 1.3980599420035920e+00, -8.8236140201371818e+00, 4.8129582535771425e+00, -8.8712259222580214e-01, -7.9225911531011384e-01, 1.0581536138514975e+00, -8.8712259222580214e-01, 5.7517102508127571e-01, 7.2363511911666983e-02, 1.3980599420035920e+00, -7.9225911531011384e-01, 7.2363511911666983e-02, 2.9909589630683520e-01, 1.6972810488291550e+01, -5.9892458751059818e+00, 1.2149630718729185e+00, -2.0798444379053964e+00, -5.9892458751059818e+00, 2.4380332297536320e+00, -2.0344300671588472e-01, 5.3145291090067048e-01, 1.2149630718729185e+00, -2.0344300671588472e-01, 6.6369223311103109e-01, -6.0094731681376024e-04, -2.0798444379053964e+00, 5.3145291090067048e-01, -6.0094731681376024e-04, 5.7957663759360090e-01, 1.7343766938479227e+01, -6.6063740219858600e+00, -2.4017757423619175e+00, 1.6051740578902221e+00, -6.6063740219858600e+00, 3.4993875746971375e+00, -3.2234547803818028e-02, -3.2663881218999763e-01, -2.4017757423619175e+00, -3.2234547803818028e-02, 1.2504135194559383e+00, -5.1587400430594810e-01, 1.6051740578902221e+00, -3.2663881218999763e-01, -5.1587400430594810e-01, 3.0068541586224429e-01, 1.6848023901869816e+01, -7.7245284650863724e+00, -6.3375161950989023e-01, 1.7598733104365907e+00, -7.7245284650863724e+00, 3.6368411257897200e+00, 5.4898635756645486e-01, -9.3708181741907781e-01, -6.3375161950989023e-01, 5.4898635756645486e-01, 7.2509667201839190e-01, -4.1251648474508129e-01, 1.7598733104365907e+00, -9.3708181741907781e-01, -4.1251648474508129e-01, 4.9975565734465982e-01, 1.7511854581625478e+01, -8.5404613942496930e+00, 4.7313924091567117e-01, -5.1532816823540972e-01, -8.5404613942496930e+00, 4.4031820387689145e+00, -3.6267289012126913e-01, -1.3528742104501390e-02, 4.7313924091567117e-01, -3.6267289012126913e-01, 6.9456125684342040e-01, -1.6317464306172286e-01, -5.1532816823540972e-01, -1.3528742104501390e-02, -1.6317464306172286e-01, 4.5384531453753552e-01, 1.6749377427726831e+01, -5.7772682564884885e+00, -1.9377388468336154e+00, 1.2795905554738882e+00, -5.7772682564884885e+00, 2.3751734474029860e+00, 3.8187973000458070e-01, -1.1511718783472463e-01, -1.9377388468336154e+00, 3.8187973000458070e-01, 1.1071634761400777e+00, -9.4204177495531310e-02, 1.2795905554738882e+00, -1.1511718783472463e-01, -9.4204177495531310e-02, 5.0911537749846014e-01, 1.6804926100729805e+01, -6.7694804790437084e+00, 1.3290892723432168e+00, -2.0329912302039057e+00, -6.7694804790437084e+00, 3.0050053244019006e+00, -6.0641807348948229e-01, 6.5667795941713925e-01, 1.3290892723432168e+00, -6.0641807348948229e-01, 4.4527286361472279e-01, -2.5453902810523399e-01, -2.0329912302039057e+00, 6.5667795941713925e-01, -2.5453902810523399e-01, 3.9739245233707388e-01, 1.5868413007832768e+01, -9.6058749345061720e+00, -1.2704002060724218e+00, 8.6185583106525021e-01, -9.6058749345061720e+00, 5.8473081312071127e+00, 7.6752664926696923e-01, -4.0695460424360097e-01, -1.2704002060724218e+00, 7.6752664926696923e-01, 1.1283540336783828e+00, -1.6618593567686379e-01, 8.6185583106525021e-01, -4.0695460424360097e-01, -1.6618593567686379e-01, 4.6113112128941769e-01, 1.8481541594398891e+01, -6.6615632043770274e+00, -7.2359822450546107e-01, -1.4644271889003471e+00, -6.6615632043770274e+00, 2.4918957745186043e+00, 9.0679417982270888e-02, 5.3245874472517518e-01, -7.2359822450546107e-01, 9.0679417982270888e-02, 4.0656985508236138e-01, 1.5911683154833606e-01, -1.4644271889003471e+00, 5.3245874472517518e-01, 1.5911683154833606e-01, 3.2173334280500521e-01, 1.6228236401107939e+01, -8.1101411423722158e+00, 1.3634571880312281e+00, -2.6078480124847037e-03, -8.1101411423722158e+00, 5.1879397503268816e+00, -2.9695259856663125e-01, -6.7344064243872026e-01, 1.3634571880312281e+00, -2.9695259856663125e-01, 5.2461909416950570e-01, -1.3795586005253810e-01, -2.6078480124847037e-03, -6.7344064243872026e-01, -1.3795586005253810e-01, 4.3066586374098781e-01, 1.5589961900178967e+01, -4.5372333345909119e+00, -3.3934718591944404e+00, 1.3540033039710933e+00, -4.5372333345909119e+00, 1.8585499822791560e+00, 6.1587937597953046e-01, -5.8318661350036138e-01, -3.3934718591944404e+00, 6.1587937597953046e-01, 1.4074389868822910e+00, -4.7675943751143041e-01, 1.3540033039710933e+00, -5.8318661350036138e-01, -4.7675943751143041e-01, 4.2143576596484850e-01, 1.5611938835367109e+01, -7.8362555612747959e+00, 1.1634362937361629e+00, 8.1530526315465568e-01, -7.8362555612747959e+00, 4.0174329797324688e+00, -7.4513440796506791e-01, -3.6945515457627198e-01, 1.1634362937361629e+00, -7.4513440796506791e-01, 3.9648861064960028e-01, -3.2953317777742928e-02, 8.1530526315465568e-01, -3.6945515457627198e-01, -3.2953317777742928e-02, 3.7578484154027375e-01, 1.8315149213395173e+01, -7.7012188993654940e+00, 1.1508124885520843e+00, 1.3123590383630339e+00, -7.7012188993654940e+00, 3.4780615549256009e+00, -1.9166820372788307e-01, -7.6063211015438847e-01, 1.1508124885520843e+00, -1.9166820372788307e-01, 5.4468638241889589e-01, -1.2072611619645017e-01, 1.3123590383630339e+00, -7.6063211015438847e-01, -1.2072611619645017e-01, 2.9841808326885588e-01, 1.6362440054767124e+01, -5.9499672523232112e+00, 1.0052721428804259e+00, -1.3668397556002811e+00, -5.9499672523232112e+00, 3.2387435072295325e+00, -7.2396011033843383e-02, -1.5809150279620302e-01, 1.0052721428804259e+00, -7.2396011033843383e-02, 7.2454982301604076e-01, -2.0381095229621829e-01, -1.3668397556002811e+00, -1.5809150279620302e-01, -2.0381095229621829e-01, 5.1930847189189466e-01, 1.5436369548255527e+01, -9.1710802008604020e+00, 1.4209357173119814e+00, -5.2333006294648410e-01, -9.1710802008604020e+00, 5.4880135746084751e+00, -9.2088987123056165e-01, 1.8726405116782840e-01, 1.4209357173119814e+00, -9.2088987123056165e-01, 8.3812049939319733e-01, 2.7683676776817306e-01, -5.2333006294648410e-01, 1.8726405116782840e-01, 2.7683676776817306e-01, 4.1959049781180191e-01, 1.5666964412535105e+01, -9.7207170538441261e+00, 1.4333836194418987e+00, 1.5960317913987163e+00, -9.7207170538441261e+00, 6.6896120732782265e+00, -5.5802284343341291e-01, -1.3529115567912897e+00, 1.4333836194418987e+00, -5.5802284343341291e-01, 4.9413228052236202e-01, -1.9425632733667825e-01, 1.5960317913987163e+00, -1.3529115567912897e+00, -1.9425632733667825e-01, 4.8918814427528240e-01, 1.5290527713026155e+01, -7.6833137332910173e+00, -1.0751883919467500e+00, 1.6910116895309504e+00, -7.6833137332910173e+00, 3.9082192239312277e+00, 4.6621331985589443e-01, -9.1883958537926080e-01, -1.0751883919467500e+00, 4.6621331985589443e-01, 5.9851497650838281e-01, -2.2322179187601654e-01, 1.6910116895309504e+00, -9.1883958537926080e-01, -2.2322179187601654e-01, 3.9830059929712897e-01, 1.7658936182671727e+01, -7.9196848078408060e+00, 2.0235234121177790e+00, 1.4901521845277215e+00, -7.9196848078408060e+00, 3.6001776162691934e+00, -1.0140973115763243e+00, -6.7507570700253083e-01, 2.0235234121177790e+00, -1.0140973115763243e+00, 6.3032871812139324e-01, -7.6750309951210835e-02, 1.4901521845277215e+00, -6.7507570700253083e-01, -7.6750309951210835e-02, 5.4789767104015730e-01, 1.5457997297160160e+01, -6.1526949506673070e+00, 6.7273961399725346e-01, 9.5770758001695433e-01, -6.1526949506673070e+00, 2.7731742075607597e+00, -4.3012052691323910e-01, -1.2255714588398386e-01, 6.7273961399725346e-01, -4.3012052691323910e-01, 3.3958384751714510e-01, -1.2798871872114692e-01, 9.5770758001695433e-01, -1.2255714588398386e-01, -1.2798871872114692e-01, 2.7268656714191419e-01, 1.7503328855994745e+01, -9.9687455312455189e+00, -1.0297062203185405e+00, -1.7031047478256800e+00, -9.9687455312455189e+00, 5.9614589455051270e+00, 6.5247739444482156e-01, 7.6794594281655504e-01, -1.0297062203185405e+00, 6.5247739444482156e-01, 3.5943482704316476e-01, -1.7606821206230184e-02, -1.7031047478256800e+00, 7.6794594281655504e-01, -1.7606821206230184e-02, 3.2716633335958190e-01, 1.6373795057571847e+01, -7.7660482226949981e+00, 1.5882849012515776e+00, -1.8494279583300000e-01, -7.7660482226949981e+00, 3.9632086562391491e+00, -1.0503970491370456e+00, -2.0638321023081674e-01, 1.5882849012515776e+00, -1.0503970491370456e+00, 7.3133644598289149e-01, 1.7037283334568715e-01, -1.8494279583300000e-01, -2.0638321023081674e-01, 1.7037283334568715e-01, 3.6991380078193070e-01, 1.7380218869596824e+01, -4.6733393169385451e+00, -1.0112374919091498e+00, 6.9440806713566350e-01, -4.6733393169385451e+00, 1.7664189364611840e+00, 5.2267413518611217e-01, -4.7073978286014190e-01, -1.0112374919091498e+00, 5.2267413518611217e-01, 7.8792508851916998e-01, -6.0593541507832505e-01, 6.9440806713566350e-01, -4.7073978286014190e-01, -6.0593541507832505e-01, 4.8532777010071737e-01, 1.6688480100290171e+01, -2.4452257429467745e+00, -4.3502468301668573e+00, 2.8686321885832311e-01, -2.4452257429467745e+00, 1.9558655769328408e+00, -2.2650646337660044e-01, 2.7255380760267456e-01, -4.3502468301668573e+00, -2.2650646337660044e-01, 1.8601358238474783e+00, -7.0528037041038183e-01, 2.8686321885832311e-01, 2.7255380760267456e-01, -7.0528037041038183e-01, 8.8533224650523568e-01, 1.7558888970958407e+01, -3.5525153278861916e+00, 1.3269063954226350e+00, 1.2711647576232004e+00, -3.5525153278861916e+00, 1.9328537647668227e+00, -7.6166921543233657e-01, 2.0334544186972267e-01, 1.3269063954226350e+00, -7.6166921543233657e-01, 4.4584049689038086e-01, 2.2203925188891041e-02, 1.2711647576232004e+00, 2.0334544186972267e-01, 2.2203925188891041e-02, 3.5499333012698542e-01, 1.7503104597019327e+01, -8.2902573074777788e+00, 1.4809359053080735e+00, 5.4433271851216269e-01, -8.2902573074777788e+00, 4.7655682747958519e+00, 4.0148581275228912e-02, -8.3819205806563979e-01, 1.4809359053080735e+00, 4.0148581275228912e-02, 7.9084822528710697e-01, -4.0761145344905420e-01, 5.4433271851216269e-01, -8.3819205806563979e-01, -4.0761145344905420e-01, 7.7052589374216640e-01, 1.6708990617842698e+01, -4.8454163020535930e+00, -2.2673879150930514e+00, -7.3833835476206833e-01, -4.8454163020535930e+00, 1.8474149183735951e+00, 5.7960906898683118e-01, -2.9034175784539162e-01, -2.2673879150930514e+00, 5.7960906898683118e-01, 9.1094402341096992e-01, 2.7337020492260133e-01, -7.3833835476206833e-01, -2.9034175784539162e-01, 2.7337020492260133e-01, 6.2002369775813038e-01, 1.6722997168847904e+01, -8.6990946412050523e+00, 3.1803438787591409e-01, 1.7340206087992600e+00, -8.6990946412050523e+00, 4.6729144559920757e+00, -4.4803121361281151e-01, -7.6591403512264922e-01, 3.1803438787591409e-01, -4.4803121361281151e-01, 6.2109380665325764e-01, -2.1702562038719872e-01, 1.7340206087992600e+00, -7.6591403512264922e-01, -2.1702562038719872e-01, 3.0659507102349531e-01, 1.6670375714696366e+01, -6.3873997157981712e+00, 1.1422940619218669e+00, 1.6114990153830422e+00, -6.3873997157981712e+00, 2.7467584657824697e+00, -1.4220435608326074e-01, -7.6707780933203606e-01, 1.1422940619218669e+00, -1.4220435608326074e-01, 4.0764553257721842e-01, -2.8201231903517046e-02, 1.6114990153830422e+00, -7.6707780933203606e-01, -2.8201231903517046e-02, 2.3272465802632000e-01, 1.7177850258051929e+01, -7.7485985175283538e+00, 1.7642463130395969e+00, 1.1947573624901038e+00, -7.7485985175283538e+00, 3.7541650257159613e+00, -9.4830419732951277e-01, -3.4701571693749028e-01, 1.7642463130395969e+00, -9.4830419732951277e-01, 2.7843609384632584e-01, 2.2138492879799063e-02, 1.1947573624901038e+00, -3.4701571693749028e-01, 2.2138492879799063e-02, 2.4621808163691816e-01, 1.7591518642987552e+01, -6.1334674582773179e+00, -1.0188517048841167e-01, -1.0141463265801072e+00, -6.1334674582773179e+00, 2.8881371999494077e+00, -6.5349503463848291e-01, 6.1786208815218846e-01, -1.0188517048841167e-01, -6.5349503463848291e-01, 6.8766586365008342e-01, -1.3177271773416527e-01, -1.0141463265801072e+00, 6.1786208815218846e-01, -1.3177271773416527e-01, 3.5763038289252136e-01, 1.6460555062664469e+01, -1.0801338119838852e-01, -2.1676479110304978e+00, 2.3797610762749716e-01, -1.0801338119838852e-01, 1.0801941303644456e+00, -5.3813243483207052e-01, -5.6237375422626656e-01, -2.1676479110304978e+00, -5.3813243483207052e-01, 5.7967459423528112e-01, 2.4903805958998279e-01, 2.3797610762749716e-01, -5.6237375422626656e-01, 2.4903805958998279e-01, 2.9853118936744938e-01, 1.6363067821237387e+01, -7.5801725217658182e+00, -2.3154589119114033e+00, 8.6491950425521069e-01, -7.5801725217658182e+00, 4.1093160719549147e+00, 7.5936478003922270e-01, -7.9305772741362146e-01, -2.3154589119114033e+00, 7.5936478003922270e-01, 5.5950251851751887e-01, 1.9960092492144615e-02, 8.6491950425521069e-01, -7.9305772741362146e-01, 1.9960092492144615e-02, 3.6240473105355619e-01, 1.7604721419101381e+01, -5.5982944484012851e+00, -5.1115292685000280e-01, 1.1238691719448353e+00, -5.5982944484012851e+00, 1.9974013218860862e+00, 4.2330085337569273e-01, -3.1553160809344194e-01, -5.1115292685000280e-01, 4.2330085337569273e-01, 3.9483438550305838e-01, 1.4146215690809766e-01, 1.1238691719448353e+00, -3.1553160809344194e-01, 1.4146215690809766e-01, 3.0911274591187871e-01, 1.7481000191587800e+01, -8.4105488031411451e+00, 1.3039429588996374e+00, -1.8943355562813982e+00, -8.4105488031411451e+00, 4.2117242584921115e+00, -9.9620212290555921e-01, 7.2096851111157401e-01, 1.3039429588996374e+00, -9.9620212290555921e-01, 1.0262916082555362e+00, 2.6308977000995892e-01, -1.8943355562813982e+00, 7.2096851111157401e-01, 2.6308977000995892e-01, 4.2893693974357433e-01, 1.6142451134319948e+01, -8.5829976232168583e+00, -1.1914111242942487e+00, -1.7828308739068168e+00, -8.5829976232168583e+00, 4.5916185650689387e+00, 6.9925837704708993e-01, 8.5594103603863902e-01, -1.1914111242942487e+00, 6.9925837704708993e-01, 6.4694343394202003e-01, -2.2381142282272101e-01, -1.7828308739068168e+00, 8.5594103603863902e-01, -2.2381142282272101e-01, 5.4706240054847100e-01, 1.6219952760326322e+01, -5.2583246351358710e+00, -1.1318857704341121e+00, 1.5464549680915729e+00, -5.2583246351358710e+00, 1.9538218170664110e+00, 4.7907376803457552e-01, -5.5765002134851860e-01, -1.1318857704341121e+00, 4.7907376803457552e-01, 8.0266791476788724e-01, 4.1910587271559552e-01, 1.5464549680915729e+00, -5.5765002134851860e-01, 4.1910587271559552e-01, 6.1337978657368797e-01, 1.6349812064295705e+01, -9.1250338518494676e+00, 1.4372115668954004e+00, 1.3574151001729744e+00, -9.1250338518494676e+00, 5.6689651253021376e+00, -7.7650769531650343e-01, -1.0781752355166871e+00, 1.4372115668954004e+00, -7.7650769531650343e-01, 5.1241508875946074e-01, -1.1886028240939153e-01, 1.3574151001729744e+00, -1.0781752355166871e+00, -1.1886028240939153e-01, 4.2133699414022918e-01, 1.7730633966916006e+01, -4.2079378425024334e+00, -2.4082318029242074e+00, -2.9326567435152695e+00, -4.2079378425024334e+00, 1.8729697103084302e+00, -1.7025332772484633e-01, 6.4769249581465438e-01, -2.4082318029242074e+00, -1.7025332772484633e-01, 9.6422971517920864e-01, 4.9637876618550530e-01, -2.9326567435152695e+00, 6.4769249581465438e-01, 4.9637876618550530e-01, 9.0607906741843292e-01, 1.6520034768467930e+01, -9.3863666536330559e+00, 1.6290624727505552e+00, 1.0051389816863621e+00, -9.3863666536330559e+00, 5.5944801287984092e+00, -9.4935489921666538e-01, -4.5064099036376648e-01, 1.6290624727505552e+00, -9.4935489921666538e-01, 5.8585060608512141e-01, -2.9952535059011953e-01, 1.0051389816863621e+00, -4.5064099036376648e-01, -2.9952535059011953e-01, 4.7197837495700723e-01, 1.7111516934883419e+01, -6.5918535028673038e+00, 1.6710814028293859e+00, -9.1262323378824761e-01, -6.5918535028673038e+00, 3.0835319597298456e+00, -2.6912281791743353e-01, 4.2893119166797788e-01, 1.6710814028293859e+00, -2.6912281791743353e-01, 4.2342213175980664e-01, -6.1060011100531768e-02, -9.1262323378824761e-01, 4.2893119166797788e-01, -6.1060011100531768e-02, 3.3382324238618233e-01, 1.7310554752654891e+01, -6.3254698558225808e+00, -1.5889046370618964e+00, 4.7998865450534484e-01, -6.3254698558225808e+00, 3.5951424725613625e+00, -4.3752933180985376e-01, -2.0052810357878864e-01, -1.5889046370618964e+00, -4.3752933180985376e-01, 1.1169674552585609e+00, -4.3289156862570732e-01, 4.7998865450534484e-01, -2.0052810357878864e-01, -4.3289156862570732e-01, 1.0348453338540857e+00, 1.5725152411710674e+01, -8.1251695223259723e+00, -1.6221105906193571e+00, -6.5726645228657188e-01, -8.1251695223259723e+00, 4.2655830965537183e+00, 6.8154083535105903e-01, 4.4651245957010732e-01, -1.6221105906193571e+00, 6.8154083535105903e-01, 6.2732001380343250e-01, -2.8865920201796080e-01, -6.5726645228657188e-01, 4.4651245957010732e-01, -2.8865920201796080e-01, 3.1861318811044576e-01, 1.5902288815268022e+01, -8.2452928765255784e+00, -2.0278743230239088e-01, 6.3486533567920422e-01, -8.2452928765255784e+00, 4.6020174712364978e+00, -3.3658080874833285e-01, -4.3977303097587184e-01, -2.0278743230239088e-01, -3.3658080874833285e-01, 6.0187702290916412e-01, 1.1014613257737753e-01, 6.3486533567920422e-01, -4.3977303097587184e-01, 1.1014613257737753e-01, 4.8184779224444352e-01, 1.6317485422902017e+01, -9.0944710620236826e+00, 1.3236077186681450e+00, 1.0543599207408361e+00, -9.0944710620236826e+00, 5.0770952150987654e+00, -6.8283997895415571e-01, -6.1415172571988774e-01, 1.3236077186681450e+00, -6.8283997895415571e-01, 4.6986429225388077e-01, -7.1779176794846591e-02, 1.0543599207408361e+00, -6.1415172571988774e-01, -7.1779176794846591e-02, 3.6646138105239617e-01, 1.6629466429731355e+01, -5.1277919122112783e+00, 1.4340055938268250e+00, 7.5497557373088631e-01, -5.1277919122112783e+00, 2.5533892790953709e+00, -6.1907458698641737e-01, 2.8548716783701866e-01, 1.4340055938268250e+00, -6.1907458698641737e-01, 5.5021114982679775e-01, 2.1988595200875122e-01, 7.5497557373088631e-01, 2.8548716783701866e-01, 2.1988595200875122e-01, 4.6790021907528190e-01, 1.8051015423857478e+01, -6.3374886699541655e+00, 6.7052196738207570e-01, 1.7901863112928220e+00, -6.3374886699541655e+00, 3.6107306567453308e+00, -1.1554776709999790e+00, -1.6138128679965386e-01, 6.7052196738207570e-01, -1.1554776709999790e+00, 6.6215475367233645e-01, -2.0835424063584101e-01, 1.7901863112928220e+00, -1.6138128679965386e-01, -2.0835424063584101e-01, 3.8230347739520337e-01, 1.4983783383912956e+01, -6.5303033327345901e+00, 1.5693503671455411e+00, 1.3115128213232672e+00, -6.5303033327345901e+00, 3.3609306656827300e+00, -3.8347465608612052e-01, -8.5246645597040638e-01, 1.5693503671455411e+00, -3.8347465608612052e-01, 4.6444575454540948e-01, 1.1133056795538820e-01, 1.3115128213232672e+00, -8.5246645597040638e-01, 1.1133056795538820e-01, 4.2050090792459205e-01, 1.6516272469929035e+01, -8.1086408858622381e+00, 1.2444910428820757e+00, 1.4111414188071807e+00, -8.1086408858622381e+00, 4.7397792897538693e+00, -3.4785795277638909e-01, -3.0333528301140100e-01, 1.2444910428820757e+00, -3.4785795277638909e-01, 6.7086370063026790e-01, 2.4773393103872440e-03, 1.4111414188071807e+00, -3.0333528301140100e-01, 2.4773393103872440e-03, 4.3791110589613524e-01, 1.6929227088899484e+01, -7.8477753916727986e+00, 5.9122351013238139e-01, 1.1995974186685161e+00, -7.8477753916727986e+00, 5.0130450761309371e+00, -4.8119378860454465e-01, -7.7299224219143381e-01, 5.9122351013238139e-01, -4.8119378860454465e-01, 6.6837844940101898e-01, -2.8293163674859706e-01, 1.1995974186685161e+00, -7.7299224219143381e-01, -2.8293163674859706e-01, 3.2651017287964618e-01, 1.5491954999314911e+01, -6.9941923612056076e+00, 1.6077749159874175e+00, 7.8834911790693296e-01, -6.9941923612056076e+00, 3.9990238979833492e+00, -3.1324180602386442e-01, -2.9940611862675603e-01, 1.6077749159874175e+00, -3.1324180602386442e-01, 3.8426170077342980e-01, 6.1325558045078110e-02, 7.8834911790693296e-01, -2.9940611862675603e-01, 6.1325558045078110e-02, 1.9844293738738106e-01, 1.8256490990353864e+01, -7.2596656308294474e+00, 7.2457573583544210e-02, 1.6393036849568940e+00, -7.2596656308294474e+00, 3.1514450092236164e+00, 1.1658304601420899e-01, -4.7408825564360935e-01, 7.2457573583544210e-02, 1.1658304601420899e-01, 4.8823333158455351e-01, -9.9774500139366201e-02, 1.6393036849568940e+00, -4.7408825564360935e-01, -9.9774500139366201e-02, 3.6855349855371966e-01, 1.6820120859691094e+01, -6.8557939495607343e+00, 9.3076613697930166e-01, 1.3614159506132830e+00, -6.8557939495607343e+00, 2.8579888611432107e+00, -4.9935124766213668e-01, -5.7041886268769781e-01, 9.3076613697930166e-01, -4.9935124766213668e-01, 4.6829838997314921e-01, -1.4197642929627971e-01, 1.3614159506132830e+00, -5.7041886268769781e-01, -1.4197642929627971e-01, 4.3316577284207314e-01, 1.6419872338608442e+01, -8.5832050734274041e+00, 1.7192566692161901e+00, -5.1218989222962830e-01, -8.5832050734274041e+00, 4.4976512426775175e+00, -9.1124533514664441e-01, 2.8795414896579641e-01, 1.7192566692161901e+00, -9.1124533514664441e-01, 7.4971043306955509e-01, -5.5608503289291955e-01, -5.1218989222962830e-01, 2.8795414896579641e-01, -5.5608503289291955e-01, 4.6700869661319677e-01, 1.8135538967327214e+01, -3.2166336691478290e+00, -2.4622113217501940e+00, -3.6996532735385559e+00, -3.2166336691478290e+00, 1.2549932403658035e+00, -2.2292652269956342e-01, 5.6100325796761008e-01, -2.4622113217501940e+00, -2.2292652269956342e-01, 1.0930566691740100e+00, 4.2547956233729428e-01, -3.6996532735385559e+00, 5.6100325796761008e-01, 4.2547956233729428e-01, 9.9882325270528738e-01, 1.7439351281629058e+01, -5.8296895831009508e+00, 1.5263763648410595e+00, -1.0127116989967861e+00, -5.8296895831009508e+00, 3.0345134422808195e+00, -1.2453100751923698e+00, 2.5921890911740408e-01, 1.5263763648410595e+00, -1.2453100751923698e+00, 6.4872185802115923e-01, -1.0471192819150560e-01, -1.0127116989967861e+00, 2.5921890911740408e-01, -1.0471192819150560e-01, 3.4321528712268262e-01, 1.8225452775936304e+01, -8.3661120474364186e+00, 1.6298037497783302e-01, 1.3744275939125781e+00, -8.3661120474364186e+00, 4.1934382694433019e+00, -4.2888473389741155e-01, -3.3111383016288920e-01, 1.6298037497783302e-01, -4.2888473389741155e-01, 3.9223109407533963e-01, -2.8438634962977571e-01, 1.3744275939125781e+00, -3.3111383016288920e-01, -2.8438634962977571e-01, 3.5861917482977251e-01, 1.7848001583696501e+01, -9.0977146732770837e+00, 1.3037096678166800e+00, -1.3549605490328065e-01, -9.0977146732770837e+00, 4.6663261401614928e+00, -6.7036638792592496e-01, 1.9232787391811296e-01, 1.3037096678166800e+00, -6.7036638792592496e-01, 1.1260447105843117e+00, -9.1370205009458638e-03, -1.3549605490328065e-01, 1.9232787391811296e-01, -9.1370205009458638e-03, 5.2699958100439070e-01
-  };
-  std::vector<double > em = em_x; 
+  std::vector<double> info = {-2.1000000000000000e+01, 2.1000000000000000e+01,
+                              1.0500000000000000e+02,  1.0000000000000000e+00,
+                              1.0000000000000000e+01,  -1.0000000000000000e+00};
+  std::vector<double> em_x = {
+      9.3816147034272368e-01,  -1.6703373029862567e-01, -4.4294526064601734e-02,
+      -2.8798505489184573e-01, -1.6703373029862567e-01, 9.2489218226366088e-01,
+      -2.8928196536572048e-01, -4.7833509099876154e-01, -4.4294526064601734e-02,
+      -2.8928196536572048e-01, 5.7034320185695120e-01,  1.8771147911830000e-01,
+      -2.8798505489184573e-01, -4.7833509099876154e-01, 1.8771147911830000e-01,
+      4.0174654365823070e-01,  8.4370316144902313e-01,  -3.7813146789689916e-02,
+      -3.6989397568296523e-01, -4.0554075086539937e-01, -3.7813146789689916e-02,
+      6.5766402633747112e-01,  -4.2312966361682885e-01, 1.2685067374257861e-01,
+      -3.6989397568296523e-01, -4.2312966361682885e-01, 6.0171576901660107e-01,
+      9.8283160997298613e-02,  -4.0554075086539937e-01, 1.2685067374257861e-01,
+      9.8283160997298613e-02,  2.1324148100625978e-01,  9.7843596341516559e-01,
+      -1.0492833888237871e-01, -1.0538688914576379e-01, -2.0453551592353389e-01,
+      -1.0492833888237871e-01, 7.7943976693565231e-01,  -1.5898500035781410e-01,
+      9.4834209331437741e-02,  -1.0538688914576379e-01, -1.5898500035781410e-01,
+      7.4778071691708869e-01,  -6.1895255142095873e-01, -2.0453551592353389e-01,
+      9.4834209331437741e-02,  -6.1895255142095873e-01, 6.0844713798743799e-01,
+      1.0079020879244640e+00,  -2.3855984150631487e-01, -3.4608276043004524e-02,
+      -4.7448768267289088e-01, -2.3855984150631487e-01, 4.9732018171028253e-01,
+      -3.1320787082485729e-01, -1.4528004145602180e-01, -3.4608276043004524e-02,
+      -3.1320787082485729e-01, 4.7696729363954582e-01,  1.1723268074231248e-01,
+      -4.7448768267289088e-01, -1.4528004145602180e-01, 1.1723268074231248e-01,
+      4.0511515406019899e-01,  6.9317482874286218e-01,  3.8721526993960850e-02,
+      -1.4829415254252801e-01, 1.9079858574793401e-01,  3.8721526993960850e-02,
+      4.0694636061668399e-01,  -1.6669745680958750e-01, -2.9455183336619600e-01,
+      -1.4829415254252801e-01, -1.6669745680958750e-01, 3.5115749833010762e-01,
+      2.7972274527006624e-02,  1.9079858574793401e-01,  -2.9455183336619600e-01,
+      2.7972274527006624e-02,  2.9261590797274251e-01,  1.0547679530528609e+00,
+      -7.5579498870314032e-01, -2.5907966401834215e-01, 2.4868586817732710e-01,
+      -7.5579498870314032e-01, 7.7943976693565231e-01,  -2.7840391808748116e-02,
+      -1.2197364072902048e-02, -2.5907966401834215e-01, -2.7840391808748116e-02,
+      2.9514791871283574e-01,  -1.4724344774699041e-01, 2.4868586817732710e-01,
+      -1.2197364072902048e-02, -1.4724344774699041e-01, 2.7333766016385419e-01,
+      7.1494810971572931e-01,  -3.8965690625377569e-01, -2.0579233200987346e-01,
+      1.5638053130676866e-01,  -3.8965690625377569e-01, 5.7034320185695120e-01,
+      -2.4759451701376567e-01, 1.7075608253389668e-01,  -2.0579233200987346e-01,
+      -2.4759451701376567e-01, 4.4489736273181785e-01,  -3.3903230422862907e-01,
+      1.5638053130676866e-01,  1.7075608253389668e-01,  -3.3903230422862907e-01,
+      2.7328888249045513e-01,  8.2256346358859145e-01,  -3.0201999278197073e-01,
+      -1.0847299712994765e-01, -3.3355086180245408e-01, -3.0201999278197073e-01,
+      6.2584346461620799e-01,  -2.3661062787307036e-02, 2.5559368174587482e-02,
+      -1.0847299712994765e-01, -2.3661062787307036e-02, 6.2471079378938721e-01,
+      -3.9923912886685187e-01, -3.3355086180245408e-01, 2.5559368174587482e-02,
+      -3.9923912886685187e-01, 4.9732018171028253e-01,  7.4300380743635475e-01,
+      -4.4058918654051710e-01, -2.5375568912718455e-01, 5.6029289688609013e-02,
+      -4.4058918654051710e-01, 4.3025970692640908e-01,  -1.0172468432781301e-02,
+      -1.2941908849275471e-01, -2.5375568912718455e-01, -1.0172468432781301e-02,
+      4.2845986148404269e-01,  -1.7460159217638957e-01, 5.6029289688609013e-02,
+      -1.2941908849275471e-01, -1.7460159217638957e-01, 3.8138264541081690e-01,
+      7.4620920788925238e-01,  -1.6424881399213448e-01, -2.2361648073503249e-01,
+      -4.0424642786821852e-01, -1.6424881399213448e-01, 6.6730350814323314e-01,
+      -2.1317459925340326e-01, 1.9376435628360764e-01,  -2.2361648073503249e-01,
+      -2.1317459925340326e-01, 5.8089947575573275e-01,  -1.0277026325170427e-01,
+      -4.0424642786821852e-01, 1.9376435628360764e-01,  -1.0277026325170427e-01,
+      3.1673915325970592e-01,  9.3159182283013242e-01,  2.7352164674733859e-01,
+      -3.0194015433346399e-01, -1.6581739923723970e-01, 2.7352164674733859e-01,
+      8.7811025837608414e-01,  -5.3657994020118693e-01, -1.4168666065928759e-02,
+      -3.0194015433346399e-01, -5.3657994020118693e-01, 5.7772908002379919e-01,
+      -2.6766718990342830e-01, -1.6581739923723970e-01, -1.4168666065928759e-02,
+      -2.6766718990342830e-01, 4.3044918224444273e-01,  7.5776646946384441e-01,
+      -2.8544634168978411e-01, -4.6917826735271817e-01, 9.0648108100258265e-02,
+      -2.8544634168978411e-01, 6.3333781479517670e-01,  -1.5635743535006455e-01,
+      -3.2225585549698127e-01, -4.6917826735271817e-01, -1.5635743535006455e-01,
+      5.8894646017949193e-01,  -4.7549586643753167e-02, 9.0648108100258265e-02,
+      -3.2225585549698127e-01, -4.7549586643753167e-02, 5.1470686148396338e-01,
+      8.9120731219577032e-01,  -4.8913932884415684e-01, -2.8535162253723745e-01,
+      -3.9481172797096048e-01, -4.8913932884415684e-01, 6.8161288899055572e-01,
+      5.3745624046182272e-02,  -9.6415050020146331e-03, -2.8535162253723745e-01,
+      5.3745624046182272e-02,  3.7216282686452884e-01,  1.8327616930599899e-01,
+      -3.9481172797096048e-01, -9.6415050020146331e-03, 1.8327616930599899e-01,
+      2.9889755032428134e-01,  6.6730350814323314e-01,  -1.9772856508212072e-01,
+      -1.3214864503616511e-01, 1.9656713013350754e-01,  -1.9772856508212072e-01,
+      6.4107564273521156e-01,  -3.6750949174506781e-01, -2.1514707877261979e-01,
+      -1.3214864503616511e-01, -3.6750949174506781e-01, 4.7696729363954582e-01,
+      -1.0605540579882382e-01, 1.9656713013350754e-01,  -2.1514707877261979e-01,
+      -1.0605540579882382e-01, 2.8720166306787565e-01,  9.3159182283013242e-01,
+      -3.9105219662031382e-01, -4.6012852922712744e-01, -3.7107529450742310e-01,
+      -3.9105219662031382e-01, 6.4624598213814299e-01,  -1.9218513692914521e-01,
+      8.7864237420793134e-02,  -4.6012852922712744e-01, -1.9218513692914521e-01,
+      5.5411629355023162e-01,  1.5181004010991156e-01,  -3.7107529450742310e-01,
+      8.7864237420793134e-02,  1.5181004010991156e-01,  5.4717905839342551e-01,
+      5.5411629355023162e-01,  -1.3154982153268135e-01, -2.2683561534265623e-02,
+      -8.5067568027022145e-02, -1.3154982153268135e-01, 4.6158852919583448e-01,
+      -7.4818007595887706e-02, -2.3446129520432515e-01, -2.2683561534265623e-02,
+      -7.4818007595887706e-02, 4.0511515406019899e-01,  -2.4964155806145566e-01,
+      -8.5067568027022145e-02, -2.3446129520432515e-01, -2.4964155806145566e-01,
+      3.9586735852137039e-01,  9.4162575876886123e-01,  1.6551527461893110e-02,
+      -1.7195190048425002e-01, -8.5321232935839153e-02, 1.6551527461893110e-02,
+      7.4304915761252677e-01,  -1.1678199974326212e-01, -4.3729941122496446e-01,
+      -1.7195190048425002e-01, -1.1678199974326212e-01, 6.9317482874286218e-01,
+      1.9191976680315659e-01,  -8.5321232935839153e-02, -4.3729941122496446e-01,
+      1.9191976680315659e-01,  2.8206822618179617e-01,  7.5776646946384441e-01,
+      1.3081288362678634e-02,  -3.2517003355282742e-01, -3.5562946132636442e-01,
+      1.3081288362678634e-02,  7.0400669442030783e-01,  -2.7774712576896132e-01,
+      -1.7472226352059492e-01, -3.2517003355282742e-01, -2.7774712576896132e-01,
+      5.1099339330908866e-01,  7.5918257186359903e-02,  -3.5562946132636442e-01,
+      -1.7472226352059492e-01, 7.5918257186359903e-02,  2.8288909669360418e-01,
+      7.4778071691708869e-01,  -3.9041097955700099e-01, -5.2895674526793196e-03,
+      -4.0620032081707269e-01, -3.9041097955700099e-01, 6.0650459602198470e-01,
+      -1.5236577918073632e-01, 1.3182011644234659e-03,  -5.2895674526793196e-03,
+      -1.5236577918073632e-01, 5.8972872609679527e-01,  -1.4295182509075030e-01,
+      -4.0620032081707269e-01, 1.3182011644234659e-03,  -1.4295182509075030e-01,
+      4.2821165206248513e-01,  6.2471079378938699e-01,  6.0802406816920937e-02,
+      -3.5127267686929931e-01, -3.3963258237386684e-01, 6.0802406816920937e-02,
+      6.0313886404423811e-01,  -2.6977044122265748e-01, -1.6348825415331092e-01,
+      -3.5127267686929931e-01, -2.6977044122265748e-01, 5.4552055268521205e-01,
+      -1.9316799462722023e-02, -3.3963258237386684e-01, -1.6348825415331092e-01,
+      -1.9316799462722023e-02, 4.8171669294486208e-01,  7.0578073898484561e-01,
+      -9.8967741792306413e-02, 1.2502659893584156e-01,  -3.7971201998874682e-01,
+      -9.8967741792306413e-02, 7.0400669442030783e-01,  -6.5060917634508969e-02,
+      -1.0206531635166906e-01, 1.2502659893584156e-01,  -6.5060917634508969e-02,
+      6.0506774685063136e-01,  -2.9019848494979322e-01, -3.7971201998874682e-01,
+      -1.0206531635166906e-01, -2.9019848494979322e-01, 3.3340397458978077e-01,
+      9.7231560474448697e-01,  -4.8839770953582246e-02, -5.2649040695832883e-01,
+      7.6239831325479152e-02,  -4.8839770953582246e-02, 8.5066067412859170e-01,
+      -2.8381943351894323e-01, -4.3719342578830916e-01, -5.2649040695832883e-01,
+      -2.8381943351894323e-01, 7.8694914200240895e-01,  -6.9042842925044076e-02,
+      7.6239831325479152e-02,  -4.3719342578830916e-01, -6.9042842925044076e-02,
+      3.1673915325970592e-01,  1.1146471781363385e+00,  -4.1950872828895774e-01,
+      -7.5099287814360732e-02, -7.0747093773604913e-02, -4.1950872828895774e-01,
+      8.7811025837608414e-01,  -7.1680109826753424e-01, 1.5469221891377744e-01,
+      -7.5099287814360732e-02, -7.1680109826753424e-01, 7.8157368152678353e-01,
+      -1.8674982324145417e-01, -7.0747093773604913e-02, 1.5469221891377744e-01,
+      -1.8674982324145417e-01, 6.3333781479517670e-01,  9.4162575876886123e-01,
+      -3.5756088285386567e-01, -4.9692587682641537e-02, -1.9259678748208950e-01,
+      -3.5756088285386567e-01, 5.7304538706875918e-01,  -1.7263416631604137e-01,
+      1.7497761971314219e-01,  -4.9692587682641537e-02, -1.7263416631604137e-01,
+      5.4717905839342551e-01,  1.8011611616770615e-01,  -1.9259678748208950e-01,
+      1.7497761971314219e-01,  1.8011611616770615e-01,  1.6303735520554410e-01,
+      1.1146471781363385e+00,  -2.9955742667885321e-01, -4.6620550078880341e-01,
+      -3.1378159981378489e-01, -2.9955742667885321e-01, 8.8424257114190075e-01,
+      -2.5441067597713185e-01, -2.1591071501682862e-01, -4.6620550078880341e-01,
+      -2.5441067597713185e-01, 7.7959145539751795e-01,  -1.2407187853083543e-01,
+      -3.1378159981378489e-01, -2.1591071501682862e-01, -1.2407187853083543e-01,
+      5.8972872609679527e-01,  6.0506774685063081e-01,  -7.0161191563646669e-02,
+      -3.3988044762842473e-01, -1.9107644999487627e-01, -7.0161191563646669e-02,
+      4.3045264342637135e-01,  2.0824077812721109e-02,  -2.5586718835495170e-01,
+      -3.3988044762842473e-01, 2.0824077812721109e-02,  4.0694636061668399e-01,
+      -3.6902580613623392e-02, -1.9107644999487627e-01, -2.5586718835495170e-01,
+      -3.6902580613623392e-02, 3.5713610126062301e-01,  1.0507172480981881e+00,
+      -5.7238843768987546e-01, -8.6532548307818979e-02, -2.4797217908729449e-01,
+      -5.7238843768987546e-01, 6.4845065425155790e-01,  -2.3798654086704824e-01,
+      -9.0951164465072995e-02, -8.6532548307818979e-02, -2.3798654086704824e-01,
+      4.3025970692640908e-01,  5.1916371214171878e-02,  -2.4797217908729449e-01,
+      -9.0951164465072995e-02, 5.1916371214171878e-02,  3.5115749833010762e-01,
+      9.7070728754011626e-01,  -1.9667254906106676e-01, 1.3881360342029997e-01,
+      1.8332147125431175e-01,  -1.9667254906106676e-01, 6.4107564273521156e-01,
+      6.9919471349729312e-02,  -2.0671318712854408e-01, 1.3881360342029997e-01,
+      6.9919471349729312e-02,  1.7842412973820965e-01,  -2.8800113566320312e-02,
+      1.8332147125431175e-01,  -2.0671318712854408e-01, -2.8800113566320312e-02,
+      8.7697980056291452e-02,  9.9699133925078010e-01,  -1.9566669897090611e-01,
+      1.1472638955603826e-02,  -3.8137297906451501e-01, -1.9566669897090611e-01,
+      7.8694914200240895e-01,  -4.8668226019268873e-01, -4.3398812393252877e-02,
+      1.1472638955603826e-02,  -4.8668226019268873e-01, 4.4489736273181785e-01,
+      -5.7511361787363047e-02, -3.8137297906451501e-01, -4.3398812393252877e-02,
+      -5.7511361787363047e-02, 2.9261590797274251e-01,  5.8721566479597598e-01,
+      2.1741339413236024e-01,  -4.6310740433823661e-01, 1.5212653882669683e-01,
+      2.1741339413236024e-01,  5.7772908002379919e-01,  -1.1309986042713593e-01,
+      -4.6297902780444065e-02, -4.6310740433823661e-01, -1.1309986042713593e-01,
+      5.3524434793006614e-01,  7.7916319803791656e-02,  1.5212653882669683e-01,
+      -4.6297902780444065e-02, 7.7916319803791656e-02,  3.3070548705408681e-01,
+      6.4845065425155790e-01,  -1.1573648783331206e-01, -1.1295787224086569e-02,
+      -3.2976819109974104e-01, -1.1573648783331206e-01, 5.7304538706875918e-01,
+      -4.4057056903958502e-01, 1.5118880665101278e-01,  -1.1295787224086569e-02,
+      -4.4057056903958502e-01, 5.0847535644168274e-01,  -2.7508898922066710e-01,
+      -3.2976819109974104e-01, 1.5118880665101278e-01,  -2.7508898922066710e-01,
+      4.6158852919583448e-01,  9.2489218226366110e-01,  1.9293109981997125e-01,
+      -4.6858170396716431e-01, 4.2486849070578564e-02,  1.9293109981997125e-01,
+      8.8759600097388758e-01,  -9.3159319915253995e-03, -2.1166189641553734e-01,
+      -4.6858170396716431e-01, -9.3159319915253995e-03, 5.8089947575573275e-01,
+      -3.8498930024537814e-01, 4.2486849070578564e-02,  -2.1166189641553734e-01,
+      -3.8498930024537814e-01, 4.0608640117736616e-01,  9.0810287860174088e-01,
+      -4.5026066211118843e-01, -2.4234797312679049e-01, -1.1635894709323868e-01,
+      -4.5026066211118843e-01, 8.3522648701948565e-01,  7.0167369514544164e-02,
+      -3.0380780947210834e-01, -2.4234797312679049e-01, 7.0167369514544164e-02,
+      2.8658902506762818e-01,  4.2786039330881952e-02,  -1.1635894709323868e-01,
+      -3.0380780947210834e-01, 4.2786039330881952e-02,  2.2990742509977113e-01,
+      8.7859873068865801e-01,  -4.0521290114598696e-01, -3.4537197184419777e-02,
+      -5.1541416274527041e-01, -4.0521290114598696e-01, 7.7073679568840003e-01,
+      -5.2766218816177624e-01, 7.2009349057603700e-02,  -3.4537197184419777e-02,
+      -5.2766218816177624e-01, 5.3524434793006614e-01,  1.2774094128859217e-01,
+      -5.1541416274527041e-01, 7.2009349057603700e-02,  1.2774094128859217e-01,
+      4.2821165206248513e-01,  8.0017606793538776e-01,  -3.6273937320056748e-01,
+      -4.8244644816054216e-01, 1.9000791267873704e-01,  -3.6273937320056748e-01,
+      5.6353888423218390e-01,  3.7095579553517985e-02,  1.0807739396133707e-01,
+      -4.8244644816054216e-01, 3.7095579553517985e-02,  4.2845986148404269e-01,
+      -1.2901615367558902e-01, 1.9000791267873704e-01,  1.0807739396133707e-01,
+      -1.2901615367558902e-01, 2.3908528931221024e-01,  8.7859873068865801e-01,
+      -5.7342075379041779e-01, -2.2560792912032243e-01, 1.3840168053297142e-01,
+      -5.7342075379041779e-01, 6.2584346461620799e-01,  -1.0147381298988209e-01,
+      -1.8792520225770581e-01, -2.2560792912032243e-01, -1.0147381298988209e-01,
+      4.9480400405457242e-01,  -1.3243403061658676e-01, 1.3840168053297142e-01,
+      -1.8792520225770581e-01, -1.3243403061658676e-01, 2.5543672306163845e-01,
+      9.7843596341516559e-01,  -3.0135276617413775e-02, -6.3204064126440895e-01,
+      7.7858338769721036e-02,  -3.0135276617413775e-02, 7.3888148744521942e-01,
+      -2.8105601448216994e-01, -1.4222291428374428e-01, -6.3204064126440895e-01,
+      -2.8105601448216994e-01, 5.3087899217465850e-01,  -1.9954226724405721e-03,
+      7.7858338769721036e-02,  -1.4222291428374428e-01, -1.9954226724405721e-03,
+      3.7962008200748842e-01,  7.3888148744521942e-01,  -3.9876854953397894e-01,
+      7.9344112776459683e-02,  3.0881146913585039e-02,  -3.9876854953397894e-01,
+      5.8894646017949193e-01,  -2.9566251514384462e-01, -2.1875193893336647e-01,
+      7.9344112776459683e-02,  -2.9566251514384462e-01, 2.1324148100625978e-01,
+      1.1337428718710466e-01,  3.0881146913585039e-02,  -2.1875193893336647e-01,
+      1.1337428718710466e-01,  1.3167057692769979e-01,  7.2588662899153644e-01,
+      -4.0491835492648354e-01, 3.8294782214220321e-02,  -1.6245502019487457e-01,
+      -4.0491835492648354e-01, 6.9978773302195119e-01,  -2.6130189173261692e-01,
+      -1.4963646270159869e-01, 3.8294782214220321e-02,  -2.6130189173261692e-01,
+      4.9396599356834686e-01,  -2.0453500637835592e-01, -1.6245502019487457e-01,
+      -1.4963646270159869e-01, -2.0453500637835592e-01, 4.3044918224444273e-01,
+      5.4829965643085543e-01,  -8.6258333909827428e-02, -9.0863809928147868e-02,
+      -2.5335540346466701e-01, -8.6258333909827428e-02, 4.8256071581125803e-01,
+      -1.5824234665042425e-01, -6.2251154159746636e-02, -9.0863809928147868e-02,
+      -1.5824234665042425e-01, 3.3070548705408681e-01,  -1.2171470598287978e-01,
+      -2.5335540346466701e-01, -6.2251154159746636e-02, -1.2171470598287978e-01,
+      2.9993528142734771e-01,  9.9699133925078010e-01,  -5.5720924928656934e-02,
+      -3.7563354786645231e-01, -1.7003176037775222e-01, -5.5720924928656934e-02,
+      7.1322781206304275e-01,  -1.4028476150114413e-01, -4.0725518739947821e-02,
+      -3.7563354786645231e-01, -1.4028476150114413e-01, 6.5766402633747112e-01,
+      -3.7470308102778649e-01, -1.7003176037775222e-01, -4.0725518739947821e-02,
+      -3.7470308102778649e-01, 4.5518325168409662e-01,  7.2306489407047936e-01,
+      -5.5652304518705331e-01, -1.7548443714837653e-01, -9.7244252488182109e-02,
+      -5.5652304518705331e-01, 5.8721566479597631e-01,  1.5466175158937293e-02,
+      2.3395338180958331e-01,  -1.7548443714837653e-01, 1.5466175158937293e-02,
+      5.1099339330908866e-01,  1.2309721775318454e-01,  -9.7244252488182109e-02,
+      2.3395338180958331e-01,  1.2309721775318454e-01,  2.9948736341201787e-01,
+      8.9120731219577032e-01,  1.3828785543997540e-01,  -6.5808954835297839e-01,
+      2.3928953196716229e-02,  1.3828785543997540e-01,  8.8759600097388758e-01,
+      -1.6580688159465298e-01, -1.0200920121572279e-01, -6.5808954835297839e-01,
+      -1.6580688159465298e-01, 6.0844713798743799e-01,  -1.8482739703288106e-01,
+      2.3928953196716229e-02,  -1.0200920121572279e-01, -1.8482739703288106e-01,
+      2.7328888249045513e-01,  9.7231560474448697e-01,  -1.5360824814848673e-01,
+      -3.0175556482836241e-01, 1.4427139165494557e-01,  -1.5360824814848673e-01,
+      7.7073679568840003e-01,  -4.4616437520485569e-01, -4.3386642987381790e-02,
+      -3.0175556482836241e-01, -4.4616437520485569e-01, 6.8161288899055572e-01,
+      -3.3586190220507495e-01, 1.4427139165494557e-01,  -4.3386642987381790e-02,
+      -3.3586190220507495e-01, 3.7736584257608707e-01,  8.4370316144902313e-01,
+      -3.4786444025064711e-01, -4.9627086468803983e-01, -3.0284479791966981e-02,
+      -3.4786444025064711e-01, 7.7959145539751795e-01,  5.4105959194424263e-02,
+      -5.6892292757652424e-02, -4.9627086468803983e-01, 5.4105959194424263e-02,
+      3.7736584257608707e-01,  1.4233877887889113e-01,  -3.0284479791966981e-02,
+      -5.6892292757652424e-02, 1.4233877887889113e-01,  2.4312804865275472e-01,
+      9.3816147034272368e-01,  -2.0860974495653162e-01, -3.7166959734666666e-01,
+      5.6250602465564153e-03,  -2.0860974495653162e-01, 9.0810287860174088e-01,
+      -9.1069253660888416e-02, -6.4962854816729276e-01, -3.7166959734666666e-01,
+      -9.1069253660888416e-02, 8.1523507511884086e-01,  -6.6856471628686520e-02,
+      5.6250602465564153e-03,  -6.4962854816729276e-01, -6.6856471628686520e-02,
+      5.4817010902737695e-01,  8.3348751379486785e-01,  -5.7386182749623121e-02,
+      -1.2725671252595133e-01, -2.6056306991901751e-01, -5.7386182749623121e-02,
+      5.2909436738615123e-01,  -4.2596706647156624e-01, -1.0919830944239709e-02,
+      -1.2725671252595133e-01, -4.2596706647156624e-01, 3.7962008200748842e-01,
+      7.0344098964105775e-02,  -2.6056306991901751e-01, -1.0919830944239709e-02,
+      7.0344098964105775e-02,  2.2679728457111192e-01,  1.0079020879244640e+00,
+      -6.4116619730398172e-01, 1.1959784878009472e-02,  -3.5036660259057062e-01,
+      -6.4116619730398172e-01, 7.2588662899153644e-01,  1.5457663848526185e-01,
+      -1.3123176159456695e-01, 1.1959784878009472e-02,  1.5457663848526185e-01,
+      7.2306489407047936e-01,  1.1120004060087613e-01,  -3.5036660259057062e-01,
+      -1.3123176159456695e-01, 1.1120004060087613e-01,  6.5291435452372704e-01,
+      6.5291435452372693e-01,  -2.0669331449151485e-01, -1.0009057137401539e-01,
+      -1.0458759498524857e-01, -2.0669331449151485e-01, 4.8256071581125803e-01,
+      -1.0975185704675450e-01, -7.1400789900385064e-02, -1.0009057137401539e-01,
+      -1.0975185704675450e-01, 3.1774836143047369e-01,  -1.3420181173883666e-01,
+      -1.0458759498524857e-01, -7.1400789900385064e-02, -1.3420181173883666e-01,
+      1.7842412973820965e-01,  7.4304915761252677e-01,  -3.7474512353359690e-02,
+      -2.0605417465948622e-01, 2.8786202774685965e-02,  -3.7474512353359690e-02,
+      6.4624598213814299e-01,  -1.8088533245882277e-01, -2.7294935111478841e-01,
+      -2.0605417465948622e-01, -1.8088533245882277e-01, 4.5625158241504560e-01,
+      -2.5573952710441156e-01, 2.8786202774685965e-02,  -2.7294935111478841e-01,
+      -2.5573952710441156e-01, 4.3045264342637135e-01,  8.1523507511884286e-01,
+      -1.1206588973386064e-01, -2.9038245483951325e-01, -9.4044512167176214e-02,
+      -1.1206588973386064e-01, 6.9069450025674883e-01,  -3.6666406927828260e-01,
+      -2.6295004305146680e-01, -2.9038245483951325e-01, -3.6666406927828260e-01,
+      6.0313886404423811e-01,  -1.1364153715298203e-01, -9.4044512167176214e-02,
+      -2.6295004305146680e-01, -1.1364153715298203e-01, 5.0847535644168274e-01,
+      7.8157368152678375e-01,  -3.6689153089953208e-01, -1.6343042599874896e-01,
+      2.0681260422479955e-01,  -3.6689153089953208e-01, 7.5302376148517924e-01,
+      -3.9141809592987054e-01, -4.9792782459804974e-01, -1.6343042599874896e-01,
+      -3.9141809592987054e-01, 4.8171669294486208e-01,  1.8620912874547166e-01,
+      2.0681260422479955e-01,  -4.9792782459804974e-01, 1.8620912874547166e-01,
+      4.5625158241504560e-01,  8.5066067412859170e-01,  -6.6383544595733118e-02,
+      -4.9833493744041324e-01, 3.5985625651793141e-02,  -6.6383544595733118e-02,
+      7.6738392766662211e-01,  -1.9489169085491792e-01, -6.4543919151000315e-02,
+      -4.9833493744041324e-01, -1.9489169085491792e-01, 5.4829965643085543e-01,
+      -2.7028089635457442e-01, 3.5985625651793141e-02,  -6.4543919151000315e-02,
+      -2.7028089635457442e-01, 3.9586735852137039e-01,  8.8424257114190075e-01,
+      -5.0968059146564315e-01, -9.5512640917474467e-03, -7.2900503108297598e-02,
+      -5.0968059146564315e-01, 7.0578073898484561e-01,  -7.9000528792048164e-02,
+      -3.4471532290325835e-01, -9.5512640917474467e-03, -7.9000528792048164e-02,
+      6.9978773302195119e-01,  -2.3508453941888172e-01, -7.2900503108297598e-02,
+      -3.4471532290325835e-01, -2.3508453941888172e-01, 5.1462854065795050e-01,
+      8.3522648701948565e-01,  -4.4716582952086292e-01, 5.4799614348689352e-02,
+      -2.2914542894364148e-02, -4.4716582952086292e-01, 8.3348751379486763e-01,
+      -6.7279352271601633e-01, -3.4916341927887895e-02, 5.4799614348689352e-02,
+      -6.7279352271601633e-01, 7.6738392766662211e-01,  -1.6809648395494664e-01,
+      -2.2914542894364148e-02, -3.4916341927887895e-02, -1.6809648395494664e-01,
+      7.1322781206304275e-01,  9.7070728754011626e-01,  -2.5224986479466716e-01,
+      5.5579119983494740e-02,  -3.6768489956717509e-01, -2.5224986479466716e-01,
+      5.3965948062451941e-01,  -4.4192312892226593e-01, 3.1513756645368173e-02,
+      5.5579119983494740e-02,  -4.4192312892226593e-01, 5.3087899217465850e-01,
+      7.9885677144886064e-02,  -3.6768489956717509e-01, 3.1513756645368173e-02,
+      7.9885677144886064e-02,  1.6104012331629208e-01,  8.6130335545327363e-01,
+      -9.6129435640763802e-02, -1.4801620948352356e-02, 7.8150102857998158e-02,
+      -9.6129435640763802e-02, 6.7625303314120611e-01,  -5.5027301017164576e-02,
+      -1.7520377876997034e-01, -1.4801620948352356e-02, -5.5027301017164576e-02,
+      3.1774836143047369e-01,  -1.2376278479081343e-01, 7.8150102857998158e-02,
+      -1.7520377876997034e-01, -1.2376278479081343e-01, 1.0841353470308304e-01,
+      7.4620920788925238e-01,  -1.6394859485203073e-01, -2.0029713033714130e-01,
+      -1.8645272306202978e-01, -1.6394859485203073e-01, 5.3965948062451941e-01,
+      -3.8230461309700781e-01, 1.3847290923401767e-01,  -2.0029713033714130e-01,
+      -3.8230461309700781e-01, 5.1462854065795050e-01,  -1.8561874788646704e-01,
+      -1.8645272306202978e-01, 1.3847290923401767e-01,  -1.8561874788646704e-01,
+      2.9993528142734771e-01,  6.7625303314120611e-01,  -1.6815361388865593e-01,
+      -3.3930776213055852e-01, -1.2114679754832709e-01, -1.6815361388865593e-01,
+      5.6353888423218390e-01,  -7.5784855072358825e-02, -3.3221823724983218e-01,
+      -3.3930776213055852e-01, -7.5784855072358825e-02, 5.1470686148396338e-01,
+      -4.1819974401314991e-02, -1.2114679754832709e-01, -3.3221823724983218e-01,
+      -4.1819974401314991e-02, 4.2819483757235705e-01,  1.0507172480981881e+00,
+      -1.4246809991945683e-01, 4.1456358442478705e-02,  -4.5365891641821365e-01,
+      -1.4246809991945683e-01, 7.1494810971572931e-01,  -2.4883057770682826e-01,
+      -2.4395987950943798e-01, 4.1456358442478705e-02,  -2.4883057770682826e-01,
+      5.4552055268521205e-01,  1.2834013149857285e-01,  -4.5365891641821365e-01,
+      -2.4395987950943798e-01, 1.2834013149857285e-01,  3.3340397458978077e-01,
+      6.9069450025674894e-01,  -3.8338543582916840e-01, -1.6642138961100816e-01,
+      -3.5535624148913159e-01, -3.8338543582916840e-01, 5.2909436738615123e-01,
+      9.6172848604107755e-02,  5.5205364851978050e-02,  -1.6642138961100816e-01,
+      9.6172848604107755e-02,  4.2819483757235710e-01,  5.9926260952505861e-03,
+      -3.5535624148913159e-01, 5.5205364851978050e-02,  5.9926260952505861e-03,
+      2.6226647193037689e-01,  8.6130335545327363e-01,  -6.2395423965096508e-02,
+      -3.8567287714901394e-01, -1.5408554583685347e-01, -6.2395423965096508e-02,
+      7.4300380743635475e-01,  -4.7285239739268992e-01, -3.2035899709108612e-02,
+      -3.8567287714901394e-01, -4.7285239739268992e-01, 6.0171576901660107e-01,
+      -1.0536966428907621e-01, -1.5408554583685347e-01, -3.2035899709108612e-02,
+      -1.0536966428907621e-01, 4.9396599356834686e-01,  7.5302376148517924e-01,
+      -6.0232285163413626e-02, -2.7132649171672180e-01, -1.4054053948995909e-01,
+      -6.0232285163413626e-02, 6.0650459602198470e-01,  -2.4794771537629287e-01,
+      -1.4485909587327089e-01, -2.7132649171672180e-01, -2.4794771537629287e-01,
+      4.5518325168409662e-01,  -1.6092902586109215e-01, -1.4054053948995909e-01,
+      -1.4485909587327089e-01, -1.6092902586109215e-01, 4.0174654365823070e-01,
+      1.0547679530528609e+00,  -6.4439808979974744e-01, 6.9192182950400305e-02,
+      -3.3250796613430167e-01, -6.4439808979974744e-01, 8.2256346358859145e-01,
+      -4.8266166410158140e-01, -7.9980773185718390e-02, 6.9192182950400305e-02,
+      -4.8266166410158140e-01, 8.0017606793538776e-01,  -2.7854894491724819e-02,
+      -3.3250796613430167e-01, -7.9980773185718390e-02, -2.7854894491724819e-02,
+      5.4817010902737695e-01,  1.5502492798962887e+01,  -9.4036183520005387e+00,
+      -9.0871843532376817e-01, 1.5458603036460241e+00,  -9.4036183520005387e+00,
+      5.8545618851096108e+00,  8.7584459712876173e-01,  -1.0260595565373307e+00,
+      -9.0871843532376817e-01, 8.7584459712876173e-01,  7.5773424105520826e-01,
+      -3.2391012220482440e-01, 1.5458603036460241e+00,  -1.0260595565373307e+00,
+      -3.2391012220482440e-01, 6.6152719782725655e-01,  1.5881484684546885e+01,
+      -8.5373015094081293e+00, 1.2910407629253464e+00,  1.2854398517437213e+00,
+      -8.5373015094081293e+00, 5.0892253563939507e+00,  -3.8569840009954620e-01,
+      -9.9850386318081996e-01, 1.2910407629253464e+00,  -3.8569840009954620e-01,
+      4.1577040236200724e-01,  -9.1388286019611339e-02, 1.2854398517437213e+00,
+      -9.9850386318081996e-01, -9.1388286019611339e-02, 2.9351817210899528e-01,
+      1.8355238388750472e+01,  -9.9693288297551721e+00, 1.4065955534477488e+00,
+      1.5236758321532125e+00,  -9.9693288297551721e+00, 5.4788018334752540e+00,
+      -7.5992948757336931e-01, -6.6807507000148814e-01, 1.4065955534477488e+00,
+      -7.5992948757336931e-01, 5.8340911874120316e-01,  9.9856752432820606e-03,
+      1.5236758321532125e+00,  -6.6807507000148814e-01, 9.9856752432820606e-03,
+      5.5177690536312463e-01,  1.7078778656839585e+01,  -6.5205205128653105e+00,
+      1.5817366283975822e+00,  -9.3640888727240412e-01, -6.5205205128653105e+00,
+      2.9619037115622602e+00,  -1.1542576186016471e+00, -8.8584406490304773e-02,
+      1.5817366283975822e+00,  -1.1542576186016471e+00, 8.3454304950111569e-01,
+      3.1272631908606119e-01,  -9.3640888727240412e-01, -8.8584406490304773e-02,
+      3.1272631908606119e-01,  7.8087115310507782e-01,  1.5669862194862620e+01,
+      -6.2090727600143900e+00, 1.3817794924064011e+00,  -1.1475004406408358e+00,
+      -6.2090727600143900e+00, 2.5567265715258070e+00,  -5.4440265806779431e-01,
+      3.8699899553155109e-01,  1.3817794924064011e+00,  -5.4440265806779431e-01,
+      3.6446802134584444e-01,  -2.8076757475429592e-01, -1.1475004406408358e+00,
+      3.8699899553155109e-01,  -2.8076757475429592e-01, 2.6130199022294265e-01,
+      1.7118875747980759e+01,  -5.9277226216953718e+00, 1.9346813580902582e+00,
+      -1.6872205590095239e+00, -5.9277226216953718e+00, 2.1086146215067632e+00,
+      -8.0747551757328861e-01, 4.7699095328329716e-01,  1.9346813580902582e+00,
+      -8.0747551757328861e-01, 5.5797080293767543e-01,  8.9832319658025406e-02,
+      -1.6872205590095239e+00, 4.7699095328329716e-01,  8.9832319658025406e-02,
+      5.5399995944851188e-01,  1.7058565959549352e+01,  -5.9263293882909478e+00,
+      1.5342405590746959e+00,  -1.1630302303586997e+00, -5.9263293882909478e+00,
+      2.6205293378999683e+00,  -4.7688305722955537e-01, 3.1804603922241220e-01,
+      1.5342405590746959e+00,  -4.7688305722955537e-01, 3.1296424858530869e-01,
+      -2.8669977614844999e-01, -1.1630302303586997e+00, 3.1804603922241220e-01,
+      -2.8669977614844999e-01, 2.7020324134820112e-01,  1.6874725411781530e+01,
+      -6.9150979008264484e+00, 1.0065221329504688e+00,  -1.8347878924481371e+00,
+      -6.9150979008264484e+00, 3.5369590566282492e+00,  -5.2228937791154206e-01,
+      2.3672173077240022e-01,  1.0065221329504688e+00,  -5.2228937791154206e-01,
+      6.0329263881086503e-01,  4.6309711033365369e-02,  -1.8347878924481371e+00,
+      2.3672173077240022e-01,  4.6309711033365369e-02,  5.8765944434164141e-01,
+      1.6856507449690348e+01,  -6.8342292657673545e+00, 8.9036581290816785e-01,
+      -4.5904138300777919e-01, -6.8342292657673545e+00, 3.9011240056387244e+00,
+      -4.1077122687276202e-02, 5.6895999928936403e-01,  8.9036581290816785e-01,
+      -4.1077122687276202e-02, 2.7435588945926814e-01,  1.9420750375548933e-02,
+      -4.5904138300777919e-01, 5.6895999928936403e-01,  1.9420750375548933e-02,
+      1.7277500144155888e-01,  1.7645529606875833e+01,  -3.2538877192050002e+00,
+      -5.5012517023538066e-01, 1.0677788759412139e+00,  -3.2538877192050002e+00,
+      1.0546932562360065e+00,  -2.3658187113371332e-01, 2.1841816790460902e-01,
+      -5.5012517023538066e-01, -2.3658187113371332e-01, 7.1656120537425561e-01,
+      -1.7040630728823944e-01, 1.0677788759412139e+00,  2.1841816790460902e-01,
+      -1.7040630728823944e-01, 5.0974972717888634e-01,  1.7839842450333954e+01,
+      -5.7379299944719877e+00, -1.9569351707197109e+00, 1.7713682302855058e+00,
+      -5.7379299944719877e+00, 2.4328041444884976e+00,  -1.7544854846175517e-02,
+      -1.6864721058949111e-01, -1.9569351707197109e+00, -1.7544854846175517e-02,
+      1.0677513531326404e+00,  -5.9215023869599626e-01, 1.7713682302855058e+00,
+      -1.6864721058949111e-01, -5.9215023869599626e-01, 4.6360784428232726e-01,
+      1.5732517152453081e+01,  -7.4014739000296572e+00, -1.6207000095637598e+00,
+      1.2383190370614494e+00,  -7.4014739000296572e+00, 3.9409851277204022e+00,
+      7.5740083786654211e-01,  -5.5800827056355640e-01, -1.6207000095637598e+00,
+      7.5740083786654211e-01,  7.7642511193896813e-01,  -4.9466228929792039e-01,
+      1.2383190370614494e+00,  -5.5800827056355640e-01, -4.9466228929792039e-01,
+      3.1958775176868037e-01,  1.6183696670937064e+01,  -8.4210107899133941e+00,
+      -1.4603023231162975e+00, -4.8487004987150362e-01, -8.4210107899133941e+00,
+      5.1398804721325435e+00,  2.1595570912014417e-01,  8.3208280045450567e-01,
+      -1.4603023231162975e+00, 2.1595570912014417e-01,  5.4394923212798296e-01,
+      -3.4805126389386676e-01, -4.8487004987150362e-01, 8.3208280045450567e-01,
+      -3.4805126389386676e-01, 4.8452486035887499e-01,  1.6451611837149500e+01,
+      -4.2969418428867261e+00, -7.3888024461400414e-01, -2.0815144409407478e+00,
+      -4.2969418428867261e+00, 1.5430243106399666e+00,  1.8729935762291541e-01,
+      1.4033663037202498e-01,  -7.3888024461400414e-01, 1.8729935762291541e-01,
+      7.4205169442909757e-01,  5.7628851673904691e-02,  -2.0815144409407478e+00,
+      1.4033663037202498e-01,  5.7628851673904691e-02,  6.5242091014819859e-01,
+      1.5883062757360207e+01,  -7.5907511804771470e+00, -9.7007798581707783e-01,
+      7.0138283553798109e-01,  -7.5907511804771470e+00, 4.0217572548274134e+00,
+      -2.6014708482761550e-02, -5.3204932822260864e-01, -9.7007798581707783e-01,
+      -2.6014708482761550e-02, 1.0259721921051177e+00,  -1.8715321271601759e-01,
+      7.0138283553798109e-01,  -5.3204932822260864e-01, -1.8715321271601759e-01,
+      5.5149306670030152e-01,  1.5978116669665239e+01,  -7.4006259214596790e+00,
+      -6.7969646246634663e-01, 6.5546600275605982e-01,  -7.4006259214596790e+00,
+      3.6071706440359010e+00,  5.3491110661316954e-01,  -5.2666565248665098e-01,
+      -6.7969646246634663e-01, 5.3491110661316954e-01,  4.7936057181766917e-01,
+      -1.5025322700354665e-01, 6.5546600275605982e-01,  -5.2666565248665098e-01,
+      -1.5025322700354665e-01, 4.3112434274449579e-01,  1.6757855414725086e+01,
+      -3.1391994419655687e+00, -3.6827363627511667e+00, -2.7493582565171533e+00,
+      -3.1391994419655687e+00, 1.2878229875735527e+00,  6.2385886839847293e-01,
+      3.4411042773468631e-01,  -3.6827363627511667e+00, 6.2385886839847293e-01,
+      1.2200979174145221e+00,  9.3261333604507307e-02,  -2.7493582565171533e+00,
+      3.4411042773468631e-01,  9.3261333604507307e-02,  1.1795148354251150e+00,
+      1.5763176470987085e+01,  -7.2993099681066589e+00, 2.1748999120100793e-01,
+      -4.7546386776476129e-01, -7.2993099681066589e+00, 3.4569862729331460e+00,
+      -1.6096289888525245e-01, 3.5019733955885657e-01,  2.1748999120100793e-01,
+      -1.6096289888525245e-01, 4.2354082998887993e-01,  -3.1304152419179737e-01,
+      -4.7546386776476129e-01, 3.5019733955885657e-01,  -3.1304152419179737e-01,
+      3.4623783030108429e-01,  1.7657045077804337e+01,  -5.4634153749758578e+00,
+      -2.8502393669993009e+00, 1.2603451180663379e+00,  -5.4634153749758578e+00,
+      2.3705777605989660e+00,  4.7298264603542695e-01,  -1.8824806410410400e-01,
+      -2.8502393669993009e+00, 4.7298264603542695e-01,  8.1312479693863837e-01,
+      -4.9719372983025689e-01, 1.2603451180663379e+00,  -1.8824806410410400e-01,
+      -4.9719372983025689e-01, 4.2735579792528156e-01,  1.7234879466141379e+01,
+      -6.9447619542898051e+00, -1.3909105342656662e+00, -1.4629483521415327e+00,
+      -6.9447619542898051e+00, 3.2257037587765272e+00,  3.4613714263225803e-01,
+      7.1038882433094597e-01,  -1.3909105342656662e+00, 3.4613714263225803e-01,
+      5.4019461216426035e-01,  -2.4590599471464314e-01, -1.4629483521415327e+00,
+      7.1038882433094597e-01,  -2.4590599471464314e-01, 4.4551884917879320e-01,
+      1.7438713147183115e+01,  -8.4187054220745114e+00, 9.9555878171084999e-01,
+      1.3301908774116342e+00,  -8.4187054220745114e+00, 4.1903876236321533e+00,
+      -7.3825814341549312e-01, -6.7163472873814567e-01, 9.9555878171084999e-01,
+      -7.3825814341549312e-01, 5.8548847257088532e-01,  1.6930014844653318e-01,
+      1.3301908774116342e+00,  -6.7163472873814567e-01, 1.6930014844653318e-01,
+      5.3684670796748524e-01,  1.6505843421877792e+01,  -8.9672859383958023e+00,
+      -5.9586737243852639e-01, -4.9508712315185199e-01, -8.9672859383958023e+00,
+      4.8781866610061115e+00,  2.8538236205314221e-01,  3.0156093579837179e-01,
+      -5.9586737243852639e-01, 2.8538236205314221e-01,  4.8868089712416274e-01,
+      -4.4039031061756390e-01, -4.9508712315185199e-01, 3.0156093579837179e-01,
+      -4.4039031061756390e-01, 4.7201467710741113e-01,  1.8009048716596261e+01,
+      -9.5048416816852885e+00, -1.4889359349719808e+00, 5.5230572557832114e-01,
+      -9.5048416816852885e+00, 5.0429843614824552e+00,  8.0889627597129587e-01,
+      -2.6367499950333312e-01, -1.4889359349719808e+00, 8.0889627597129587e-01,
+      1.1121395372372715e+00,  -9.0442258964978972e-01, 5.5230572557832114e-01,
+      -2.6367499950333312e-01, -9.0442258964978972e-01, 8.5074190371660285e-01,
+      1.8969812503837847e+01,  -7.7867026450257386e+00, 1.6621469974591756e+00,
+      -1.3535726585282810e+00, -7.7867026450257386e+00, 3.2242358228553925e+00,
+      -5.5729571150631485e-01, 5.2143834192344174e-01,  1.6621469974591756e+00,
+      -5.5729571150631485e-01, 7.4287944271406869e-01,  -4.0168187357492424e-01,
+      -1.3535726585282810e+00, 5.2143834192344174e-01,  -4.0168187357492424e-01,
+      5.7837665640757685e-01,  1.6268715288654960e+01,  -8.9640581019331016e+00,
+      1.9472562726742195e+00,  1.1634394630256961e+00,  -8.9640581019331016e+00,
+      5.2999985470828141e+00,  -9.0781775797605180e-01, -9.9127146007630118e-01,
+      1.9472562726742195e+00,  -9.0781775797605180e-01, 5.5880703055816960e-01,
+      -2.0482720926808279e-02, 1.1634394630256961e+00,  -9.9127146007630118e-01,
+      -2.0482720926808279e-02, 4.2314243080374431e-01,  1.7309134332310190e+01,
+      -6.7958317179822716e+00, 1.7430014752690762e-01,  7.4781078592582828e-01,
+      -6.7958317179822716e+00, 2.7832702389289379e+00,  -2.9174947002806606e-01,
+      -2.5266256491062944e-01, 1.7430014752690762e-01,  -2.9174947002806606e-01,
+      4.3576818045690519e-01,  -8.8896184051415236e-02, 7.4781078592582828e-01,
+      -2.5266256491062944e-01, -8.8896184051415236e-02, 3.9997224840640028e-01,
+      1.7153574941598098e+01,  -6.8846934950832344e+00, 1.8254724366772068e+00,
+      4.8039342168786198e-01,  -6.8846934950832344e+00, 2.8276344038991410e+00,
+      -5.6790635321077287e-01, -1.9270510354144452e-01, 1.8254724366772068e+00,
+      -5.6790635321077287e-01, 6.9227219483725666e-01,  -1.2709334375461934e-01,
+      4.8039342168786198e-01,  -1.9270510354144452e-01, -1.2709334375461934e-01,
+      4.2917978157262676e-01,  1.8132184964363688e+01,  -3.1733007722489184e+00,
+      1.7825953260422662e+00,  -1.3140033285181132e+00, -3.1733007722489184e+00,
+      6.9684206950381822e-01,  -5.4962857370590601e-01, -1.0351982721358500e-02,
+      1.7825953260422662e+00,  -5.4962857370590601e-01, 6.5568195496490611e-01,
+      1.7607191243391238e-01,  -1.3140033285181132e+00, -1.0351982721358500e-02,
+      1.7607191243391238e-01,  6.2263345089164690e-01,  1.6127141186360564e+01,
+      -9.2175933167952362e+00, 4.5482959624410402e-01,  1.5607705465624031e+00,
+      -9.2175933167952362e+00, 5.8646726994165235e+00,  -7.6823914094072088e-01,
+      -5.4262462349906693e-01, 4.5482959624410402e-01,  -7.6823914094072088e-01,
+      5.9151161800332619e-01,  -2.4286672705208728e-01, 1.5607705465624031e+00,
+      -5.4262462349906693e-01, -2.4286672705208728e-01, 3.5666849406587792e-01,
+      1.6165753976243874e+01,  -7.3811188161212806e+00, -2.9732014153906883e-01,
+      1.0894338527618317e+00,  -7.3811188161212806e+00, 3.4986894854838320e+00,
+      1.2640223060154221e-02,  -7.0520798584256972e-01, -2.9732014153906883e-01,
+      1.2640223060154221e-02,  5.6149770153482581e-01,  3.6870659678219064e-02,
+      1.0894338527618317e+00,  -7.0520798584256972e-01, 3.6870659678219064e-02,
+      4.5536900447269846e-01,  1.5398988006027349e+01,  -8.1208517349885732e+00,
+      8.6228252695234930e-01,  1.1537806103765487e+00,  -8.1208517349885732e+00,
+      4.3222427499704104e+00,  -5.2765755042823237e-01, -6.0836165343511328e-01,
+      8.6228252695234930e-01,  -5.2765755042823237e-01, 4.7558839586188817e-01,
+      -2.2039226845147369e-01, 1.1537806103765487e+00,  -6.0836165343511328e-01,
+      -2.2039226845147369e-01, 3.6326310248611182e-01,  1.6653862335772988e+01,
+      -6.3212833166201801e+00, 1.7035548874689670e+00,  1.7818121037836856e+00,
+      -6.3212833166201801e+00, 3.2372475346428593e+00,  -1.6771360201147706e-01,
+      -7.3260377579450964e-01, 1.7035548874689670e+00,  -1.6771360201147706e-01,
+      4.5584861179226355e-01,  1.7938188763718371e-01,  1.7818121037836856e+00,
+      -7.3260377579450964e-01, 1.7938188763718371e-01,  3.0343699262163576e-01,
+      1.6005636284380316e+01,  -8.9463211901340483e+00, -1.4995353233230992e+00,
+      1.6144976837089247e+00,  -8.9463211901340483e+00, 5.4553198002689216e+00,
+      8.2430401820014310e-01,  -7.7450210219583759e-01, -1.4995353233230992e+00,
+      8.2430401820014310e-01,  3.0203181275741886e-01,  -5.0431498184811262e-02,
+      1.6144976837089247e+00,  -7.7450210219583759e-01, -5.0431498184811262e-02,
+      2.6690426032037673e-01,  1.5957019862552055e+01,  -8.4122236810732502e+00,
+      1.4776406260905088e+00,  1.1752014846148688e+00,  -8.4122236810732502e+00,
+      5.1499519890947365e+00,  -1.2438745362258066e+00, -2.4793216417570077e-01,
+      1.4776406260905088e+00,  -1.2438745362258066e+00, 4.3907159105778493e-01,
+      -1.3201484086761139e-01, 1.1752014846148688e+00,  -2.4793216417570077e-01,
+      -1.3201484086761139e-01, 2.8971532780081477e-01,  1.7264626784129828e+01,
+      -7.1717293181514563e+00, 1.8173778481524891e-01,  1.6638776173614904e+00,
+      -7.1717293181514563e+00, 3.3498514787396210e+00,  5.3070406002362888e-02,
+      -8.3211897250151612e-01, 1.8173778481524891e-01,  5.3070406002362888e-02,
+      3.5750188990225340e-01,  1.7905618157975495e-01,  1.6638776173614904e+00,
+      -8.3211897250151612e-01, 1.7905618157975495e-01,  3.5631089226304219e-01,
+      1.6951331209440209e+01,  -6.9265044010797432e+00, 1.6649505088436367e+00,
+      1.2476223930507584e+00,  -6.9265044010797432e+00, 3.0908353140153588e+00,
+      -7.9349084304257178e-01, -5.5641718253825512e-01, 1.6649505088436367e+00,
+      -7.9349084304257178e-01, 5.1957810047939335e-01,  -1.8256034059952314e-01,
+      1.2476223930507584e+00,  -5.5641718253825512e-01, -1.8256034059952314e-01,
+      4.4508123489757256e-01,  1.9012236387930873e+01,  -8.2612906987401367e+00,
+      1.7822397061112394e+00,  1.3130655015793740e+00,  -8.2612906987401367e+00,
+      3.9785776745364090e+00,  -4.5911569699280996e-01, -7.8540804412996457e-01,
+      1.7822397061112394e+00,  -4.5911569699280996e-01, 7.1651557376422037e-01,
+      -2.1545675296134048e-01, 1.3130655015793740e+00,  -7.8540804412996457e-01,
+      -2.1545675296134048e-01, 3.0131845380712763e-01,  1.6597075714728486e+01,
+      -5.7800006677690803e+00, 1.5381006811275582e+00,  -1.5197413389828438e+00,
+      -5.7800006677690803e+00, 3.0963818435550561e+00,  -7.6010709310479962e-01,
+      4.4547615970032189e-01,  1.5381006811275582e+00,  -7.6010709310479962e-01,
+      3.1280696744315561e-01,  -1.7575813761777823e-01, -1.5197413389828438e+00,
+      4.4547615970032189e-01,  -1.7575813761777823e-01, 1.6771576241498387e-01,
+      1.7019126909133337e+01,  -6.6527113537535385e+00, 1.3704144651040868e+00,
+      1.5022343603610107e+00,  -6.6527113537535385e+00, 2.9978544936485725e+00,
+      -1.8662862581940526e-01, -6.5208288251651947e-01, 1.3704144651040868e+00,
+      -1.8662862581940526e-01, 5.2409376673618568e-01,  -7.5645601483690317e-02,
+      1.5022343603610107e+00,  -6.5208288251651947e-01, -7.5645601483690317e-02,
+      3.2522439346024634e-01,  1.7662116032947637e+01,  -7.3173562685033531e+00,
+      -4.8473999550733515e-01, 1.2177126365029898e+00,  -7.3173562685033531e+00,
+      3.0582743607486380e+00,  9.5372312198528986e-02,  -4.4792222499260004e-01,
+      -4.8473999550733515e-01, 9.5372312198528986e-02,  4.2961213757689048e-01,
+      -2.5302983486156821e-01, 1.2177126365029898e+00,  -4.4792222499260004e-01,
+      -2.5302983486156821e-01, 3.5756224652772239e-01,  1.6523014322908107e+01,
+      -8.0087006367676903e+00, 1.8183754790473388e+00,  1.3553052743949423e+00,
+      -8.0087006367676903e+00, 4.0733577269970231e+00,  -1.1117655839274267e+00,
+      -7.2719651384385020e-01, 1.8183754790473388e+00,  -1.1117655839274267e+00,
+      5.2643538086294572e-01,  1.0149565570079074e-01,  1.3553052743949423e+00,
+      -7.2719651384385020e-01, 1.0149565570079074e-01,  4.9227647014805931e-01,
+      1.7397143785954697e+01,  9.1482994934314243e-01,  -1.2055972918955953e+00,
+      1.5195127886243485e+00,  9.1482994934314243e-01,  6.0050109036606192e-01,
+      2.3235888502821123e-01,  -2.5896893411041405e-01, -1.2055972918955953e+00,
+      2.3235888502821123e-01,  4.5114243240770924e-01,  -3.9549546725682455e-01,
+      1.5195127886243485e+00,  -2.5896893411041405e-01, -3.9549546725682455e-01,
+      3.9713449207042850e-01,  1.6754962568939149e+01,  -6.9909519455758877e+00,
+      1.3238214949533700e-02,  1.7461462053388506e+00,  -6.9909519455758877e+00,
+      3.5271059676844763e+00,  -4.8608852379905848e-01, -3.9767641403791087e-01,
+      1.3238214949533700e-02,  -4.8608852379905848e-01, 4.6008364836193916e-01,
+      -2.3283300864365591e-01, 1.7461462053388506e+00,  -3.9767641403791087e-01,
+      -2.3283300864365591e-01, 3.6997630813085547e-01,  1.6914438459335880e+01,
+      -8.2018573101250176e+00, 1.8607992969625340e+00,  3.6214197147114746e-01,
+      -8.2018573101250176e+00, 4.1440760753178036e+00,  -7.4448077344820940e-01,
+      2.1802370922440709e-02,  1.8607992969625340e+00,  -7.4448077344820940e-01,
+      4.2429585090834404e-01,  1.5770605248055045e-01,  3.6214197147114746e-01,
+      2.1802370922440709e-02,  1.5770605248055045e-01,  3.0822560548642208e-01,
+      1.7076011430674352e+01,  -8.8780259960025418e+00, 4.1114807990525343e-01,
+      1.7160868438637338e+00,  -8.8780259960025418e+00, 4.6868693538444646e+00,
+      -3.9792997857664614e-01, -7.8413912548256937e-01, 4.1114807990525343e-01,
+      -3.9792997857664614e-01, 4.9907853940129265e-01,  -2.2844650397698621e-01,
+      1.7160868438637338e+00,  -7.8413912548256937e-01, -2.2844650397698621e-01,
+      3.4562144955490959e-01,  1.6875577654372840e+01,  -6.7321113543278024e+00,
+      1.5467259695484039e+00,  1.5835725354604495e+00,  -6.7321113543278024e+00,
+      3.2243427952557981e+00,  -3.9371520741376925e-01, -3.6997207365849999e-01,
+      1.5467259695484039e+00,  -3.9371520741376925e-01, 7.3691512999550857e-01,
+      -6.5365934650236268e-02, 1.5835725354604495e+00,  -3.6997207365849999e-01,
+      -6.5365934650236268e-02, 4.7827347437249718e-01,  1.7164841669190295e+01,
+      -3.9184842337808603e+00, -1.9330224573943846e+00, -3.5384656982588716e+00,
+      -3.9184842337808603e+00, 1.8459124004525904e+00,  -2.7968361445259859e-01,
+      6.3051523823343891e-01,  -1.9330224573943846e+00, -2.7968361445259859e-01,
+      9.1319007138394170e-01,  4.4823426503524189e-01,  -3.5384656982588716e+00,
+      6.3051523823343891e-01,  4.4823426503524189e-01,  8.1043864783118424e-01,
+      1.7776298615595469e+01,  -8.3125493630052887e+00, -7.7441931055878699e-01,
+      1.5226098031185762e+00,  -8.3125493630052887e+00, 5.1190411322780580e+00,
+      2.2952835214172629e-02,  -1.4773103203139837e-01, -7.7441931055878699e-01,
+      2.2952835214172629e-02,  8.5889441357603480e-01,  -6.9917219907711145e-02,
+      1.5226098031185762e+00,  -1.4773103203139837e-01, -6.9917219907711145e-02,
+      4.2035440559421033e-01,  1.5862691008342372e+01,  -6.1176683909011116e+00,
+      1.2625855228210956e+00,  -1.3435168854108823e+00, -6.1176683909011116e+00,
+      2.3824446972003832e+00,  -5.4326864192360957e-01, 4.7940703668476647e-01,
+      1.2625855228210956e+00,  -5.4326864192360957e-01, 4.3406572657200515e-01,
+      9.5842602881569275e-02,  -1.3435168854108823e+00, 4.7940703668476647e-01,
+      9.5842602881569275e-02,  2.3854989743717084e-01,  1.8256070397132763e+01,
+      -6.9737979519085576e+00, -2.4828924208535295e+00, -6.8067548292463942e-01,
+      -6.9737979519085576e+00, 2.7984958050555582e+00,  6.3576956667988715e-01,
+      4.5488849481004173e-01,  -2.4828924208535295e+00, 6.3576956667988715e-01,
+      1.1009943270852411e+00,  -2.5112157021262832e-01, -6.8067548292463942e-01,
+      4.5488849481004173e-01,  -2.5112157021262832e-01, 6.3576733446761735e-01,
+      1.7056582256752865e+01,  -7.1009743841647808e+00, 1.1160779926499460e+00,
+      1.6103269168653844e+00,  -7.1009743841647808e+00, 3.1733314893589308e+00,
+      -7.0718250574770325e-01, -4.3660325341089939e-01, 1.1160779926499460e+00,
+      -7.0718250574770325e-01, 5.0309713543792534e-01,  -6.0959434222991160e-02,
+      1.6103269168653844e+00,  -4.3660325341089939e-01, -6.0959434222991160e-02,
+      4.6051187668197546e-01,  1.8357242514234677e+01,  -8.7847041747386729e+00,
+      2.9797124907094874e-01,  -2.1027146029189812e+00, -8.7847041747386729e+00,
+      4.3983224978124813e+00,  1.6771532571466041e-01,  8.6501485551462776e-01,
+      2.9797124907094874e-01,  1.6771532571466041e-01,  6.6655368883518706e-01,
+      -1.0627500830106132e-01, -2.1027146029189812e+00, 8.6501485551462776e-01,
+      -1.0627500830106132e-01, 4.8427082375374647e-01,  1.6772311022508610e+01,
+      -5.5233743204468615e+00, -1.4827239072848402e+00, -1.6683315295413981e-01,
+      -5.5233743204468615e+00, 2.8632372577610519e+00,  -1.9908041485308506e-01,
+      6.7147007749442378e-02,  -1.4827239072848402e+00, -1.9908041485308506e-01,
+      5.8384443306666967e-01,  1.8696608263835957e-02,  -1.6683315295413981e-01,
+      6.7147007749442378e-02,  1.8696608263835957e-02,  4.1948303453199803e-01,
+      1.6628501414098437e+01,  -8.4601815957694324e+00, -1.2499311118743477e+00,
+      1.3390492539579859e+00,  -8.4601815957694324e+00, 4.3761271922541898e+00,
+      6.0876619378843877e-01,  -5.7252755667562849e-01, -1.2499311118743477e+00,
+      6.0876619378843877e-01,  5.4159859530740562e-01,  1.6462302179435517e-01,
+      1.3390492539579859e+00,  -5.7252755667562849e-01, 1.6462302179435517e-01,
+      4.8726092250544450e-01,  1.7067254080129800e+01,  -8.6191292214447621e+00,
+      8.4191297536122567e-01,  1.6991769398963010e+00,  -8.6191292214447621e+00,
+      4.7912823899616992e+00,  -9.5325899518160639e-01, -9.9723259667135367e-01,
+      8.4191297536122567e-01,  -9.5325899518160639e-01, 8.3710075559284569e-01,
+      9.7283473262340187e-02,  1.6991769398963010e+00,  -9.9723259667135367e-01,
+      9.7283473262340187e-02,  3.6200206548711727e-01,  1.7702046820994806e+01,
+      1.1560960095286070e+00,  -3.3325166201283918e+00, -1.4775683676695714e+00,
+      1.1560960095286070e+00,  1.0791246214081565e+00,  1.8182679840027050e-01,
+      -5.0410612828568047e-01, -3.3325166201283918e+00, 1.8182679840027050e-01,
+      9.5912816564144698e-01,  3.5209385289513767e-01,  -1.4775683676695714e+00,
+      -5.0410612828568047e-01, 3.5209385289513767e-01,  6.1173061322520472e-01,
+      1.7855425834543190e+01,  -3.2697217693515706e+00, 1.4807091014457323e+00,
+      -2.6764346453544805e+00, -3.2697217693515706e+00, 8.3027813273245776e-01,
+      4.0426879175947228e-02,  3.3848123209104880e-01,  1.4807091014457323e+00,
+      4.0426879175947228e-02,  6.9975094084688649e-01,  -4.2689334899290365e-01,
+      -2.6764346453544805e+00, 3.3848123209104880e-01,  -4.2689334899290365e-01,
+      5.0049960147362493e-01,  1.6401315819584983e+01,  -7.9740616572801812e+00,
+      1.0005058774036371e+00,  -1.4203138481446598e+00, -7.9740616572801812e+00,
+      4.4023705076543553e+00,  -9.1941184955615007e-01, 5.7637509877548454e-01,
+      1.0005058774036371e+00,  -9.1941184955615007e-01, 4.5841164275306279e-01,
+      5.3383138750947870e-02,  -1.4203138481446598e+00, 5.7637509877548454e-01,
+      5.3383138750947870e-02,  1.9979339037632937e-01,  1.7332501002967668e+01,
+      -7.0781330453987845e+00, 9.2037176711584545e-01,  -1.7498405404426292e+00,
+      -7.0781330453987845e+00, 3.1470072102227165e+00,  -6.9097983597892554e-01,
+      6.2185046840444402e-01,  9.2037176711584545e-01,  -6.9097983597892554e-01,
+      5.5932116260968723e-01,  2.1858582089281359e-01,  -1.7498405404426292e+00,
+      6.2185046840444402e-01,  2.1858582089281359e-01,  5.2680120312008438e-01,
+      1.7734484227892562e+01,  -5.3654898514610219e+00, -1.6146396348259038e+00,
+      9.9033821011608625e-01,  -5.3654898514610219e+00, 2.0384509120220251e+00,
+      2.5981041293593549e-01,  1.2547778095454426e-01,  -1.6146396348259038e+00,
+      2.5981041293593549e-01,  7.0973144206899874e-01,  -2.6410086403128091e-01,
+      9.9033821011608625e-01,  1.2547778095454426e-01,  -2.6410086403128091e-01,
+      4.9890531981897601e-01,  1.7373803386373812e+01,  -7.3655877697476715e+00,
+      7.8270125086657205e-02,  -2.0999061171249407e+00, -7.3655877697476715e+00,
+      3.8164841996986887e+00,  3.9641030912017372e-02,  6.5278955794156168e-01,
+      7.8270125086657205e-02,  3.9641030912017372e-02,  4.1742585469994020e-01,
+      1.6686881152650240e-02,  -2.0999061171249407e+00, 6.5278955794156168e-01,
+      1.6686881152650240e-02,  3.4144491013516665e-01,  1.7266719068868216e+01,
+      -9.0704352506311370e+00, 1.0694216573455857e+00,  -1.5795224196213768e+00,
+      -9.0704352506311370e+00, 5.0037919681802911e+00,  -9.3097451917909035e-01,
+      6.6974458417928873e-01,  1.0694216573455857e+00,  -9.3097451917909035e-01,
+      7.5988767453192230e-01,  7.6311108649566889e-02,  -1.5795224196213768e+00,
+      6.6974458417928873e-01,  7.6311108649566889e-02,  2.9489768008036216e-01,
+      1.6376515340100283e+01,  -6.9793256398089296e+00, 1.5243144645041427e+00,
+      2.7079268382937571e-01,  -6.9793256398089296e+00, 3.0541734354131531e+00,
+      -7.7135074324503605e-01, -6.8397349674078733e-02, 1.5243144645041427e+00,
+      -7.7135074324503605e-01, 3.6395566937932222e-01,  6.2728909404942329e-02,
+      2.7079268382937571e-01,  -6.8397349674078733e-02, 6.2728909404942329e-02,
+      3.6164470489278866e-01,  1.7516133518582485e+01,  -8.3514443126068123e+00,
+      -1.7180303760588957e+00, 1.5748319209869406e+00,  -8.3514443126068123e+00,
+      5.3528291603548226e+00,  -3.9980896941006638e-01, -7.0540361469493940e-02,
+      -1.7180303760588957e+00, -3.9980896941006638e-01, 1.2547848615007291e+00,
+      -7.5175461576019531e-01, 1.5748319209869406e+00,  -7.0540361469493940e-02,
+      -7.5175461576019531e-01, 5.0207052491314630e-01,  1.6471332655238577e+01,
+      -1.9840240794333170e+00, -2.7675952686613430e+00, 1.7245254662592493e+00,
+      -1.9840240794333170e+00, 1.3786012470006792e+00,  -1.7634724450792111e-01,
+      -6.8648055131182217e-01, -2.7675952686613430e+00, -1.7634724450792111e-01,
+      7.1321367156083215e-01,  -1.2327176278311069e-01, 1.7245254662592493e+00,
+      -6.8648055131182217e-01, -1.2327176278311069e-01, 4.9397169669978858e-01,
+      1.7091226474203204e+01,  -4.5675405166074583e+00, 1.4400642541699227e+00,
+      1.2141833386470169e+00,  -4.5675405166074583e+00, 1.3161027292266854e+00,
+      -3.1345863399964818e-01, -5.0066767853585703e-01, 1.4400642541699227e+00,
+      -3.1345863399964818e-01, 6.7234081428616688e-01,  -1.3503981152357039e-01,
+      1.2141833386470169e+00,  -5.0066767853585703e-01, -1.3503981152357039e-01,
+      4.3385283110391992e-01,  1.8246506732367031e+01,  -7.6558698630583821e+00,
+      1.9213354454183826e+00,  1.6373332810021559e+00,  -7.6558698630583821e+00,
+      3.9467944680536289e+00,  -4.0297301651530965e-01, -5.2963503443511684e-01,
+      1.9213354454183826e+00,  -4.0297301651530965e-01, 5.3176392085153512e-01,
+      8.9574705000869517e-02,  1.6373332810021559e+00,  -5.2963503443511684e-01,
+      8.9574705000869517e-02,  4.4536642738675492e-01,  1.7911671783219571e+01,
+      -5.9985252112864700e+00, -4.4745656306102388e-01, 1.1721399057841080e+00,
+      -5.9985252112864700e+00, 2.4139535616836252e+00,  -1.7219293329619995e-01,
+      -5.0172832204407780e-01, -4.4745656306102388e-01, -1.7219293329619995e-01,
+      9.7476836720338544e-01,  -3.2171662686669505e-01, 1.1721399057841080e+00,
+      -5.0172832204407780e-01, -3.2171662686669505e-01, 3.0939894061872220e-01,
+      1.6155032382388242e+01,  -7.2064352418320210e+00, 2.5981969792469695e-01,
+      2.8727950159025917e-01,  -7.2064352418320210e+00, 3.9980129943264391e+00,
+      -5.1910321191687969e-01, 3.2409611261106097e-01,  2.5981969792469695e-01,
+      -5.1910321191687969e-01, 6.1299094127694165e-01,  -4.6966198549264621e-01,
+      2.8727950159025917e-01,  3.2409611261106097e-01,  -4.6966198549264621e-01,
+      4.1154555711070495e-01,  1.5942674207027702e+01,  -8.2948902984678892e+00,
+      -1.9203061850624374e+00, 3.4964168574330856e-01,  -8.2948902984678892e+00,
+      4.7444890690473800e+00,  6.1177999082136658e-01,  -5.6828655972420206e-01,
+      -1.9203061850624374e+00, 6.1177999082136658e-01,  6.4406756760102324e-01,
+      3.0795787565507104e-01,  3.4964168574330856e-01,  -5.6828655972420206e-01,
+      3.0795787565507104e-01,  3.5590181457520187e-01,  1.6944946717618595e+01,
+      -6.0254959800609988e+00, -2.8357767854573899e+00, 4.1473435870618813e-01,
+      -6.0254959800609988e+00, 3.0665746424677778e+00,  2.3975145045391522e-01,
+      3.0627924467723289e-01,  -2.8357767854573899e+00, 2.3975145045391522e-01,
+      1.2345787399737920e+00,  -3.3814683111213717e-01, 4.1473435870618813e-01,
+      3.0627924467723289e-01,  -3.3814683111213717e-01, 3.3103947150526031e-01,
+      1.7117314554066201e+01,  -8.8236140201371818e+00, 1.0581536138514975e+00,
+      1.3980599420035920e+00,  -8.8236140201371818e+00, 4.8129582535771425e+00,
+      -8.8712259222580214e-01, -7.9225911531011384e-01, 1.0581536138514975e+00,
+      -8.8712259222580214e-01, 5.7517102508127571e-01,  7.2363511911666983e-02,
+      1.3980599420035920e+00,  -7.9225911531011384e-01, 7.2363511911666983e-02,
+      2.9909589630683520e-01,  1.6972810488291550e+01,  -5.9892458751059818e+00,
+      1.2149630718729185e+00,  -2.0798444379053964e+00, -5.9892458751059818e+00,
+      2.4380332297536320e+00,  -2.0344300671588472e-01, 5.3145291090067048e-01,
+      1.2149630718729185e+00,  -2.0344300671588472e-01, 6.6369223311103109e-01,
+      -6.0094731681376024e-04, -2.0798444379053964e+00, 5.3145291090067048e-01,
+      -6.0094731681376024e-04, 5.7957663759360090e-01,  1.7343766938479227e+01,
+      -6.6063740219858600e+00, -2.4017757423619175e+00, 1.6051740578902221e+00,
+      -6.6063740219858600e+00, 3.4993875746971375e+00,  -3.2234547803818028e-02,
+      -3.2663881218999763e-01, -2.4017757423619175e+00, -3.2234547803818028e-02,
+      1.2504135194559383e+00,  -5.1587400430594810e-01, 1.6051740578902221e+00,
+      -3.2663881218999763e-01, -5.1587400430594810e-01, 3.0068541586224429e-01,
+      1.6848023901869816e+01,  -7.7245284650863724e+00, -6.3375161950989023e-01,
+      1.7598733104365907e+00,  -7.7245284650863724e+00, 3.6368411257897200e+00,
+      5.4898635756645486e-01,  -9.3708181741907781e-01, -6.3375161950989023e-01,
+      5.4898635756645486e-01,  7.2509667201839190e-01,  -4.1251648474508129e-01,
+      1.7598733104365907e+00,  -9.3708181741907781e-01, -4.1251648474508129e-01,
+      4.9975565734465982e-01,  1.7511854581625478e+01,  -8.5404613942496930e+00,
+      4.7313924091567117e-01,  -5.1532816823540972e-01, -8.5404613942496930e+00,
+      4.4031820387689145e+00,  -3.6267289012126913e-01, -1.3528742104501390e-02,
+      4.7313924091567117e-01,  -3.6267289012126913e-01, 6.9456125684342040e-01,
+      -1.6317464306172286e-01, -5.1532816823540972e-01, -1.3528742104501390e-02,
+      -1.6317464306172286e-01, 4.5384531453753552e-01,  1.6749377427726831e+01,
+      -5.7772682564884885e+00, -1.9377388468336154e+00, 1.2795905554738882e+00,
+      -5.7772682564884885e+00, 2.3751734474029860e+00,  3.8187973000458070e-01,
+      -1.1511718783472463e-01, -1.9377388468336154e+00, 3.8187973000458070e-01,
+      1.1071634761400777e+00,  -9.4204177495531310e-02, 1.2795905554738882e+00,
+      -1.1511718783472463e-01, -9.4204177495531310e-02, 5.0911537749846014e-01,
+      1.6804926100729805e+01,  -6.7694804790437084e+00, 1.3290892723432168e+00,
+      -2.0329912302039057e+00, -6.7694804790437084e+00, 3.0050053244019006e+00,
+      -6.0641807348948229e-01, 6.5667795941713925e-01,  1.3290892723432168e+00,
+      -6.0641807348948229e-01, 4.4527286361472279e-01,  -2.5453902810523399e-01,
+      -2.0329912302039057e+00, 6.5667795941713925e-01,  -2.5453902810523399e-01,
+      3.9739245233707388e-01,  1.5868413007832768e+01,  -9.6058749345061720e+00,
+      -1.2704002060724218e+00, 8.6185583106525021e-01,  -9.6058749345061720e+00,
+      5.8473081312071127e+00,  7.6752664926696923e-01,  -4.0695460424360097e-01,
+      -1.2704002060724218e+00, 7.6752664926696923e-01,  1.1283540336783828e+00,
+      -1.6618593567686379e-01, 8.6185583106525021e-01,  -4.0695460424360097e-01,
+      -1.6618593567686379e-01, 4.6113112128941769e-01,  1.8481541594398891e+01,
+      -6.6615632043770274e+00, -7.2359822450546107e-01, -1.4644271889003471e+00,
+      -6.6615632043770274e+00, 2.4918957745186043e+00,  9.0679417982270888e-02,
+      5.3245874472517518e-01,  -7.2359822450546107e-01, 9.0679417982270888e-02,
+      4.0656985508236138e-01,  1.5911683154833606e-01,  -1.4644271889003471e+00,
+      5.3245874472517518e-01,  1.5911683154833606e-01,  3.2173334280500521e-01,
+      1.6228236401107939e+01,  -8.1101411423722158e+00, 1.3634571880312281e+00,
+      -2.6078480124847037e-03, -8.1101411423722158e+00, 5.1879397503268816e+00,
+      -2.9695259856663125e-01, -6.7344064243872026e-01, 1.3634571880312281e+00,
+      -2.9695259856663125e-01, 5.2461909416950570e-01,  -1.3795586005253810e-01,
+      -2.6078480124847037e-03, -6.7344064243872026e-01, -1.3795586005253810e-01,
+      4.3066586374098781e-01,  1.5589961900178967e+01,  -4.5372333345909119e+00,
+      -3.3934718591944404e+00, 1.3540033039710933e+00,  -4.5372333345909119e+00,
+      1.8585499822791560e+00,  6.1587937597953046e-01,  -5.8318661350036138e-01,
+      -3.3934718591944404e+00, 6.1587937597953046e-01,  1.4074389868822910e+00,
+      -4.7675943751143041e-01, 1.3540033039710933e+00,  -5.8318661350036138e-01,
+      -4.7675943751143041e-01, 4.2143576596484850e-01,  1.5611938835367109e+01,
+      -7.8362555612747959e+00, 1.1634362937361629e+00,  8.1530526315465568e-01,
+      -7.8362555612747959e+00, 4.0174329797324688e+00,  -7.4513440796506791e-01,
+      -3.6945515457627198e-01, 1.1634362937361629e+00,  -7.4513440796506791e-01,
+      3.9648861064960028e-01,  -3.2953317777742928e-02, 8.1530526315465568e-01,
+      -3.6945515457627198e-01, -3.2953317777742928e-02, 3.7578484154027375e-01,
+      1.8315149213395173e+01,  -7.7012188993654940e+00, 1.1508124885520843e+00,
+      1.3123590383630339e+00,  -7.7012188993654940e+00, 3.4780615549256009e+00,
+      -1.9166820372788307e-01, -7.6063211015438847e-01, 1.1508124885520843e+00,
+      -1.9166820372788307e-01, 5.4468638241889589e-01,  -1.2072611619645017e-01,
+      1.3123590383630339e+00,  -7.6063211015438847e-01, -1.2072611619645017e-01,
+      2.9841808326885588e-01,  1.6362440054767124e+01,  -5.9499672523232112e+00,
+      1.0052721428804259e+00,  -1.3668397556002811e+00, -5.9499672523232112e+00,
+      3.2387435072295325e+00,  -7.2396011033843383e-02, -1.5809150279620302e-01,
+      1.0052721428804259e+00,  -7.2396011033843383e-02, 7.2454982301604076e-01,
+      -2.0381095229621829e-01, -1.3668397556002811e+00, -1.5809150279620302e-01,
+      -2.0381095229621829e-01, 5.1930847189189466e-01,  1.5436369548255527e+01,
+      -9.1710802008604020e+00, 1.4209357173119814e+00,  -5.2333006294648410e-01,
+      -9.1710802008604020e+00, 5.4880135746084751e+00,  -9.2088987123056165e-01,
+      1.8726405116782840e-01,  1.4209357173119814e+00,  -9.2088987123056165e-01,
+      8.3812049939319733e-01,  2.7683676776817306e-01,  -5.2333006294648410e-01,
+      1.8726405116782840e-01,  2.7683676776817306e-01,  4.1959049781180191e-01,
+      1.5666964412535105e+01,  -9.7207170538441261e+00, 1.4333836194418987e+00,
+      1.5960317913987163e+00,  -9.7207170538441261e+00, 6.6896120732782265e+00,
+      -5.5802284343341291e-01, -1.3529115567912897e+00, 1.4333836194418987e+00,
+      -5.5802284343341291e-01, 4.9413228052236202e-01,  -1.9425632733667825e-01,
+      1.5960317913987163e+00,  -1.3529115567912897e+00, -1.9425632733667825e-01,
+      4.8918814427528240e-01,  1.5290527713026155e+01,  -7.6833137332910173e+00,
+      -1.0751883919467500e+00, 1.6910116895309504e+00,  -7.6833137332910173e+00,
+      3.9082192239312277e+00,  4.6621331985589443e-01,  -9.1883958537926080e-01,
+      -1.0751883919467500e+00, 4.6621331985589443e-01,  5.9851497650838281e-01,
+      -2.2322179187601654e-01, 1.6910116895309504e+00,  -9.1883958537926080e-01,
+      -2.2322179187601654e-01, 3.9830059929712897e-01,  1.7658936182671727e+01,
+      -7.9196848078408060e+00, 2.0235234121177790e+00,  1.4901521845277215e+00,
+      -7.9196848078408060e+00, 3.6001776162691934e+00,  -1.0140973115763243e+00,
+      -6.7507570700253083e-01, 2.0235234121177790e+00,  -1.0140973115763243e+00,
+      6.3032871812139324e-01,  -7.6750309951210835e-02, 1.4901521845277215e+00,
+      -6.7507570700253083e-01, -7.6750309951210835e-02, 5.4789767104015730e-01,
+      1.5457997297160160e+01,  -6.1526949506673070e+00, 6.7273961399725346e-01,
+      9.5770758001695433e-01,  -6.1526949506673070e+00, 2.7731742075607597e+00,
+      -4.3012052691323910e-01, -1.2255714588398386e-01, 6.7273961399725346e-01,
+      -4.3012052691323910e-01, 3.3958384751714510e-01,  -1.2798871872114692e-01,
+      9.5770758001695433e-01,  -1.2255714588398386e-01, -1.2798871872114692e-01,
+      2.7268656714191419e-01,  1.7503328855994745e+01,  -9.9687455312455189e+00,
+      -1.0297062203185405e+00, -1.7031047478256800e+00, -9.9687455312455189e+00,
+      5.9614589455051270e+00,  6.5247739444482156e-01,  7.6794594281655504e-01,
+      -1.0297062203185405e+00, 6.5247739444482156e-01,  3.5943482704316476e-01,
+      -1.7606821206230184e-02, -1.7031047478256800e+00, 7.6794594281655504e-01,
+      -1.7606821206230184e-02, 3.2716633335958190e-01,  1.6373795057571847e+01,
+      -7.7660482226949981e+00, 1.5882849012515776e+00,  -1.8494279583300000e-01,
+      -7.7660482226949981e+00, 3.9632086562391491e+00,  -1.0503970491370456e+00,
+      -2.0638321023081674e-01, 1.5882849012515776e+00,  -1.0503970491370456e+00,
+      7.3133644598289149e-01,  1.7037283334568715e-01,  -1.8494279583300000e-01,
+      -2.0638321023081674e-01, 1.7037283334568715e-01,  3.6991380078193070e-01,
+      1.7380218869596824e+01,  -4.6733393169385451e+00, -1.0112374919091498e+00,
+      6.9440806713566350e-01,  -4.6733393169385451e+00, 1.7664189364611840e+00,
+      5.2267413518611217e-01,  -4.7073978286014190e-01, -1.0112374919091498e+00,
+      5.2267413518611217e-01,  7.8792508851916998e-01,  -6.0593541507832505e-01,
+      6.9440806713566350e-01,  -4.7073978286014190e-01, -6.0593541507832505e-01,
+      4.8532777010071737e-01,  1.6688480100290171e+01,  -2.4452257429467745e+00,
+      -4.3502468301668573e+00, 2.8686321885832311e-01,  -2.4452257429467745e+00,
+      1.9558655769328408e+00,  -2.2650646337660044e-01, 2.7255380760267456e-01,
+      -4.3502468301668573e+00, -2.2650646337660044e-01, 1.8601358238474783e+00,
+      -7.0528037041038183e-01, 2.8686321885832311e-01,  2.7255380760267456e-01,
+      -7.0528037041038183e-01, 8.8533224650523568e-01,  1.7558888970958407e+01,
+      -3.5525153278861916e+00, 1.3269063954226350e+00,  1.2711647576232004e+00,
+      -3.5525153278861916e+00, 1.9328537647668227e+00,  -7.6166921543233657e-01,
+      2.0334544186972267e-01,  1.3269063954226350e+00,  -7.6166921543233657e-01,
+      4.4584049689038086e-01,  2.2203925188891041e-02,  1.2711647576232004e+00,
+      2.0334544186972267e-01,  2.2203925188891041e-02,  3.5499333012698542e-01,
+      1.7503104597019327e+01,  -8.2902573074777788e+00, 1.4809359053080735e+00,
+      5.4433271851216269e-01,  -8.2902573074777788e+00, 4.7655682747958519e+00,
+      4.0148581275228912e-02,  -8.3819205806563979e-01, 1.4809359053080735e+00,
+      4.0148581275228912e-02,  7.9084822528710697e-01,  -4.0761145344905420e-01,
+      5.4433271851216269e-01,  -8.3819205806563979e-01, -4.0761145344905420e-01,
+      7.7052589374216640e-01,  1.6708990617842698e+01,  -4.8454163020535930e+00,
+      -2.2673879150930514e+00, -7.3833835476206833e-01, -4.8454163020535930e+00,
+      1.8474149183735951e+00,  5.7960906898683118e-01,  -2.9034175784539162e-01,
+      -2.2673879150930514e+00, 5.7960906898683118e-01,  9.1094402341096992e-01,
+      2.7337020492260133e-01,  -7.3833835476206833e-01, -2.9034175784539162e-01,
+      2.7337020492260133e-01,  6.2002369775813038e-01,  1.6722997168847904e+01,
+      -8.6990946412050523e+00, 3.1803438787591409e-01,  1.7340206087992600e+00,
+      -8.6990946412050523e+00, 4.6729144559920757e+00,  -4.4803121361281151e-01,
+      -7.6591403512264922e-01, 3.1803438787591409e-01,  -4.4803121361281151e-01,
+      6.2109380665325764e-01,  -2.1702562038719872e-01, 1.7340206087992600e+00,
+      -7.6591403512264922e-01, -2.1702562038719872e-01, 3.0659507102349531e-01,
+      1.6670375714696366e+01,  -6.3873997157981712e+00, 1.1422940619218669e+00,
+      1.6114990153830422e+00,  -6.3873997157981712e+00, 2.7467584657824697e+00,
+      -1.4220435608326074e-01, -7.6707780933203606e-01, 1.1422940619218669e+00,
+      -1.4220435608326074e-01, 4.0764553257721842e-01,  -2.8201231903517046e-02,
+      1.6114990153830422e+00,  -7.6707780933203606e-01, -2.8201231903517046e-02,
+      2.3272465802632000e-01,  1.7177850258051929e+01,  -7.7485985175283538e+00,
+      1.7642463130395969e+00,  1.1947573624901038e+00,  -7.7485985175283538e+00,
+      3.7541650257159613e+00,  -9.4830419732951277e-01, -3.4701571693749028e-01,
+      1.7642463130395969e+00,  -9.4830419732951277e-01, 2.7843609384632584e-01,
+      2.2138492879799063e-02,  1.1947573624901038e+00,  -3.4701571693749028e-01,
+      2.2138492879799063e-02,  2.4621808163691816e-01,  1.7591518642987552e+01,
+      -6.1334674582773179e+00, -1.0188517048841167e-01, -1.0141463265801072e+00,
+      -6.1334674582773179e+00, 2.8881371999494077e+00,  -6.5349503463848291e-01,
+      6.1786208815218846e-01,  -1.0188517048841167e-01, -6.5349503463848291e-01,
+      6.8766586365008342e-01,  -1.3177271773416527e-01, -1.0141463265801072e+00,
+      6.1786208815218846e-01,  -1.3177271773416527e-01, 3.5763038289252136e-01,
+      1.6460555062664469e+01,  -1.0801338119838852e-01, -2.1676479110304978e+00,
+      2.3797610762749716e-01,  -1.0801338119838852e-01, 1.0801941303644456e+00,
+      -5.3813243483207052e-01, -5.6237375422626656e-01, -2.1676479110304978e+00,
+      -5.3813243483207052e-01, 5.7967459423528112e-01,  2.4903805958998279e-01,
+      2.3797610762749716e-01,  -5.6237375422626656e-01, 2.4903805958998279e-01,
+      2.9853118936744938e-01,  1.6363067821237387e+01,  -7.5801725217658182e+00,
+      -2.3154589119114033e+00, 8.6491950425521069e-01,  -7.5801725217658182e+00,
+      4.1093160719549147e+00,  7.5936478003922270e-01,  -7.9305772741362146e-01,
+      -2.3154589119114033e+00, 7.5936478003922270e-01,  5.5950251851751887e-01,
+      1.9960092492144615e-02,  8.6491950425521069e-01,  -7.9305772741362146e-01,
+      1.9960092492144615e-02,  3.6240473105355619e-01,  1.7604721419101381e+01,
+      -5.5982944484012851e+00, -5.1115292685000280e-01, 1.1238691719448353e+00,
+      -5.5982944484012851e+00, 1.9974013218860862e+00,  4.2330085337569273e-01,
+      -3.1553160809344194e-01, -5.1115292685000280e-01, 4.2330085337569273e-01,
+      3.9483438550305838e-01,  1.4146215690809766e-01,  1.1238691719448353e+00,
+      -3.1553160809344194e-01, 1.4146215690809766e-01,  3.0911274591187871e-01,
+      1.7481000191587800e+01,  -8.4105488031411451e+00, 1.3039429588996374e+00,
+      -1.8943355562813982e+00, -8.4105488031411451e+00, 4.2117242584921115e+00,
+      -9.9620212290555921e-01, 7.2096851111157401e-01,  1.3039429588996374e+00,
+      -9.9620212290555921e-01, 1.0262916082555362e+00,  2.6308977000995892e-01,
+      -1.8943355562813982e+00, 7.2096851111157401e-01,  2.6308977000995892e-01,
+      4.2893693974357433e-01,  1.6142451134319948e+01,  -8.5829976232168583e+00,
+      -1.1914111242942487e+00, -1.7828308739068168e+00, -8.5829976232168583e+00,
+      4.5916185650689387e+00,  6.9925837704708993e-01,  8.5594103603863902e-01,
+      -1.1914111242942487e+00, 6.9925837704708993e-01,  6.4694343394202003e-01,
+      -2.2381142282272101e-01, -1.7828308739068168e+00, 8.5594103603863902e-01,
+      -2.2381142282272101e-01, 5.4706240054847100e-01,  1.6219952760326322e+01,
+      -5.2583246351358710e+00, -1.1318857704341121e+00, 1.5464549680915729e+00,
+      -5.2583246351358710e+00, 1.9538218170664110e+00,  4.7907376803457552e-01,
+      -5.5765002134851860e-01, -1.1318857704341121e+00, 4.7907376803457552e-01,
+      8.0266791476788724e-01,  4.1910587271559552e-01,  1.5464549680915729e+00,
+      -5.5765002134851860e-01, 4.1910587271559552e-01,  6.1337978657368797e-01,
+      1.6349812064295705e+01,  -9.1250338518494676e+00, 1.4372115668954004e+00,
+      1.3574151001729744e+00,  -9.1250338518494676e+00, 5.6689651253021376e+00,
+      -7.7650769531650343e-01, -1.0781752355166871e+00, 1.4372115668954004e+00,
+      -7.7650769531650343e-01, 5.1241508875946074e-01,  -1.1886028240939153e-01,
+      1.3574151001729744e+00,  -1.0781752355166871e+00, -1.1886028240939153e-01,
+      4.2133699414022918e-01,  1.7730633966916006e+01,  -4.2079378425024334e+00,
+      -2.4082318029242074e+00, -2.9326567435152695e+00, -4.2079378425024334e+00,
+      1.8729697103084302e+00,  -1.7025332772484633e-01, 6.4769249581465438e-01,
+      -2.4082318029242074e+00, -1.7025332772484633e-01, 9.6422971517920864e-01,
+      4.9637876618550530e-01,  -2.9326567435152695e+00, 6.4769249581465438e-01,
+      4.9637876618550530e-01,  9.0607906741843292e-01,  1.6520034768467930e+01,
+      -9.3863666536330559e+00, 1.6290624727505552e+00,  1.0051389816863621e+00,
+      -9.3863666536330559e+00, 5.5944801287984092e+00,  -9.4935489921666538e-01,
+      -4.5064099036376648e-01, 1.6290624727505552e+00,  -9.4935489921666538e-01,
+      5.8585060608512141e-01,  -2.9952535059011953e-01, 1.0051389816863621e+00,
+      -4.5064099036376648e-01, -2.9952535059011953e-01, 4.7197837495700723e-01,
+      1.7111516934883419e+01,  -6.5918535028673038e+00, 1.6710814028293859e+00,
+      -9.1262323378824761e-01, -6.5918535028673038e+00, 3.0835319597298456e+00,
+      -2.6912281791743353e-01, 4.2893119166797788e-01,  1.6710814028293859e+00,
+      -2.6912281791743353e-01, 4.2342213175980664e-01,  -6.1060011100531768e-02,
+      -9.1262323378824761e-01, 4.2893119166797788e-01,  -6.1060011100531768e-02,
+      3.3382324238618233e-01,  1.7310554752654891e+01,  -6.3254698558225808e+00,
+      -1.5889046370618964e+00, 4.7998865450534484e-01,  -6.3254698558225808e+00,
+      3.5951424725613625e+00,  -4.3752933180985376e-01, -2.0052810357878864e-01,
+      -1.5889046370618964e+00, -4.3752933180985376e-01, 1.1169674552585609e+00,
+      -4.3289156862570732e-01, 4.7998865450534484e-01,  -2.0052810357878864e-01,
+      -4.3289156862570732e-01, 1.0348453338540857e+00,  1.5725152411710674e+01,
+      -8.1251695223259723e+00, -1.6221105906193571e+00, -6.5726645228657188e-01,
+      -8.1251695223259723e+00, 4.2655830965537183e+00,  6.8154083535105903e-01,
+      4.4651245957010732e-01,  -1.6221105906193571e+00, 6.8154083535105903e-01,
+      6.2732001380343250e-01,  -2.8865920201796080e-01, -6.5726645228657188e-01,
+      4.4651245957010732e-01,  -2.8865920201796080e-01, 3.1861318811044576e-01,
+      1.5902288815268022e+01,  -8.2452928765255784e+00, -2.0278743230239088e-01,
+      6.3486533567920422e-01,  -8.2452928765255784e+00, 4.6020174712364978e+00,
+      -3.3658080874833285e-01, -4.3977303097587184e-01, -2.0278743230239088e-01,
+      -3.3658080874833285e-01, 6.0187702290916412e-01,  1.1014613257737753e-01,
+      6.3486533567920422e-01,  -4.3977303097587184e-01, 1.1014613257737753e-01,
+      4.8184779224444352e-01,  1.6317485422902017e+01,  -9.0944710620236826e+00,
+      1.3236077186681450e+00,  1.0543599207408361e+00,  -9.0944710620236826e+00,
+      5.0770952150987654e+00,  -6.8283997895415571e-01, -6.1415172571988774e-01,
+      1.3236077186681450e+00,  -6.8283997895415571e-01, 4.6986429225388077e-01,
+      -7.1779176794846591e-02, 1.0543599207408361e+00,  -6.1415172571988774e-01,
+      -7.1779176794846591e-02, 3.6646138105239617e-01,  1.6629466429731355e+01,
+      -5.1277919122112783e+00, 1.4340055938268250e+00,  7.5497557373088631e-01,
+      -5.1277919122112783e+00, 2.5533892790953709e+00,  -6.1907458698641737e-01,
+      2.8548716783701866e-01,  1.4340055938268250e+00,  -6.1907458698641737e-01,
+      5.5021114982679775e-01,  2.1988595200875122e-01,  7.5497557373088631e-01,
+      2.8548716783701866e-01,  2.1988595200875122e-01,  4.6790021907528190e-01,
+      1.8051015423857478e+01,  -6.3374886699541655e+00, 6.7052196738207570e-01,
+      1.7901863112928220e+00,  -6.3374886699541655e+00, 3.6107306567453308e+00,
+      -1.1554776709999790e+00, -1.6138128679965386e-01, 6.7052196738207570e-01,
+      -1.1554776709999790e+00, 6.6215475367233645e-01,  -2.0835424063584101e-01,
+      1.7901863112928220e+00,  -1.6138128679965386e-01, -2.0835424063584101e-01,
+      3.8230347739520337e-01,  1.4983783383912956e+01,  -6.5303033327345901e+00,
+      1.5693503671455411e+00,  1.3115128213232672e+00,  -6.5303033327345901e+00,
+      3.3609306656827300e+00,  -3.8347465608612052e-01, -8.5246645597040638e-01,
+      1.5693503671455411e+00,  -3.8347465608612052e-01, 4.6444575454540948e-01,
+      1.1133056795538820e-01,  1.3115128213232672e+00,  -8.5246645597040638e-01,
+      1.1133056795538820e-01,  4.2050090792459205e-01,  1.6516272469929035e+01,
+      -8.1086408858622381e+00, 1.2444910428820757e+00,  1.4111414188071807e+00,
+      -8.1086408858622381e+00, 4.7397792897538693e+00,  -3.4785795277638909e-01,
+      -3.0333528301140100e-01, 1.2444910428820757e+00,  -3.4785795277638909e-01,
+      6.7086370063026790e-01,  2.4773393103872440e-03,  1.4111414188071807e+00,
+      -3.0333528301140100e-01, 2.4773393103872440e-03,  4.3791110589613524e-01,
+      1.6929227088899484e+01,  -7.8477753916727986e+00, 5.9122351013238139e-01,
+      1.1995974186685161e+00,  -7.8477753916727986e+00, 5.0130450761309371e+00,
+      -4.8119378860454465e-01, -7.7299224219143381e-01, 5.9122351013238139e-01,
+      -4.8119378860454465e-01, 6.6837844940101898e-01,  -2.8293163674859706e-01,
+      1.1995974186685161e+00,  -7.7299224219143381e-01, -2.8293163674859706e-01,
+      3.2651017287964618e-01,  1.5491954999314911e+01,  -6.9941923612056076e+00,
+      1.6077749159874175e+00,  7.8834911790693296e-01,  -6.9941923612056076e+00,
+      3.9990238979833492e+00,  -3.1324180602386442e-01, -2.9940611862675603e-01,
+      1.6077749159874175e+00,  -3.1324180602386442e-01, 3.8426170077342980e-01,
+      6.1325558045078110e-02,  7.8834911790693296e-01,  -2.9940611862675603e-01,
+      6.1325558045078110e-02,  1.9844293738738106e-01,  1.8256490990353864e+01,
+      -7.2596656308294474e+00, 7.2457573583544210e-02,  1.6393036849568940e+00,
+      -7.2596656308294474e+00, 3.1514450092236164e+00,  1.1658304601420899e-01,
+      -4.7408825564360935e-01, 7.2457573583544210e-02,  1.1658304601420899e-01,
+      4.8823333158455351e-01,  -9.9774500139366201e-02, 1.6393036849568940e+00,
+      -4.7408825564360935e-01, -9.9774500139366201e-02, 3.6855349855371966e-01,
+      1.6820120859691094e+01,  -6.8557939495607343e+00, 9.3076613697930166e-01,
+      1.3614159506132830e+00,  -6.8557939495607343e+00, 2.8579888611432107e+00,
+      -4.9935124766213668e-01, -5.7041886268769781e-01, 9.3076613697930166e-01,
+      -4.9935124766213668e-01, 4.6829838997314921e-01,  -1.4197642929627971e-01,
+      1.3614159506132830e+00,  -5.7041886268769781e-01, -1.4197642929627971e-01,
+      4.3316577284207314e-01,  1.6419872338608442e+01,  -8.5832050734274041e+00,
+      1.7192566692161901e+00,  -5.1218989222962830e-01, -8.5832050734274041e+00,
+      4.4976512426775175e+00,  -9.1124533514664441e-01, 2.8795414896579641e-01,
+      1.7192566692161901e+00,  -9.1124533514664441e-01, 7.4971043306955509e-01,
+      -5.5608503289291955e-01, -5.1218989222962830e-01, 2.8795414896579641e-01,
+      -5.5608503289291955e-01, 4.6700869661319677e-01,  1.8135538967327214e+01,
+      -3.2166336691478290e+00, -2.4622113217501940e+00, -3.6996532735385559e+00,
+      -3.2166336691478290e+00, 1.2549932403658035e+00,  -2.2292652269956342e-01,
+      5.6100325796761008e-01,  -2.4622113217501940e+00, -2.2292652269956342e-01,
+      1.0930566691740100e+00,  4.2547956233729428e-01,  -3.6996532735385559e+00,
+      5.6100325796761008e-01,  4.2547956233729428e-01,  9.9882325270528738e-01,
+      1.7439351281629058e+01,  -5.8296895831009508e+00, 1.5263763648410595e+00,
+      -1.0127116989967861e+00, -5.8296895831009508e+00, 3.0345134422808195e+00,
+      -1.2453100751923698e+00, 2.5921890911740408e-01,  1.5263763648410595e+00,
+      -1.2453100751923698e+00, 6.4872185802115923e-01,  -1.0471192819150560e-01,
+      -1.0127116989967861e+00, 2.5921890911740408e-01,  -1.0471192819150560e-01,
+      3.4321528712268262e-01,  1.8225452775936304e+01,  -8.3661120474364186e+00,
+      1.6298037497783302e-01,  1.3744275939125781e+00,  -8.3661120474364186e+00,
+      4.1934382694433019e+00,  -4.2888473389741155e-01, -3.3111383016288920e-01,
+      1.6298037497783302e-01,  -4.2888473389741155e-01, 3.9223109407533963e-01,
+      -2.8438634962977571e-01, 1.3744275939125781e+00,  -3.3111383016288920e-01,
+      -2.8438634962977571e-01, 3.5861917482977251e-01,  1.7848001583696501e+01,
+      -9.0977146732770837e+00, 1.3037096678166800e+00,  -1.3549605490328065e-01,
+      -9.0977146732770837e+00, 4.6663261401614928e+00,  -6.7036638792592496e-01,
+      1.9232787391811296e-01,  1.3037096678166800e+00,  -6.7036638792592496e-01,
+      1.1260447105843117e+00,  -9.1370205009458638e-03, -1.3549605490328065e-01,
+      1.9232787391811296e-01,  -9.1370205009458638e-03, 5.2699958100439070e-01};
+  std::vector<double> em = em_x;
   std::vector<double> table = {
-    -1.0600000163027882e+02, 7.7059358807135015e+02, -5.6954714749735385e+03, 1.2167808756610991e+03, -7.6199102434332218e+01, 1.0706136029373441e+00, -1.0600000164528124e+02, 7.7059358630452323e+02, -5.6954715659539552e+03, 1.2167808757436076e+03, -7.6199099707724926e+01, 1.0706134206080884e+00, -1.0600000163027882e+02, 7.7059358807135015e+02, -5.6954714749735385e+03, 1.2167808756610991e+03, -7.6199102434332218e+01, 1.0706136029373441e+00, -1.0600000164528124e+02, 7.7059358630452323e+02, -5.6954715659539552e+03, 1.2167808757436076e+03, -7.6199099707724926e+01, 1.0706134206080884e+00, -9.6000006759336443e+01, 6.2969719646863621e+02, -4.2053706363664551e+03, 9.0372155784831205e+02, -5.7600014239472898e+01, 8.6528676197113796e-01, -9.6000006828502180e+01, 6.2969718981238339e+02, -4.2053709121998018e+03, 9.0372156236848912e+02, -5.7600006817493266e+01, 8.6528625106787871e-01, -9.6000006759336443e+01, 6.2969719646863621e+02, -4.2053706363664551e+03, 9.0372155784831205e+02, -5.7600014239472898e+01, 8.6528676197113796e-01, -9.6000006828502180e+01, 6.2969718981238339e+02, -4.2053709121998018e+03, 9.0372156236848912e+02, -5.7600006817493266e+01, 8.6528625106787871e-01, -8.6000028021606425e+01, 5.0303296429845562e+02, -3.0008648248894533e+03, 6.4939597734382562e+02, -4.2250984019314707e+01, 6.8180015607155764e-01, -8.6000028340480625e+01, 5.0303293978396903e+02, -3.0008656209622986e+03, 6.4939600529391078e+02, -4.2250965541906716e+01, 6.8179882734268982e-01, -8.6000028021606425e+01, 5.0303296429845562e+02, -3.0008648248894533e+03, 6.4939597734382562e+02, -4.2250984019314707e+01, 6.8180015607155764e-01, -8.6000028340480625e+01, 5.0303293978396903e+02, -3.0008656209622986e+03, 6.4939600529353049e+02, -4.2250965541830588e+01, 6.8179882733888086e-01, -7.6000116148038558e+01, 3.9060139597613619e+02, -2.0515743554479322e+03, 4.4772754091167945e+02, -2.9848087537832814e+01, 5.2014755686537917e-01, -7.6000117618125429e+01, 3.9060130821883052e+02, -2.0515765138621105e+03, 4.4772766653712006e+02, -2.9848047259266409e+01, 5.2014443989116910e-01, -7.6000116148038558e+01, 3.9060139597613619e+02, -2.0515743554479322e+03, 4.4772754091167945e+02, -2.9848087537832814e+01, 5.2014755686537917e-01, -7.6000117618125742e+01, 3.9060130821877993e+02, -2.0515765138659344e+03, 4.4772766652483722e+02, -2.9848047256692499e+01, 5.2014443976043645e-01, -6.6000481290731443e+01, 2.9240425245900917e+02, -1.3271250821434478e+03, 2.9263955624337893e+02, -2.0087224005740719e+01, 3.8031147992206349e-01, -6.6000488067863742e+01, 2.9240394960550276e+02, -1.3271304743966571e+03, 2.9264002765325057e+02, -2.0087154325946980e+01, 3.8030522013794582e-01, -6.6000481290731443e+01, 2.9240425245900917e+02, -1.3271250821434478e+03, 2.9263955624337893e+02, -2.0087224005740719e+01, 3.8031147992206349e-01, -6.6000488067883694e+01, 2.9240394960308691e+02, -1.3271304745319526e+03, 2.9264002727267626e+02, -2.0087154245656002e+01, 3.8030521605011575e-01, -5.6001992867343972e+01, 2.0844745574402617e+02, -7.9715799906587699e+02, 1.7805563184427194e+02, -1.2663929104029080e+01, 2.6224978307822894e-01, -5.6002024103130161e+01, 2.0844646075692629e+02, -7.9717003898786652e+02, 1.7805715054974732e+02, -1.2663864677938077e+01, 2.6224029170957303e-01, -5.6001992867343972e+01, 2.0844745574402617e+02, -7.9715799906587699e+02, 1.7805563184427194e+02, -1.2663929104029080e+01, 2.6224978307822894e-01, -5.6002024104383771e+01, 2.0844646064871867e+02, -7.9717004324410516e+02, 1.7805714044473001e+02, -1.2663862524337585e+01, 2.6224018166598279e-01, -4.6008230210744550e+01, 1.3874976550319553e+02, -4.3134867537287749e+02, 9.7902623595157010e+01, -7.2734403121911884e+00, 1.6589123996688057e-01, -4.6008373996710617e+01, 1.3874671965012058e+02, -4.3137141216256458e+02, 9.7906861443792735e+01, -7.2735856084076280e+00, 1.6588642735924275e-01, -4.6008230210744550e+01, 1.3874976550319553e+02, -4.3134867537287749e+02, 9.7902623595157010e+01, -7.2734403121911884e+00, 1.6589123996688057e-01, -4.6008374075307870e+01, 1.3874671513440606e+02, -4.3137152784492957e+02, 9.7906652364871050e+01, -7.2735401377994249e+00, 1.6588408717348646e-01, -3.6033642533368131e+01, 8.3364086172019398e+01, -1.9942175516407502e+02, 4.6124022747838069e+01, -3.6130563858549958e+00, 9.1249773312287188e-02, -3.6034298111245583e+01, 8.3355843868269616e+01, -1.9945266030093268e+02, 4.6135000705962462e+01, -3.6142786797647353e+00, 9.1293932043118198e-02, -3.6033642533368131e+01, 8.3364086172019398e+01, -1.9942175516407502e+02, 4.6124022747838069e+01, -3.6130563858549958e+00, 9.1249773312287188e-02, -3.6034302998781108e+01, 8.3355675173745269e+01, -1.9945516784358935e+02, 4.6132303200740992e+01, -3.6136582565667807e+00, 9.1261386291659793e-02, -2.6132076703837274e+01, 4.2398929436319683e+01, -7.1037171119057973e+01, 1.3425662262407457e+01, -7.5172495708992593e-01, 7.7522572203268742e-03, -2.6134776894873077e+01, 4.2384732735328775e+01, -7.1030526549717337e+01, 1.3431455085299461e+01, -7.5302028721199155e-01, 7.8186246126207160e-03, -2.6132076703837274e+01, 4.2398929436319683e+01, -7.1037171119057973e+01, 1.3425662262405055e+01, -7.5172495708944420e-01, 7.7522572203027138e-03, -2.6135071381093578e+01, 4.2379566840123424e+01, -7.1067162844830236e+01, 1.3434603316099608e+01, -7.5251233833488806e-01, 7.7734884077347950e-03, -2.2221480705551805e+01, 3.0067218434037404e+01, -4.1779705297521097e+01, -1.9077757705724110e+02, 3.6413466026808294e+02, -1.6067397401486718e+02, -2.2225430071703467e+01, 3.0060809113889512e+01, -4.1712800191721314e+01, -1.9084786311022177e+02, 3.6410062714257685e+02, -1.6063028238785057e+02, -2.2221480705551830e+01, 3.0067218434036263e+01, -4.1779705297545611e+01, -1.9077757705723738e+02, 3.6413466026815809e+02, -1.6067397401492047e+02, -2.2226913938674084e+01, 3.0042371820589185e+01, -4.1801582285426832e+01, -1.9048619249019526e+02, 3.6373874557858261e+02, -1.6052358406417352e+02, -2.1250858373060836e+01, 2.7343847665267702e+01, -3.6044215009418814e+01, -1.7618484800469861e+02, 3.3120085405644409e+02, -1.4534825256321494e+02, -2.1254939505030809e+01, 2.7342716030835884e+01, -3.5955450545431681e+01, -1.7635550119316844e+02, 3.3127447930769307e+02, -1.4533876561022046e+02, -2.1250858373060954e+01, 2.7343847665262818e+01, -3.6044215009514119e+01, -1.7618484800464822e+02, 3.3120085405666612e+02, -1.4534825256338749e+02, -2.1257155379297881e+01, 2.7317691772612619e+01, -3.6063526926252166e+01, -1.7588696592837897e+02, 3.3079005662384850e+02, -1.4519086534447842e+02, -2.0283472228681301e+01, 2.4763027042036295e+01, -3.0876160316998963e+01, -1.6184864900381874e+02, 2.9976970905591691e+02, -1.3084395423768876e+02, -2.0287461515322455e+01, 2.4769400540137131e+01, -3.0762734380983186e+01, -1.6214886052089241e+02, 2.9998995088792128e+02, -1.3088331758129965e+02, -2.0283472228681809e+01, 2.4763027042017129e+01, -3.0876160317336627e+01, -1.6184864900359682e+02, 2.9976970905662938e+02, -1.3084395423826805e+02, -2.0290765181946348e+01, 2.4735639907973120e+01, -3.0892738413082597e+01, -1.6154574482310053e+02, 2.9934595420013272e+02, -1.3068028494926122e+02, -1.9319499689234629e+01, 2.2323824431805683e+01, -2.6243395369841849e+01, -1.4782286378121026e+02, 2.6985759662396487e+02, -1.1715474197881395e+02, -1.9323022570439292e+01, 2.2340565860680357e+01, -2.6102786429129356e+01, -1.4828764857305418e+02, 2.7027298759214750e+02, -1.1726163007473576e+02, -1.9319499689236839e+01, 2.2323824431730525e+01, -2.6243395371031539e+01, -1.4782286378021576e+02, 2.6985759662609979e+02, -1.1715474198068593e+02, -1.9327939259284843e+01, 2.2295320666731183e+01, -2.6257097174199931e+01, -1.4751677383623073e+02, 2.6942341041084092e+02, -1.1698575776762208e+02, -1.8359079763330211e+01, 2.0025118950280675e+01, -2.2113826757823226e+01, -1.3415932552431914e+02, 2.4147795894487624e+02, -1.0427314537549884e+02, -1.8361534194530734e+01, 2.0055847278170305e+01, -2.1944107342764479e+01, -1.3482982214648752e+02, 2.4214772485703989e+02, -1.0447085300268679e+02, -1.8359079763339750e+01, 2.0025118949989704e+01, -2.2113826761939308e+01, -1.3415932552009582e+02, 2.4147795895089951e+02, -1.0427314538136979e+02, -1.8368836959765495e+01, 1.9995657614892380e+01, -2.2124533894067383e+01, -1.3385233293246981e+02, 2.4103659293914149e+02, -1.0410011400771683e+02, -1.7402299525814517e+01, 1.7865597763687486e+01, -1.8455503416511757e+01, -1.2090765118569301e+02, 2.1464125749038132e+02, -9.2190581022134992e+01, -1.7402744551259310e+01, 1.7914800567904472e+01, -1.8255754666855470e+01, -1.2183089355280822e+02, 2.1563582256173194e+02, -9.2507405324257306e+01, -1.7402299525855486e+01, 1.7865597762572605e+01, -1.8455503430527756e+01, -1.2090765116826699e+02, 2.1464125750558804e+02, -9.2190581039770791e+01, -1.7413567239985614e+01, 1.7835392747330133e+01, -1.8463115133795956e+01, -1.2060260469703572e+02, 2.1419685510959093e+02, -9.2015134441585104e+01, -1.6449179896085464e+01, 1.5843762224435309e+01, -1.5236722252652665e+01, -1.0811515163854509e+02, 1.8935506712501905e+02, -8.0897437157402223e+01, -1.6446174965543889e+01, 1.5916874201410112e+01, -1.5007553197461570e+01, -1.0934291295595986e+02, 1.9075532567542470e+02, -8.1366596347119696e+01, -1.6449179896260411e+01, 1.5843762220214204e+01, -1.5236722299508587e+01, -1.0811515156878269e+02, 1.8935506715588940e+02, -8.0897437207525684e+01, -1.6462173655481337e+01, 1.5813096619069219e+01, -1.5241142983208677e+01, -1.0781563484017332e+02, 1.8891289499393798e+02, -8.0721658713418606e+01, -1.5499661595231082e+01, 1.3957945516559789e+01, -1.2426145992195885e+01, -9.5826844741964834e+01, 1.6562434781973772e+02, -7.0383233416004117e+01, -1.5491037589250178e+01, 1.4061349904707843e+01, -1.2170301483989650e+01, -9.7412966929875139e+01, 1.6751874597575440e+02, -7.1041920384880939e+01, -1.5499661595973759e+01, 1.3957945500778198e+01, -1.2426146145776961e+01, -9.5826844470313858e+01, 1.6562434784656404e+02, -7.0383233547510557e+01, -1.5514618579274794e+01, 1.3927192540790591e+01, -1.2427264674287118e+01, -9.5537423121432880e+01, 1.6519113036542510e+02, -7.0209783384625098e+01, -1.4553592409098401e+01, 1.2206343505203831e+01, -9.9929274597052196e+00, -8.4085595900823435e+01, 1.4345191724964303e+02, -6.0636862050381758e+01, -1.4536130507533649e+01, 1.2347228125716077e+01, -9.7159302678980044e+00, -8.6081002959763751e+01, 1.4592996741513730e+02, -6.1523840242331410e+01, -1.4553592412232879e+01, 1.2206343446986155e+01, -9.9929279524397305e+00, -8.4085594870780753e+01, 1.4345191706222485e+02, -6.0636862352071532e+01, -1.4570766853404239e+01, 1.2175998366492486e+01, -9.9905856922863112e+00, -8.3812185051328299e+01, 1.4303633648493073e+02, -6.0469165577726159e+01, -1.3610717065161962e+01, 1.0587059629986399e+01, -7.9068321681349163e+00, -7.2932404423885004e+01, 1.2283913327111270e+02, -5.1646910322317169e+01, -1.3579708436673444e+01, 1.0773027159520954e+01, -7.6175370796795425e+00, -7.5376833196183071e+01, 1.2597958225245242e+02, -5.2797863799745748e+01, -1.3610717078313911e+01, 1.0587059418306087e+01, -7.9068337121483454e+00, -7.2932400620636059e+01, 1.2283913169238102e+02, -5.1646910832841897e+01, -1.3630368323321786e+01, 1.0557789879027116e+01, -7.9007777139483810e+00, -7.2682825476758552e+01, 1.2245259140017740e+02, -5.1489446559796768e+01, -1.2670671078399982e+01, 9.0981634949263963e+00, -6.1383490362855788e+00, -6.2406844162279825e+01, 1.0378677653422224e+02, -4.3402055519687693e+01, -1.2619333100308433e+01, 9.3364634226935799e+00, -5.8491811509717584e+00, -6.5316414528433455e+01, 1.0763857666200300e+02, -4.4841832720191050e+01, -1.2670671133253135e+01, 9.0981627374157021e+00, -6.1383537481895356e+00, -6.2406830503476570e+01, 1.0378676818216074e+02, -4.3402055529436716e+01, -1.2693036794620980e+01, 9.0708908225804148e+00, -6.1281713411274001e+00, -6.2191660620037396e+01, 1.0344456594081470e+02, -4.3260806640248063e+01, -1.1732979767504439e+01, 7.7377614739662697e+00, -4.6587775146685351e+00, -5.2547655563671029e+01, 8.6296103981829802e+01, -3.5891515805495345e+01, -1.1651721415208119e+01, 8.0340005825064456e+00, -4.3852919661646119e+00, -5.5898160750405737e+01, 9.0851291378134590e+01, -3.7622755083739385e+01, -1.1732979994779518e+01, 7.7377588120662892e+00, -4.6587914600219875e+00, -5.2547607987974565e+01, 8.6296066930227624e+01, -3.5891510429190419e+01, -1.1758218632638741e+01, 7.7137968422318544e+00, -4.6438239588320966e+00, -5.2381405657406454e+01, 8.6019170302439520e+01, -3.5774653697918737e+01, -1.0797063195543267e+01, 6.5040766534586290e+00, -3.4402783696562169e+00, -4.3393478931462226e+01, 7.0370032342568010e+01, -2.9105535302381853e+01, -1.0672637254876815e+01, 6.8603244928014488e+00, -3.1995767859681346e+00, -4.7101348454718874e+01, 7.5530774605740319e+01, -3.1094453979913311e+01, -1.0797064129672576e+01, 6.5040675030570139e+00, -3.4403181344841500e+00, -4.3393319126804485e+01, 7.0369884883020177e+01, -2.9105501594155889e+01, -1.0825134802124644e+01, 6.4853446725127366e+00, -3.4195560956016346e+00, -4.3296381389022351e+01, 7.0187483762520671e+01, -2.9024415860031247e+01, -9.8622468030169337e+00, 5.3955359781222549e+00, -2.4558741324534137e+00, -3.4983728078555984e+01, 5.6014425934291204e+01, -2.3035887876475471e+01, -9.6769173769353625e+00, 5.8079540801032961e+00, -2.2635143148159220e+00, -3.8890523502249145e+01, 6.1563046720547966e+01, -2.5198820521877391e+01, -9.8622505990399034e+00, 5.3955054149765509e+00, -2.4559821583353774e+00, -3.4983216045684472e+01, 5.6013889382190079e+01, -2.3035736114340502e+01, -9.8926597117464805e+00, 5.3849440641688187e+00, -2.4279562878572039e+00, -3.4983707025980287e+01, 5.5966629574570753e+01, -2.3006306589550750e+01, -8.9277749780883457e+00, 4.4108678323349286e+00, -1.6793815271288624e+00, -2.7359655656676122e+01, 4.3239544183593061e+01, -1.7676416286664047e+01, -8.6587749152265552e+00, 4.8674392165289442e+00, -1.5450097170494306e+00, -3.1230915545542118e+01, 4.8829474992442343e+01, -1.9874755288141955e+01, -8.9277901202336185e+00, 4.4107699183102085e+00, -1.6796551456533098e+00, -2.7358123514289456e+01, 4.3237769027728554e+01, -1.7675844947587926e+01, -8.9590559763951383e+00, 4.4128957610428623e+00, -1.6423658138809611e+00, -2.7493743583145054e+01, 4.3380518846300511e+01, -1.7719639183506050e+01, -7.9928164326293913e+00, 3.5492331091008302e+00, -1.0852462622393610e+00, -2.0565792757352423e+01, 3.2061909496398073e+01, -1.3023704651715642e+01, -7.6125412569887647e+00, 4.0287966748633526e+00, -1.0084592804412351e+00, -2.4116992333062022e+01, 3.7252797603904497e+01, -1.5077495076198684e+01, -7.9928747817255603e+00, 3.5489404571097585e+00, -1.0858609980296849e+00, -2.0561701094768868e+01, 3.2056747083970720e+01, -1.3021877019728107e+01, -8.0213899495838241e+00, 3.5708128515175943e+00, -1.0368753205735253e+00, -2.0877831538201836e+01, 3.2456559535389509e+01, -1.3165540198118645e+01, -7.0564174984379102e+00, 2.8104770395789380e+00, -6.4821407306458223e-01, -1.4652118176169953e+01, 2.2507145963021038e+01, -9.0780963613608154e+00, -6.5338936679228468e+00, 3.2846161494194233e+00, -6.1760141818709846e-01, -1.7606122820367215e+01, 2.6855555289500277e+01, -1.0803821410528570e+01, -7.0566263531717324e+00, 2.8097184139861691e+00, -6.4925197579297411e-01, -1.4643483271177150e+01, 2.2495243692983838e+01, -9.0734373052814821e+00, -7.0742646195707266e+00, 2.8621047467298468e+00, -5.8641470402843421e-01, -1.5178915176777426e+01, 2.3211717123277591e+01, -9.3414295847965061e+00, -6.1172231064332783e+00, 2.1957964102200167e+00, -3.4265643705632465e-01, -9.6769153352706798e+00, 1.4613873405033004e+01, -5.8450824172251430e+00, -5.4212678780860326e+00, 2.6341589573018260e+00, -3.4085224757280796e-01, -1.1835854891340576e+01, 1.7794701474942944e+01, -7.1075278532253687e+00, -6.1178367984533244e+00, 2.1945528943967396e+00, -3.4261268423617658e-01, -9.6695829134679272e+00, 1.4600877298870854e+01, -5.8381668136523013e+00, -6.1072022151656586e+00, 2.2922503774685161e+00, -2.6715334266026142e-01, -1.0408120531614587e+01, 1.5617405440391840e+01, -6.2270636615178061e+00, -5.1722074807324017e+00, 1.7098190643016411e+00, -1.4098618492175408e-01, -5.7061337346696464e+00, 8.4331806866534098e+00, -3.3349192888568142e+00, -4.2766424379800121e+00, 2.0860564217794284e+00, -1.5548660419053545e-01, -7.0034949575065015e+00, 1.0332245608764421e+01, -4.0873492185766374e+00, -5.1727690165421372e+00, 1.7132539127425084e+00, -1.2776576793785877e-01, -5.7565343018918274e+00, 8.4941254548170697e+00, -3.3479852132230872e+00, -5.0998839330979591e+00, 1.8678855512825561e+00, -5.7718910331047868e-02, -6.5095346397755423e+00, 9.5462002113817768e+00, -3.7632628689263172e+00, -4.2112469382255613e+00, 1.3675717927787789e+00, -9.4961575783498800e-03, -2.7877417589321136e+00, 3.9953503912711956e+00, -1.5499906707437840e+00, -3.1046711877098376e+00, 1.6568346830533449e+00, -4.5990009889900242e-02, -3.3140676307068091e+00, 4.7472200808709299e+00, -1.8492173878772247e+00, -4.1976749320353317e+00, 1.4246952243441517e+00, 8.7531923058200650e-02, -3.0996975434049761e+00, 4.4668738099197531e+00, -1.7103055321708385e+00, -4.0163145894665320e+00, 1.5923303121893606e+00, 5.8249749369824022e-02, -3.3748048713195491e+00, 4.7925769874900315e+00, -1.8598420111853879e+00, -3.1955533414298376e+00, 1.2168024121915868e+00, 9.9474205814620603e-02, -8.6811124876189694e-01, 1.1994338853723501e+00, -4.4837238870567747e-01, -1.9098914522594992e+00, 1.3654451552507061e+00, 2.9537044429980407e-03, -9.3701125207094127e-01, 1.2575365835116745e+00, -4.7248060681970733e-01, -3.0285770502890443e+00, 1.6166340190704305e+00, 4.8662683065338386e-01, -1.2308607057515726e+00, 1.6114560066217587e+00, -6.5896729332189652e-01, -2.8078044229222514e+00, 1.4555130910035559e+00, 9.0876948497501955e-02, -1.0566809618626720e+00, 1.3938154223720176e+00, -5.2279617091852160e-01, -1.9963264755188566e+00, 1.3672906754961440e+00, 2.0801988470625002e-01, 2.0083818728351077e-02, -1.5135587406137185e-02, -1.4175240342178652e-02, -6.9344786794476854e-01, 1.2280621078720415e+00, 1.2333381103148277e-02, -1.0895386066093759e-02, 2.1764282171790141e-02, -1.0106900291744604e-02, -1.2036881930169383e+00, 2.0482931230000392e+00, -1.2689218008973949e-01, -5.0580690719339239e-01, 3.4047786101030464e-01, -7.0959386937004015e-02, -1.4470760938303664e+00, 1.4285049373060201e+00, 5.5764887956399375e-02, -2.9461990750009881e-02, 2.3005167601875431e-02, -1.0760396189439407e-02, -4.3024292433642597e-01, 1.7121633497582587e+00, 3.5705413032693957e-02, -9.9216800479772127e-01, 1.5115432403429119e+00, -6.3985596276149748e-01, 5.4770961684437192e-01, 1.2565653391084903e+00, 9.1639130181564755e-03, -6.8547618650262643e-01, 1.2037212931265591e+00, -5.1526772142324506e-01, 4.8142431677326969e-01, 1.2842025505965851e+00, -3.1103960497811806e-01, -3.8667287940463613e-01, 9.2663039525338942e-01, -4.1330437951972537e-01, 1.9976512094478704e-02, 1.4898674304290889e+00, -2.1940405767858565e-03, -8.0791207141984167e-01, 1.3979310081478775e+00, -5.9845265079421794e-01, 1.1971451112382212e+00, 1.6539633089946477e+00, -2.7009878691796618e-01, -2.8868139196850624e+00, 4.7294193613612734e+00, -1.9578020397520424e+00, 1.8164162541717044e+00, 1.4570111710269262e+00, 2.2385898037164991e-02, -3.1195681762439769e+00, 4.9723722392038878e+00, -2.0423972644796100e+00, 1.5812403987207633e+00, 1.1421043858413655e+00, -4.4319666868952730e-02, -2.3144705949527720e+00, 3.7448930479898297e+00, -1.5426803544433196e+00, 1.4992161878806018e+00, 1.6612039136364238e+00, -2.2870713891204597e-02, -3.4442115437939465e+00, 5.5057190995408973e+00, -2.2657208348376137e+00, 2.4658130352390710e+00, 1.5819912227884063e+00, -1.3204477532594588e-01, -5.7752803465671017e+00, 9.0677018990478242e+00, -3.6843468204828174e+00, 3.1062201217160963e+00, 1.8205810727868250e+00, 7.3942159732456811e-02, -7.3418038323250947e+00, 1.1309154676354810e+01, -4.5733470083866452e+00, 2.5667672162869133e+00, 1.3762236869878626e+00, 5.4823291778512563e-02, -5.5558964069977943e+00, 8.5620133672289516e+00, -3.4575259608624478e+00, 2.9333361085351610e+00, 1.9771000784477066e+00, 2.1600903596218385e-02, -7.7786452012965430e+00, 1.2026327126407146e+01, -4.8722408979121159e+00, 3.5238342146994350e+00, 1.8411341262124141e+00, 1.0485737443151430e-01, -1.0316470080846322e+01, 1.5628354265192609e+01, -6.2547428286449396e+00, 4.3947471898784478e+00, 2.3129375587624681e+00, 1.6998863701958250e-01, -1.3069120913924280e+01, 1.9764673064124775e+01, -7.9234176878170990e+00, 3.5464051944219954e+00, 1.7786047141550632e+00, 1.8395466553434961e-01, -1.0256713338978345e+01, 1.5450540198835597e+01, -6.1709943751208902e+00, 4.3074781177775723e+00, 2.4284702978185178e+00, 1.2121907902830774e-01, -1.3510697720561426e+01, 2.0490823414440431e+01, -8.2265504110307699e+00, 4.5269670710447079e+00, 2.3411415500822019e+00, 3.7814443659878427e-01, -1.6533454371385766e+01, 2.4532574055181296e+01, -9.7222898630871342e+00, 5.6498078480438974e+00, 2.8871559084424092e+00, 3.1648740182441881e-01, -1.9832336139347099e+01, 2.9630584562783888e+01, -1.1804975183138390e+01, 4.5317970588477650e+00, 2.3235629480266455e+00, 4.0711209040396701e-01, -1.6523611973754900e+01, 2.4482080409856291e+01, -9.6968326211377835e+00, 5.6107427774726322e+00, 2.9693568967987254e+00, 2.6856229367890733e-01, -2.0186235796983127e+01, 3.0228033555488111e+01, -1.2057362656117963e+01, 5.5230828784340904e+00, 3.0159142144119913e+00, 7.5032702265793638e-01, -2.4452361306480910e+01, 3.5745746299744695e+01, -1.4059387633540990e+01, 6.8467243986091164e+00, 3.5205846294935204e+00, 5.5323452910250115e-01, -2.7424447720726722e+01, 4.0542113968978946e+01, -1.6058340606199877e+01, 5.5241079122419858e+00, 3.0111097413061287e+00, 7.6043241689918206e-01, -2.4453330947201032e+01, 3.5733842835424838e+01, -1.4052622761934279e+01, 6.8330970703372866e+00, 3.5730950345697865e+00, 5.0442967447855436e-01, -2.7630302835415993e+01, 4.0921397061842079e+01, -1.6223699529825666e+01, 6.5233214752268127e+00, 3.8455313715589599e+00, 1.2738445662734672e+00, -3.4142511056048967e+01, 4.9288751118195229e+01, -1.9258816488331760e+01, 7.9798691992574877e+00, 4.2304633704347614e+00, 9.4916911879724064e-01, -3.6082800915305256e+01, 5.2740474636382487e+01, -2.0757970588732530e+01, 6.5235391967368317e+00, 3.8442392655293900e+00, 1.2772689685023881e+00, -3.4144245582802192e+01, 4.9286600694030149e+01, -1.9257235266278844e+01, 7.9780164759860508e+00, 4.2581364755189171e+00, 9.0490824102641643e-01, -3.6146890048111374e+01, 5.2902251888236343e+01, -2.0834714063750525e+01, 7.5301209868737518e+00, 4.8266093670811516e+00, 1.9906532239804082e+00, -4.5696171225139402e+01, 6.5222794336738914e+01, -2.5330008845677121e+01, 9.0592048208341964e+00, 5.0524444639807982e+00, 1.5639083038511417e+00, -4.6227354827270197e+01, 6.6742768625790532e+01, -2.6090733281390481e+01, 7.5301672757177256e+00, 4.8262668988539703e+00, 1.9917837214882572e+00, -4.5697152262800707e+01, 6.5222641787790508e+01, -2.5329699752317662e+01, 9.0617089689058279e+00, 5.0627200474303731e+00, 1.5306087886050987e+00, -4.6201245261995687e+01, 6.6753711704174307e+01, -2.6103836713323240e+01, 8.5439978438576958e+00, 5.9605352581937785e+00, 2.9388171122244109e+00, -5.9213652478598007e+01, 8.3623964589400401e+01, -3.2288651007290504e+01, 1.0100238105795977e+01, 6.0156046860821641e+00, 2.4311227628788585e+00, -5.8189717323516248e+01, 8.2972590004142106e+01, -3.2212869674305303e+01, 8.5440076687321067e+00, 5.9604459430021439e+00, 2.9391801366526531e+00, -5.9214078468041464e+01, 8.3624068891376510e+01, -3.2288610777657510e+01, 1.0103667533796683e+01, 6.0158650887345448e+00, 2.4107760944314816e+00, -5.8125625048064265e+01, 8.2906979417176174e+01, -3.2191629006406409e+01, 9.5650113177877785e+00, 7.2498153679976820e+00, 4.1551371399277919e+00, -7.4795843598083408e+01, 1.0457037732454131e+02, -4.0151433068943419e+01, 1.1116968561077568e+01, 7.1347098863330896e+00, 3.5688140741297674e+00, -7.2151486218593305e+01, 1.0165680693075836e+02, -3.9206269356622016e+01, 9.5650133940644455e+00, 7.2497924894015711e+00, 4.1552503042122613e+00, -7.4796005009548836e+01, 1.0457044971811401e+02, -4.0151435976986221e+01, 1.1120034079668221e+01, 7.1303147700774092e+00, 3.5594873892317103e+00, -7.2082067018068685e+01, 1.0156598726189708e+02, -3.9171834664292227e+01, 1.0593064483227742e+01, 8.6969028070512202e+00, 5.6755396034912966e+00, -9.2539537763180832e+01, 1.2813560149579646e+02, -4.8933613418447223e+01, 1.2119543877083460e+01, 8.4137603187360543e+00, 4.9925034366798311e+00, -8.8194505075704640e+01, 1.2287993196505218e+02, -4.7096724506223822e+01, 1.0593064919257221e+01, 8.6968970567044934e+00, 5.6755738143875760e+00, -9.2539593640863643e+01, 1.2813563331215474e+02, -4.8933618162805772e+01, 1.2121921818513506e+01, 8.4078642204619420e+00, 4.9908632634858190e+00, -8.8134432374832016e+01, 1.2279086550380391e+02, -4.7060844505587738e+01, 1.1627957207938659e+01, 1.0303707615441018e+01, 7.5344011042552923e+00, -1.1253294830348190e+02, 1.5438372244089408e+02, -5.8647453529357783e+01, 1.3114510015623049e+01, 9.8513572940713416e+00, 6.7213349376406626e+00, -1.0635738219113546e+02, 1.4665751311861146e+02, -5.5881528760137869e+01, 1.1627957298834614e+01, 1.0303706197478814e+01, 7.5344111366673712e+00, -1.1253296638384563e+02, 1.5438373415898508e+02, -5.8647455853629580e+01, 1.3116237925845430e+01, 9.8455331102145145e+00, 6.7243141059359051e+00, -1.0631074264006560e+02, 1.4658112805680690e+02, -5.5849452095162235e+01, 1.2669386535689361e+01, 1.2071287030293307e+01, 9.7633555455962835e+00, -1.3485075345900265e+02, 1.8336444946299886e+02, -6.9300787627414508e+01, 1.4105804414673191e+01, 1.1444289269702800e+01, 8.7789794745243590e+00, -1.2666835962860844e+02, 1.7298274034188972e+02, -6.5547771558832267e+01, 1.2669386554490638e+01, 1.2071286687068984e+01, 9.7633584027450482e+00, -1.3485075900242089e+02, 1.8336445335820781e+02, -6.9300788508071975e+01, 1.4107018463574896e+01, 1.1439185153305873e+01, 8.7843335749580440e+00, -1.2663444344319166e+02, 1.7292158897636148e+02, -6.5521162694327174e+01, 1.3716937488160630e+01, 1.3999597459400730e+01, 1.2389915672436279e+01, -1.5954894249539399e+02, 2.1510813446746886e+02, -8.0895567204040049e+01, 1.5095682313349364e+01, 1.3189272906323732e+01, 1.1192627051714643e+01, -1.4915916817312757e+02, 2.0184825850919157e+02, -7.6081293415969839e+01, 1.3716937492019641e+01, 1.3999597377767842e+01, 1.2389916464009524e+01, -1.5954894412085929e+02, 2.1510813567394996e+02, -8.0895567498068928e+01, 1.5096520030681436e+01, 1.3185064407456906e+01, 1.1198910160279951e+01, -1.4913565617175487e+02, 2.0180124290250004e+02, -7.6060129778156622e+01, 1.4770075388032444e+01, 1.6087303167766446e+01, 1.5436222950666867e+01, -1.8666021493779203e+02, 2.4962122089688103e+02, -9.3426463524457304e+01, 1.6085379191481852e+01, 1.5083589447287226e+01, 1.3991739427782750e+01, -1.7386892459375579e+02, 2.3325385095807121e+02, -8.7470099643500802e+01, 1.4770075388818769e+01, 1.6087303148664304e+01, 1.5436223164442264e+01, -1.8666021539675981e+02, 2.4962122125116741e+02, -9.3426463615076329e+01, 1.6085951551006787e+01, 1.5080238931969067e+01, 1.3998101278449143e+01, -1.7385331837944693e+02, 2.3321864790104019e+02, -8.7453697552144448e+01, 1.5828143941097450e+01, 1.8331670220961666e+01, 1.8918268274003861e+01, -2.1619095210442941e+02, 2.8688297635978756e+02, -1.0687973526499771e+02, 1.7075534787366465e+01, 1.7125200136366264e+01, 1.7207074959934751e+01, -2.0084388544719391e+02, 2.6720765911058965e+02, -9.9705133726570395e+01, 1.5828143941256627e+01, 1.8331670216557445e+01, 1.8918268330404022e+01, -2.1619095222989833e+02, 2.8688297645950814e+02, -1.0687973529137253e+02, 1.7075923730873765e+01, 1.7122590193964911e+01, 1.7213058024904747e+01, -2.0083402645820061e+02, 2.6718180837697332e+02, -9.9692640534772679e+01, 1.6890371426423382e+01, 2.0728579569842751e+01, 2.2845917469463828e+01, -2.4812083435502871e+02, 3.2684448823688496e+02, -1.2123263616047282e+02, 1.8066449820492846e+01, 1.9312661524160735e+01, 2.0870036016187061e+01, -2.3013589616073858e+02, 3.0372498377642154e+02, -1.1277999824352135e+02, 1.6890371426455424e+01, 2.0728579568840633e+01, 2.2845917484032956e+01, -2.4812083438838550e+02, 3.2684448826399682e+02, -1.2123263616782057e+02, 1.8066713333743454e+01, 1.9310657703202459e+01, 2.0875423564416035e+01, -2.3013008228413184e+02, 3.0370630494679148e+02, -1.1277060230387309e+02, 1.7955886187113396e+01, 2.3272683588860026e+01, 2.7223982220959247e+01, -2.8240595076334000e+02, 3.6943078590316281e+02, -1.3645364576977221e+02, 1.9058236733002300e+01, 2.1644988962398710e+01, 2.5012267757287322e+01, -2.6180071928343307e+02, 3.4282650121799617e+02, -1.2669036882336400e+02, 1.7955886187119816e+01, 2.3272683588634656e+01, 2.7223982224651898e+01, -2.8240595077199526e+02, 3.6943078591032139e+02, -1.3645364577174797e+02, 1.9058414960148450e+01, 2.1643466247439289e+01, 2.5016983354038196e+01, -2.6179767020610126e+02, 3.4281320617581565e+02, -1.2668337355331974e+02, 1.9023741366983238e+01, 2.5957710504548576e+01, 3.2054387652193789e+01, -3.1898571318422574e+02, 4.1454655650462962e+02, -1.5250373535684176e+02, 2.0050906563887416e+01, 2.4121527381838824e+01, 2.9665428981325245e+01, -2.9589665055055406e+02, 3.8453661583827250e+02, -1.4143340987287985e+02, 1.9023741366984520e+01, 2.5957710504498362e+01, 3.2054387653114766e+01, -3.1898571318642672e+02, 4.1454655650647550e+02, -1.5250373535735841e+02, 2.0051026978020587e+01, 2.4120379273875816e+01, 2.9669474257430963e+01, -2.9589543070583102e+02, 3.8452729731205977e+02, -1.4142824748467820e+02, 2.0092947487287756e+01, 2.8776895490568755e+01, 3.7339233558876920e+01, -9.8781982607414882e+00, 7.0916635282296292e-01, -1.2340880155534291e-02, 2.1044418341890132e+01, 2.6741847681518077e+01, 3.4861073630499796e+01, -9.1700568642165461e+00, 6.5220324713443967e-01, -1.1045071585279443e-02, 2.0092947487288011e+01, 2.8776895490557653e+01, 3.7339233559103448e+01, -9.8781982608033179e+00, 7.0916635282857932e-01, -1.2340880155703077e-02, 2.1044499630877905e+01, 2.6740987496092696e+01, 3.4864491165514394e+01, -9.1707199731434574e+00, 6.5223741134844682e-01, -1.1045188698410773e-02, 2.1162510215379026e+01, 3.1723491960797684e+01, 4.3084295875067085e+01, -4.1033675985379521e+00, -6.6095139594000130e-01, 6.0977735530407223e-02, 2.2038706806958309e+01, 2.9505670300337073e+01, 4.0630600131872811e+01, -2.7905442844326718e+00, -8.3885972791335117e-01, 6.8309956404426039e-02, 2.1162510215379076e+01, 3.1723491960795304e+01, 4.3084295875120795e+01, -4.1033675985539224e+00, -6.6095139593840913e-01, 6.0977735530354210e-02, 2.2038761643178379e+01, 2.9505029336592230e+01, 4.0633451796171073e+01, -2.7913314472201640e+00, -8.3878528163749511e-01, 6.8307595298566767e-02, 3.1719012432820758e+01, 6.7480322661109355e+01, 1.3318978565899991e+02, -1.6791944323404795e+01, -1.0181217992701848e+00, 1.2989592638281225e-01, 3.2009499874031789e+01, 6.5013296175889408e+01, 1.3669799889514238e+02, -1.7009031615065428e+01, -1.0689880784706638e+00, 1.3388972346122466e-01, 3.1719012432820758e+01, 6.7480322661109355e+01, 1.3318978565899991e+02, -1.6791944323404795e+01, -1.0181217992701848e+00, 1.2989592638281225e-01, 3.2009500887769519e+01, 6.5013269472322307e+01, 1.3669829238273672e+02, -1.7009116366540379e+01, -1.0689798256828462e+00, 1.3388945486998777e-01, 4.1931127118492086e+01, 1.1600186087954401e+02, 3.1751764022286790e+02, -4.6438894455748802e+01, -8.7599401950869438e-01, 2.2297105562740663e-01, 4.2002297497564768e+01, 1.1479764873768737e+02, 3.2393143797302810e+02, -4.7847299173836262e+01, -7.8150712905299369e-01, 2.2131248436241077e-01, 4.1931127118492086e+01, 1.1600186087954401e+02, 3.1751764022286790e+02, -4.6438894455748802e+01, -8.7599401950869438e-01, 2.2297105562740663e-01, 4.2002297514594851e+01, 1.1479764793294436e+02, 3.2393145467669495e+02, -4.7847304068128608e+01, -7.8150664807362491e-01, 2.2131246858403722e-01, 5.1984670105634827e+01, 1.7926303194781252e+02, 6.2846495111925287e+02, -1.0034649475039414e+02, 2.4606292097951082e-01, 3.3256752105517051e-01, 5.2000554052128159e+01, 1.7883235795593501e+02, 6.3273302895025176e+02, -1.0138733878813618e+02, 3.2804187851642969e-01, 3.3055293107858102e-01, 5.1984670105634827e+01, 1.7926303194781252e+02, 6.2846495111925287e+02, -1.0034649475039414e+02, 2.4606292097951082e-01, 3.3256752105517051e-01, 5.2000554052402805e+01, 1.7883235793562420e+02, 6.3273302962903426e+02, -1.0138733898825184e+02, 3.2804189825766372e-01, 3.3055293042886030e-01, 6.1996666427075382e+01, 2.5724136589119979e+02, 1.0913830717468406e+03, -1.8317243758181812e+02, 2.5193786568880601e+00, 4.6277932792022042e-01, 6.2000133522892554e+01, 2.5710536851489377e+02, 1.0934673032018356e+03, -1.8370056934287794e+02, 2.5630609198690104e+00, 4.6162176037505448e-01, 6.1996666427075382e+01, 2.5724136589119979e+02, 1.0913830717468406e+03, -1.8317243758181812e+02, 2.5193786568880601e+00, 4.6277932792022042e-01, 6.2000133522896938e+01, 2.5710536851442714e+02, 1.0934673032246803e+03, -1.8370056934963364e+02, 2.5630609205366826e+00, 4.6162176035304603e-01, 7.1999279107664492e+01, 3.4965254984584158e+02, 1.7356304176273381e+03, -3.0063395678020430e+02, 6.2079056750108883e+00, 6.1505333334154833e-01, 7.2000032172982571e+01, 3.4961232791697932e+02, 1.7365043785874466e+03, -3.0086002522613632e+02, 6.2270725229979789e+00, 6.1452738833821030e-01, 7.1999279107664492e+01, 3.4965254984584158e+02, 1.7356304176273381e+03, -3.0063395678020430e+02, 6.2079056750108883e+00, 6.1505333334154833e-01, 7.2000032172982642e+01, 3.4961232791696904e+02, 1.7365043785881401e+03, -3.0086002522634379e+02, 6.2270725230187063e+00, 6.1452738833751985e-01, 8.1999844359310714e+01, 4.5636323545227941e+02, 2.5918884526432239e+03, -4.5885344883307727e+02, 1.1616256691917803e+01, 7.8948404417119522e-01, 8.2000007751936337e+01, 4.5635184072744744e+02, 2.5922210189842476e+03, -4.5894061525528980e+02, 1.1623761628208563e+01, 7.8927378661620728e-01, 8.1999844359310714e+01, 4.5636323545227941e+02, 2.5918884526432239e+03, -4.5885344883307727e+02, 1.1616256691917803e+01, 7.8948404417119522e-01, 8.2000007751936337e+01, 4.5635184072744744e+02, 2.5922210189842476e+03, -4.5894061525528980e+02, 1.1623761628208563e+01, 7.8927378661620728e-01
-  };
-  std::vector<double > expected_xyz_scatter = {
-    1.4271973325754339e+00, 2.5214997685364109e+00, 3.1394341134078902e+00, 2.2727894815158436e+00, 1.9127738317829568e+00, 2.5288382955492263e+00, 3.1401587802428659e+00, 2.5252400661016079e+00, 9.4806287131835343e-01, 2.3778589851963829e+00, 2.8273548699126683e+00, 1.9358633427396228e+00, 2.1586806210305824e+00, 2.6256636737020518e+00, 3.3955783231847523e+00, 2.7091329174140033e+00, -1.9231004620365049e+00, -4.6499941633630704e-01, -1.1594526098009617e+00, -1.2686640472208488e+00, 2.0867847214069872e+00, 3.0003750888529219e+00, 3.6325449823191440e+00, 2.8788902557067368e+00, 1.2684738158575621e+00, 1.8537695728403008e+00, 2.1955525109720693e+00, 1.7836450721166277e+00, 1.8550735634159015e+00, 2.4434013845454778e+00, 3.0971074319021614e+00, 2.4481507963338514e+00, 1.6439641588553517e+00, 1.9173245315063490e+00, 2.4213050183154365e+00, 2.0154649449162125e+00, 8.6044027444396542e-01, 1.6761956340909820e+00, 1.9714372427825169e+00, 1.4694269993819085e+00, 1.1578881590922248e+00, 2.4304644465537262e+00, 2.8997419900334167e+00, 2.0775716876050363e+00, 2.3918652577373138e+00, 2.7767532459788180e+00, 3.5565699066582859e+00, 2.9253650111396308e+00, 1.6429790566102422e+00, 2.3353986933747315e+00, 2.8497701445565649e+00, 2.2665599345093730e+00, 6.2452940515269861e-01, 1.3483891434563131e+00, 1.5336055353368097e+00, 1.1476467351376733e+00, 2.3375903116778036e+00, 3.0294370345439616e+00, 3.8114115382246951e+00, 3.0486965696352639e+00, 6.1627879872497271e-01, 1.0222504107870520e+00, 1.1967221175625382e+00, 9.4398210879701261e-01, -1.7068032019607302e+00, 3.3613403560802918e-01, -1.0411939552994098e-01, -6.6667768860645871e-01, 1.8511171935709925e+00, 2.2616949107465572e+00, 2.8901786544735999e+00, 2.3344195582834213e+00, 1.8684390110773692e+00, 2.3089422940069237e+00, 2.9226805832398313e+00, 2.3717710946817374e+00, 1.9375672494736595e+00, 2.2557776291035463e+00, 2.8639227616937220e+00, 2.3752065452942275e+00, -3.2972712763415735e-02, 1.0799332278489837e+00, 1.0885237991034180e+00, 6.4736232064759269e-01, 2.8245006381754121e+00, 3.5259487523490192e+00, 4.6113702776738981e+00, 3.5998420702676723e+00, 2.5905343058532044e+00, 3.9030377854459730e+00, 4.9001863961421570e+00, 3.6937521865974929e+00, -2.0466716707172710e+00, -1.0685624603518851e-01, -7.3049231485296484e-01, -1.1126777403630335e+00, 4.5968126827466538e+00, 4.8629895439961155e+00, 6.7099151660558576e+00, 5.3272372780027073e+00, 1.3052826537711313e+00, 1.5171629367774915e+00, 1.9011692135645539e+00, 1.5983578000913097e+00, 2.7500355791930211e+00, 3.1012540902842334e+00, 4.0886454556166472e+00, 3.2988732261870899e+00, -3.7661410117701113e+00, -1.3602653035667422e+00, -2.5196607983439852e+00, -2.7610055328203522e+00, 2.4189623440903629e+00, 2.8963366391936933e+00, 3.8107924830413253e+00, 3.0083029750449866e+00, -3.4747440084737047e+00, -1.1388326294486402e+00, -2.4182412000178957e+00, -2.4782338467864626e+00, 1.8127024518519697e+00, 2.2230503769241436e+00, 2.7468475012971849e+00, 2.2887727041772736e+00, -1.2285895780562228e-01, 1.5434771625279660e+00, 1.6000755001429154e+00, 8.7985271502585627e-01, 1.9011366955569318e+00, 2.4167616547852120e+00, 3.1462829168951041e+00, 2.4517928223455625e+00, 3.4327869085046898e+00, 3.8030109751616310e+00, 4.8712356823465610e+00, 4.0764499721493568e+00, -1.2035506504910221e-01, 1.0522704557335492e+00, 9.2716931026249949e-01, 5.7901898977964616e-01, 2.3714074680568968e+00, 2.7069461333245264e+00, 3.4788009563530058e+00, 2.8668785353548181e+00, 1.3173599955901605e+00, 2.3373622162330081e+00, 2.7950932510153166e+00, 2.0958887568436859e+00, 8.4352826372327494e-01, 1.3787710702843035e+00, 1.6194525500748886e+00, 1.2753294206512922e+00, 2.1844818576218366e+00, 2.4600046540695972e+00, 3.2061976014984541e+00, 2.6275850202185489e+00, 7.7180988879817070e-01, 1.0272946579967681e+00, 1.2621894302014174e+00, 1.0265959882640883e+00, 1.6049231964243249e+00, 2.4546141304955089e+00, 3.1543902252316531e+00, 2.3261815654970941e+00, -1.5851508845166586e+00, 1.7616609630246921e-01, -4.6927862795076358e-01, -7.0847602690642730e-01, 9.7206101520523258e-01, 2.1984829496765985e+00, 2.5547429254737746e+00, 1.8332359752494667e+00, 1.7754746253185822e+00, 2.6825317821817345e+00, 3.3991991376107316e+00, 2.5509087537769037e+00, 1.0721995919270044e-01, 1.2979498217369176e+00, 1.3387413397315138e+00, 8.4672254891200061e-01, 2.9293634097685093e+00, 3.7870623674013393e+00, 4.9049858395715571e+00, 3.8089529879570825e+00, 8.4019940154161687e-01, 1.4394975389766356e+00, 1.7212747966327231e+00, 1.3114842544997232e+00, -4.4681831340750566e-01, 1.6558375814978303e+00, 1.5645433370489232e+00, 7.7463977013538887e-01, 5.8820535859917089e-01, 9.1338322532915583e-01, 1.1202010966286036e+00, 8.6364930010308649e-01, 7.0926056868283660e-01, 1.4218405965219119e+00, 1.6894042270047498e+00, 1.2378761768042328e+00, 2.3099219963216546e+00, 2.6932789796161916e+00, 3.5453161672341622e+00, 2.8363942767964803e+00, 1.8678281855424870e+00, 2.6920312666879047e+00, 3.2316323462531087e+00, 2.6004324324975743e+00, 1.4005208650900944e+00, 2.1811523726875692e+00, 2.7048686478151001e+00, 2.0527720081111767e+00, 2.3136081060507738e+00, 2.9431216274000898e+00, 3.7927332883433289e+00, 2.9867484155017419e+00, 2.4183082750679299e+00, 3.4684542472901878e+00, 4.3044551492126208e+00, 3.3503390337668466e+00, 1.3369220495937211e+00, 2.0576414218953958e+00, 2.5357049204003479e+00, 1.9409755276270539e+00, -2.1107398251243468e+00, -4.8988978509617087e-01, -1.0925226074379997e+00, -1.3770224787571617e+00, 1.6149648338580387e+00, 1.9730143918958940e+00, 2.5005508027915648e+00, 2.0369912328773259e+00, 1.9358903207989977e+00, 2.1474863817546317e+00, 2.7816238043705535e+00, 2.3089858393655152e+00, 5.9610253563576776e-01, 1.8174801336559421e+00, 2.1475049000471036e+00, 1.4151097394224248e+00, 5.1090833065932995e-01, 1.2160531841070317e+00, 1.3152029243838474e+00, 1.0054053301687891e+00, 2.1775101573737672e+00, 2.7592291083038578e+00, 3.5514577227476543e+00, 2.8051749838391071e+00, 1.4574449572780601e+00, 1.8257794999887023e+00, 2.3529474069241134e+00, 1.8687169225939499e+00, 3.9934672259732729e+00, 4.6069625003686925e+00, 6.0365399138833418e+00, 4.8498952298984239e+00, 5.0206849491088514e+02, 5.2990619575924950e+02, 5.0718360719485423e+02, 5.3078609113850609e+02, 4.2099706807708640e+02, 4.4995011865286330e+02, 4.2613481931665478e+02, 4.5054742567627943e+02, 6.4624126958401507e+02, 6.7232697462462943e+02, 6.5259938738906271e+02, 6.7148563064230416e+02, 4.2978137542372599e+02, 4.5001246708893814e+02, 4.3535643412910235e+02, 4.5267999547985386e+02, 3.3452377892226485e+02, 3.5665923744531250e+02, 3.3921143957791395e+02, 3.5904334578072132e+02, 4.2377024314145552e+02, 4.4269249140996482e+02, 4.3013979474766063e+02, 4.4602164788241845e+02, 4.0106095277790195e+02, 4.2025993436574691e+02, 4.0597499630524908e+02, 4.2366611812473144e+02, 3.8696471878412717e+02, 4.0913035507396773e+02, 3.8997037364405418e+02, 4.1288273173906788e+02, 3.6331657327614209e+02, 3.8874925104444759e+02, 3.6568536230961831e+02, 3.8997929930096944e+02, 3.0766058208873443e+02, 3.3007628916707438e+02, 3.1095679032603221e+02, 3.3214170847015549e+02, 3.7417389932974652e+02, 3.9610585314922997e+02, 3.8035396619631911e+02, 4.0089442860025434e+02, 3.8077716282948728e+02, 4.0439557659241609e+02, 3.8495076836644944e+02, 4.0710965835899611e+02, 5.0832346005741056e+02, 5.2918139375030751e+02, 5.0999148566291376e+02, 5.3238824760640534e+02, 3.5895613327835940e+02, 3.7388263235784137e+02, 3.6145315529806305e+02, 3.7979834444073651e+02, 3.7956459278145832e+02, 4.0172961800115581e+02, 3.8172408748658489e+02, 4.0500568949748191e+02, 3.9684913832843944e+02, 4.1936779798804349e+02, 3.9896001344387633e+02, 4.2078747802693186e+02, 3.3493113256513072e+02, 3.5265874034487007e+02, 3.3927703565539474e+02, 3.6107266137720109e+02, 3.6604604197249961e+02, 3.8893228913059670e+02, 3.6730129201251361e+02, 3.9043728041862283e+02, 3.8812581530185560e+02, 4.0924615019224609e+02, 3.9322998085289782e+02, 4.1390337702757438e+02, 4.8856029801345204e+02, 5.0271192916654570e+02, 4.9091594198952845e+02, 5.0809377638444926e+02, 5.2321208035994221e+02, 5.4770952441490192e+02, 5.2817006845838080e+02, 5.4714484189009147e+02, 4.9081225091120268e+02, 5.1240397279603928e+02, 4.9158073027031935e+02, 5.1502480371472871e+02, 5.7223828602721358e+02, 5.9667394727556575e+02, 5.7537894734461975e+02, 5.9925470530439986e+02, 4.9646123158168882e+02, 5.1903534403318656e+02, 5.0181634086118572e+02, 5.2172511442944483e+02, 5.2233159969818155e+02, 5.4780412588510796e+02, 5.2846475284626229e+02, 5.4849754426441416e+02, 4.5617530091144931e+02, 4.7396892884953650e+02, 4.5877481670469268e+02, 4.7577802295466512e+02, 4.4587673834159580e+02, 4.6732395897795834e+02, 4.5162986675993960e+02, 4.6775280428052747e+02, 4.0283354373844770e+02, 4.1961075089899697e+02, 4.0745259059538853e+02, 4.2303359882010614e+02, 5.3834353723774768e+02, 5.6525863139407920e+02, 5.4218387057666916e+02, 5.6547607603652864e+02, 4.4261815891116561e+02, 4.6322139706306598e+02, 4.4557510448028398e+02, 4.6409152553377004e+02, 4.4247192104148161e+02, 4.6619614492253584e+02, 4.4614612324987121e+02, 4.6625206457720230e+02, 3.6128738532891242e+02, 3.8988552843469040e+02, 3.6940156471526564e+02, 3.8850389331677923e+02, 4.7389962473318047e+02, 5.0013101019299427e+02, 4.7841978642421418e+02, 5.0240916591976708e+02, 4.4507921732155654e+02, 4.7390246086556681e+02, 4.5040565227666025e+02, 4.7420235641495236e+02, 4.8993159334334075e+02, 5.1119545519430841e+02, 4.9450301753071705e+02, 5.1079476503475638e+02, 3.9149835455877076e+02, 4.1695134005502371e+02, 3.9850449284689967e+02, 4.1768853701434006e+02, 5.5536615451459932e+02, 5.8218713084686863e+02, 5.6219650600540592e+02, 5.8110371415189206e+02, 3.5857465223194458e+02, 3.7805893452052851e+02, 3.6313044059114281e+02, 3.8278842764472688e+02, 3.9613973998079075e+02, 4.2121250028684204e+02, 4.0322872883880973e+02, 4.2129275763397044e+02, 4.2936205593131211e+02, 4.5353270360878503e+02, 4.3287345252040535e+02, 4.5448469042238418e+02, 4.4950161157908241e+02, 4.7532105082965637e+02, 4.5647550173342893e+02, 4.7547228050864646e+02, 3.2679348365595416e+02, 3.4926301408293159e+02, 3.3184472007084833e+02, 3.4766058741420920e+02, 3.7386411648728529e+02, 3.9861550748020761e+02, 3.7822194089162900e+02, 4.0013870143390176e+02, 4.4829502159394286e+02, 4.7694032786407286e+02, 4.5274923302353994e+02, 4.7569909856079317e+02, 5.0605732715124822e+02, 5.2982960997204441e+02, 5.1018585735851212e+02, 5.3097883224725592e+02, 3.5863665220726045e+02, 3.8682448095468220e+02, 3.6639443664339842e+02, 3.8645991462669110e+02, 4.2980724222044194e+02, 4.4200509624071060e+02, 4.3377896483976394e+02, 4.5049650817962009e+02, 4.7353077646683136e+02, 5.0370872675117209e+02, 4.7734477563720822e+02, 5.0383525959610876e+02, 3.3018963371387275e+02, 3.5335741347599691e+02, 3.3490687757435569e+02, 3.5549069288195227e+02, 5.4332101159480499e+02, 5.5773440615406525e+02, 5.4693692877181081e+02, 5.6229241672395904e+02, 4.3911838198513118e+02, 4.6389559606570378e+02, 4.4555972724814870e+02, 4.6330360136131640e+02, 5.9012746767991473e+02, 6.0881103345863914e+02, 5.9238046236563105e+02, 6.1153339968383796e+02, 3.3567654669942158e+02, 3.5471649519755044e+02, 3.3738613420476901e+02, 3.6005001180565057e+02, 4.5183734883466576e+02, 4.7792551648388240e+02, 4.5576605194928987e+02, 4.7882989629493864e+02, 4.9963595110319130e+02, 5.2447596823087997e+02, 5.0502830948039860e+02, 5.2575477228206853e+02, 3.0830398627916276e+02, 3.3105832470397951e+02, 3.1259710901928895e+02, 3.3410602914065612e+02, 3.2743062231073293e+02, 3.4891769728491801e+02, 3.3177463909525744e+02, 3.5362578896828563e+02, 4.7519868494408144e+02, 4.9515900441603736e+02, 4.7779717459656456e+02, 4.9826625741531501e+02, 4.9284871034497542e+02, 5.1106984057352037e+02, 4.9641742709973187e+02, 5.1336922593850761e+02, 3.5619924997896692e+02, 3.7973909500494273e+02, 3.6036976487086139e+02, 3.8244786836827933e+02, 5.1023777988632258e+02, 5.2792203133332578e+02, 5.1181598214733094e+02, 5.3093252840119374e+02, 5.8515851970733684e+02, 6.0662095917829583e+02, 5.8836080558705828e+02, 6.0856740950778476e+02, 4.2320528132965376e+02, 4.4433409233260238e+02, 4.2774087770870437e+02, 4.4493227381084881e+02, 5.3649403282476419e+02, 5.6002649142365817e+02, 5.4100544048421602e+02, 5.6344537433495304e+02, 3.2108481178606974e+02, 3.3751402850679085e+02, 3.2639602686352305e+02, 3.4280497127600313e+02, 3.6088723193495770e+02, 3.8330727327246012e+02, 3.6766216892694246e+02, 3.8408740198273415e+02, 5.3789217565204069e+02, 5.6077082817082771e+02, 5.4561433429711803e+02, 5.6002190651264800e+02, 3.8542525382667549e+02, 4.0818706679585972e+02, 3.8989237977139476e+02, 4.1071672112601937e+02, 4.4017080999360439e+02, 4.6096607829162730e+02, 4.4148597908498385e+02, 4.6196829584408204e+02, 4.4910149091628733e+02, 4.7307394363534456e+02, 4.5076663700944351e+02, 4.7551430237289520e+02, 3.9227814682587132e+02, 4.1257798087214985e+02, 3.9521371950921736e+02, 4.1711897460767489e+02, 5.1452231508718626e+02, 5.3859526400720893e+02, 5.1952695545599056e+02, 5.3961923578685878e+02, 3.8109275356108719e+02, 4.0346504014461300e+02, 3.8610825630499738e+02, 4.0617030886712132e+02, 4.8452457505595487e+02, 5.0041974589465406e+02, 4.8964722068875346e+02, 5.0550298995865631e+02, 3.9804275290318742e+02, 4.2264782863487699e+02, 4.0321007872679951e+02, 4.2492264399123434e+02, 5.0509147743746217e+02, 5.2641842260951853e+02, 5.0615604907658002e+02, 5.2851782681184159e+02, 3.3459128881879548e+02, 3.5785989581765483e+02, 3.4014053555863410e+02, 3.6113224451566020e+02, 3.7243330242446217e+02, 3.9454364197091792e+02, 3.7709573464905105e+02, 3.9881470989101706e+02, 4.6373112347911342e+02, 4.9281947700430516e+02, 4.6700883320630629e+02, 4.9432223003118827e+02, 4.8356479533205874e+02, 4.9924019843893802e+02, 4.8602244108327392e+02, 5.0403944602754200e+02, 4.9405865887431497e+02, 5.1764099704293153e+02, 4.9706952654679344e+02, 5.1857657553800686e+02, 3.2353192335630808e+02, 3.4294458004548534e+02, 3.3005085921688982e+02, 3.4832237869662674e+02, 3.7709990325592833e+02, 4.0170421910177203e+02, 3.8082050238146007e+02, 4.0277160778705706e+02, 5.3575655043259815e+02, 5.5597457504626493e+02, 5.4128699868812475e+02, 5.5631870299488321e+02, 4.0289822061838851e+02, 4.2011136295440394e+02, 4.0596438967835684e+02, 4.2376691548223067e+02, 4.9819597054194503e+02, 5.2512706626398779e+02, 5.0197315929161960e+02, 5.2498152747596998e+02, 4.6817667276634273e+02, 5.0014722377303497e+02, 4.7458649647723576e+02, 5.0150495171951650e+02, 4.1941666791857625e+02, 4.3883151509608655e+02, 4.2401675298355462e+02, 4.4187117909268017e+02, 4.4249214476881394e+02, 4.6979092814746605e+02, 4.5029339653448335e+02, 4.7008378840721508e+02, 3.5083981057495157e+02, 3.7313087632713695e+02, 3.5453905916645419e+02, 3.7328522070000724e+02, 5.9181740721854158e+02, 6.1605214530968453e+02, 5.9371833379543057e+02, 6.1944402449941288e+02, 4.5118997327463666e+02, 4.7157660592958536e+02, 4.5537464602637550e+02, 4.7369769489472503e+02, 4.0138177193179956e+02, 4.1688091486099427e+02, 4.0497942623326634e+02, 4.2129355581523492e+02, 3.2454476068904751e+02, 3.4375410350243283e+02, 3.2903937444908257e+02, 3.4952628701737717e+02, 3.2747645819233173e+02, 3.5223863743599901e+02, 3.3462584368670485e+02, 3.5238457621937755e+02, 5.1406293910821455e+02, 5.4054861864779025e+02, 5.1862961318339524e+02, 5.3983332800144319e+02, 3.2626786506568948e+02, 3.4484875304946956e+02, 3.2968597510617190e+02, 3.5115983614977830e+02, 4.3161832975765424e+02, 4.5816388988101232e+02, 4.3575161757387821e+02, 4.5976959274894392e+02, 3.5654924037338890e+02, 3.8348265216764844e+02, 3.6337728104134629e+02, 3.8304023786626510e+02, 4.8355605334720593e+02, 5.0545440221729081e+02, 4.8988058276802548e+02, 5.0618475071249503e+02, 4.0865412723946616e+02, 4.2800407219894146e+02, 4.1095472883899140e+02, 4.3167357399059108e+02, 2.9708316761901597e+02, 3.1383146748172811e+02, 2.9852303627317781e+02, 3.1710940115047504e+02, 4.7142270881905046e+02, 4.8987477487142007e+02, 4.7426138183490190e+02, 4.9403105234490903e+02, 3.6559540566801468e+02, 3.8870805730365379e+02, 3.6991533799271076e+02, 3.9018512214896242e+02, 5.3051276561692873e+02, 5.5329760522784727e+02, 5.3526924112310326e+02, 5.5512839001953125e+02, 4.9480860427837490e+02, 5.1417711766271100e+02, 4.9754426266298464e+02, 5.1841294991322388e+02, 3.7755705785212410e+02, 3.9871403752122859e+02, 3.8399149816388353e+02, 3.9960423516196516e+02, 5.4606883830754020e+02, 5.7269177786419971e+02, 5.5192452991915388e+02, 5.7279122308323679e+02, 3.7257595764014530e+02, 3.9118140540161784e+02, 3.7687331126515454e+02, 3.9896571592453080e+02, 5.2752105969967772e+02, 5.5611293296971098e+02, 5.3281649703346557e+02, 5.5594920150426572e+02, 4.3231846898537026e+02, 4.5288085667282348e+02, 4.3724671179152733e+02, 4.5453580950036348e+02, 4.7848078271205947e+02, 4.9368761153563361e+02, 4.8122739366968841e+02, 4.9813960591790141e+02, 4.1534041583572440e+02, 4.3799832356033329e+02, 4.1706050141647705e+02, 4.4120345083811236e+02, 4.2450222526363888e+02, 4.4987978590265880e+02, 4.2579221415895904e+02, 4.5062389839125194e+02, 5.3367829890285134e+02, 5.5925299920437146e+02, 5.3827600387794030e+02, 5.5875107897809198e+02, 3.1886322192082901e+02, 3.4604588931737567e+02, 3.2494503946587514e+02, 3.4498806567101104e+02, 4.5828995348069998e+02, 4.8134664875820539e+02, 4.6362842114441025e+02, 4.8218008418611402e+02, 3.2815013315367082e+02, 3.5330311766072475e+02, 3.3495746008397487e+02, 3.5350425839038832e+02, 4.5574559510937661e+02, 4.8461323076426964e+02, 4.6148981459068813e+02, 4.8263615785873958e+02, 4.2501531271042046e+02, 4.5132404339430190e+02, 4.2823468757230540e+02, 4.5342670023963024e+02, 3.7302979260694212e+02, 3.9811608227111651e+02, 3.7780483413354148e+02, 3.9775933708215859e+02, 5.3852258883589616e+02, 5.5889183953662018e+02, 5.4315382086627653e+02, 5.5817153685794256e+02, 3.5722233146453829e+02, 3.8308198559090272e+02, 3.6280918007156259e+02, 3.8401915651539724e+02, 4.8783906491191323e+02, 5.0943975381762459e+02, 4.9217676882174038e+02, 5.1161007146016738e+02, 4.6331654743950884e+02, 4.7499075201329407e+02, 4.6705071715769213e+02, 4.8374951558367218e+02, 4.2992453268172954e+02, 4.4705802004563787e+02, 4.3466409215168113e+02, 4.5143517678668394e+02, 5.8618728290886418e+02, 6.0641021558623345e+02, 5.8936993438739410e+02, 6.0675386578249834e+02, 4.9964299424902913e+02, 5.2969306085468259e+02, 5.0356864383992928e+02, 5.2869887670804815e+02
-  }; 
-  std::vector<double > expected_dy_dem_x = {
-    -8.5282566239703315e-04, 9.7434896238470878e-05, 2.8198670103749745e-05, 1.4683718725367045e-04, 9.7434896238470878e-05, -8.2177981795691663e-04, 1.4730179246350320e-04, 2.2697700888374512e-04, 2.8198670103749745e-05, 1.4730179246350320e-04, -3.0546099757163860e-04, -1.0691955275624962e-04, 1.4683718725367045e-04, 2.2697700888374512e-04, -1.0691955275624962e-04, -2.0472393176281583e-04, -5.8986194101090419e-04, 2.0301485015849167e-05, 1.4477690521761920e-04, 1.5589944596906697e-04, 2.0301485015849167e-05, -3.4790826051777767e-04, 1.6188954118095066e-04, -6.4754798873391454e-05, 1.4477690521761920e-04, 1.6188954118095066e-04, -2.9910593935885945e-04, -5.0936876433791144e-05, 1.5589944596906697e-04, -6.4754798873391454e-05, -5.0936876433791144e-05, -1.0355517826698125e-04, -6.9459162938867685e-04, 2.8327565248668782e-05, 2.8430463755414195e-05, 4.6197796678653893e-05, 2.8327565248668782e-05, -4.3779001120184660e-04, 3.9125697200477542e-05, -3.1649070137933027e-05, 2.8430463755414195e-05, 3.9125697200477542e-05, -4.0417658999342403e-04, 7.6519185026183547e-05, 4.6197796678653893e-05, -3.1649070137933027e-05, 7.6519185026183547e-05, -2.8017988936616456e-04, -8.6012936276845748e-04, 8.9176446815027605e-05, 1.6318850169617433e-05, 1.4131109278314074e-04, 8.9176446815027605e-05, -2.3539344428552647e-04, 1.0554839943982053e-04, 6.1754240894486574e-05, 1.6318850169617433e-05, 1.0554839943982053e-04, -2.2356938850427748e-04, -5.5818570386833549e-05, 1.4131109278314074e-04, 6.1754240894486574e-05, -5.5818570386833549e-05, -1.8579307724742348e-04, -1.4579471606530545e-05, 1.8373628547591411e-06, 6.3404455687761808e-09, 1.6520017494432617e-05, 1.8373628547591411e-06, 4.0055076619841326e-05, 5.1870915506862245e-07, 6.5455154995692458e-06, 6.3404455687761808e-09, 5.1870915506862245e-07, 3.5777013395772088e-05, 1.2331072770409084e-06, 1.6520017494432617e-05, 6.5455154995692458e-06, 1.2331072770409084e-06, 2.9330374271367200e-05, -1.0591838639597137e-03, 3.7324386487840001e-04, 1.1201603730535799e-04, -1.3094998715550417e-04, 3.7324386487840001e-04, -5.3825225173688483e-04, 1.5433463803274103e-05, 6.7929980996522018e-06, 1.1201603730535799e-04, 1.5433463803274103e-05, -1.5347431592130682e-04, 7.3816404184980853e-05, -1.3094998715550417e-04, 6.7929980996522018e-06, 7.3816404184980853e-05, -1.4291206449640258e-04, -3.3653765271671784e-04, 9.6409327441248667e-05, 6.4078783813068993e-05, -5.7869265471838981e-05, 9.6409327441248667e-05, -2.2563161687222687e-04, 7.2686803292055054e-05, -6.2940087651933607e-05, 6.4078783813068993e-05, 7.2686803292055054e-05, -1.6173581616879356e-04, 8.8234028851700132e-05, -5.7869265471838981e-05, -6.2940087651933607e-05, 8.8234028851700132e-05, -9.8164250335740287e-05, -4.8591645753528860e-04, 9.7740935502860828e-05, 4.4478158377719349e-05, 1.0419035578929442e-04, 9.7740935502860828e-05, -2.8008833575539116e-04, 1.0234342742699657e-05, -1.1066375197239399e-05, 4.4478158377719349e-05, 1.0234342742699657e-05, -2.7922668025205693e-04, 1.1806531746953321e-04, 1.0419035578929442e-04, -1.1066375197239399e-05, 1.1806531746953321e-04, -1.9974381044785472e-04, -4.5059018192044660e-04, 1.5855953261830535e-04, 1.0703891128048314e-04, -2.9320496830173838e-05, 1.5855953261830535e-04, -2.0101672795536590e-04, 5.3750835060043594e-06, 6.3549045572453374e-05, 1.0703891128048314e-04, 5.3750835060043594e-06, -2.0012584084945853e-04, 8.1458143700506442e-05, -2.9320496830173838e-05, 6.3549045572453374e-05, 8.1458143700506442e-05, -1.7784532544621600e-04, -4.7553664863859020e-04, 9.0606794366112880e-05, 1.1547757782783547e-04, 1.7942202135921617e-04, 9.0606794366112880e-05, -3.7929902055660708e-04, 1.1141005377050407e-04, -1.0369999356845449e-04, 1.1547757782783547e-04, 1.1141005377050407e-04, -3.0010715447777566e-04, 5.9933061644597410e-05, 1.7942202135921617e-04, -1.0369999356845449e-04, 5.9933061644597410e-05, -1.5711713725349039e-04, -8.4512694843494427e-04, -1.5998766504508188e-04, 1.3459366005811189e-04, 8.9050023157920225e-05, -1.5998766504508188e-04, -7.4497721059562944e-04, 2.1490465386259777e-04, 8.6993211035125107e-06, 1.3459366005811189e-04, 2.1490465386259777e-04, -3.5600902729500730e-04, 1.2490893288676265e-04, 8.9050023157920225e-05, 8.6993211035125107e-06, 1.2490893288676265e-04, -2.4912698171829185e-04, -5.4677729864457437e-04, 1.4544760640130386e-04, 2.1553577462634675e-04, -5.7697338533785043e-05, 1.4544760640130386e-04, -3.9389932680879196e-04, 9.3103780652668911e-05, 1.5781465334615180e-04, 2.1553577462634675e-04, 9.3103780652668911e-05, -3.5209769647293266e-04, 3.0908080405118161e-05, -5.7697338533785043e-05, 1.5781465334615180e-04, 3.0908080405118161e-05, -2.9381991178282840e-04, -8.4897434901643008e-04, 2.4958296800820011e-04, 1.6042793880630297e-04, 2.0191539006263895e-04, 2.4958296800820011e-04, -4.9074044762242083e-04, -3.8390868153238754e-05, 6.9794232794373798e-06, 1.6042793880630297e-04, -3.8390868153238754e-05, -2.3180481912080813e-04, -1.2344065882196416e-04, 2.0191539006263895e-04, 6.9794232794373798e-06, -1.2344065882196416e-04, -1.9034910644016834e-04, -3.9234583079674319e-04, 1.0779962022557863e-04, 7.7172205961845708e-05, -1.0845300489032052e-04, 1.0779962022557863e-04, -3.6501653375163899e-04, 1.6932449587587277e-04, 1.1497268397185604e-04, 7.7172205961845708e-05, 1.6932449587587277e-04, -2.4040175903203856e-04, 6.3315499528010007e-05, -1.0845300489032052e-04, 1.1497268397185604e-04, 6.3315499528010007e-05, -1.4974518011387353e-04, -7.5415253127048964e-04, 1.4628668950921841e-04, 1.6870584651280783e-04, 1.4071636815485435e-04, 1.4628668950921841e-04, -3.5212613519392216e-04, 8.9576029143352727e-05, -4.6556408920235956e-05, 1.6870584651280783e-04, 8.9576029143352727e-05, -2.7822614419127146e-04, -7.8397846864094857e-05, 1.4071636815485435e-04, -4.6556408920235956e-05, -7.8397846864094857e-05, -2.7346041679651505e-04, -2.6033241393473680e-04, 6.1297134521054865e-05, 1.1370943254758424e-05, 4.1345709213983918e-05, 6.1297134521054865e-05, -2.0636544320363793e-04, 3.6633072227909512e-05, 9.6487308065657876e-05, 1.1370943254758424e-05, 3.6633072227909512e-05, -1.7929253284262379e-04, 1.0075401050272592e-04, 4.1345709213983918e-05, 9.6487308065657876e-05, 1.0075401050272592e-04, -1.7514728993513846e-04, -9.1976596775803973e-04, -1.1250093174975719e-05, 1.0574848982308227e-04, 5.6892733088968351e-05, -1.1250093174975719e-05, -5.3677767474141075e-04, 7.5975050471457976e-05, 2.1341143425148652e-04, 1.0574848982308227e-04, 7.5975050471457976e-05, -4.6910404481979227e-04, -1.1961907952442812e-04, 5.6892733088968351e-05, 2.1341143425148652e-04, -1.1961907952442812e-04, -1.6713401053931241e-04, -5.0318411138792780e-04, -7.8642752393614670e-06, 1.4734980833474556e-04, 1.5694775605953879e-04, -7.8642752393614670e-06, -4.3428679183052686e-04, 1.3240571026484175e-04, 9.4284982663807307e-05, 1.4734980833474556e-04, 1.3240571026484175e-04, -2.6476905195442599e-04, -4.4663985184858534e-05, 1.5694775605953879e-04, 9.4284982663807307e-05, -4.4663985184858534e-05, -1.4901238055304647e-04, -5.2022971006193295e-04, 1.8673817993987294e-04, 3.4824093151832877e-06, 1.9316715603711834e-04, 1.8673817993987294e-04, -3.5661136332554733e-04, 9.2514978038235173e-05, -8.6662703285468833e-07, 3.4824093151832877e-06, 9.2514978038235173e-05, -3.4175347533838051e-04, 8.7641158627568827e-05, 1.9316715603711834e-04, -8.6662703285468833e-07, 8.7641158627568827e-05, -2.3228701071836513e-04, -2.3172744537639032e-04, -2.0145061787998218e-05, 8.0361470649018278e-05, 7.8686158095771946e-05, -2.0145061787998218e-05, -2.1788437548850563e-04, 6.8244915676574325e-05, 4.7753842109443553e-05, 8.0361470649018278e-05, 6.8244915676574325e-05, -1.8538465680992033e-04, 6.3885883987663494e-06, 7.8686158095771946e-05, 4.7753842109443553e-05, 6.3885883987663494e-06, -1.5576199151219941e-04, -2.6053492859149329e-04, 3.7985008257765683e-05, -4.1618594585238569e-05, 1.2029024299682188e-04, 3.7985008257765683e-05, -2.5883101840537494e-04, 2.5194049224745282e-05, 3.9122106536011156e-05, -4.1618594585238569e-05, 2.5194049224745282e-05, -1.8215627475922963e-04, 9.5955293419038848e-05, 1.2029024299682188e-04, 3.9122106536011156e-05, 9.5955293419038848e-05, -8.8495058887861342e-05, -8.5077111031875798e-04, 2.7142179798126236e-05, 2.3506811184181804e-04, -4.0221741191713819e-05, 2.7142179798126236e-05, -5.8825160593922092e-04, 1.2928972567669391e-04, 1.8492434110243213e-04, 2.3506811184181804e-04, 1.2928972567669391e-04, -4.8258381050380911e-04, 3.8157653583435203e-05, -4.0221741191713819e-05, 1.8492434110243213e-04, 3.8157653583435203e-05, -1.3868330525066390e-04, -1.0046100956135153e-03, 1.0071981850677016e-04, 2.8872287092350934e-05, 2.7319207214565589e-05, 1.0071981850677016e-04, -5.8995054305638166e-04, 1.9420289297011076e-04, -6.4394518219290390e-05, 2.8872287092350934e-05, 1.9420289297011076e-04, -4.6301081539337568e-04, 6.2018876700662711e-05, 2.7319207214565589e-05, -6.4394518219290390e-05, 6.2018876700662711e-05, -3.1496980814001716e-04, -3.5505598207892898e-04, 4.2612480787834219e-05, 4.6241360306879771e-06, 2.1108926449578879e-05, 4.2612480787834219e-05, -7.2955350707598794e-06, 1.8755820202733821e-05, -2.6907330166430605e-06, 4.6241360306879771e-06, 1.8755820202733821e-05, -1.6364300965012292e-06, -2.4890773965917357e-06, 2.1108926449578879e-05, -2.6907330166430605e-06, -2.4890773965917357e-06, -3.1119034082904818e-06, -1.3157832950043162e-03, 1.5109719057770362e-04, 2.2136411827069661e-04, 1.5620989616059463e-04, 1.5109719057770362e-04, -6.9909797727330165e-04, 1.3446109058413912e-04, 1.1910768000198964e-04, 2.2136411827069661e-04, 1.3446109058413912e-04, -5.1538523518062426e-04, 7.5113122085345235e-05, 1.5620989616059463e-04, 1.1910768000198964e-04, 7.5113122085345235e-05, -3.0505421799441914e-04, -3.0462471772100332e-04, 3.9065280474790656e-05, 1.4614293236463156e-04, 9.5882906751313209e-05, 3.9065280474790656e-05, -1.9543435847266125e-04, -1.1587160757702439e-05, 1.1931573498340947e-04, 1.4614293236463156e-04, -1.1587160757702439e-05, -1.8470088623319614e-04, 2.0760996552120055e-05, 9.5882906751313209e-05, 1.1931573498340947e-04, 2.0760996552120055e-05, -1.6350571548014739e-04, -1.0266618133570869e-03, 2.2637567583948763e-04, 4.5648808840412684e-05, 1.0877858120403682e-04, 2.2637567583948763e-04, -3.5855168729171305e-04, 1.0575495913196480e-04, 4.7811314368940493e-05, 4.5648808840412684e-05, 1.0575495913196480e-04, -2.0795540905090660e-04, -2.8126314053445194e-05, 1.0877858120403682e-04, 4.7811314368940493e-05, -2.8126314053445194e-05, -1.7015380275751095e-04, -1.0008635815443789e-03, 1.0735782597685222e-04, -8.2342347671546411e-05, -1.0621035075752513e-04, 1.0735782597685222e-04, -3.9974471825416052e-04, -4.2847890972143996e-05, 1.1148431692879854e-04, -8.2342347671546411e-05, -4.2847890972143996e-05, -1.0364328030944446e-04, 1.8027890592285884e-05, -1.0621035075752513e-04, 1.1148431692879854e-04, 1.8027890592285884e-05, -5.3335283910055137e-05, -1.1022946107544056e-03, 1.2081709463727365e-04, -8.2220753605772352e-06, 1.9181878385071303e-04, 1.2081709463727365e-04, -6.5229668590924652e-04, 2.4088842641070782e-04, 3.0891558204400955e-05, -8.2220753605772352e-06, 2.4088842641070782e-04, -2.8026542830655177e-04, 4.0661846631706496e-05, 1.9181878385071303e-04, 3.0891558204400955e-05, 4.0661846631706496e-05, -1.8847765937598992e-04, -2.4152123416571131e-04, -8.6922232482105472e-05, 1.6955733405143119e-04, -6.4111931600053363e-05, -8.6922232482105472e-05, -2.3487488104965331e-04, 5.1809081045441156e-05, 2.1850790757452379e-05, 1.6955733405143119e-04, 5.1809081045441156e-05, -2.0805036913184400e-04, -3.4798889866819799e-05, -6.4111931600053363e-05, 2.1850790757452379e-05, -3.4798889866819799e-05, -1.2312192870086894e-04, -8.8445499497716014e-05, 2.4421506868587198e-05, 2.2044662993515158e-06, 6.5601836003054636e-05, 2.4421506868587198e-05, -5.6415856340379447e-05, 9.2118704714902022e-05, -1.9706075570445630e-05, 2.2044662993515158e-06, 9.2118704714902022e-05, -4.0161585013125762e-05, 5.5260071625401637e-05, 6.5601836003054636e-05, -1.9706075570445630e-05, 5.5260071625401637e-05, -3.3100174236203148e-05, -8.2209958500230578e-04, -1.0375911757141743e-04, 2.0710347102188484e-04, -2.4978971692820028e-05, -1.0375911757141743e-04, -7.3910742021759002e-04, 5.5770606372779485e-06, 1.0998087187508114e-04, 2.0710347102188484e-04, 5.5770606372779485e-06, -3.0781601772972918e-04, 1.7042830142744166e-04, -2.4978971692820028e-05, 1.0998087187508114e-04, 1.7042830142744166e-04, -1.9931789083998724e-04, -8.2864124175878646e-04, 2.1156993777400108e-04, 1.3222387946538444e-04, 7.3125614419211014e-05, 2.1156993777400108e-04, -6.7998120575166903e-04, -4.5284032495574803e-05, 1.5448833703626970e-04, 1.3222387946538444e-04, -4.5284032495574803e-05, -1.6486495415349870e-04, -2.7910903682007980e-05, 7.3125614419211014e-05, 1.5448833703626970e-04, -2.7910903682007980e-05, -1.3616141316573496e-04, -6.9168427037508595e-04, 1.6848337105401589e-04, 2.0347536376145238e-05, 2.1730204740541038e-04, 1.6848337105401589e-04, -5.1567181939462376e-04, 2.2442346917809767e-04, -4.1679702475652895e-05, 2.0347536376145238e-05, 2.2442346917809767e-04, -2.7886738080818829e-04, -7.2100020041017474e-05, 2.1730204740541038e-04, -4.1679702475652895e-05, -7.2100020041017474e-05, -2.1591694500250683e-04, -6.3118866694050128e-04, 1.7407698534034082e-04, 2.2506708310326124e-04, -1.1855521616198067e-04, 1.7407698534034082e-04, -3.4276284162076975e-04, -2.4678327316965964e-05, -7.0132865868384085e-05, 2.2506708310326124e-04, -2.4678327316965964e-05, -2.4829751991729555e-04, 8.0665853806108728e-05, -1.1855521616198067e-04, -7.0132865868384085e-05, 8.0665853806108728e-05, -1.4560588738992748e-04, -5.4516146248809595e-04, 1.5429566078092839e-04, 7.3002508020663260e-05, -5.4127939476372286e-05, 1.5429566078092839e-04, -2.7006749001243577e-04, 3.8245018464081647e-05, 6.4001466507229872e-05, 7.3002508020663260e-05, 3.8245018464081647e-05, -1.9064634129416447e-04, 4.8315978083461554e-05, -5.4127939476372286e-05, 6.4001466507229872e-05, 4.8315978083461554e-05, -9.6287270269265838e-05, -1.0867871706203747e-03, 2.2665889399974761e-05, 3.8527790611792381e-04, -5.7606042046986583e-05, 2.2665889399974761e-05, -5.9377950867244152e-04, 1.6458617522517848e-04, 9.8647825618332862e-05, 3.8527790611792381e-04, 1.6458617522517848e-04, -3.5517610380986437e-04, 1.5047714871992010e-06, -5.7606042046986583e-05, 9.8647825618332862e-05, 1.5047714871992010e-06, -2.4847179034300285e-04, 1.7287370288645611e-05, 2.4751905195362399e-05, 5.7409278650844171e-06, 1.5778384555210318e-06, 2.4751905195362399e-05, 6.9233671176431198e-05, 1.2056763828265809e-05, 5.8193528185755228e-06, 5.7409278650844171e-06, 1.2056763828265809e-05, 2.5803013934721890e-05, 9.7800078032772889e-06, 1.5778384555210318e-06, 5.8193528185755228e-06, 9.7800078032772889e-06, 1.2290894822139189e-05, -4.7276129504627868e-04, 1.7996071145545987e-04, -2.3559613078385275e-05, 9.1817364759875372e-05, 1.7996071145545987e-04, -4.4031640931680089e-04, 1.3129665680176059e-04, 8.5753307504504326e-05, -2.3559613078385275e-05, 1.3129665680176059e-04, -2.6082369278761529e-04, 1.1008038444960020e-04, 9.1817364759875372e-05, 8.5753307504504326e-05, 1.1008038444960020e-04, -2.2448972892423485e-04, 7.3552911328923082e-05, -4.6386393976681180e-07, -3.6302661188033895e-07, 8.1292949427203692e-06, -4.6386393976681180e-07, 7.1855824442516002e-05, 2.0977058635296311e-06, -8.1660148046199487e-07, -3.6302661188033895e-07, 2.0977058635296311e-06, 4.9178179355036989e-05, 5.5111033912396161e-07, 8.1292949427203692e-06, -8.1660148046199487e-07, 5.5111033912396161e-07, 4.3126931159686622e-05, -1.0356645478699321e-03, 3.5655021996677344e-05, 1.7863455145561712e-04, 9.9345386863453332e-05, 3.5655021996677344e-05, -4.6868369051367967e-04, 8.4528986648654719e-05, 2.6178454134616406e-05, 1.7863455145561712e-04, 8.4528986648654719e-05, -4.0189105052683167e-04, 1.7827808102152204e-04, 9.9345386863453332e-05, 2.6178454134616406e-05, 1.7827808102152204e-04, -2.4096643692971789e-04, -5.2947177689736285e-04, 2.9155995696660334e-04, 1.0881309013420403e-04, 6.5216605248019719e-05, 2.9155995696660334e-04, -3.7174127902678917e-04, -1.0713195660439511e-05, -1.4652487345689542e-04, 1.0881309013420403e-04, -1.0713195660439511e-05, -3.0848787251277967e-04, -8.1788880974243585e-05, 6.5216605248019719e-05, -1.4652487345689542e-04, -8.1788880974243585e-05, -1.8187514475847530e-04, -8.1675147016875278e-04, -9.1132812142916361e-05, 3.7581090557227133e-04, -1.6391843424681004e-05, -9.1132812142916361e-05, -8.0928426677666680e-04, 1.0186682914852628e-04, 6.6974048500429245e-05, 3.7581090557227133e-04, 1.0186682914852628e-04, -3.9501800512992450e-04, 1.1097843258641806e-04, -1.6391843424681004e-05, 6.6974048500429245e-05, 1.1097843258641806e-04, -1.6922741791056576e-04, -9.5550488243767526e-04, 8.9834092429790310e-05, 1.4676450162607620e-04, -8.9486200173685058e-05, 8.9834092429790310e-05, -5.7124935343482407e-04, 1.9674652749940278e-04, 2.7820147231366192e-05, 1.4676450162607620e-04, 1.9674652749940278e-04, -4.5311825527210236e-04, 1.5751505797459724e-04, -8.9486200173685058e-05, 2.7820147231366192e-05, 1.5751505797459724e-04, -2.1504103761769665e-04, -7.4655646881417747e-04, 1.7137670375811731e-04, 2.3305985967898233e-04, 2.0974576016239595e-05, 1.7137670375811731e-04, -6.3490393076444808e-04, -3.7460934394642461e-05, 3.8915692525215832e-05, 2.3305985967898233e-04, -3.7460934394642461e-05, -2.3619871093608590e-04, -9.5966181361813001e-05, 2.0974576016239595e-05, 3.8915692525215832e-05, -9.5966181361813001e-05, -1.5753671328738253e-04, -8.4952522473083828e-04, 1.0905671249046153e-04, 1.6050124252773700e-04, -3.5054030436609191e-06, 1.0905671249046153e-04, -7.8916382592320218e-04, 5.4410597072967622e-05, 3.1697067660733621e-04, 1.6050124252773700e-04, 5.4410597072967622e-05, -6.2431502557835482e-04, 4.0685779353217203e-05, -3.5054030436609191e-06, 3.1697067660733621e-04, 4.0685779353217203e-05, -3.1869838579109982e-04, -4.9197474900203851e-04, 2.3175272353917089e-05, 4.8349993627407357e-05, 8.3431439033521311e-05, 2.3175272353917089e-05, -2.1237686325728994e-04, 1.1579272422842471e-04, 4.4948063413445143e-06, 4.8349993627407357e-05, 1.1579272422842471e-04, -1.4387542985386714e-04, -2.8807020627775499e-05, 8.3431439033521311e-05, 4.4948063413445143e-06, -2.8807020627775499e-05, -8.8314386801545690e-05, -1.0377932964218571e-03, 3.1548342438866215e-04, -7.7539032460135759e-06, 1.5862268899707329e-04, 3.1548342438866215e-04, -5.2279750482530675e-04, -9.7156212604978696e-05, 7.8070746092368879e-05, -7.7539032460135759e-06, -9.7156212604978696e-05, -5.1909921397954076e-04, -7.0888358650239406e-05, 1.5862268899707329e-04, 7.8070746092368879e-05, -7.0888358650239406e-05, -4.3559478522129486e-04, -3.4047542887403288e-04, 9.9485414683848702e-05, 5.3467360125445010e-05, 5.5687160259303899e-05, 9.9485414683848702e-05, -2.1688590318227243e-04, 5.8207991561983353e-05, 3.8807798300090633e-05, 5.3467360125445010e-05, 5.8207991561983353e-05, -1.4444518993710908e-04, 6.9712029871340034e-05, 5.5687160259303899e-05, 3.8807798300090633e-05, 6.9712029871340034e-05, -8.8308431324746653e-05, -3.9748580631234745e-04, 1.6061520126347354e-05, 7.4535335825201364e-05, -1.2455905762848885e-05, 1.6061520126347354e-05, -3.0428877235287100e-04, 6.7638842446531838e-05, 9.0216218435291209e-05, 7.4535335825201364e-05, 6.7638842446531838e-05, -1.8476291197258023e-04, 8.6490256848873625e-05, -1.2455905762848885e-05, 9.0216218435291209e-05, 8.6490256848873625e-05, -1.7290120999552497e-04, -6.3017890323441988e-04, 6.8925477551151667e-05, 1.4591855761896425e-04, 5.8696848733904197e-05, 6.8925477551151667e-05, -4.4840596419734699e-04, 1.7185871633737483e-04, 1.3636588971003942e-04, 1.4591855761896425e-04, 1.7185871633737483e-04, -3.5633566053381324e-04, 6.9798068644545436e-05, 5.8696848733904197e-05, 1.3636588971003942e-04, 6.9798068644545436e-05, -2.8194929183414266e-04, -5.2084239851836716e-04, 1.3919308449155372e-04, 7.9297302299835321e-05, -1.0666209674729959e-04, 1.3919308449155372e-04, -4.8272864193460750e-04, 1.4580575117746091e-04, 1.8231015042159636e-04, 7.9297302299835321e-05, 1.4580575117746091e-04, -2.4109836209905614e-04, -9.6833449169729229e-05, -1.0666209674729959e-04, 1.8231015042159636e-04, -9.6833449169729229e-05, -2.2652176409735576e-04, -5.9265052462615796e-04, 3.2921496507552024e-05, 1.6741839899836377e-04, -1.8287094414409389e-05, 3.2921496507552024e-05, -4.7506545424716396e-04, 8.4264843353050377e-05, 3.2049421513026473e-05, 1.6741839899836377e-04, 8.4264843353050377e-05, -2.6830906645465910e-04, 1.0562771031346049e-04, -1.8287094414409389e-05, 3.2049421513026473e-05, 1.0562771031346049e-04, -1.8350092261002626e-04, -7.5685265706840210e-04, 2.5873322291747580e-04, 6.4147355541750696e-06, 4.8379384836955772e-05, 2.5873322291747580e-04, -4.5814972293314378e-04, 5.2257125816072260e-05, 1.7555007655928345e-04, 6.4147355541750696e-06, 5.2257125816072260e-05, -4.5065665859371718e-04, 1.3365888276934993e-04, 4.8379384836955772e-05, 1.7555007655928345e-04, 1.3365888276934993e-04, -2.8055080587645673e-04, -6.8617367385189203e-04, 2.0111488204472137e-04, -3.5677735636164015e-05, 1.5038190317367591e-05, 2.0111488204472137e-04, -6.8307312914361548e-04, 3.7317688096927270e-04, 2.2841592720268073e-05, -3.5677735636164015e-05, 3.7317688096927270e-04, -5.7456997444991467e-04, 9.8364592445165106e-05, 1.5038190317367591e-05, 2.2841592720268073e-05, 9.8364592445165106e-05, -4.9874689148873266e-04, -9.7937117522682625e-04, 1.3365493445469090e-04, -3.5389680247569871e-05, 1.7409836485831839e-04, 1.3365493445469090e-04, -3.0622931287801688e-04, 2.0447820578798172e-04, -2.0241678808795576e-05, -3.5389680247569871e-05, 2.0447820578798172e-04, -2.9965533718679050e-04, -5.0337561347213358e-05, 1.7409836485831839e-04, -2.0241678808795576e-05, -5.0337561347213358e-05, -9.7218271603616025e-05, -7.2361999623641096e-04, 6.0238770967297993e-05, 9.5583099624811875e-06, -4.8817196772293789e-05, 6.0238770967297993e-05, -4.2873796707020983e-04, 3.5290062490134286e-05, 1.0186668437085330e-04, 9.5583099624811875e-06, 3.5290062490134286e-05, -1.7327171153624178e-04, 7.5826006592362129e-05, -4.8817196772293789e-05, 1.0186668437085330e-04, 7.5826006592362129e-05, -6.6625240506338742e-05, -5.1082007985057531e-04, 9.0145069011309307e-05, 1.0545714803937606e-04, 9.9838943414637320e-05, 9.0145069011309307e-05, -2.9666343813120901e-04, 1.6607259863244859e-04, -8.0518077824595215e-05, 1.0545714803937606e-04, 1.6607259863244859e-04, -2.7888486151681505e-04, 9.9492300205694249e-05, 9.9838943414637320e-05, -8.0518077824595215e-05, 9.9492300205694249e-05, -1.6153484908064224e-04, -3.8893832026680984e-04, 8.4480598688399479e-05, 1.3917939203462009e-04, 6.4028139288043303e-05, 8.4480598688399479e-05, -2.9031480540068355e-04, 4.1581354137412694e-05, 1.3719607068135917e-04, 1.3917939203462009e-04, 4.1581354137412694e-05, -2.5727897912316704e-04, 2.3355557894492145e-05, 6.4028139288043303e-05, 1.3719607068135917e-04, 2.3355557894492145e-05, -2.0869295477029555e-04, -1.2940743236606971e-03, 9.9242112186229852e-05, -3.0883079008754396e-05, 2.4110922845507835e-04, 9.9242112186229852e-05, -5.4844927137480013e-04, 1.5284676340009107e-04, 1.5074954460357989e-04, -3.0883079008754396e-05, 1.5284676340009107e-04, -3.5890880634564429e-04, -9.2111965277796469e-05, 2.4110922845507835e-04, 1.5074954460357989e-04, -9.2111965277796469e-05, -2.1606644503010969e-04, -3.6859095195743705e-04, 1.2811340827727349e-04, 7.1617940606111583e-05, 1.2147695942115610e-04, 1.2811340827727349e-04, -2.4093211232803426e-04, -4.5817408623953358e-05, -2.6602561355328134e-05, 7.1617940606111583e-05, -4.5817408623953358e-05, -1.8715896488104966e-04, -2.9073404906152265e-06, 1.2147695942115610e-04, -2.6602561355328134e-05, -2.9073404906152265e-06, -1.1706145694897137e-04, -6.6786161879298469e-04, 3.5686415150814899e-05, 1.5672913017009308e-04, 8.1161443650996505e-05, 3.5686415150814899e-05, -4.8659658995020956e-04, 1.8844759048223573e-04, 1.8572079841468395e-05, 1.5672913017009308e-04, 1.8844759048223573e-04, -3.3591563943804917e-04, 5.8356892899359846e-05, 8.1161443650996505e-05, 1.8572079841468395e-05, 5.8356892899359846e-05, -2.5729244353890580e-04, -4.9057710957934610e-04, 3.5189384292357178e-05, 1.2768527966492655e-04, 7.7111559864786111e-05, 3.5189384292357178e-05, -3.3022996564975609e-04, 1.1998806440725087e-04, 7.9127474986119613e-05, 1.2768527966492655e-04, 1.1998806440725087e-04, -2.2746335211673159e-04, 8.6400013414019474e-05, 7.7111559864786111e-05, 7.9127474986119613e-05, 8.6400013414019474e-05, -2.0017500168890246e-04, -1.1930564853647864e-03, 3.6059432907568098e-04, -4.5802509267350180e-05, 1.6549786604919792e-04, 3.6059432907568098e-04, -6.6459120881215568e-04, 2.2815747528720748e-04, 5.2558786107152938e-05, -4.5802509267350180e-05, 2.2815747528720748e-04, -6.2636194192342237e-04, 1.8747959874315315e-05, 1.6549786604919792e-04, 5.2558786107152938e-05, 1.8747959874315315e-05, -3.2850961953909732e-04, 7.8706872876127071e-02, -2.2653584740143385e-02, 9.7740231630116456e-04, -8.0082023113196138e-04, -2.2653584740143385e-02, -8.9447422099334685e-03, -8.8792954595688558e-04, 1.1368137224524006e-03, 9.7740231630116456e-04, -8.8792954595688558e-04, -7.7069586145163686e-04, 2.3926064253705276e-04, -8.0082023113196138e-04, 1.1368137224524006e-03, 2.3926064253705276e-04, -6.6984821695051074e-04, -1.0791500301598586e-01, -3.2421420547026750e-02, -1.6310176240427882e-03, -1.6324544291089519e-03, -3.2421420547026750e-02, -1.0068088253218213e-02, 3.7700151002736698e-04, 1.5579093564988948e-03, -1.6310176240427882e-03, 3.7700151002736698e-04, -6.4009621154418304e-04, 1.3364317447552753e-04, -1.6324544291089519e-03, 1.5579093564988948e-03, 1.3364317447552753e-04, -4.5908668219426775e-04, 1.9931647893026413e-02, 1.8868322719129860e-02, -2.6059845702808022e-04, -1.7677688147390257e-04, 1.8868322719129860e-02, -2.0969852051011485e-04, 3.6441845096447370e-04, 2.7994468344729460e-04, -2.6059845702808022e-04, 3.6441845096447370e-04, -2.8402192286764761e-04, -5.7796869103119775e-06, -1.7677688147390257e-04, 2.7994468344729460e-04, -5.7796869103119775e-06, -2.7098438358809471e-04, -2.5171876544209310e-01, -1.6215224556807420e-02, -1.7135769001415840e-03, 1.5448964301369569e-03, -1.6215224556807420e-02, -2.3023489339995557e-03, 1.5454079463723322e-03, 1.4540171518868922e-04, -1.7135769001415840e-03, 1.5454079463723322e-03, -1.5783435634697221e-03, -5.6267811205058696e-04, 1.5448964301369569e-03, 1.4540171518868922e-04, -5.6267811205058696e-04, -1.4668478620555464e-03, 9.0064272324944780e-02, 1.0159854544206332e-02, -1.8417068624857591e-03, 1.5581049875507725e-03, 1.0159854544206332e-02, -1.2626008140885292e-03, 5.5672594951240815e-04, -6.6992565886242451e-04, -1.8417068624857591e-03, 5.5672594951240815e-04, -6.3183326791236682e-04, 3.4048390925801674e-04, 1.5581049875507725e-03, -6.6992565886242451e-04, 3.4048390925801674e-04, -4.5748292658650842e-04, -2.2443359343759173e-01, 2.3550848933061329e-02, -1.3605814923240537e-03, 1.6581748189729430e-03, 2.3550848933061329e-02, -1.2119208694257528e-03, 1.1877044380419087e-03, -8.7237437059008887e-04, -1.3605814923240537e-03, 1.1877044380419087e-03, -1.0281058010702063e-03, -1.6587408018026506e-04, 1.6581748189729430e-03, -8.7237437059008887e-04, -1.6587408018026506e-04, -1.0202663289786004e-03, -2.6189782764010544e-01, 2.2986538547258420e-02, -1.7632069264149162e-03, 1.5107406905049286e-03, 2.2986538547258420e-02, -1.2746144428941505e-03, 4.8343451999544908e-04, -5.7379894194384067e-04, -1.7632069264149162e-03, 4.8343451999544908e-04, -5.6480453719830008e-04, 3.4954233236744335e-04, 1.5107406905049286e-03, -5.7379894194384067e-04, 3.4954233236744335e-04, -4.8900362963805524e-04, -1.2749879550430313e-01, 2.8523823603441460e-02, -1.9125154882043663e-03, 2.7084423469016369e-03, 2.8523823603441460e-02, -2.3862377659643530e-03, 5.1372875563453473e-04, -4.3154165895067698e-04, -1.9125154882043663e-03, 5.1372875563453473e-04, -1.1276693699735670e-03, -8.2543254355266842e-05, 2.7084423469016369e-03, -4.3154165895067698e-04, -8.2543254355266842e-05, -1.0951105630241573e-03, -1.0158866176089715e-01, 1.6602982315561777e-02, -1.7772582548894245e-03, 4.3920384137976376e-04, 1.6602982315561777e-02, -4.7547131566108777e-03, 6.8783893805187952e-05, -1.0748720663370313e-03, -1.7772582548894245e-03, 6.8783893805187952e-05, -5.0205120710799611e-04, -3.3919151133434616e-05, 4.3920384137976376e-04, -1.0748720663370313e-03, -3.3919151133434616e-05, -3.1422927937441901e-04, 1.5108981059488835e-01, 2.1974044174935103e-03, 4.9973793800626900e-04, -2.0324655241621987e-03, 2.1974044174935103e-03, -2.0249834345665027e-03, 2.8912368069502044e-04, -3.9641649235281848e-04, 4.9973793800626900e-04, 2.8912368069502044e-04, -1.4279057483715719e-03, 2.3314451382963299e-04, -2.0324655241621987e-03, -3.9641649235281848e-04, 2.3314451382963299e-04, -9.6409814362172263e-04, -9.7276844843051602e-02, 3.5504450866312058e-03, 3.8203830614491319e-03, -1.4507036855037911e-03, 3.5504450866312058e-03, -1.1050016265867703e-03, 3.0086962405142534e-05, 2.4446145351075344e-04, 3.8203830614491319e-03, 3.0086962405142534e-05, -1.8455518785688229e-03, 6.3062318909916208e-04, -1.4507036855037911e-03, 2.4446145351075344e-04, 6.3062318909916208e-04, -8.0531364204684895e-04, 2.9804711899684446e-02, -1.5978165071995631e-02, 1.0778430397325950e-03, -1.4446815201809952e-03, -1.5978165071995631e-02, -5.6345296642827322e-03, -1.0626058855508918e-03, 5.0267944657573355e-04, 1.0778430397325950e-03, -1.0626058855508918e-03, -1.0914650387547676e-03, 4.3259937074976290e-04, -1.4446815201809952e-03, 5.0267944657573355e-04, 4.3259937074976290e-04, -4.5142572576815595e-04, -6.9494494593566603e-02, -2.0051133800610695e-02, 5.8710738033530515e-04, 3.5010545121283096e-04, -2.0051133800610695e-02, -7.1098727986929232e-03, -2.5423159406186779e-04, -9.3348474227718226e-04, 5.8710738033530515e-04, -2.5423159406186779e-04, -6.0375150441947952e-04, 2.7259811627839867e-04, 3.5010545121283096e-04, -9.3348474227718226e-04, 2.7259811627839867e-04, -5.4004855753860101e-04, 1.5401777363000305e-01, 8.1160613829283474e-04, 9.2628051082108370e-04, 3.6211771998279100e-03, 8.1160613829283474e-04, -1.7141151995087387e-03, -3.3990186206657631e-04, -2.5417245475395459e-04, 9.2628051082108370e-04, -3.3990186206657631e-04, -1.4122852387503289e-03, -1.0293746792916486e-04, 3.6211771998279100e-03, -2.5417245475395459e-04, -1.0293746792916486e-04, -1.2188212673550846e-03, -1.2066137822324684e-01, -2.1235012929347232e-02, 1.6254696003078779e-03, -1.2375969117924457e-03, -2.1235012929347232e-02, -6.7234109883531987e-03, 4.4368086424732868e-05, 5.4519256781239238e-04, 1.6254696003078779e-03, 4.4368086424732868e-05, -1.7851002974240047e-03, 2.6435810786128557e-04, -1.2375969117924457e-03, 5.4519256781239238e-04, 2.6435810786128557e-04, -9.5219784353280741e-04, -2.0428062162336544e-01, -1.8822402400988779e-02, 8.0995888860823685e-04, -1.1746114214952719e-03, -1.8822402400988779e-02, -3.1320256402858366e-03, -9.4173163158250614e-04, 5.3739665278471852e-04, 8.0995888860823685e-04, -9.4173163158250614e-04, -8.4068628230452394e-04, 2.2479483070440945e-04, -1.1746114214952719e-03, 5.3739665278471852e-04, 2.2479483070440945e-04, -7.5550494619625963e-04, 2.0478820977633771e-02, 5.6432816347106261e-03, 1.5567905281500807e-03, 3.1015131190170152e-03, 5.6432816347106261e-03, -1.8593392130277192e-03, -1.0887013744527499e-03, -5.9319521599679871e-04, 1.5567905281500807e-03, -1.0887013744527499e-03, -1.8709510047864347e-03, -1.6277602269798984e-04, 3.1015131190170152e-03, -5.9319521599679871e-04, -1.6277602269798984e-04, -1.8685922653237117e-03, 6.5107726886614783e-03, -3.6209927731580757e-03, -3.8090764358667738e-04, 4.8090075186154230e-04, -3.6209927731580757e-03, -2.5033792471679753e-03, 2.3549698237273272e-04, -6.0379270164610918e-04, -3.8090764358667738e-04, 2.3549698237273272e-04, -7.2631802550718566e-04, 3.6336152250252679e-04, 4.8090075186154230e-04, -6.0379270164610918e-04, 3.6336152250252679e-04, -5.9720775259365871e-04, 1.5597924802072299e-01, -9.1287267636976852e-03, 5.1940698209609363e-03, -1.9270196737078854e-03, -9.1287267636976852e-03, -1.0171355227070189e-03, -8.3707115487426011e-04, 2.6648470329260969e-04, 5.1940698209609363e-03, -8.3707115487426011e-04, -1.5133538294868042e-03, 5.0193401691492020e-04, -1.9270196737078854e-03, 2.6648470329260969e-04, 5.0193401691492020e-04, -7.5563292076018306e-04, -5.1184574889172579e-02, 2.7987397416586360e-02, 7.5080665467791617e-04, 7.0012226943657642e-04, 2.7987397416586360e-02, -2.1716159037027593e-03, -4.8086372671787576e-04, -9.8018175618315873e-04, 7.5080665467791617e-04, -4.8086372671787576e-04, -7.3577207641989818e-04, 2.6562080093313106e-04, 7.0012226943657642e-04, -9.8018175618315873e-04, 2.6562080093313106e-04, -6.0973448697062660e-04, 1.3382668541587697e-01, -2.2722610733945561e-02, -1.2434543017359170e-03, -1.2728876503270323e-03, -2.2722610733945561e-02, -4.3688188071787232e-03, 7.7645284186351441e-04, 6.3845101211561532e-04, -1.2434543017359170e-03, 7.7645284186351441e-04, -7.3990375201463685e-04, -2.2683288029159060e-04, -1.2728876503270323e-03, 6.3845101211561532e-04, -2.2683288029159060e-04, -6.7815800933636403e-04, 1.3120488245988418e-01, 4.8547089951903696e-02, 5.2274693388694210e-04, 4.0755937244519420e-04, 4.8547089951903696e-02, -8.3907942102404050e-03, -3.8252224092104130e-04, -4.0281273644884673e-04, 5.2274693388694210e-04, -3.8252224092104130e-04, -6.3441175364586163e-04, 3.6516056969289123e-04, 4.0755937244519420e-04, -4.0281273644884673e-04, 3.6516056969289123e-04, -6.1351796194961811e-04, -1.5706881122068012e-01, -2.8064609799324989e-02, 5.2866261510743358e-04, -5.0158079779070628e-04, -2.8064609799324989e-02, -7.9255987347370254e-03, -7.2907309715153476e-04, 2.0023722312570202e-04, 5.2866261510743358e-04, -7.2907309715153476e-04, -8.7726820594463044e-04, 9.3347733493378919e-04, -5.0158079779070628e-04, 2.0023722312570202e-04, 9.3347733493378919e-04, -7.6444145077611959e-04, -2.4185449825431532e-01, 9.3190796925950465e-03, -1.0360517460863226e-03, 7.5166654771481383e-04, 9.3190796925950465e-03, -1.8089927328024354e-03, 4.5529238180270712e-04, -6.7548648300266129e-04, -1.0360517460863226e-03, 4.5529238180270712e-04, -9.8195456656893299e-04, 3.2953602624600028e-04, 7.5166654771481383e-04, -6.7548648300266129e-04, 3.2953602624600028e-04, -7.5088680434060126e-04, -6.2266646786067938e-03, 4.3482306684820587e-02, -6.7931544150714681e-04, -1.1318057792932172e-03, 4.3482306684820587e-02, -3.0163016880633831e-03, 1.1154407743767898e-03, 1.2680847876108238e-03, -6.7931544150714681e-04, 1.1154407743767898e-03, -6.2686682105641005e-04, 2.4799994581475311e-05, -1.1318057792932172e-03, 1.2680847876108238e-03, 2.4799994581475311e-05, -4.8261912829217966e-04, 4.4034487767024420e-02, 1.2027693141635914e-02, -3.1617252369308421e-04, -1.3810034906793043e-03, 1.2027693141635914e-02, -1.9189382672973364e-03, 3.5771288333242082e-04, 3.2903469104204127e-04, -3.1617252369308421e-04, 3.5771288333242082e-04, -7.7434480573853985e-04, 1.4662364658262446e-04, -1.3810034906793043e-03, 3.2903469104204127e-04, 1.4662364658262446e-04, -7.1146947717666163e-04, -1.7439979214498852e-01, 2.5559515911645361e-02, -1.3609574326154626e-03, -8.1347979141622453e-04, 2.5559515911645361e-02, -1.9101252004779083e-03, 5.8401786740861872e-04, 2.6463889212369153e-04, -1.3609574326154626e-03, 5.8401786740861872e-04, -1.2022614404840581e-03, 1.9183593028871340e-04, -8.1347979141622453e-04, 2.6463889212369153e-04, 1.9183593028871340e-04, -7.2688162163724512e-04, -2.6243218345287456e-01, 4.9398209870091560e-03, -1.5696233107405566e-03, 1.0567220238967725e-03, 4.9398209870091560e-03, -1.4022787059415794e-03, 5.8256688059597295e-04, 1.9025682403384436e-05, -1.5696233107405566e-03, 5.8256688059597295e-04, -1.3070453333682249e-03, -3.3851763928833433e-04, 1.0567220238967725e-03, 1.9025682403384436e-05, -3.3851763928833433e-04, -1.2320723061259361e-03, -8.2714390070501104e-02, 1.0975412304762084e-02, -4.2098382115161030e-04, -6.7702215349947506e-04, 1.0975412304762084e-02, -8.7719105355609575e-03, 6.4626568971351604e-04, 3.4082672855708366e-04, -4.2098382115161030e-04, 6.4626568971351604e-04, -5.3896694179129197e-04, 1.8641783252149489e-04, -6.7702215349947506e-04, 3.4082672855708366e-04, 1.8641783252149489e-04, -3.3770767382408195e-04, -1.2193197837136689e-01, -1.5380109380286541e-02, 3.3246097773883404e-04, -1.6628731429037037e-03, -1.5380109380286541e-02, -2.5373182102857170e-03, -2.0443003453976912e-05, 8.2993773737306147e-04, 3.3246097773883404e-04, -2.0443003453976912e-05, -8.8731220769894020e-04, -6.0009982488096482e-05, -1.6628731429037037e-03, 8.2993773737306147e-04, -6.0009982488096482e-05, -7.1835196502551967e-04, 7.8012293187519954e-02, 3.1132058063563710e-02, -1.1696899428820292e-03, -1.3753629772876416e-03, 3.1132058063563710e-02, -2.9320533071647394e-03, 4.4209006788966515e-04, 5.4507080398151985e-04, -1.1696899428820292e-03, 4.4209006788966515e-04, -6.3061860103715917e-04, 2.3835122850300404e-04, -1.3753629772876416e-03, 5.4507080398151985e-04, 2.3835122850300404e-04, -4.8807112103491492e-04, 1.2489186610021678e-01, -4.2121994803512846e-03, -1.4615798111060961e-03, -1.3284544933660910e-03, -4.2121994803512846e-03, -1.9469301448134307e-03, 2.4383944981524552e-04, 9.0191595070716760e-04, -1.4615798111060961e-03, 2.4383944981524552e-04, -8.4489411054921443e-04, -3.2983470816998103e-04, -1.3284544933660910e-03, 9.0191595070716760e-04, -3.2983470816998103e-04, -5.5892089821921545e-04, -1.3647684084529246e-01, 3.9776937406723417e-02, 6.1334383189688029e-04, -8.9816660782599911e-04, 3.9776937406723417e-02, -4.3020139627246353e-04, -9.3557080941627938e-04, 7.7958935546057592e-04, 6.1334383189688029e-04, -9.3557080941627938e-04, -3.5211495153066340e-04, 5.8831013523111768e-05, -8.9816660782599911e-04, 7.7958935546057592e-04, 5.8831013523111768e-05, -3.1382659001747163e-04, -1.6918244557222134e-01, -2.6159967833569130e-02, -1.5521334165285558e-03, -1.6552342040953793e-03, -2.6159967833569130e-02, -8.5733501045166700e-03, 1.1884549627826878e-03, 2.9341595161121387e-04, -1.5521334165285558e-03, 1.1884549627826878e-03, -6.8065594315602643e-04, 1.8544180606965729e-04, -1.6552342040953793e-03, 2.9341595161121387e-04, 1.8544180606965729e-04, -4.5742794945487230e-04, -2.0437904412209100e-02, 2.0278428462028566e-02, -3.1833318892537861e-04, -1.5672358504625003e-03, 2.0278428462028566e-02, -2.3308820806625680e-03, -9.2413164888836781e-05, 1.2261910114897223e-03, -3.1833318892537861e-04, -9.2413164888836781e-05, -6.1509973454375463e-04, -3.1369065713858962e-04, -1.5672358504625003e-03, 1.2261910114897223e-03, -3.1369065713858962e-04, -6.1311372710779475e-04, -2.1068663677001948e-01, 3.0586587729756580e-02, -1.4638083866968235e-03, -1.7663639913775599e-03, 3.0586587729756580e-02, -2.4896000109830450e-03, 1.0878021744662348e-03, 5.5784495882122158e-04, -1.4638083866968235e-03, 1.0878021744662348e-03, -8.5226768150814753e-04, 2.4896475900177203e-04, -1.7663639913775599e-03, 5.5784495882122158e-04, 2.4896475900177203e-04, -7.2954677906140835e-04, -2.0670514661104566e-01, 2.3101872560098982e-03, -6.3615804468624706e-04, -1.0191121784747393e-03, 2.3101872560098982e-03, -4.5012725771569565e-03, 3.0966024212130121e-04, 7.3480770548280236e-04, -6.3615804468624706e-04, 3.0966024212130121e-04, -7.6577049132799655e-04, 1.9069869693581136e-04, -1.0191121784747393e-03, 7.3480770548280236e-04, 1.9069869693581136e-04, -3.3060687835284803e-04, 1.4979203013904469e-01, 7.1888739604317664e-03, -1.6498112176118718e-03, 7.6090317854697837e-04, 7.1888739604317664e-03, -1.6847252134716941e-03, 9.1765413550792092e-04, -8.1660576268565398e-04, -1.6498112176118718e-03, 9.1765413550792092e-04, -5.6736119553899445e-04, 2.4293054607895927e-04, 7.6090317854697837e-04, -8.1660576268565398e-04, 2.4293054607895927e-04, -3.0141462646083878e-04, -2.5893180917593467e-01, -8.1773154879442519e-03, -1.8051666448928912e-03, -1.7078409149329419e-03, -8.1773154879442519e-03, -2.4391627814064182e-03, 2.5949401569512947e-04, 7.3814751053530889e-04, -1.8051666448928912e-03, 2.5949401569512947e-04, -8.9181287706463540e-04, 1.2172298924298193e-04, -1.7078409149329419e-03, 7.3814751053530889e-04, 1.2172298924298193e-04, -5.5666018893034495e-04, 1.3114296578438139e-01, -6.0075695408917090e-03, 4.3242635386152908e-04, -1.6174936298550607e-03, -6.0075695408917090e-03, -2.1679629901973250e-03, -1.4790030682634993e-04, 4.0251669474305628e-04, 4.3242635386152908e-04, -1.4790030682634993e-04, -6.4946095998946827e-04, 2.8330245317322158e-04, -1.6174936298550607e-03, 4.0251669474305628e-04, 2.8330245317322158e-04, -5.4324532966633539e-04, 1.5527234406654167e-01, 4.5641696399707443e-02, -1.1740442464421269e-03, -1.5447474252538395e-03, 4.5641696399707443e-02, -6.1793537674581797e-03, 1.5275860858276267e-03, 8.6030738024539066e-04, -1.1740442464421269e-03, 1.5275860858276267e-03, -7.8182161800352231e-04, -1.5885947204160602e-04, -1.5447474252538395e-03, 8.6030738024539066e-04, -1.5885947204160602e-04, -7.3143195173575981e-04, 1.1697380540622421e-01, -1.8452554290786850e-03, 1.0016961110488366e-03, -1.4829998143344009e-03, -1.8452554290786850e-03, -1.1489869626221500e-03, -4.0492587305436355e-04, 2.7582642932447637e-04, 1.0016961110488366e-03, -4.0492587305436355e-04, -8.2641017439131223e-04, 3.3828661526212394e-04, -1.4829998143344009e-03, 2.7582642932447637e-04, 3.3828661526212394e-04, -7.1775652801919342e-04, 2.4087451790493968e-02, 3.6657022667584582e-02, -2.3160169352650246e-05, -1.4754112553395189e-03, 3.6657022667584582e-02, -2.7192998188347670e-03, 4.9290573568910629e-04, 4.2019978689329402e-04, -2.3160169352650246e-05, 4.9290573568910629e-04, -8.0300459280031010e-04, 3.0740767103767887e-04, -1.4754112553395189e-03, 4.2019978689329402e-04, 3.0740767103767887e-04, -6.4767348263235984e-04, -1.6342252605621727e-01, 1.8127756599951576e-02, -1.1824339446502448e-03, -5.6869655256144131e-04, 1.8127756599951576e-02, -5.4663896424227282e-03, 9.1072733783564201e-04, -3.4788638053916024e-05, -1.1824339446502448e-03, 9.1072733783564201e-04, -6.6244397559521830e-04, -2.5432935828759249e-04, -5.6869655256144131e-04, -3.4788638053916024e-05, -2.5432935828759249e-04, -4.8751736339739109e-04, -1.5701412918240615e-01, 2.7992421613277401e-02, -4.6515704530449996e-04, -7.9964297888608410e-04, 2.7992421613277401e-02, -3.9221662977877072e-03, 2.9742173175084299e-04, 7.9338187798423123e-04, -4.6515704530449996e-04, 2.9742173175084299e-04, -5.5822864771270446e-04, 2.1181161515901515e-04, -7.9964297888608410e-04, 7.9338187798423123e-04, 2.1181161515901515e-04, -3.9618825805997281e-04, -1.2003184408108214e-01, 1.8018611826677796e-03, -1.4587721561363381e-03, -1.4251735569210393e-03, 1.8018611826677796e-03, -2.3078955332620707e-03, 3.8402353601999531e-04, 3.6911457383390317e-04, -1.4587721561363381e-03, 3.8402353601999531e-04, -1.1519702353104452e-03, 9.8372557539751129e-05, -1.4251735569210393e-03, 3.6911457383390317e-04, 9.8372557539751129e-05, -7.3110630617569865e-04, -1.6462935008848217e-01, 1.0600946531245254e-02, 3.6977083048745467e-03, -1.4181644891152485e-03, 1.0600946531245254e-02, -1.3992372112707173e-03, 3.4296475948198595e-04, -1.1160182310149349e-03, 3.6977083048745467e-03, 3.4296475948198595e-04, -1.6724888886693919e-03, -7.8051702685589782e-04, -1.4181644891152485e-03, -1.1160182310149349e-03, -7.8051702685589782e-04, -1.4795383843152874e-03, 2.5193698717292578e-03, -7.2228540313489780e-03, 8.7131868658691573e-04, -1.1871506186637458e-03, -7.2228540313489780e-03, -8.3981655790035283e-03, -3.1337460147976907e-05, 1.7507484534873217e-04, 8.7131868658691573e-04, -3.1337460147976907e-05, -1.1381726073134719e-03, 9.0691674803716218e-05, -1.1871506186637458e-03, 1.7507484534873217e-04, 9.0691674803716218e-05, -5.4923843003268163e-04, -9.8541123283475321e-02, 2.0328745160678043e-02, -2.0458389881609796e-03, 9.0630002244413007e-04, 2.0328745160678043e-02, -7.4221841135019870e-04, 5.3925274862334381e-04, -9.0169207887240497e-04, -2.0458389881609796e-03, 5.3925274862334381e-04, -8.1237048011580354e-04, -1.7572671277406646e-04, 9.0630002244413007e-04, -9.0169207887240497e-04, -1.7572671277406646e-04, -4.4334028399372748e-04, -2.7234646577534043e-02, 2.5369573930940532e-02, 1.5705630781754590e-04, 6.1198876277706281e-04, 2.5369573930940532e-02, -1.3071102928106637e-03, -7.2368957471257269e-04, -5.2265333441466977e-04, 1.5705630781754590e-04, -7.2368957471257269e-04, -1.1462134164282704e-03, 2.3337377507278166e-04, 6.1198876277706281e-04, -5.2265333441466977e-04, 2.3337377507278166e-04, -7.2368697376535771e-04, -2.4800412677279193e-01, 3.0558589210589435e-02, -1.7679744500246292e-03, -1.5482210113520617e-03, 3.0558589210589435e-02, -2.4786384007542978e-03, 8.5356892230528767e-04, 4.3403640403337902e-04, -1.7679744500246292e-03, 8.5356892230528767e-04, -8.3267405947969693e-04, 9.7644740296861087e-05, -1.5482210113520617e-03, 4.3403640403337902e-04, 9.7644740296861087e-05, -7.6184659130409797e-04, 6.4088879122978254e-02, 7.4033191067212354e-03, -3.0166036111540026e-04, 2.2866639862654022e-03, 7.4033191067212354e-03, -1.9075281126971072e-03, -1.7513934189862141e-04, -8.3685859198624453e-04, -3.0166036111540026e-04, -1.7513934189862141e-04, -6.4451282241398183e-04, 1.0242732795762495e-04, 2.2866639862654022e-03, -8.3685859198624453e-04, 1.0242732795762495e-04, -4.7151425790170642e-04, 2.6280963047272420e-03, -8.2523616999923399e-03, 6.9274155947225767e-04, 2.2806878867334188e-04, -8.2523616999923399e-03, -9.9628281026221477e-04, 2.5812365885309900e-04, -1.1568156376056965e-04, 6.9274155947225767e-04, 2.5812365885309900e-04, -1.0820703009744815e-03, -3.1530431722761752e-05, 2.2806878867334188e-04, -1.1568156376056965e-04, -3.1530431722761752e-05, -7.5527985458567228e-04, 9.1101115309797379e-02, -2.2884644065069411e-02, 9.1368322276223378e-04, -1.0850217992609608e-03, -2.2884644065069411e-02, -2.2375718156060894e-03, -6.8171614704544544e-04, 4.3298786621578863e-04, 9.1368322276223378e-04, -6.8171614704544544e-04, -6.0655582575581761e-04, -1.9700957313198807e-04, -1.0850217992609608e-03, 4.3298786621578863e-04, -1.9700957313198807e-04, -5.4760699412119591e-04, -2.2640416371139069e-01, -2.5191853524724098e-02, -1.3198312724450061e-03, -1.3359296338435942e-03, -2.5191853524724098e-02, -7.4114781706104268e-03, 1.4884771292347921e-03, 1.5714670158787022e-03, -1.3198312724450061e-03, 1.4884771292347921e-03, -1.3119866716855199e-03, -1.5536016251956771e-04, -1.3359296338435942e-03, 1.5714670158787022e-03, -1.5536016251956771e-04, -5.5696348361036394e-04, 9.2726790578004134e-02, -2.0242369791069288e-03, 3.8405423708959545e-04, 6.3967354157029978e-04, -2.0242369791069288e-03, -2.0157885093086683e-03, -3.2058306533741196e-04, 4.2971640692212861e-04, 3.8405423708959545e-04, -3.2058306533741196e-04, -1.9162959308769002e-03, -6.3827678873427419e-04, 6.3967354157029978e-04, 4.2971640692212861e-04, -6.3827678873427419e-04, -1.1765363907219604e-03, -1.1010994988318533e-01, 1.8364595516921723e-03, -1.7052805868601824e-03, 1.5722132456195729e-03, 1.8364595516921723e-03, -1.6428731888699357e-03, -6.9125587365518731e-05, -6.1003796613763421e-04, -1.7052805868601824e-03, -6.9125587365518731e-05, -1.3517785548352037e-03, 3.9700219843950759e-04, 1.5722132456195729e-03, -6.1003796613763421e-04, 3.9700219843950759e-04, -9.2229729865463410e-04, 1.0482002463291755e-01, 4.1269081557235562e-02, -1.4243305558994911e-03, 7.2560546133989428e-04, 4.1269081557235562e-02, -2.4533418282568198e-03, 1.3112208013762515e-03, -8.1980100268941986e-04, -1.4243305558994911e-03, 1.3112208013762515e-03, -6.5149545825962077e-04, -7.9051325639480671e-05, 7.2560546133989428e-04, -8.1980100268941986e-04, -7.9051325639480671e-05, -2.9459876874997148e-04, 5.2687763814020426e-02, 2.5753172346694865e-02, -1.1364673445506180e-03, 1.6847984517648468e-03, 2.5753172346694865e-02, -1.9744024088759471e-03, 6.5730101175142199e-04, -7.6106378367768782e-04, -1.1364673445506180e-03, 6.5730101175142199e-04, -6.8322027298045856e-04, -2.8125846979498703e-04, 1.6847984517648468e-03, -7.6106378367768782e-04, -2.8125846979498703e-04, -6.4384013872712724e-04, 5.9218429229155281e-02, -4.4827537798794370e-03, 1.0270811096682296e-03, -1.9168586516354947e-03, -4.4827537798794370e-03, -6.8593562640291818e-04, -4.6835704849159830e-04, -2.2268361484443280e-04, 1.0270811096682296e-03, -4.6835704849159830e-04, -1.3752987145318814e-03, 3.1079072153330253e-04, -1.9168586516354947e-03, -2.2268361484443280e-04, 3.1079072153330253e-04, -9.2178851145784926e-04, 6.4476070763014870e-02, -8.2336865840800244e-03, -8.2613444706260115e-05, 2.2870252609144106e-03, -8.2336865840800244e-03, -3.2744036284686373e-03, -4.1704987639591122e-05, -6.4166807270754048e-04, -8.2613444706260115e-05, -4.1704987639591122e-05, -4.1548386737107250e-04, -1.7479369462839750e-05, 2.2870252609144106e-03, -6.4166807270754048e-04, -1.7479369462839750e-05, -3.4493527017004036e-04, -6.1828450559953466e-03, 2.8428624253357817e-02, -7.5163309079767377e-04, 5.4940663791714793e-04, 2.8428624253357817e-02, -6.7829893110121844e-03, 8.1954861327358602e-04, -5.3114117045863587e-04, -7.5163309079767377e-04, 8.1954861327358602e-04, -6.0173813996780567e-04, -6.7160875496170326e-05, 5.4940663791714793e-04, -5.3114117045863587e-04, -6.7160875496170326e-05, -2.4845337051236009e-04, 1.0533880384113113e-01, 3.6210071469509400e-02, -1.7107999791441781e-03, -4.7083865881270926e-04, 3.6210071469509400e-02, -2.5591369610882472e-03, 1.0474090798181057e-03, 1.1172328488792965e-04, -1.7107999791441781e-03, 1.0474090798181057e-03, -6.2681311095400482e-04, -1.0956014522883990e-04, -4.7083865881270926e-04, 1.1172328488792965e-04, -1.0956014522883990e-04, -6.2294976074032634e-04, 1.1902524122755653e-01, -1.0909771422571959e-02, 1.2448713677229743e-03, -7.8880695949750681e-04, -1.0909771422571959e-02, -1.5951056840545162e-03, 2.7429495897176952e-04, 7.3506920307663841e-05, 1.2448713677229743e-03, 2.7429495897176952e-04, -9.7786177177501001e-04, 6.6672113049240779e-04, -7.8880695949750681e-04, 7.3506920307663841e-05, 6.6672113049240779e-04, -5.0679343969428484e-04, 1.6159868780566031e-01, 3.7481421868546007e-03, 3.3383110783462289e-03, -1.4389234118571464e-03, 3.7481421868546007e-03, -1.8935398127113731e-03, 2.5240285683527449e-04, 7.9691796069654658e-04, 3.3383110783462289e-03, 2.5240285683527449e-04, -1.3490096726389840e-03, 1.9074939429963151e-04, -1.4389234118571464e-03, 7.9691796069654658e-04, 1.9074939429963151e-04, -8.9877461888945853e-04, -2.2502909073369459e-01, -3.7321287736186322e-03, -1.7987858311244466e-03, -1.9708047702503991e-03, -3.7321287736186322e-03, -1.9214934405923476e-03, 3.5283302489755764e-04, 4.8110254301636892e-04, -1.7987858311244466e-03, 3.5283302489755764e-04, -1.2680475239585940e-03, 2.0205708932592430e-04, -1.9708047702503991e-03, 4.8110254301636892e-04, 2.0205708932592430e-04, -7.8681400841451241e-04, -2.3952675967064883e-02, -6.5409371210018051e-03, -2.6851469943078203e-04, -4.9805803828584922e-04, -6.5409371210018051e-03, -3.7561313498678302e-03, 2.3206665741903022e-04, 3.0048228666978370e-04, -2.6851469943078203e-04, 2.3206665741903022e-04, -4.4121780270757531e-04, -8.2215427654899162e-05, -4.9805803828584922e-04, 3.0048228666978370e-04, -8.2215427654899162e-05, -3.7500716387283193e-04, -2.1228367131798687e-01, 2.7113291916486364e-02, 4.6016317977870395e-04, -1.9594764054362895e-03, 2.7113291916486364e-02, -1.0853572773374739e-03, 2.5219233819137219e-04, 5.1172231054065541e-04, 4.6016317977870395e-04, 2.5219233819137219e-04, -1.8213699224644336e-03, 3.7393213810628812e-04, -1.9594764054362895e-03, 5.1172231054065541e-04, 3.7393213810628812e-04, -5.5588635692437049e-04, -1.2594061059151607e-01, 1.2592656724822970e-02, -4.0285977545487228e-04, -4.4341698620668172e-04, 1.2592656724822970e-02, -6.1572534159074592e-03, 4.8152144273438187e-04, -4.9727853505236708e-04, -4.0285977545487228e-04, 4.8152144273438187e-04, -9.3221660800086681e-04, 4.3322214079744763e-04, -4.4341698620668172e-04, -4.9727853505236708e-04, 4.3322214079744763e-04, -6.2440230731627326e-04, -1.3585509657998510e-01, -3.8909226522543441e-03, 3.0193138572521729e-03, -4.6754349870445496e-04, -3.8909226522543441e-03, -5.6220236698656771e-03, -8.0283448531211330e-04, 4.9313364840345667e-04, 3.0193138572521729e-03, -8.0283448531211330e-04, -8.4724654438547091e-04, -4.1535841093396958e-04, -4.6754349870445496e-04, 4.9313364840345667e-04, -4.1535841093396958e-04, -4.7533675347965273e-04, -2.1524465492323114e-01, 2.6830885704129219e-02, 4.8987007530070020e-03, -7.4769890428344238e-04, 2.6830885704129219e-02, -2.4008792532451296e-03, -4.3520127006076755e-04, -5.5384351394842390e-04, 4.8987007530070020e-03, -4.3520127006076755e-04, -1.9799533182669562e-03, 3.8152653140380451e-04, -7.4769890428344238e-04, -5.5384351394842390e-04, 3.8152653140380451e-04, -5.9785566430098553e-04, -1.6784213165410733e-01, 2.0244058370466202e-02, -1.3420658225188609e-03, -1.3189834234737445e-03, 2.0244058370466202e-02, -7.2263618147349398e-03, 1.1939034574010880e-03, 9.4667802145852230e-04, -1.3420658225188609e-03, 1.1939034574010880e-03, -7.6018644585713153e-04, -1.0210604952061623e-04, -1.3189834234737445e-03, 9.4667802145852230e-04, -1.0210604952061623e-04, -4.0821100223754577e-04, -2.3751039223678280e-01, 2.6165234489045515e-02, -1.9846169871215727e-03, 3.7170136665050013e-03, 2.6165234489045515e-02, -9.2783860491590594e-04, 2.8032134504712048e-04, -9.6957018524106912e-04, -1.9846169871215727e-03, 2.8032134504712048e-04, -1.2399063877565513e-03, 1.0528263449886494e-06, 3.7170136665050013e-03, -9.6957018524106912e-04, 1.0528263449886494e-06, -1.0651880667174132e-03, 6.8982016656450096e-02, -1.0194592948666651e-02, 4.0224436318745071e-04, -1.1696801694026104e-03, -1.0194592948666651e-02, -2.2058854995895081e-03, 4.3964931839049595e-05, 3.0469030637087147e-04, 4.0224436318745071e-04, 4.3964931839049595e-05, -1.3777303555632778e-03, 4.3404728696010988e-04, -1.1696801694026104e-03, 3.0469030637087147e-04, 4.3404728696010988e-04, -4.0950956530560686e-04, -9.2461109228483468e-02, -1.3650469255974099e-03, 6.9549155108044203e-04, -1.3791803108254750e-03, -1.3650469255974099e-03, -3.4426360382142198e-03, -8.9312514713293023e-04, 1.5185160693596093e-03, 6.9549155108044203e-04, -8.9312514713293023e-04, -1.2055712134559134e-03, 4.1803484747986825e-04, -1.3791803108254750e-03, 1.5185160693596093e-03, 4.1803484747986825e-04, -8.1179190435472248e-04, 1.7492329868952405e-01, -2.9646943331972139e-02, -6.6563429686947173e-04, 4.4682634999685810e-04, -2.9646943331972139e-02, -2.4173338183744193e-03, 3.3542502232973938e-04, 1.9443059924745616e-05, -6.6563429686947173e-04, 3.3542502232973938e-04, -9.9183926207136765e-04, 2.0084203476038989e-04, 4.4682634999685810e-04, 1.9443059924745616e-05, 2.0084203476038989e-04, -6.3908876172596529e-04, 3.1539924140147259e-02, 7.6934952984409844e-03, 3.6809732723858749e-03, -1.9589949144150562e-03, 7.6934952984409844e-03, -1.0048732991394538e-03, -6.8525949419794615e-04, 1.8196951541743413e-04, 3.6809732723858749e-03, -6.8525949419794615e-04, -1.9447331645722941e-03, 1.5283056340779168e-04, -1.9589949144150562e-03, 1.8196951541743413e-04, 1.5283056340779168e-04, -9.1738980564369424e-04, -3.9105676791886118e-02, 7.5467303744346081e-03, -1.8311155447274202e-03, 3.9722587176520933e-03, 7.5467303744346081e-03, -2.4625304120398338e-03, 6.5110354661161197e-04, -1.1424140297801545e-03, -1.8311155447274202e-03, 6.5110354661161197e-04, -7.6013689055211709e-04, 3.1982239567331837e-04, 3.9722587176520933e-03, -1.1424140297801545e-03, 3.1982239567331837e-04, -6.7960924512481386e-04, -6.1502540343009511e-02, -2.5918936122811376e-02, 8.2260585699679700e-04, -8.5737616123583824e-04, -2.5918936122811376e-02, -9.1045995335339511e-03, -7.6582517466873253e-04, 2.8347777788018374e-04, 8.2260585699679700e-04, -7.6582517466873253e-04, -9.8142522999483802e-04, 1.5709724166556660e-04, -8.5737616123583824e-04, 2.8347777788018374e-04, 1.5709724166556660e-04, -4.6604755267205529e-04, 2.1148032486205992e-01, -6.3731337543181353e-03, 8.0925966188404811e-04, 7.1463137701211561e-04, -6.3731337543181353e-03, -9.3133813463367691e-04, -1.3767800389944088e-04, -7.8134699474057532e-04, 8.0925966188404811e-04, -1.3767800389944088e-04, -5.9804522674363695e-04, -2.4139815546091799e-04, 7.1463137701211561e-04, -7.8134699474057532e-04, -2.4139815546091799e-04, -4.7788602351705350e-04, -3.7426472661389397e-02, 2.8157517432252185e-02, -1.0934144857086604e-03, 3.1458337980653973e-06, 2.8157517432252185e-02, -5.9309295616162740e-03, 2.5241788599436376e-04, 5.8278212231620947e-04, -1.0934144857086604e-03, 2.5241788599436376e-04, -5.9659440322057725e-04, 1.4773715153137406e-04, 3.1458337980653973e-06, 5.8278212231620947e-04, 1.4773715153137406e-04, -4.9439441008183914e-04, 1.2948178533847016e-01, -4.4858655966967973e-03, -7.3394293433360475e-04, -1.8318789975846868e-03, -4.4858655966967973e-03, -1.3111242346283915e-03, -1.0753742916959626e-03, 6.1098836231564713e-04, -7.3394293433360475e-04, -1.0753742916959626e-03, -1.7980863990064372e-03, 4.7774929182747947e-04, -1.8318789975846868e-03, 6.1098836231564713e-04, 4.7774929182747947e-04, -7.2636447350640115e-04, 1.1246840827726504e-01, 2.1631039403096977e-02, -1.6528967084977182e-03, -1.3069704047438378e-03, 2.1631039403096977e-02, -6.2735534467153794e-03, 9.1784915417337241e-04, 3.7040218222533875e-04, -1.6528967084977182e-03, 9.1784915417337241e-04, -6.1695925898898460e-04, 5.1243512009102464e-05, -1.3069704047438378e-03, 3.7040218222533875e-04, 5.1243512009102464e-05, -5.8607009651544847e-04, 2.3191443887515172e-02, -2.6201262501037315e-03, -7.6581304428420961e-04, -7.0793868722308494e-04, -2.6201262501037315e-03, -1.2427377919089994e-03, 1.3774011176280213e-04, 5.3774177980385463e-04, -7.6581304428420961e-04, 1.3774011176280213e-04, -4.4342025234310304e-04, 9.5568301616175372e-05, -7.0793868722308494e-04, 5.3774177980385463e-04, 9.5568301616175372e-05, -2.5406888063130668e-04, 9.3549762929661148e-02, 2.4789338496675476e-02, -1.7999250937495298e-03, 9.1491381013646730e-04, 2.4789338496675476e-02, -2.4686873006394385e-03, 1.1959407625742012e-04, 2.3453642308929890e-04, -1.7999250937495298e-03, 1.1959407625742012e-04, -1.3058948205367103e-03, 2.8220390404668665e-04, 9.1491381013646730e-04, 2.3453642308929890e-04, 2.8220390404668665e-04, -9.0785502558073951e-04, 8.0716780382424247e-02, 2.5365518135899486e-02, -1.0966084834676559e-03, 3.9092402053228742e-04, 2.5365518135899486e-02, -3.5742604372390177e-04, 1.1161995819547624e-03, -2.3097494896500610e-04, -1.0966084834676559e-03, 1.1161995819547624e-03, -9.9846801442279387e-04, -3.3563094825744084e-04, 3.9092402053228742e-04, -2.3097494896500610e-04, -3.3563094825744084e-04, -4.9557683336977874e-04, 4.0111243527158481e-02, -5.1743679607531489e-03, -6.4668882267963457e-04, -5.3256658217573259e-04, -5.1743679607531489e-03, -2.1959890278307748e-03, 3.3406917357637071e-04, 5.5860960683749177e-04, -6.4668882267963457e-04, 3.3406917357637071e-04, -4.0449951144626246e-04, 1.4870745971877001e-04, -5.3256658217573259e-04, 5.5860960683749177e-04, 1.4870745971877001e-04, -4.0088619373289668e-04, 1.3936559957096936e-02, -9.2345720437320422e-03, 1.7112603251330989e-03, -1.5812360931300338e-03, -9.2345720437320422e-03, -6.0853404733679922e-03, -8.1587691393434074e-04, 1.5127202442355710e-03, 1.7112603251330989e-03, -8.1587691393434074e-04, -1.0582512267918579e-03, 3.0169179197568852e-04, -1.5812360931300338e-03, 1.5127202442355710e-03, 3.0169179197568852e-04, -6.9818940668481635e-04, 1.2271035315361421e-01, 3.3834534832647936e-02, -8.4425345932574085e-04, -1.2845411056578771e-03, 3.3834534832647936e-02, -2.7021681799793487e-03, 1.4589329290161041e-03, 6.8134662217169816e-04, -8.4425345932574085e-04, 1.4589329290161041e-03, -8.5494381562856267e-04, 1.0288019018269524e-04, -1.2845411056578771e-03, 6.8134662217169816e-04, 1.0288019018269524e-04, -7.3984903838043902e-04, 1.3426877609097060e-01, 1.7349015756059596e-02, -1.3246192802241865e-03, -1.9305711238934450e-03, 1.7349015756059596e-02, -1.5270102850098125e-03, 4.4956077650381329e-04, 1.9714221201937447e-04, -1.3246192802241865e-03, 4.4956077650381329e-04, -6.4182469515364075e-04, 2.0433900996145839e-04, -1.9305711238934450e-03, 1.9714221201937447e-04, 2.0433900996145839e-04, -5.1586663656365934e-04, 8.6391392047467014e-02, 3.6980521889860229e-02, 9.3321794676103585e-04, 9.3948476321605902e-04, 3.6980521889860229e-02, -9.8928180871077319e-03, -5.3070991717869105e-04, -6.2377425133496771e-04, 9.3321794676103585e-04, -5.3070991717869105e-04, -3.0488330333303663e-04, 1.5659386456603057e-05, 9.3948476321605902e-04, -6.2377425133496771e-04, 1.5659386456603057e-05, -2.7999703918634182e-04, 1.0146744017771264e-01, 7.2816252131513558e-03, -1.6155238061515398e-03, 2.5644205137853007e-04, 7.2816252131513558e-03, -6.3112672678755539e-03, 1.6890811365672615e-03, 2.7693395172731348e-04, -1.6155238061515398e-03, 1.6890811365672615e-03, -1.2647032121969489e-03, -2.9370354320717849e-04, 2.5644205137853007e-04, 2.7693395172731348e-04, -2.9370354320717849e-04, -6.2342159510005504e-04, 1.2959056902281882e-01, -6.1301776131238232e-05, 1.6534216868320365e-03, -1.3058105872678105e-03, -6.1301776131238232e-05, -1.4249381206094562e-03, -9.5380678395206903e-04, 4.7419065106550553e-04, 1.6534216868320365e-03, -9.5380678395206903e-04, -1.5076699002388233e-03, 6.4805275159887318e-04, -1.3058105872678105e-03, 4.7419065106550553e-04, 6.4805275159887318e-04, -8.8238756291255597e-04, 8.9247536658271412e-02, 2.3842441423826456e-04, -1.4020150585281280e-03, -5.1152963328585229e-04, 2.3842441423826456e-04, -9.0191620517334706e-04, 2.8625020652046294e-04, -4.8584743596851983e-04, -1.4020150585281280e-03, 2.8625020652046294e-04, -1.0559260959327309e-03, 7.8831939884397011e-04, -5.1152963328585229e-04, -4.8584743596851983e-04, 7.8831939884397011e-04, -1.7109718079099097e-03, 1.8544185491158613e-01, -1.0718215088244560e-03, -1.8813810349262525e-03, -1.9339063688273953e-03, -1.0718215088244560e-03, -7.2274547605494753e-04, 8.6435447211605384e-04, -3.6026940405211872e-04, -1.8813810349262525e-03, 8.6435447211605384e-04, -8.1277716432238999e-04, -3.7277556893117323e-05, -1.9339063688273953e-03, -3.6026940405211872e-04, -3.7277556893117323e-05, -6.3915541802464406e-04, 1.4434415789530322e-01, -2.6872190143700809e-03, -1.0846599699061007e-03, -6.4477346277669280e-04, -2.6872190143700809e-03, -5.6018401280834278e-03, -5.0911052653693770e-05, 9.6450041626136844e-04, -1.0846599699061007e-03, -5.0911052653693770e-05, -9.4975456411969575e-04, 3.1940035065807030e-04, -6.4477346277669280e-04, 9.6450041626136844e-04, 3.1940035065807030e-04, -9.2420815563382878e-04, 7.8171789374052239e-02, 1.1131802654471883e-02, 1.7637127962972779e-03, 9.4754776914784435e-04, 1.1131802654471883e-02, -1.3560867793273460e-03, -1.0739127680493021e-03, 3.5149852956582349e-04, 1.7637127962972779e-03, -1.0739127680493021e-03, -1.7712750948741785e-03, -4.9962373537378436e-04, 9.4754776914784435e-04, 3.5149852956582349e-04, -4.9962373537378436e-04, -1.1574313866688994e-03, 5.2957801369084900e-02, -1.0005678772364536e-02, -4.7804628490494196e-04, -1.2361403815386134e-03, -1.0005678772364536e-02, -4.5116744307715714e-03, 4.1017691673670646e-04, 9.4250866230064672e-04, -4.7804628490494196e-04, 4.1017691673670646e-04, -9.2170564193822815e-04, 2.5987259125205417e-04, -1.2361403815386134e-03, 9.4250866230064672e-04, 2.5987259125205417e-04, -4.6176740062320114e-04, 1.0365332441174736e-01, -9.8221692578944128e-03, -1.9915804870139055e-03, -1.4829964959047716e-03, -9.8221692578944128e-03, -8.0048197952266309e-04, 2.0484157742938923e-04, 9.2083194131700275e-04, -1.9915804870139055e-03, 2.0484157742938923e-04, -7.4074324466875886e-04, 4.6683122189394826e-05, -1.4829964959047716e-03, 9.2083194131700275e-04, 4.6683122189394826e-05, -4.1774348179361514e-04, -1.3421959250355708e-01, 3.3713260029558795e-03, -1.3243455638564110e-03, -1.6977772976177158e-03, 3.3713260029558795e-03, -4.3427989568832846e-03, 1.5008659836834226e-03, 3.6385328660509515e-04, -1.3243455638564110e-03, 1.5008659836834226e-03, -4.4959639797028332e-04, -3.5993966797194079e-05, -1.6977772976177158e-03, 3.6385328660509515e-04, -3.5993966797194079e-05, -3.9959494583247839e-04, 1.5319795291380678e-01, 1.5606938575385582e-02, 1.2896044217671962e-04, 1.4042986843700814e-03, 1.5606938575385582e-02, -1.6968432451613968e-03, 6.1781606247467071e-04, -8.1022537873411268e-04, 1.2896044217671962e-04, 6.1781606247467071e-04, -9.0781561089793507e-04, 1.6085869694517344e-04, 1.4042986843700814e-03, -8.1022537873411268e-04, 1.6085869694517344e-04, -4.7576363868138846e-04, 1.2254882374915720e-01, 1.4693545363766116e-04, 2.1938591971393732e-03, -3.9982695744349866e-04, 1.4693545363766116e-04, -1.8792585045041851e-03, 4.2828698130133657e-04, 4.5172919991448188e-04, 2.1938591971393732e-03, 4.2828698130133657e-04, -1.0432885144689712e-03, -4.1920428823997481e-04, -3.9982695744349866e-04, 4.5172919991448188e-04, -4.1920428823997481e-04, -5.0654766396187863e-04, 6.2567741767689450e-02, -1.5942792665184773e-02, 9.4219357229582805e-04, -1.0047556566068546e-03, -1.5942792665184773e-02, -4.9222058747801188e-03, -8.8081902987161898e-04, 8.5096048171422992e-04, 9.4219357229582805e-04, -8.8081902987161898e-04, -6.4309961958789396e-04, -2.4752901697258346e-05, -1.0047556566068546e-03, 8.5096048171422992e-04, -2.4752901697258346e-05, -4.2749730100807973e-04, 2.0912880324801708e-01, -7.6064433064355411e-03, 5.3323058431332623e-04, -2.0532588685479958e-03, -7.6064433064355411e-03, -1.3135732341439657e-03, -7.8988659553189968e-04, 3.7928836724313318e-04, 5.3323058431332623e-04, -7.8988659553189968e-04, -7.3649612756357917e-04, -2.6573320308412903e-04, -2.0532588685479958e-03, 3.7928836724313318e-04, -2.6573320308412903e-04, -5.7797801692182097e-04, 1.5055274915795547e-01, -2.1924936777925716e-02, -1.2612611024713787e-03, 2.7821375886116877e-03, -2.1924936777925716e-02, -4.2068916459438062e-03, 1.3684687816957373e-03, -9.0195379182343130e-04, -1.2612611024713787e-03, 1.3684687816957373e-03, -1.2345358774530297e-03, -3.4235325803685078e-04, 2.7821375886116877e-03, -9.0195379182343130e-04, -3.4235325803685078e-04, -5.3816147544009192e-04, -9.4235991321761955e-02, -2.1367388228175833e-02, 1.0448569989099396e-03, 1.7485944032731482e-03, -2.1367388228175833e-02, -2.6716566352546589e-03, -7.3073829452656989e-04, -8.9610200660087183e-04, 1.0448569989099396e-03, -7.3073829452656989e-04, -6.7501385163585615e-04, 2.0355441125821877e-04, 1.7485944032731482e-03, -8.9610200660087183e-04, 2.0355441125821877e-04, -5.7236545019238698e-04, -6.9503808891479835e-02, 3.1383773305167471e-03, 1.5986828965354864e-03, -1.7402981803352200e-03, 3.1383773305167471e-03, -1.2489391870377849e-03, -8.5751835076954214e-04, 5.8268795422606202e-04, 1.5986828965354864e-03, -8.5751835076954214e-04, -1.5076108825684306e-03, -7.4944126836778806e-04, -1.7402981803352200e-03, 5.8268795422606202e-04, -7.4944126836778806e-04, -1.1142665347040458e-03, 4.7151474002859579e-02, 3.1740728795928382e-02, -9.4301630376494167e-04, -9.8957451562421933e-04, 3.1740728795928382e-02, -3.0449847294980667e-03, 7.3961197038087075e-04, 1.1737609646021582e-03, -9.4301630376494167e-04, 7.3961197038087075e-04, -5.3898208391981741e-04, 1.2215824265585573e-04, -9.8957451562421933e-04, 1.1737609646021582e-03, 1.2215824265585573e-04, -4.4878575781268185e-04, 6.9890878585001953e-02, 5.5058801359721072e-03, 4.3303599481317440e-04, 6.7321007698798990e-03, 5.5058801359721072e-03, -1.3315438647224626e-03, 2.4082287446273412e-04, -1.1070907494787486e-03, 4.3303599481317440e-04, 2.4082287446273412e-04, -1.6833930700655335e-03, -8.3440886703805834e-04, 6.7321007698798990e-03, -1.1070907494787486e-03, -8.3440886703805834e-04, -1.5967950989918100e-03, 8.5697346753994430e-02, -1.8323684437467489e-02, -5.9473967389843570e-04, -8.4314615829175212e-04, -1.8323684437467489e-02, -1.1875494620811390e-03, 9.7079341037182275e-04, 2.7501065040690445e-04, -5.9473967389843570e-04, 9.7079341037182275e-04, -5.2204544453808501e-04, 2.0919905664672838e-04, -8.4314615829175212e-04, 2.7501065040690445e-04, 2.0919905664672838e-04, -4.2639051308978156e-04, -2.2631222057764766e-01, -1.3601514363752679e-02, -1.6137759080852433e-03, 1.4950759744726269e-03, -1.3601514363752679e-02, -2.6453255589728329e-03, 3.3968249078500379e-04, -7.5978605583900977e-04, -1.6137759080852433e-03, 3.3968249078500379e-04, -7.5010511151759902e-04, 1.0313781788359500e-04, 1.4950759744726269e-03, -7.5978605583900977e-04, 1.0313781788359500e-04, -5.9448373489452483e-04, 4.1116271730053472e-02, -4.5650281290770516e-03, 1.0174302480600848e-03, -7.4144675491881571e-04, -4.5650281290770516e-03, -3.0111597874487158e-03, 4.1892149452060594e-04, 2.5662370553909404e-04, 1.0174302480600848e-03, 4.1892149452060594e-04, -1.6351264041670321e-03, 4.1540798986628444e-04, -7.4144675491881571e-04, 2.5662370553909404e-04, 4.1540798986628444e-04, -1.5888041899304089e-03, 3.8822005325296664e-02, 3.4682085173453489e-02, 1.1383554058731083e-03, 6.9763412662967174e-04, 3.4682085173453489e-02, -3.8778621426546131e-03, -1.0502459984301810e-03, -6.7815726314415445e-04, 1.1383554058731083e-03, -1.0502459984301810e-03, -9.5953811566263974e-04, 3.1547011250005413e-04, 6.9763412662967174e-04, -6.7815726314415445e-04, 3.1547011250005413e-04, -4.9094447850356326e-04, -1.3275117419010990e-01, 7.7532610202420025e-03, 2.6481743952388775e-04, -1.0233025705150341e-03, 7.7532610202420025e-03, -3.5307144422284118e-03, 3.6081301612281790e-04, 4.2984812221947383e-04, 2.6481743952388775e-04, 3.6081301612281790e-04, -9.6620391750736523e-04, -1.8268752035532475e-04, -1.0233025705150341e-03, 4.2984812221947383e-04, -1.8268752035532475e-04, -7.6865748942518946e-04, 2.0902111595826358e-02, 2.8778407466407915e-02, -7.3589637063263200e-04, -7.9232400295570942e-04, 2.8778407466407915e-02, -6.8244148733939813e-03, 4.6837342411344191e-04, 3.8337154291441424e-04, -7.3589637063263200e-04, 4.6837342411344191e-04, -3.9870959441253668e-04, 6.3661217652106041e-05, -7.9232400295570942e-04, 3.8337154291441424e-04, 6.3661217652106041e-05, -3.1873633147878725e-04, 1.2306903655844117e-01, 1.1790694654017940e-02, -1.6930694484504614e-03, -1.4478486853861244e-03, 1.1790694654017940e-02, -2.6362432912232283e-04, 5.7801647588433605e-04, -5.0260654622532848e-04, -1.6930694484504614e-03, 5.7801647588433605e-04, -1.0057986783918533e-03, -3.8473412149296924e-04, -1.4478486853861244e-03, -5.0260654622532848e-04, -3.8473412149296924e-04, -8.4155363764574717e-04, -2.6072264441980286e-01, -5.3839312494948985e-03, -9.8286481836498294e-04, -1.1077071063072626e-03, -5.3839312494948985e-03, -2.8144397517049936e-03, 1.3570909178396836e-03, 2.0352636344653444e-04, -9.8286481836498294e-04, 1.3570909178396836e-03, -9.6948887126791080e-04, 2.4502863167189274e-04, -1.1077071063072626e-03, 2.0352636344653444e-04, 2.4502863167189274e-04, -5.5674073681467461e-04, -1.4461247437719649e-01, -1.3593073092315819e-02, -1.4591789215271309e-03, -1.8444309608458772e-03, -1.3593073092315819e-02, -1.3790376045237883e-03, 3.6507611650380594e-04, 1.0989155862134194e-03, -1.4591789215271309e-03, 3.6507611650380594e-04, -8.2908061421262179e-04, -1.9093220436421467e-04, -1.8444309608458772e-03, 1.0989155862134194e-03, -1.9093220436421467e-04, -7.4603886066833670e-04, 1.5718948391852267e-01, 3.7509266885863875e-02, -1.6535289369067915e-03, -1.5835485386314300e-03, 3.7509266885863875e-02, -5.9100449278194633e-03, 3.5324348250094338e-04, 3.2687210216937634e-04, -1.6535289369067915e-03, 3.5324348250094338e-04, -1.0567635462974483e-03, -3.8959429122272150e-06, -1.5835485386314300e-03, 3.2687210216937634e-04, -3.8959429122272150e-06, -6.7923415962746719e-04, -2.0460083814262353e-01, 2.6461245236457143e-02, -1.0964643716453355e-03, -2.0155849422843325e-03, 2.6461245236457143e-02, -1.1607499563668883e-02, 4.9712684089718660e-04, 1.0744196203423314e-03, -1.0964643716453355e-03, 4.9712684089718660e-04, -1.2575560415559299e-03, 3.5355569910289946e-04, -2.0155849422843325e-03, 1.0744196203423314e-03, 3.5355569910289946e-04, -5.9829675272084996e-04, 1.4227457831415008e-01, 3.7409193782549421e-02, -1.6999608012668766e-03, -1.4775810720134223e-03, 3.7409193782549421e-02, -6.6980266639126880e-03, 3.7313860360236049e-04, 3.6390843416024218e-04, -1.6999608012668766e-03, 3.7313860360236049e-04, -6.8923641265649434e-04, -1.1118512619169452e-04, -1.4775810720134223e-03, 3.6390843416024218e-04, -1.1118512619169452e-04, -3.6167006383143143e-04, -3.0408057058263434e-02, 2.8102062565578297e-03, -9.9186210750588349e-05, -1.0848067328911566e-03, 2.8102062565578297e-03, -1.9168669695793325e-03, -1.5989044944160041e-04, 3.7928404911171667e-04, -9.9186210750588349e-05, -1.5989044944160041e-04, -6.4212418303488965e-04, 1.2414097957366861e-04, -1.0848067328911566e-03, 3.7928404911171667e-04, 1.2414097957366861e-04, -4.8937310642877495e-04, -5.8151762326818325e-02, 2.0612537512420814e-02, -1.8069350755089380e-03, -1.9159225235797013e-03, 2.0612537512420814e-02, -1.5273838578175230e-03, 4.8641178441277146e-04, 5.7168029371084457e-04, -1.8069350755089380e-03, 4.8641178441277146e-04, -8.5373804438111515e-04, 2.1202613463988789e-04, -1.9159225235797013e-03, 5.7168029371084457e-04, 2.1202613463988789e-04, -7.8732691228072034e-04, 1.1798334731925102e-01, -2.8140161709447171e-02, -1.2130478454804230e-03, 4.6360451013415833e-04, -2.8140161709447171e-02, -2.5008525901788615e-03, 1.3314100722865963e-03, -4.2462405872217889e-04, -1.2130478454804230e-03, 1.3314100722865963e-03, -1.0941534938684865e-03, 5.1529382431827911e-04, 4.6360451013415833e-04, -4.2462405872217889e-04, 5.1529382431827911e-04, -6.7010849513206446e-04, -1.9635177370699608e-01, 3.1453073526812375e-03, 1.9943467720728159e-04, 1.8490379843829265e-03, 3.1453073526812375e-03, -1.4755660222860181e-03, 2.5216877122513355e-04, -7.8528688355206447e-04, 1.9943467720728159e-04, 2.5216877122513355e-04, -1.4555949485632702e-03, -5.9788067272640536e-04, 1.8490379843829265e-03, -7.8528688355206447e-04, -5.9788067272640536e-04, -1.4009824713669468e-03, 1.8079617326597403e-01, 1.3311705456384233e-02, -1.6813583174003447e-03, 1.6754281220701891e-03, 1.3311705456384233e-02, -2.5836933131966949e-03, 1.2524623632335693e-03, -4.4318906832989434e-04, -1.6813583174003447e-03, 1.2524623632335693e-03, -1.1071301248058326e-03, 1.6237170251639789e-04, 1.6754281220701891e-03, -4.4318906832989434e-04, 1.6237170251639789e-04, -5.8066403722343185e-04, -4.7658243260452392e-02, -1.1547225654362255e-02, -1.6724724586724625e-04, -8.6129819877703328e-04, -1.1547225654362255e-02, -3.2589278968339326e-03, 2.6655390367207022e-04, 2.2553348630838165e-04, -1.6724724586724625e-04, 2.6655390367207022e-04, -3.8468694210936846e-04, 2.0656031333140019e-04, -8.6129819877703328e-04, 2.2553348630838165e-04, 2.0656031333140019e-04, -3.5394037175600837e-04, -6.6559656789261540e-02, 3.6780140422847912e-02, -1.0833735822547434e-03, 1.3914255331022606e-04, 3.6780140422847912e-02, -3.3904219310184804e-03, 5.4509538508334726e-04, -2.2439596920688072e-04, -1.0833735822547434e-03, 5.4509538508334726e-04, -1.1195941512654017e-03, 1.0549891235828525e-05, 1.3914255331022606e-04, -2.2439596920688072e-04, 1.0549891235828525e-05, -5.8598966626073352e-04
-  };
-  std::vector<double > expected_dy_dem = {
-    4.8687245272451135e-03, 5.5397159651024933e-03, 5.4642599386694624e-03, 5.6057159373549721e-03, 5.5397159651024933e-03, 4.8806501836285432e-03, 5.6063767619383984e-03, 5.6970860664459413e-03, 5.4642599386694624e-03, 5.6063767619383984e-03, 5.1211835750794290e-03, 5.3223602254697877e-03, 5.6057159373549721e-03, 5.6970860664459413e-03, 5.3223602254697877e-03, 5.2081125072059725e-03, 4.5292484636524086e-03, 4.9816448819259198e-03, 5.1363544712933759e-03, 5.1501696199608019e-03, 4.9816448819259198e-03, 4.6419992655419269e-03, 5.1569135569294903e-03, 4.8945949693767234e-03, 5.1363544712933759e-03, 5.1569135569294903e-03, 4.6706613224722264e-03, 4.9092906666249794e-03, 5.1501696199608019e-03, 4.8945949693767234e-03, 4.9092906666249794e-03, 4.8515691645339065e-03, 3.0190652370495003e-03, 3.4898418636317798e-03, 3.4899656132580579e-03, 3.5145415393083900e-03, 3.4898418636317798e-03, 3.1452916478866737e-03, 3.5037937416394702e-03, 3.4286490652383700e-03, 3.4899656132580579e-03, 3.5037937416394702e-03, 3.1627359787623427e-03, 3.5804187550924349e-03, 3.5145415393083900e-03, 3.4286490652383700e-03, 3.5804187550924349e-03, 3.2322057910012985e-03, 3.6840833901333001e-03, 4.3387482039692832e-03, 4.2515867787302417e-03, 4.4155206099863904e-03, 4.3387482039692832e-03, 4.0028491849676334e-03, 4.3652298563497116e-03, 4.3014839026378881e-03, 4.2515867787302417e-03, 4.3652298563497116e-03, 4.0124345907042516e-03, 4.1790409970907686e-03, 4.4155206099863904e-03, 4.3014839026378881e-03, 4.1790409970907686e-03, 4.0456958994549621e-03, 4.7788898285209408e-03, 4.7316589641173863e-03, 4.7277506758881538e-03, 4.7420787786160946e-03, 4.7316589641173863e-03, 4.7632472674770847e-03, 4.7277799549445667e-03, 4.7294160442947122e-03, 4.7277506758881538e-03, 4.7277799549445667e-03, 4.7576353824550499e-03, 4.7311669494084236e-03, 4.7420787786160946e-03, 4.7294160442947122e-03, 4.7311669494084236e-03, 4.7516945819656171e-03, 3.9248662197341371e-03, 4.9180169646964866e-03, 4.7190463613870446e-03, 4.4520204316217879e-03, 4.9180169646964866e-03, 4.1556621508365569e-03, 4.6033674111750095e-03, 4.5946737014459435e-03, 4.7190463613870446e-03, 4.6033674111750095e-03, 4.4277151435242147e-03, 4.6668537390335040e-03, 4.4520204316217879e-03, 4.5946737014459435e-03, 4.6668537390335040e-03, 4.4390865038161998e-03, 3.7758343215872881e-03, 4.1709220560399215e-03, 4.1202109662083035e-03, 3.9893563131548088e-03, 4.1709220560399215e-03, 3.8379626600639266e-03, 4.1328523107198936e-03, 3.9840470230024889e-03, 4.1202109662083035e-03, 4.1328523107198936e-03, 3.8852759103779593e-03, 4.1580920004019896e-03, 3.9893563131548088e-03, 3.9840470230024889e-03, 4.1580920004019896e-03, 3.9467666481718391e-03, 3.9761642677066033e-03, 4.4504099654305816e-03, 4.3794412854383678e-03, 4.4604317957851781e-03, 4.4504099654305816e-03, 4.0768567666647814e-03, 4.3435449806077473e-03, 4.3222112460340181e-03, 4.3794412854383678e-03, 4.3435449806077473e-03, 4.0773633573082564e-03, 4.4803490047300660e-03, 4.4604317957851781e-03, 4.3222112460340181e-03, 4.4803490047300660e-03, 4.1310317786866901e-03, 4.3424352854361066e-03, 4.9111142266166450e-03, 4.8395299678665781e-03, 4.6865337839814593e-03, 4.9111142266166450e-03, 4.5038688193735138e-03, 4.7214010478928668e-03, 4.7827433155358395e-03, 4.8395299678665781e-03, 4.7214010478928668e-03, 4.5047095985936833e-03, 4.8043858107812464e-03, 4.6865337839814593e-03, 4.7827433155358395e-03, 4.8043858107812464e-03, 4.5266636394073641e-03, 5.0023501655291321e-03, 5.4960776974766461e-03, 5.5277863655162403e-03, 5.6131610328358644e-03, 5.4960776974766461e-03, 5.0497823718039583e-03, 5.5223616852066977e-03, 5.2893854856376011e-03, 5.5277863655162403e-03, 5.5223616852066977e-03, 5.0964808619146599e-03, 5.4611470394996591e-03, 5.6131610328358644e-03, 5.2893854856376011e-03, 5.4611470394996591e-03, 5.2261512984334412e-03, 3.9613105413924812e-03, 4.4015218128590642e-03, 4.7307418574636991e-03, 4.6640477363723105e-03, 4.4015218128590642e-03, 4.0082410785944010e-03, 4.8261944934070503e-03, 4.5757895595947961e-03, 4.7307418574636991e-03, 4.8261944934070503e-03, 4.2233067127250678e-03, 4.7151130208320496e-03, 4.6640477363723105e-03, 4.5757895595947961e-03, 4.7151130208320496e-03, 4.3106975949268253e-03, 4.7334182996649809e-03, 5.3634328062759831e-03, 5.4504527709104602e-03, 5.1343603133321243e-03, 5.3634328062759831e-03, 4.8165188916604821e-03, 5.2922209819789611e-03, 5.3818126119621716e-03, 5.4504527709104602e-03, 5.2922209819789611e-03, 4.8435704822708701e-03, 5.2240647899442503e-03, 5.1343603133321243e-03, 5.3818126119621716e-03, 5.2240647899442503e-03, 4.8868508221224377e-03, 4.7996614634904892e-03, 5.7179083120858371e-03, 5.6117201232891321e-03, 5.6700701716033263e-03, 5.7179083120858371e-03, 4.9727735792746818e-03, 5.3848863488571300e-03, 5.4305318435656936e-03, 5.6117201232891321e-03, 5.3848863488571300e-03, 5.1731090323620544e-03, 5.2948377822407506e-03, 5.6700701716033263e-03, 5.4305318435656936e-03, 5.2948377822407506e-03, 5.2191935738847518e-03, 5.0888074222028307e-03, 5.5682241956695952e-03, 5.5311688481500410e-03, 5.3354587343858242e-03, 5.5682241956695952e-03, 5.1039796573406886e-03, 5.6527375470408862e-03, 5.5776264780928323e-03, 5.5311688481500410e-03, 5.6527375470408862e-03, 5.1908856775166106e-03, 5.5157564883515529e-03, 5.3354587343858242e-03, 5.5776264780928323e-03, 5.5157564883515529e-03, 5.2868882706481306e-03, 4.1148293948602473e-03, 4.8099243464647095e-03, 4.8354127941843744e-03, 4.8024021654871083e-03, 4.8099243464647095e-03, 4.3036710353764468e-03, 4.7276148863981300e-03, 4.5819645174844182e-03, 4.8354127941843744e-03, 4.7276148863981300e-03, 4.3517326519376268e-03, 4.5484962243548931e-03, 4.8024021654871083e-03, 4.5819645174844182e-03, 4.5484962243548931e-03, 4.3552076928133352e-03, 4.4103831260207836e-03, 4.7323280089558032e-03, 4.6792976546604689e-03, 4.7101816275088885e-03, 4.7323280089558032e-03, 4.4526464210458746e-03, 4.7051812634566420e-03, 4.7775181030841720e-03, 4.6792976546604689e-03, 4.7051812634566420e-03, 4.4777362867795979e-03, 4.7837047168366176e-03, 4.7101816275088885e-03, 4.7775181030841720e-03, 4.7837047168366176e-03, 4.4818283635942383e-03, 4.8171219099102398e-03, 5.4329072017648130e-03, 5.5577769098679687e-03, 5.5021033559999686e-03, 5.4329072017648130e-03, 4.9840681446863353e-03, 5.5228366571162838e-03, 5.6998588607070494e-03, 5.5577769098679687e-03, 5.5228366571162838e-03, 5.0189254656526020e-03, 5.3181989951699288e-03, 5.5021033559999686e-03, 5.6998588607070494e-03, 5.3181989951699288e-03, 5.2634498662377308e-03, 4.6530516200842852e-03, 5.0650394647562338e-03, 5.2488756975870273e-03, 5.2624908207797184e-03, 5.0650394647562338e-03, 4.6874447846501164e-03, 5.2268492983034561e-03, 5.1745585095054458e-03, 5.2488756975870273e-03, 5.2268492983034561e-03, 4.7952117024116173e-03, 5.0276292129730416e-03, 5.2624908207797184e-03, 5.1745585095054458e-03, 5.0276292129730416e-03, 4.9122789880896404e-03, 4.9251289238065234e-03, 5.5933241606014724e-03, 5.3718594158626860e-03, 5.6008531360396017e-03, 5.5933241606014724e-03, 5.0150407582346635e-03, 5.4662584346395404e-03, 5.3675120118810739e-03, 5.3718594158626860e-03, 5.4662584346395404e-03, 5.0248322936143662e-03, 5.4605145072560580e-03, 5.6008531360396017e-03, 5.3675120118810739e-03, 5.4605145072560580e-03, 5.1143948516248604e-03, 3.7657249048296967e-03, 3.9486926147620297e-03, 4.0691576682980483e-03, 4.0664780157542643e-03, 3.9486926147620297e-03, 3.7736205874903724e-03, 4.0495831622273807e-03, 4.0206411044168824e-03, 4.0691576682980483e-03, 4.0495831622273807e-03, 3.7937874360149263e-03, 3.9752869288898511e-03, 4.0664780157542643e-03, 4.0206411044168824e-03, 3.9752869288898511e-03, 3.8149050745084684e-03, 5.0114061217704613e-03, 5.2617991497744074e-03, 5.1790540139945674e-03, 5.3591951134087162e-03, 5.2617991497744074e-03, 5.0120596768805908e-03, 5.2487154791349359e-03, 5.2629872576859908e-03, 5.1790540139945674e-03, 5.2487154791349359e-03, 5.0448755388764347e-03, 5.3303535417822852e-03, 5.3591951134087162e-03, 5.2629872576859908e-03, 5.3303535417822852e-03, 5.1174676770792023e-03, 4.8377902633377264e-03, 5.3788546108106241e-03, 5.6040768285432335e-03, 5.3105068959469794e-03, 5.3788546108106241e-03, 4.9326524445286032e-03, 5.4991607490083253e-03, 5.5655132712644469e-03, 5.6040768285432335e-03, 5.4991607490083253e-03, 4.9741473776001794e-03, 5.3900550171932238e-03, 5.3105068959469794e-03, 5.5655132712644469e-03, 5.3900550171932238e-03, 5.1950052918756442e-03, 3.2492587714875146e-03, 3.9827109719638674e-03, 3.8774234602011355e-03, 3.8757465280329336e-03, 3.9827109719638674e-03, 3.4344256880305358e-03, 4.0545162671736575e-03, 3.7835976611712702e-03, 3.8774234602011355e-03, 4.0545162671736575e-03, 3.4953556313526311e-03, 3.9175382918411519e-03, 3.8757465280329336e-03, 3.7835976611712702e-03, 3.9175382918411519e-03, 3.5756760339217433e-03, 4.7617721014200634e-03, 4.8614225555244890e-03, 4.8279075259455451e-03, 4.8427156866381622e-03, 4.8614225555244890e-03, 4.8195643885916269e-03, 4.8405369639889255e-03, 4.8156734615472999e-03, 4.8279075259455451e-03, 4.8405369639889255e-03, 4.8197639580753243e-03, 4.8155984576041654e-03, 4.8427156866381622e-03, 4.8156734615472999e-03, 4.8155984576041654e-03, 4.8158790994777771e-03, 4.5833139216121957e-03, 5.4837863258358531e-03, 5.5638146695959553e-03, 5.4909136024647049e-03, 5.4837863258358531e-03, 4.8077007992908590e-03, 5.4604862042020273e-03, 5.4396976154026198e-03, 5.5638146695959553e-03, 5.4604862042020273e-03, 4.8833715685529635e-03, 5.3864868915666220e-03, 5.4909136024647049e-03, 5.4396976154026198e-03, 5.3864868915666220e-03, 4.9933154810546838e-03, 5.1071558319641056e-03, 5.4433074957792454e-03, 5.5765474271101826e-03, 5.5076635863146820e-03, 5.4433074957792454e-03, 5.1894387573805066e-03, 5.3922841131452456e-03, 5.5390126993166908e-03, 5.5765474271101826e-03, 5.3922841131452456e-03, 5.2001066482717309e-03, 5.4246759715010128e-03, 5.5076635863146820e-03, 5.5390126993166908e-03, 5.4246759715010128e-03, 5.2227895716483279e-03, 4.0497679429561092e-03, 4.9282556022888424e-03, 4.7237087909624649e-03, 4.8020213273712292e-03, 4.9282556022888424e-03, 4.3464895120150539e-03, 4.7976125238518097e-03, 4.7260356905951930e-03, 4.7237087909624649e-03, 4.7976125238518097e-03, 4.4574002582117803e-03, 4.6487121118467404e-03, 4.8020213273712292e-03, 4.7260356905951930e-03, 4.6487121118467404e-03, 4.4956041562837267e-03, 4.9714633531457494e-03, 5.7206656536672494e-03, 5.5180734069265810e-03, 5.4919793509825418e-03, 5.7206656536672494e-03, 5.2356045663686296e-03, 5.5596394964592278e-03, 5.7261136434031473e-03, 5.5180734069265810e-03, 5.5596394964592278e-03, 5.4948204143983092e-03, 5.6210477153196018e-03, 5.4919793509825418e-03, 5.7261136434031473e-03, 5.6210477153196018e-03, 5.5487852497226155e-03, 4.5489572462475468e-03, 5.4201128096244833e-03, 5.2786942174057458e-03, 5.5227997797472469e-03, 5.4201128096244833e-03, 4.7506506604289601e-03, 5.5748809286163452e-03, 5.3179890118108808e-03, 5.2786942174057458e-03, 5.5748809286163452e-03, 4.9901034258736697e-03, 5.3280021534388728e-03, 5.5227997797472469e-03, 5.3179890118108808e-03, 5.3280021534388728e-03, 5.0865046620443850e-03, 5.1827253238118367e-03, 5.3230838505120767e-03, 5.6092186758799996e-03, 5.3498800832996584e-03, 5.3230838505120767e-03, 5.1866043895766913e-03, 5.4707858854025323e-03, 5.4396236153839645e-03, 5.6092186758799996e-03, 5.4707858854025323e-03, 5.2034777387438419e-03, 5.3820989828010021e-03, 5.3498800832996584e-03, 5.4396236153839645e-03, 5.3820989828010021e-03, 5.2795140455242154e-03, 4.9373256245385227e-03, 5.0306921107680144e-03, 5.0091821137527148e-03, 5.0745697481229361e-03, 5.0306921107680144e-03, 4.9460721328636499e-03, 5.0969673520163961e-03, 4.9827250786815078e-03, 5.0091821137527148e-03, 5.0969673520163961e-03, 4.9517391298398959e-03, 5.0636541925258224e-03, 5.0745697481229361e-03, 4.9827250786815078e-03, 5.0636541925258224e-03, 4.9552504666375910e-03, 4.9253932922107505e-03, 5.3550337394986059e-03, 5.7052084741338500e-03, 5.4398733606965252e-03, 5.3550337394986059e-03, 4.9574861537077779e-03, 5.4706377679931946e-03, 5.5861137412303828e-03, 5.7052084741338500e-03, 5.4706377679931946e-03, 5.1587536865196583e-03, 5.6684419038570740e-03, 5.4398733606965252e-03, 5.5861137412303828e-03, 5.6684419038570740e-03, 5.2466699874090649e-03, 4.8003051442717763e-03, 5.6347570986480860e-03, 5.5318100167355980e-03, 5.4577257727782651e-03, 5.6347570986480860e-03, 4.8631401810418006e-03, 5.3361212366442329e-03, 5.5641617598327581e-03, 5.5318100167355980e-03, 5.3361212366442329e-03, 5.2040986395562412e-03, 5.3538907108157440e-03, 5.4577257727782651e-03, 5.5641617598327581e-03, 5.3538907108157440e-03, 5.2371703144156933e-03, 4.4297692564271461e-03, 5.1376827019044427e-03, 4.9522665099534903e-03, 5.1833712956327353e-03, 5.1376827019044427e-03, 4.5080542081015985e-03, 5.1885572418010912e-03, 4.8897090121346205e-03, 4.9522665099534903e-03, 5.1885572418010912e-03, 4.6450870564566172e-03, 4.8578374835205329e-03, 5.1833712956327353e-03, 4.8897090121346205e-03, 4.8578374835205329e-03, 4.6996959359605593e-03, 4.7160296966838235e-03, 5.4354504254331372e-03, 5.4914673382449068e-03, 5.0994910096800114e-03, 5.4354504254331372e-03, 4.8780256297317678e-03, 5.1983414836785551e-03, 5.1516527659604631e-03, 5.4914673382449068e-03, 5.1983414836785551e-03, 4.9576745968284569e-03, 5.3076257372673850e-03, 5.0994910096800114e-03, 5.1516527659604631e-03, 5.3076257372673850e-03, 5.0692404281867864e-03, 3.7521616561831035e-03, 4.3041195036684684e-03, 4.2083507066731441e-03, 4.0701551841232234e-03, 4.3041195036684684e-03, 3.8824364771630269e-03, 4.1647702647614748e-03, 4.1958378911293734e-03, 4.2083507066731441e-03, 4.1647702647614748e-03, 3.9355724928669756e-03, 4.1762562220603594e-03, 4.0701551841232234e-03, 4.1958378911293734e-03, 4.1762562220603594e-03, 4.0252316104492768e-03, 4.6691024276046839e-03, 5.4358497609484843e-03, 5.7984562413035670e-03, 5.3548860709007389e-03, 5.4358497609484843e-03, 4.8958005316626745e-03, 5.6061623938377849e-03, 5.5174561676182539e-03, 5.7984562413035670e-03, 5.6061623938377849e-03, 5.0465532622400548e-03, 5.4146406099752648e-03, 5.3548860709007389e-03, 5.5174561676182539e-03, 5.4146406099752648e-03, 5.1458567514192288e-03, 4.7873760079603245e-03, 4.7132530407841666e-03, 4.7097574777507523e-03, 4.7067597555051121e-03, 4.7132530407841666e-03, 4.7759539827926852e-03, 4.7079931840557261e-03, 4.7054012333601347e-03, 4.7097574777507523e-03, 4.7079931840557261e-03, 4.7228911829172273e-03, 4.7124589687313331e-03, 4.7067597555051121e-03, 4.7054012333601347e-03, 4.7124589687313331e-03, 4.7141024992431325e-03, 4.7653857276584981e-03, 5.3935337863564433e-03, 5.1518363112552411e-03, 5.2735623271459057e-03, 5.3935337863564433e-03, 4.7820910815419837e-03, 5.3263045464322246e-03, 5.2662666132495747e-03, 5.1518363112552411e-03, 5.3263045464322246e-03, 4.8989837691361037e-03, 5.2967809909708055e-03, 5.2735623271459057e-03, 5.2662666132495747e-03, 5.2967809909708055e-03, 4.9322596897032446e-03, 4.7680183858758744e-03, 4.6997587251910548e-03, 4.6997371537680086e-03, 4.7022616189482079e-03, 4.6997587251910548e-03, 4.7586592022394198e-03, 4.7000771623067050e-03, 4.6999788734553346e-03, 4.6997371537680086e-03, 4.7000771623067050e-03, 4.7355142545157962e-03, 4.6997484975064945e-03, 4.7022616189482079e-03, 4.6999788734553346e-03, 4.6997484975064945e-03, 4.7310105077216554e-03, 4.8458569989126826e-03, 5.5268069114665867e-03, 5.7047145500931061e-03, 5.5972004900509010e-03, 5.5268069114665867e-03, 5.0812217431459358e-03, 5.5795445843535817e-03, 5.5171878865958018e-03, 5.7047145500931061e-03, 5.5795445843535817e-03, 5.1164073324255592e-03, 5.7042719550623056e-03, 5.5972004900509010e-03, 5.5171878865958018e-03, 5.7042719550623056e-03, 5.2296532393988453e-03, 5.0331052677300868e-03, 5.8126537574760377e-03, 5.6116616631243848e-03, 5.5610714403935496e-03, 5.8126537574760377e-03, 5.1251590071031677e-03, 5.4834432263391871e-03, 5.3387073396366554e-03, 5.6116616631243848e-03, 5.4834432263391871e-03, 5.1721761378333781e-03, 5.4102404378702880e-03, 5.5610714403935496e-03, 5.3387073396366554e-03, 5.4102404378702880e-03, 5.2983146020102812e-03, 4.5634809762413915e-03, 5.0708627795713048e-03, 5.5285155509920162e-03, 5.1479022947186481e-03, 5.0708627795713048e-03, 4.5667821151481602e-03, 5.2739955449695626e-03, 5.2334032096775420e-03, 5.5285155509920162e-03, 5.2739955449695626e-03, 4.7796850268603851e-03, 5.2855491938456917e-03, 5.1479022947186481e-03, 5.2334032096775420e-03, 5.2855491938456917e-03, 4.9846472734427676e-03, 4.3408943946436742e-03, 5.0828126111047292e-03, 5.1619429658240372e-03, 4.8950748576602138e-03, 5.0828126111047292e-03, 4.5132933116312137e-03, 5.2279735123596507e-03, 5.0148402712815654e-03, 5.1619429658240372e-03, 5.2279735123596507e-03, 4.5757840526439138e-03, 5.1782251466555721e-03, 4.8950748576602138e-03, 5.0148402712815654e-03, 5.1782251466555721e-03, 4.7573202851706603e-03, 4.5244066537111763e-03, 5.3139854411244740e-03, 5.3841932493787804e-03, 5.1243544377133250e-03, 5.3139854411244740e-03, 4.5788274266713343e-03, 5.0657051731653390e-03, 5.1426785687150312e-03, 5.3841932493787804e-03, 5.0657051731653390e-03, 4.8535063031089169e-03, 5.0053417498441321e-03, 5.1243544377133250e-03, 5.1426785687150312e-03, 5.0053417498441321e-03, 4.9387189843115254e-03, 4.1561105459001855e-03, 4.8760140670205537e-03, 4.9530604056372751e-03, 4.7505323982066205e-03, 4.8760140670205537e-03, 4.1827775726392021e-03, 4.8099710346094313e-03, 5.0738733877216434e-03, 4.9530604056372751e-03, 4.8099710346094313e-03, 4.2585652889670211e-03, 4.7953656364286730e-03, 4.7505323982066205e-03, 5.0738733877216434e-03, 4.7953656364286730e-03, 4.4343641159264166e-03, 3.8850145135094689e-03, 4.2621118153951384e-03, 4.2895589113090549e-03, 4.3362464279518660e-03, 4.2621118153951384e-03, 4.0308729657790154e-03, 4.3844806003525494e-03, 4.2431337390287519e-03, 4.2895589113090549e-03, 4.3844806003525494e-03, 4.0886915647727800e-03, 4.2096924634853674e-03, 4.3362464279518660e-03, 4.2431337390287519e-03, 4.2096924634853674e-03, 4.1471058237790934e-03, 4.0748278012573509e-03, 5.0975121874835549e-03, 4.7643646721661492e-03, 4.9684148451479976e-03, 5.0975121874835549e-03, 4.3184840883046935e-03, 4.6729488469095803e-03, 4.8545826387352059e-03, 4.7643646721661492e-03, 4.6729488469095803e-03, 4.3205131005967286e-03, 4.7004116957686162e-03, 4.9684148451479976e-03, 4.8545826387352059e-03, 4.7004116957686162e-03, 4.3690205971449432e-03, 4.9909978133227360e-03, 5.4143910989253663e-03, 5.3601062423874191e-03, 5.3625046092275102e-03, 5.4143910989253663e-03, 5.0724427507293137e-03, 5.3652489479395936e-03, 5.3446386733428074e-03, 5.3601062423874191e-03, 5.3652489479395936e-03, 5.1460907707869393e-03, 5.3780861391384124e-03, 5.3625046092275102e-03, 5.3446386733428074e-03, 5.3780861391384124e-03, 5.2120089804020249e-03, 3.9588108570032090e-03, 4.2938117793838125e-03, 4.3611063350752835e-03, 4.2651906389425988e-03, 4.2938117793838125e-03, 4.0073255645855172e-03, 4.3518483851969093e-03, 4.3842419740188166e-03, 4.3611063350752835e-03, 4.3518483851969093e-03, 4.0893058354619380e-03, 4.3784881764625605e-03, 4.2651906389425988e-03, 4.3842419740188166e-03, 4.3784881764625605e-03, 4.0997083424903474e-03, 4.7902809088108339e-03, 5.3533071692810326e-03, 5.4529697742326999e-03, 5.3421394953302170e-03, 5.3533071692810326e-03, 4.8783628762572584e-03, 5.4898943571368179e-03, 5.4389677679356945e-03, 5.4529697742326999e-03, 5.4898943571368179e-03, 4.9324715462178625e-03, 5.3542755941681317e-03, 5.3421394953302170e-03, 5.4389677679356945e-03, 5.3542755941681317e-03, 4.9864655774714602e-03, 4.2147742524955437e-03, 4.8044404091749406e-03, 4.7174654013479485e-03, 4.5212986560996676e-03, 4.8044404091749406e-03, 4.2334336465773776e-03, 4.8136572475988510e-03, 4.8526412903096890e-03, 4.7174654013479485e-03, 4.8136572475988510e-03, 4.3841364814263201e-03, 4.5319687487354911e-03, 4.5212986560996676e-03, 4.8526412903096890e-03, 4.5319687487354911e-03, 4.3968275756536071e-03, 4.0044555899961193e-03, 4.4761433683108247e-03, 4.6472307349424728e-03, 4.4243677432400045e-03, 4.4761433683108247e-03, 4.0591447141326868e-03, 4.5360985593542293e-03, 4.4752304699918616e-03, 4.6472307349424728e-03, 4.5360985593542293e-03, 4.1784992937999085e-03, 4.5671014827176528e-03, 4.4243677432400045e-03, 4.4752304699918616e-03, 4.5671014827176528e-03, 4.2505134990343130e-03, 4.9235489798322716e-03, 5.7605067104734096e-03, 5.4765479512553930e-03, 5.5189947100062961e-03, 5.7605067104734096e-03, 5.0563592914372041e-03, 5.5230364268243756e-03, 5.6783945214777731e-03, 5.4765479512553930e-03, 5.5230364268243756e-03, 5.0602340964368484e-03, 5.6195955397581694e-03, 5.5189947100062961e-03, 5.6783945214777731e-03, 5.6195955397581694e-03, 5.1685388328162494e-03, 4.5391542914077748e-03, 5.3162333178131448e-03, 5.0344382645984304e-03, 5.0853779913647874e-03, 5.3162333178131448e-03, 4.5405811858949636e-03, 5.4256287620576297e-03, 5.0932428800818767e-03, 5.0344382645984304e-03, 5.4256287620576297e-03, 4.5923554538614669e-03, 5.1764781936595632e-03, 5.0853779913647874e-03, 5.0932428800818767e-03, 5.1764781936595632e-03, 4.6315286436058034e-03, 4.8241517294518987e-03, 5.6107202492192646e-03, 5.4225910095940417e-03, 5.6682617800773672e-03, 5.6107202492192646e-03, 5.1440593741325126e-03, 5.7028655987132184e-03, 5.4379837043436550e-03, 5.4225910095940417e-03, 5.7028655987132184e-03, 5.1490285191096163e-03, 5.4071926623830719e-03, 5.6682617800773672e-03, 5.4379837043436550e-03, 5.4071926623830719e-03, 5.3570980160283933e-03, 5.0497164090830202e-03, 5.6354102305264170e-03, 5.5833976153072101e-03, 5.5241887515719923e-03, 5.6354102305264170e-03, 5.1844311703979455e-03, 5.6093229622724144e-03, 5.6832732406642099e-03, 5.5833976153072101e-03, 5.6093229622724144e-03, 5.3845110635961542e-03, 5.6525395728626391e-03, 5.5241887515719923e-03, 5.6832732406642099e-03, 5.6525395728626391e-03, 5.5054350484104982e-03, 4.7330318788156075e-03, 5.2558542628192498e-03, 5.2754180637923608e-03, 5.2680668623478954e-03, 5.2558542628192498e-03, 4.8583176679629775e-03, 5.3617411087909300e-03, 5.0763527226022447e-03, 5.2754180637923608e-03, 5.3617411087909300e-03, 4.8719763416853719e-03, 5.2676200731791344e-03, 5.2680668623478954e-03, 5.0763527226022447e-03, 5.2676200731791344e-03, 4.9861268237371891e-03, 4.4829218835174276e-03, 4.9248762537746826e-03, 5.0024520635936409e-03, 4.9006320187329356e-03, 4.9248762537746826e-03, 4.5440399582473258e-03, 4.8761752126013965e-03, 4.9995342540547251e-03, 5.0024520635936409e-03, 4.8761752126013965e-03, 4.5687972074181541e-03, 4.8573575677983922e-03, 4.9006320187329356e-03, 4.9995342540547251e-03, 4.8573575677983922e-03, 4.6113723269492028e-03, 4.7439766500629171e-03, 5.6665405738237223e-03, 5.5308219569568514e-03, 5.8507003829033413e-03, 5.6665405738237223e-03, 5.0720272017036588e-03, 5.7362709767074119e-03, 5.7332701844372063e-03, 5.5308219569568514e-03, 5.7362709767074119e-03, 5.1913803385325689e-03, 5.4671990661370592e-03, 5.8507003829033413e-03, 5.7332701844372063e-03, 5.4671990661370592e-03, 5.3276953731318695e-03, 4.1781480426967242e-03, 4.6569448684048234e-03, 4.5751840200780334e-03, 4.6474757680489750e-03, 4.6569448684048234e-03, 4.2572109329085161e-03, 4.4514347800524946e-03, 4.4710709510486640e-03, 4.5751840200780334e-03, 4.4514347800524946e-03, 4.3020526773420746e-03, 4.4948839390591307e-03, 4.6474757680489750e-03, 4.4710709510486640e-03, 4.4948839390591307e-03, 4.3748034929899484e-03, 4.3313384846384567e-03, 4.8609426130105955e-03, 5.0178932944894578e-03, 4.9114981624153410e-03, 4.8609426130105955e-03, 4.4156139708581314e-03, 5.0527692323661466e-03, 4.8434482548828555e-03, 5.0178932944894578e-03, 5.0527692323661466e-03, 4.5006787363920647e-03, 4.8851573490110390e-03, 4.9114981624153410e-03, 4.8434482548828555e-03, 4.8851573490110390e-03, 4.5585418241746826e-03, 4.6655021290269603e-03, 5.1099289511134065e-03, 5.2222030661634514e-03, 5.1555694826168827e-03, 5.1099289511134065e-03, 4.7523395276319845e-03, 5.2110469413705869e-03, 5.1579337232921206e-03, 5.2222030661634514e-03, 5.2110469413705869e-03, 4.8305384374289825e-03, 5.1666370817119011e-03, 5.1555694826168827e-03, 5.1579337232921206e-03, 5.1666370817119011e-03, 4.8571729590057358e-03, 4.4124832580118107e-03, 5.5124750974480611e-03, 5.1133380051136147e-03, 5.3588860042126296e-03, 5.5124750974480611e-03, 4.6356559976615270e-03, 5.4305254470557798e-03, 5.2131271012044759e-03, 5.1133380051136147e-03, 5.4305254470557798e-03, 4.6534597410828934e-03, 5.1783873042292455e-03, 5.3588860042126296e-03, 5.2131271012044759e-03, 5.1783873042292455e-03, 4.8236077833933117e-03, -9.6615927935495179e-03, 6.8675588255994405e-03, 1.0945473142527546e-03, -1.1194179153164361e-03, 6.8675588255994405e-03, -3.6346241719092134e-03, -5.8138983110610643e-04, 1.2243832363445957e-03, 1.0945473142527546e-03, -5.8138983110610643e-04, -4.6126147746582667e-04, 6.3180189590662426e-04, -1.1194179153164361e-03, 1.2243832363445957e-03, 6.3180189590662426e-04, -3.6362492012642004e-04, -1.2936789941773327e-02, 9.1335782726763882e-03, -9.2413647644373851e-04, -9.1704209021694640e-04, 9.1335782726763882e-03, -4.3195816573411861e-03, 1.5488956851291041e-03, 2.2741681746236771e-03, -9.2413647644373851e-04, 1.5488956851291041e-03, 4.0145236950450106e-04, 1.1955645031987922e-03, -9.1704209021694640e-04, 2.2741681746236771e-03, 1.1955645031987922e-03, 5.9101407236907759e-04, -5.3962989027005557e-03, 2.6072667157267145e-03, -8.2947112894151754e-04, -8.4707999007982405e-04, 2.6072667157267145e-03, -1.9454893539958128e-03, 1.3259854799795544e-04, 9.1413360753332847e-05, -8.2947112894151754e-04, 1.3259854799795544e-04, -5.1496376171848712e-04, -2.0587710943157896e-04, -8.4707999007982405e-04, 9.1413360753332847e-05, -2.0587710943157896e-04, -4.9949758378796459e-04, -1.7429393257872950e-02, 8.3520701836941741e-03, -1.3077437245125536e-03, 2.6219650960785422e-03, 8.3520701836941741e-03, -2.1730131032824999e-03, 2.9672791558086049e-03, 1.5427551148408693e-03, -1.3077437245125536e-03, 2.9672791558086049e-03, -1.2507874901661125e-04, 8.2649701074271687e-04, 2.6219650960785422e-03, 1.5427551148408693e-03, 8.2649701074271687e-04, -2.3883234105498484e-05, -1.3830306151965368e-02, 8.6140679080944672e-03, -1.0519351576461367e-03, 2.8626024695093547e-03, 8.6140679080944672e-03, -1.9075258568341821e-03, 2.0138742914406317e-03, 6.3743572321268256e-04, -1.0519351576461367e-03, 2.0138742914406317e-03, 6.7646549781383313e-04, 1.7337725787928556e-03, 2.8626024695093547e-03, 6.3743572321268256e-04, 1.7337725787928556e-03, 8.5614628175256790e-04, -1.8403458798923876e-02, 7.8573984911785699e-03, -1.5961302550980016e-03, 3.5149754327317441e-03, 7.8573984911785699e-03, -1.7067248914369689e-03, 2.5330322771104944e-03, 6.0551635352428294e-04, -1.5961302550980016e-03, 2.5330322771104944e-03, 4.5693878376751882e-04, 1.3178252989731669e-03, 3.5149754327317441e-03, 6.0551635352428294e-04, 1.3178252989731669e-03, 4.6425349761707751e-04, -1.7006699057036175e-02, 7.6077168403978004e-03, -1.2881631030799989e-03, 2.9480535843260710e-03, 7.6077168403978004e-03, -1.9702066590096216e-03, 2.0108629639478673e-03, 7.9516080819711594e-04, -1.2881631030799989e-03, 2.0108629639478673e-03, 8.0433046450099942e-04, 1.8038266298348227e-03, 2.9480535843260710e-03, 7.9516080819711594e-04, 1.8038266298348227e-03, 8.8160771996069693e-04, -1.3854826825508055e-02, 8.0479162307344476e-03, -5.4775445908707208e-04, 3.4108070940140215e-03, 8.0479162307344476e-03, -2.4855624480393168e-03, 2.0010342207153007e-03, 9.0050864299703079e-04, -5.4775445908707208e-04, 2.0010342207153007e-03, 2.2879961439639510e-04, 1.2455851470582276e-03, 3.4108070940140215e-03, 9.0050864299703079e-04, 1.2455851470582276e-03, 2.5797630491429058e-04, -1.3110891255545737e-02, 7.4933352500272960e-03, -3.9694223029317251e-04, 1.8696987208199419e-03, 7.4933352500272960e-03, -2.7269680038459173e-03, 1.3403236271558815e-03, 2.3125877913683179e-04, -3.9694223029317251e-04, 1.3403236271558815e-03, 7.7648354810912158e-04, 1.2366194463463297e-03, 1.8696987208199419e-03, 2.3125877913683179e-04, 1.2366194463463297e-03, 9.6189796048403537e-04, -1.3232672426819813e-02, 4.6823041218628670e-03, 1.8611702707055700e-03, -8.3085468064121430e-04, 4.6823041218628670e-03, -8.0583799511241597e-04, 1.5487811484838697e-03, 8.1349281204434981e-04, 1.8611702707055700e-03, 1.5487811484838697e-03, -1.2621786723316788e-04, 1.4631208905251433e-03, -8.3085468064121430e-04, 8.1349281204434981e-04, 1.4631208905251433e-03, 2.7511955514886215e-04, -1.5084537572866579e-02, 7.1293848562809948e-03, 3.7045468755946658e-03, -1.4711529451730022e-03, 7.1293848562809948e-03, -1.8494973697146082e-03, 1.3524358852716983e-03, 1.5936537318735912e-03, 3.7045468755946658e-03, 1.3524358852716983e-03, -5.7047546733440834e-04, 2.0738159150412733e-03, -1.4711529451730022e-03, 1.5936537318735912e-03, 2.0738159150412733e-03, 5.0829312908897773e-04, -1.1837724802376451e-02, 8.0065879433286139e-03, 2.5212064219669013e-03, -8.6580744742516692e-04, 8.0065879433286139e-03, -2.7205322761259480e-03, -2.2105454381026531e-04, 1.4586534642126341e-03, 2.5212064219669013e-03, -2.2105454381026531e-04, -2.4777193779670914e-04, 1.4026084563219037e-03, -8.6580744742516692e-04, 1.4586534642126341e-03, 1.4026084563219037e-03, 3.8679517983707091e-04, -1.2017287033769334e-02, 6.9442711205541319e-03, 1.8032964267264526e-03, 9.4303015390613293e-04, 6.9442711205541319e-03, -3.5149126553830593e-03, 2.3699469146257939e-04, -4.5705934851597888e-04, 1.8032964267264526e-03, 2.3699469146257939e-04, -1.3586411786986857e-04, 8.4154434886370209e-04, 9.4303015390613293e-04, -4.5705934851597888e-04, 8.4154434886370209e-04, -6.9789884734637771e-05, -1.6094817869361303e-02, 6.3002762847164506e-03, 2.2530843424977648e-03, 3.8816772736172305e-03, 6.3002762847164506e-03, -1.3384245408796698e-03, 1.0043242756394048e-03, 1.0894870315372434e-03, 2.2530843424977648e-03, 1.0043242756394048e-03, -1.2100553317588711e-05, 1.2384575776262850e-03, 3.8816772736172305e-03, 1.0894870315372434e-03, 1.2384575776262850e-03, 1.5692877333876084e-04, -1.4045801776472706e-02, 8.9584932766103818e-03, 2.6014796744964234e-03, 1.0166144512118521e-04, 8.9584932766103818e-03, -3.2080958107171351e-03, 1.3686944034468785e-03, 2.0148513302307739e-03, 2.6014796744964234e-03, 1.3686944034468785e-03, -4.7783480807389827e-04, 1.6221316661178631e-03, 1.0166144512118521e-04, 2.0148513302307739e-03, 1.6221316661178631e-03, 3.6300160393023845e-04, -1.5035351590565366e-02, 9.6530182335056031e-03, 2.1949862437937429e-03, 1.8246414683111131e-04, 9.6530182335056031e-03, -2.6652597726363591e-03, 3.9640886823489533e-04, 2.0283144683530636e-03, 2.1949862437937429e-03, 3.9640886823489533e-04, 4.9399384235801198e-04, 1.5872284744377334e-03, 1.8246414683111131e-04, 2.0283144683530636e-03, 1.5872284744377334e-03, 5.7853947086566285e-04, -1.3983142687551818e-02, 5.0756892732599047e-03, 5.1905234137764912e-03, 4.2875838849719752e-03, 5.0756892732599047e-03, -9.4954694217068230e-04, 1.8883020854449403e-04, 6.7109554143711891e-04, 5.1905234137764912e-03, 1.8883020854449403e-04, -8.4870028551829930e-04, 1.1077830644977793e-03, 4.2875838849719752e-03, 6.7109554143711891e-04, 1.1077830644977793e-03, -7.8542966877660964e-04, -1.3600736364920495e-02, 9.7195178003623480e-03, 9.3917635136515779e-04, 1.9513372114623364e-03, 9.7195178003623480e-03, -2.5366246539418099e-03, 1.5797585617940194e-03, 7.0858722530550288e-04, 9.3917635136515779e-04, 1.5797585617940194e-03, 5.8251868528788402e-04, 1.7783091417839559e-03, 1.9513372114623364e-03, 7.0858722530550288e-04, 1.7783091417839559e-03, 7.1541539011648952e-04, -1.5411936953103653e-02, 7.3903417706619335e-03, 4.5483022121753222e-03, -8.9677881534670116e-04, 7.3903417706619335e-03, -1.8311445673321121e-03, 5.0618507124800330e-04, 1.6497657392883489e-03, 4.5483022121753222e-03, 5.0618507124800330e-04, -1.0944142931277624e-04, 2.0053974090094044e-03, -8.9677881534670116e-04, 1.6497657392883489e-03, 2.0053974090094044e-03, 5.8688361598166873e-04, -1.5877997586428029e-02, 7.0102564979234233e-03, 2.3822117395892084e-03, 2.4183758414625888e-03, 7.0102564979234233e-03, -2.0572286784362988e-03, 3.5983393342809214e-04, -1.3917845780270373e-04, 2.3822117395892084e-03, 3.5983393342809214e-04, 9.3742516843216294e-05, 1.1681434312541931e-03, 2.4183758414625888e-03, -1.3917845780270373e-04, 1.1681434312541931e-03, 2.2289605656281757e-04, -1.4083572828047439e-02, 7.9925244381302291e-03, -6.0284983439355796e-04, -9.7568039067081366e-04, 7.9925244381302291e-03, -2.8403745612453242e-03, 1.4149913418989291e-03, 1.3483783335542488e-03, -6.0284983439355796e-04, 1.4149913418989291e-03, -7.9567702505795620e-05, 4.5761961508970946e-04, -9.7568039067081366e-04, 1.3483783335542488e-03, 4.5761961508970946e-04, -1.8122180927645343e-05, -1.3011750140965523e-02, 8.0448027672424538e-03, 1.3711087550206127e-03, 1.2861069552821227e-03, 8.0448027672424538e-03, -3.4752261499698942e-03, 3.7416175157491864e-04, 3.5251373821010203e-04, 1.3711087550206127e-03, 3.7416175157491864e-04, 1.0668685970538342e-04, 1.2410321451126983e-03, 1.2861069552821227e-03, 3.5251373821010203e-04, 1.2410321451126983e-03, 1.2833570311562067e-04, -1.0240113275587109e-02, 6.1802880789633766e-03, 1.3904451755038282e-03, -3.0192736293871718e-04, 6.1802880789633766e-03, -3.0059440450167932e-03, -5.3368175414203413e-04, 4.7195090306965839e-04, 1.3904451755038282e-03, -5.3368175414203413e-04, -7.9556056354311339e-04, 9.5483655393905145e-04, -3.0192736293871718e-04, 4.7195090306965839e-04, 9.5483655393905145e-04, -5.7134829653405345e-04, -1.3385066274392758e-02, 6.6124934381380064e-03, -1.2821932840326072e-03, 2.0730056600946553e-03, 6.6124934381380064e-03, -1.9317403170367104e-03, 1.2403275688648368e-03, -8.1880449480140711e-06, -1.2821932840326072e-03, 1.2403275688648368e-03, -2.9734471249323222e-04, 1.1154847964356678e-03, 2.0730056600946553e-03, -8.1880449480140711e-06, 1.1154847964356678e-03, -8.2008990837446980e-05, -1.2832033261169851e-02, 7.1731149113646135e-03, -1.3178043396281812e-03, -8.1927059876190744e-04, 7.1731149113646135e-03, -3.8547355138079868e-03, 1.3689397012382246e-03, 1.4741597574298045e-03, -1.3178043396281812e-03, 1.3689397012382246e-03, -1.5523748509968895e-04, 5.2913940932647796e-04, -8.1927059876190744e-04, 1.4741597574298045e-03, 5.2913940932647796e-04, -2.0477161752664730e-06, -1.9353179288859976e-02, 8.2472993213544895e-03, 1.1328241708247928e-03, 1.0173474843959841e-04, 8.2472993213544895e-03, -2.0348560585739840e-03, 1.8916142915004761e-03, 1.8422284519757142e-03, 1.1328241708247928e-03, 1.8916142915004761e-03, 6.6390870098819582e-04, 1.6001417768552631e-03, 1.0173474843959841e-04, 1.8422284519757142e-03, 1.6001417768552631e-03, 7.2754320201389323e-04, -1.8034057030268318e-02, 8.1439718393683594e-03, -1.4927369306321973e-03, 4.4653038520066791e-04, 8.1439718393683594e-03, -2.0375629958202432e-03, 1.9844371297367868e-03, 1.5706327924026185e-03, -1.4927369306321973e-03, 1.9844371297367868e-03, 8.4221508811039951e-05, 1.4760224149450302e-03, 4.4653038520066791e-04, 1.5706327924026185e-03, 1.4760224149450302e-03, 5.3324147164490450e-04, -1.9858435274308524e-02, 5.5927944148301684e-03, -1.5438870160433414e-03, 3.3657100726607308e-03, 5.5927944148301684e-03, 2.3561660401123010e-04, 2.3242973956863057e-03, 1.5999686201749661e-03, -1.5438870160433414e-03, 2.3242973956863057e-03, 3.1805397259977025e-04, 1.2466021708669767e-03, 3.3657100726607308e-03, 1.5999686201749661e-03, 1.2466021708669767e-03, 3.8368966980689126e-04, -9.7281699245688677e-03, 6.3301451421522454e-03, -2.1014055387616102e-04, -1.0747609780239144e-03, 6.3301451421522454e-03, -3.4334745573678408e-03, 8.1169421191581016e-04, 6.4971659824279585e-04, -2.1014055387616102e-04, 8.1169421191581016e-04, -3.3545824360928884e-04, 4.5259596268414320e-04, -1.0747609780239144e-03, 6.4971659824279585e-04, 4.5259596268414320e-04, -1.1830597768979520e-04, -1.6332140614730220e-02, 9.0103128113567417e-03, 1.5571710079426081e-03, -6.0630677294044237e-04, 9.0103128113567417e-03, -2.4498309848700814e-03, 1.1243148388670240e-03, 1.9746725100914823e-03, 1.5571710079426081e-03, 1.1243148388670240e-03, 2.4215985584758782e-04, 1.0849950462954012e-03, -6.0630677294044237e-04, 1.9746725100914823e-03, 1.0849950462954012e-03, 4.0955394785959507e-04, -1.3036079872743208e-02, 8.1986656104820928e-03, -4.1339688016818390e-04, -7.9169837147628590e-04, 8.1986656104820928e-03, -3.0128993657090463e-03, 1.3087483787852701e-03, 1.3783682086432115e-03, -4.1339688016818390e-04, 1.3087483787852701e-03, 1.0389730541704824e-04, 1.0307092574933089e-03, -7.9169837147628590e-04, 1.3783682086432115e-03, 1.0307092574933089e-03, 2.5367532767235548e-04, -1.3984645154594681e-02, 8.3445337042039359e-03, -1.5668162012331865e-03, -1.6294897225424144e-03, 8.3445337042039359e-03, -2.3222775494725553e-03, 1.5981596851179622e-03, 2.2297148976875006e-03, -1.5668162012331865e-03, 1.5981596851179622e-03, 4.9099274793647457e-04, 1.0006803325697039e-03, -1.6294897225424144e-03, 2.2297148976875006e-03, 1.0006803325697039e-03, 7.7225711991308439e-04, -1.0878208304283613e-02, 6.8495335213857884e-03, 1.8367434011084490e-03, -1.1684248224645396e-03, 6.8495335213857884e-03, -3.7485460530336514e-03, -4.5636048439838095e-04, 1.1909478868148576e-03, 1.8367434011084490e-03, -4.5636048439838095e-04, 1.3521556398979253e-04, 5.5654021720440735e-04, -1.1684248224645396e-03, 1.1909478868148576e-03, 5.5654021720440735e-04, 1.7634298653690025e-04, -1.3695683195390350e-02, 9.6440337102124008e-03, -1.1111079511340462e-03, -7.3806515965964433e-04, 9.6440337102124008e-03, -4.4655385326203868e-03, 2.6568699762222737e-03, 1.4535689858293593e-03, -1.1111079511340462e-03, 2.6568699762222737e-03, 4.0767188718331062e-04, 1.3035911326184557e-03, -7.3806515965964433e-04, 1.4535689858293593e-03, 1.3035911326184557e-03, 6.4104771006401936e-04, -1.8783865558349294e-02, 9.5046988369441830e-03, 1.0054895662265832e-03, -1.3398595612765494e-03, 9.5046988369441830e-03, -2.4387572360332795e-03, 1.2308518058462559e-03, 2.3676189965730725e-03, 1.0054895662265832e-03, 1.2308518058462559e-03, 7.0028547773139820e-04, 1.0101870932540413e-03, -1.3398595612765494e-03, 2.3676189965730725e-03, 1.0101870932540413e-03, 7.0233475365099978e-04, -1.4976622456425184e-02, 8.1162153169009460e-03, -1.3827823800513477e-03, -9.0475962868004594e-04, 8.1162153169009460e-03, -2.2249946889018480e-03, 2.1152052285084320e-03, 1.8399989448963172e-03, -1.3827823800513477e-03, 2.1152052285084320e-03, 2.8805366919883429e-04, 1.4321728932099045e-03, -9.0475962868004594e-04, 1.8399989448963172e-03, 1.4321728932099045e-03, 4.1014738409446026e-04, -1.1408226145512439e-02, 6.4954304965538492e-03, -1.2001987796160765e-03, -9.3863075040407956e-04, 6.4954304965538492e-03, -2.1295410823541430e-03, 8.3445635040981805e-04, 1.0828225854915745e-03, -1.2001987796160765e-03, 8.3445635040981805e-04, -3.5131917652677573e-04, 6.5050284812873853e-04, -9.3863075040407956e-04, 1.0828225854915745e-03, 6.5050284812873853e-04, 9.2967121716285277e-05, -1.3540012040553353e-02, 6.5044103809259023e-03, -1.4509842112690869e-03, 2.9456259587854984e-03, 6.5044103809259023e-03, -2.1362340178470861e-03, 2.1384291485579830e-03, 4.4150731301810292e-04, -1.4509842112690869e-03, 2.1384291485579830e-03, 6.8321237604588041e-04, 1.5155198702477583e-03, 2.9456259587854984e-03, 4.4150731301810292e-04, 1.5155198702477583e-03, 9.4537651413804154e-04, -1.6246993041419768e-02, 7.8928849361846465e-03, -1.0371149787786690e-03, -1.1988565506819290e-03, 7.8928849361846465e-03, -2.1711861031565201e-03, 1.5625386477991353e-03, 2.0766924041950999e-03, -1.0371149787786690e-03, 1.5625386477991353e-03, 3.6916286503139387e-04, 1.3956257087938428e-03, -1.1988565506819290e-03, 2.0766924041950999e-03, 1.3956257087938428e-03, 7.0753221752847730e-04, -1.3605718835333182e-02, 8.4677963790206132e-03, 1.5674417326158496e-03, -8.5156763496229099e-04, 8.4677963790206132e-03, -2.0207185798241187e-03, 8.5611012401802392e-04, 1.5345153178721719e-03, 1.5674417326158496e-03, 8.5611012401802392e-04, 3.4310085919516103e-04, 1.3423485494321341e-03, -8.5156763496229099e-04, 1.5345153178721719e-03, 1.3423485494321341e-03, 4.5225478537197169e-04, -1.4669000264706085e-02, 8.7488167172959999e-03, -1.4091880634556062e-03, -1.0017125355292859e-03, 8.7488167172959999e-03, -3.0719648092492838e-03, 2.3683327512114935e-03, 1.8164163784652701e-03, -1.4091880634556062e-03, 2.3683327512114935e-03, 1.8550057165905962e-04, 8.3199462205668607e-04, -1.0017125355292859e-03, 1.8164163784652701e-03, 8.3199462205668607e-04, 2.3623702228027553e-04, -1.3384842502986008e-02, -7.4284413610738024e-04, 2.2164427066970675e-03, -1.6984495314060527e-03, -7.4284413610738024e-04, -1.1789069761800031e-04, 5.5226050924804017e-04, 1.2865230734008021e-03, 2.2164427066970675e-03, 5.5226050924804017e-04, 1.6158157787987345e-04, 1.4162104619029536e-03, -1.6984495314060527e-03, 1.2865230734008021e-03, 1.4162104619029536e-03, 2.5984053718450023e-04, -1.4270238868634912e-02, 8.7435299844129590e-03, 1.3121371658921229e-03, -1.4459562540351310e-03, 8.7435299844129590e-03, -2.5869716021176296e-03, 1.9816371858388513e-03, 1.8906964142812917e-03, 1.3121371658921229e-03, 1.9816371858388513e-03, 5.2299567205402252e-04, 1.6974470674424424e-03, -1.4459562540351310e-03, 1.8906964142812917e-03, 1.6974470674424424e-03, 6.8040298431208874e-04, -1.3927808495450104e-02, 9.8114319729643538e-03, -1.4585605825814291e-03, 5.1081452718003395e-04, 9.8114319729643538e-03, -3.1780317017032489e-03, 1.9510988088846683e-03, 1.0552731530989561e-03, -1.4585605825814291e-03, 1.9510988088846683e-03, 4.1351930697924790e-04, 8.3639732480242888e-04, 5.1081452718003395e-04, 1.0552731530989561e-03, 8.3639732480242888e-04, 5.9577771535064973e-04, -1.2005957611549042e-02, 6.5269829696843855e-03, 5.4227086053001321e-06, -1.2166655959930230e-03, 6.5269829696843855e-03, -2.8587968071345869e-03, 8.7526046059167601e-04, 1.1870931547370881e-03, 5.4227086053001321e-06, 8.7526046059167601e-04, -9.3426039127427613e-05, 7.3532331884749720e-04, -1.2166655959930230e-03, 1.1870931547370881e-03, 7.3532331884749720e-04, 8.0025894282650164e-05, -1.3643474898358156e-02, 7.2377155928434068e-03, -1.2401442140379342e-03, -1.2740975088610677e-03, 7.2377155928434068e-03, -2.2422690205721159e-03, 1.5138822054081335e-03, 1.4904684300819143e-03, -1.2401442140379342e-03, 1.5138822054081335e-03, -1.3634361034485141e-04, 1.1127686221486200e-03, -1.2740975088610677e-03, 1.4904684300819143e-03, 1.1127686221486200e-03, 2.6190429702892281e-04, -1.8536129654681068e-02, 5.7362215425607876e-03, 3.7077483503927737e-03, 5.3703505121290920e-03, 5.7362215425607876e-03, -1.5089926703986004e-03, 1.7790988909616276e-03, 2.4855814646503223e-04, 3.7077483503927737e-03, 1.7790988909616276e-03, -2.6330472344847811e-04, 5.6773516831324072e-04, 5.3703505121290920e-03, 2.4855814646503223e-04, 5.6773516831324072e-04, -7.5082904071627644e-05, -1.2009357144285096e-02, 8.3436157388768785e-03, 1.4898887848690406e-03, -1.1644905255429930e-03, 8.3436157388768785e-03, -3.9039776700446403e-03, 6.7845541120046616e-04, 8.9957003910951026e-04, 1.4898887848690406e-03, 6.7845541120046616e-04, -4.3136022070644790e-04, 8.0279376655449392e-04, -1.1644905255429930e-03, 8.9957003910951026e-04, 8.0279376655449392e-04, 1.4214812713657745e-04, -1.3087027777011376e-02, 8.1091117611986480e-03, -9.9940829784200174e-04, 3.0841860053121063e-03, 8.1091117611986480e-03, -1.8957247580462895e-03, 2.0851167099702368e-03, 4.9914625071405998e-04, -9.9940829784200174e-04, 2.0851167099702368e-03, 5.8420276661913983e-04, 1.2124350953385600e-03, 3.0841860053121063e-03, 4.9914625071405998e-04, 1.2124350953385600e-03, 9.4839886112247610e-04, -1.4389875409702553e-02, 5.9650198667490139e-03, 2.7789793386952099e-03, 1.1951993133499255e-03, 5.9650198667490139e-03, -1.5525484868771592e-03, -1.8249951551237587e-04, 2.3756213628496397e-05, 2.7789793386952099e-03, -1.8249951551237587e-04, -7.0686530665496237e-04, 8.4787536338310149e-04, 1.1951993133499255e-03, 2.3756213628496397e-05, 8.4787536338310149e-04, -1.8249697460956133e-04, -1.6493749736099662e-02, 8.9845954108499354e-03, -6.5855511936865679e-04, -1.2916630860458668e-03, 8.9845954108499354e-03, -2.2720543779065185e-03, 2.0777202119131770e-03, 1.7945117300401194e-03, -6.5855511936865679e-04, 2.0777202119131770e-03, 3.7547177825330777e-04, 1.3207948522699173e-03, -1.2916630860458668e-03, 1.7945117300401194e-03, 1.3207948522699173e-03, 4.4592713214989318e-04, -1.1661413908588582e-02, 5.3356769263466705e-03, 2.3910148956967888e-05, 2.0144431709342532e-03, 5.3356769263466705e-03, -2.3763451303913793e-03, 1.5795406491315219e-04, -5.2947905482373585e-04, 2.3910148956967888e-05, 1.5795406491315219e-04, -3.3709897439123792e-04, 4.4156371402711333e-04, 2.0144431709342532e-03, -5.2947905482373585e-04, 4.4156371402711333e-04, -1.6067267371488200e-04, -1.2127195499525786e-02, 6.3575178498569175e-03, 2.7885477665962593e-03, 1.4103487851294598e-03, 6.3575178498569175e-03, -1.9866201831196012e-03, 1.4532946357492936e-03, 1.0405145770188389e-03, 2.7885477665962593e-03, 1.4532946357492936e-03, 1.1650211077524758e-04, 1.1231612548733749e-03, 1.4103487851294598e-03, 1.0405145770188389e-03, 1.1231612548733749e-03, 4.1629479593797221e-04, -1.0520573085213737e-02, 6.8711276675652456e-03, 1.6939148906612810e-03, -9.8113687728168184e-04, 6.8711276675652456e-03, -2.6887735100749720e-03, -2.1586496673177543e-04, 1.0064124321346908e-03, 1.6939148906612810e-03, -2.1586496673177543e-04, -1.4067199641936031e-04, 2.9361950909233638e-04, -9.8113687728168184e-04, 1.0064124321346908e-03, 2.9361950909233638e-04, -7.9727460669166473e-05, -1.5912209340202287e-02, 8.8708507277150267e-03, -2.2876273236139503e-04, -1.2962376027821204e-03, 8.8708507277150267e-03, -3.7886872287483438e-03, 2.2364194787288061e-03, 2.3055217337219964e-03, -2.2876273236139503e-04, 2.2364194787288061e-03, -2.2121966075386214e-04, 9.2457524181229541e-04, -1.2962376027821204e-03, 2.3055217337219964e-03, 9.2457524181229541e-04, 5.0819851534159976e-04, -1.1988487259813843e-02, -1.0817915696993614e-03, 4.4055451227390865e-03, 2.6194469136628352e-03, -1.0817915696993614e-03, -9.4238486903406541e-04, 7.7092876812179679e-04, 1.6765166378430332e-03, 4.4055451227390865e-03, 7.7092876812179679e-04, -7.0957594439696422e-04, 4.6632501327589830e-04, 2.6194469136628352e-03, 1.6765166378430332e-03, 4.6632501327589830e-04, -1.7141234209432641e-05, -1.2965555240827500e-02, 4.6872799977542403e-03, -1.4510821237139582e-03, 3.7946233749072901e-03, 4.6872799977542403e-03, -3.5962713002839149e-04, 1.0943003685336375e-03, 5.6646742816718408e-04, -1.4510821237139582e-03, 1.0943003685336375e-03, -1.0401090694716338e-04, 1.7088920364535401e-03, 3.7946233749072901e-03, 5.6646742816718408e-04, 1.7088920364535401e-03, 2.7162106952229655e-04, -1.4785199674470367e-02, 8.0633700283524075e-03, -5.6495833882158305e-04, 2.4514928345558051e-03, 8.0633700283524075e-03, -3.2143330398933306e-03, 1.9126378978111445e-03, 4.8878097615942593e-05, -5.6495833882158305e-04, 1.9126378978111445e-03, 2.1638725769767891e-04, 8.0709190039421428e-04, 2.4514928345558051e-03, 4.8878097615942593e-05, 8.0709190039421428e-04, 5.8999550418438625e-04, -1.4240209598961997e-02, 6.7516548024444747e-03, -5.1918416449977969e-04, 2.2353477983456612e-03, 6.7516548024444747e-03, -1.8474456163168917e-03, 1.2971818549383650e-03, -1.5014058192087238e-04, -5.1918416449977969e-04, 1.2971818549383650e-03, -7.3709152533265676e-05, 3.5066038413611596e-04, 2.2353477983456612e-03, -1.5014058192087238e-04, 3.5066038413611596e-04, -3.3978637630894203e-05, -1.3007652156842001e-02, 6.6856636493797493e-03, 2.9160109924347812e-03, -6.5543528412060508e-04, 6.6856636493797493e-03, -1.8216554256699276e-03, 7.3027879176171099e-04, 9.7092376946287130e-04, 2.9160109924347812e-03, 7.3027879176171099e-04, -1.0356785011530854e-04, 1.5729902578397168e-03, -6.5543528412060508e-04, 9.7092376946287130e-04, 1.5729902578397168e-03, 2.9497919465760276e-04, -1.0866405763036151e-02, 5.3513859835064457e-03, 2.4060930773512137e-04, 1.9862948106334644e-03, 5.3513859835064457e-03, -1.8512769013365881e-03, 2.8133169930445625e-04, -3.4053514307311997e-04, 2.4060930773512137e-04, 2.8133169930445625e-04, -1.0872990161973775e-04, 3.0543245383033921e-04, 1.9862948106334644e-03, -3.4053514307311997e-04, 3.0543245383033921e-04, -3.2564062326252566e-05, -9.0348481159891574e-03, 4.9368779887051665e-03, -7.7858023749495433e-04, 1.0920385408053164e-03, 4.9368779887051665e-03, -2.4651064596398460e-03, 7.2405402568656097e-04, -4.7031756933928247e-04, -7.7858023749495433e-04, 7.2405402568656097e-04, -5.4175927276914012e-04, 2.2338122120519862e-05, 1.0920385408053164e-03, -4.7031756933928247e-04, 2.2338122120519862e-05, -1.6681805314778045e-04, -1.7304180355604392e-02, 8.6652343814632091e-03, -1.2246474341539783e-03, 8.2415022799262484e-04, 8.6652343814632091e-03, -2.2303662865870645e-03, 2.2578893430555296e-03, 1.4121004619557782e-03, -1.2246474341539783e-03, 2.2578893430555296e-03, 6.6297648327747090e-04, 1.1885000811279875e-03, 8.2415022799262484e-04, 1.4121004619557782e-03, 1.1885000811279875e-03, 6.6695685275927760e-04, -1.0778073152356201e-02, 6.3694652581600224e-03, 1.6961032371909813e-03, -1.0948355265557148e-03, 6.3694652581600224e-03, -3.4260749027166322e-03, 7.2077630136983231e-04, 4.4084306708303315e-04, 1.6961032371909813e-03, 7.2077630136983231e-04, -8.9011520033262931e-04, 9.7699882816715137e-04, -1.0948355265557148e-03, 4.4084306708303315e-04, 9.7699882816715137e-04, -1.6721134424032506e-04, -1.5880217492160682e-02, 3.6937157026263458e-03, 4.3136575901598258e-03, -1.5179193912709036e-03, 3.6937157026263458e-03, -1.1384551327959617e-03, 1.6183111798056420e-03, 2.1843622767156786e-03, 4.3136575901598258e-03, 1.6183111798056420e-03, 3.6830220626843141e-05, 1.5392367332523269e-03, -1.5179193912709036e-03, 2.1843622767156786e-03, 1.5392367332523269e-03, 4.4276775065660106e-04, -1.6165406785950832e-02, 5.8699367964453637e-03, -1.2859790511595471e-03, -9.6075957427906315e-04, 5.8699367964453637e-03, -1.1179968213466582e-03, 1.7183301947139779e-03, 1.9085251825466231e-03, -1.2859790511595471e-03, 1.7183301947139779e-03, 4.8200250785465073e-05, 1.4857105304615032e-03, -9.6075957427906315e-04, 1.9085251825466231e-03, 1.4857105304615032e-03, 4.8795341166811253e-04, -1.0013561677409232e-02, 4.0961622741432311e-03, -1.1185134603428390e-03, -1.0574249764454893e-03, 4.0961622741432311e-03, -1.7667047214563205e-03, 4.4148465732737679e-04, 5.1292958257300033e-04, -1.1185134603428390e-03, 4.4148465732737679e-04, -3.2715402184443172e-04, 5.9087815524321886e-05, -1.0574249764454893e-03, 5.1292958257300033e-04, 5.9087815524321886e-05, -2.5498418232061302e-04, -1.5836685529134305e-02, 7.9890379919545819e-03, 2.0022017376080106e-03, -7.4049310281811551e-04, 7.9890379919545819e-03, -1.8638004635911647e-03, 1.6707614648490442e-03, 2.0576430038936316e-03, 2.0022017376080106e-03, 1.6707614648490442e-03, -3.8921469816428380e-04, 1.8660395072033363e-03, -7.4049310281811551e-04, 2.0576430038936316e-03, 1.8660395072033363e-03, 8.3166983679526074e-04, -1.5585823655705712e-02, 8.6547358140740775e-03, 6.4685805130901839e-04, 6.0437717832279090e-04, 8.6547358140740775e-03, -2.9179966185179798e-03, 1.6650090091244428e-03, 5.4772087545596447e-04, 6.4685805130901839e-04, 1.6650090091244428e-03, 1.0944173789827910e-04, 1.6193714005056402e-03, 6.0437717832279090e-04, 5.4772087545596447e-04, 1.6193714005056402e-03, 4.1435400171744153e-04, -1.2002918703686777e-02, 8.5494004897721486e-03, 2.6869376009802685e-03, 2.8041002192664066e-04, 8.5494004897721486e-03, -3.3375720513194084e-03, -6.4781613669761683e-05, 1.3524488057429884e-03, 2.6869376009802685e-03, -6.4781613669761683e-05, -1.0720086261486713e-04, 3.3638662164407361e-04, 2.8041002192664066e-04, 1.3524488057429884e-03, 3.3638662164407361e-04, 2.7204403023007185e-04, -1.5230508969721116e-02, 8.0033674082133788e-03, 4.5505572033502154e-03, 6.3142263681769913e-04, 8.0033674082133788e-03, -2.2473990191216016e-03, 9.4766049239732128e-04, 8.2712654749629762e-04, 4.5505572033502154e-03, 9.4766049239732128e-04, -8.7138227660763510e-04, 1.8741234298296900e-03, 6.3142263681769913e-04, 8.2712654749629762e-04, 1.8741234298296900e-03, 7.8238202994568230e-04, -1.4728673062431606e-02, 7.7272261248470383e-03, -6.5330828262795480e-04, -1.0320780914468404e-03, 7.7272261248470383e-03, -3.4770903165693631e-03, 1.7224962220012408e-03, 1.6016916176218841e-03, -6.5330828262795480e-04, 1.7224962220012408e-03, -1.0624168683721165e-05, 6.7411356195351047e-04, -1.0320780914468404e-03, 1.6016916176218841e-03, 6.7411356195351047e-04, 3.5793923973344256e-04, -1.5365215679984470e-02, 7.7326906220242997e-03, -8.7919884112348649e-04, 3.9232895421004452e-03, 7.7326906220242997e-03, -1.8951873402245514e-03, 1.6702212822668559e-03, 3.8698617614536183e-04, -8.7919884112348649e-04, 1.6702212822668559e-03, 1.4305624533357813e-04, 1.3483365102715016e-03, 3.9232895421004452e-03, 3.8698617614536183e-04, 1.3483365102715016e-03, 2.9887961813433576e-04, -1.5308665984215052e-02, 6.4035427214453088e-03, 3.2264588182182893e-03, -1.2172293673001523e-03, 6.4035427214453088e-03, -2.1803365653642855e-03, 8.1169763975199303e-04, 1.1516553650030236e-03, 3.2264588182182893e-03, 8.1169763975199303e-04, -8.9273426545469822e-04, 1.3149492890516085e-03, -1.2172293673001523e-03, 1.1516553650030236e-03, 1.3149492890516085e-03, 3.4916307447500070e-04, -1.4177990755768224e-02, 8.4436539885229638e-03, 1.9644514003780713e-03, -1.4051939351763857e-03, 8.4436539885229638e-03, -2.6365958058229824e-03, 2.7988734419438126e-04, 2.3791557288914468e-03, 1.9644514003780713e-03, 2.7988734419438126e-04, -9.3295814287705590e-06, 1.7397564635589064e-03, -1.4051939351763857e-03, 2.3791557288914468e-03, 1.7397564635589064e-03, 3.5990020010247216e-04, -1.4509970137912609e-02, 8.3321965664366754e-03, 1.9636760666178182e-04, 1.4472834003371236e-03, 8.3321965664366754e-03, -3.1644286858689462e-03, 1.3130696372032696e-03, 8.9901256003085657e-04, 1.9636760666178182e-04, 1.3130696372032696e-03, -1.1641434273373312e-04, 1.1005872558455100e-03, 1.4472834003371236e-03, 8.9901256003085657e-04, 1.1005872558455100e-03, 2.2352300907388465e-04, -1.4191547859266384e-02, 7.2008193102188825e-03, 3.7147795995076251e-03, -9.3769917155450059e-04, 7.2008193102188825e-03, -1.8544479396288801e-03, 6.8480624138753587e-04, 1.5662746452927681e-03, 3.7147795995076251e-03, 6.8480624138753587e-04, -6.5351803821489724e-04, 1.5327770952292775e-03, -9.3769917155450059e-04, 1.5662746452927681e-03, 1.5327770952292775e-03, 4.5630359182475774e-04, -1.4160940337820836e-02, 7.8821478615683557e-03, -9.8558148594463752e-04, 3.7918168562442430e-03, 7.8821478615683557e-03, -2.1790556607648682e-03, 2.0369416094564120e-03, 1.4304949644710783e-04, -9.8558148594463752e-04, 2.0369416094564120e-03, 5.0614978147559312e-04, 1.6610917581866963e-03, 3.7918168562442430e-03, 1.4304949644710783e-04, 1.6610917581866963e-03, 5.8794575109223548e-04, -9.2845894059316349e-03, 6.4939565529886596e-03, 1.4724840417364997e-03, -5.6610856876894574e-04, 6.4939565529886596e-03, -3.7833463506718019e-03, -4.7204290292012798e-04, 6.9241613661032045e-04, 1.4724840417364997e-03, -4.7204290292012798e-04, -8.1865701212015295e-04, 4.9894256490768405e-04, -5.6610856876894574e-04, 6.9241613661032045e-04, 4.9894256490768405e-04, -1.6611368267292618e-04, -1.6184598560687900e-02, 6.8439613963293747e-03, 1.7365578719469512e-03, 2.5532525388744873e-03, 6.8439613963293747e-03, -1.6997340703512266e-03, 8.1255506906787128e-04, 1.5422686239700562e-04, 1.7365578719469512e-03, 8.1255506906787128e-04, 3.3892711957371837e-04, 7.0860872528022347e-04, 2.5532525388744873e-03, 1.5422686239700562e-04, 7.0860872528022347e-04, 4.6427613812592781e-04, -1.2357824977244209e-02, 6.9688664588778747e-03, -1.0006387693880701e-03, 5.1736619278341814e-04, 6.9688664588778747e-03, -3.6183785683711568e-03, 8.2525839785551063e-04, 1.1173236493360916e-03, -1.0006387693880701e-03, 8.2525839785551063e-04, -1.0729556618775974e-04, 6.7298930398621047e-04, 5.1736619278341814e-04, 1.1173236493360916e-03, 6.7298930398621047e-04, -3.1875208496333698e-08, -1.4252206077143996e-02, 6.2144795121130665e-03, 5.2879460633076993e-03, -1.0233446873137595e-03, 6.2144795121130665e-03, -1.5331372596218611e-03, 2.2209966824697589e-04, 2.0306655712737896e-03, 5.2879460633076993e-03, 2.2209966824697589e-04, -1.0936295576075275e-03, 1.9226034908899328e-03, -1.0233446873137595e-03, 2.0306655712737896e-03, 1.9226034908899328e-03, 5.5839135772282368e-04, -1.3330238816035098e-02, 8.1905708096769846e-03, -7.6394721968447781e-04, -2.2394664524934586e-04, 8.1905708096769846e-03, -3.0167954985111760e-03, 1.9263001035794555e-03, 1.5435010884285658e-03, -7.6394721968447781e-04, 1.9263001035794555e-03, 4.3194129081048293e-04, 1.1144179655621770e-03, -2.2394664524934586e-04, 1.5435010884285658e-03, 1.1144179655621770e-03, 4.6419314960482224e-04, -9.3078896127232972e-03, 3.7808378683754900e-03, -8.2500659938291763e-04, -9.2253721497522736e-04, 3.7808378683754900e-03, -1.3735593478715945e-03, 2.6895103878370749e-04, 6.0702527043364398e-04, -8.2500659938291763e-04, 2.6895103878370749e-04, -3.5102799517677157e-04, 2.1534278727519608e-04, -9.2253721497522736e-04, 6.0702527043364398e-04, 2.1534278727519608e-04, -1.4676179836516589e-04, -1.7610358444604211e-02, 7.7354116237610682e-03, -4.3061286359086953e-04, 3.1429218095944938e-03, 7.7354116237610682e-03, -2.3939505233226636e-03, 1.4834692755721455e-03, 1.6181041921510780e-03, -4.3061286359086953e-04, 1.4834692755721455e-03, 8.0959866125124443e-05, 1.6836715825834035e-03, 3.1429218095944938e-03, 1.6181041921510780e-03, 1.6836715825834035e-03, 4.4455549963187051e-04, -1.1327541090185321e-02, 8.0271674256418221e-03, -1.0748118107689127e-03, 1.0220978517679886e-03, 8.0271674256418221e-03, -3.7625982726974937e-03, 1.4015004959102217e-03, 2.9941859123995497e-04, -1.0748118107689127e-03, 1.4015004959102217e-03, -4.7414240813170117e-04, 1.8984462650148025e-04, 1.0220978517679886e-03, 2.9941859123995497e-04, 1.8984462650148025e-04, 1.9168610525451880e-05, -7.7599972015153637e-03, 5.0487328889732026e-03, -1.0150335177867968e-03, -1.0787256637662084e-03, 5.0487328889732026e-03, -3.8283334708152520e-03, 4.8095191605469594e-04, 1.0961988375689778e-03, -1.0150335177867968e-03, 4.8095191605469594e-04, -3.5006941784132919e-04, 2.5142046401834721e-04, -1.0787256637662084e-03, 1.0961988375689778e-03, 2.5142046401834721e-04, -3.4601993607214204e-04, -1.6722037962919063e-02, 8.9023027147699402e-03, 2.8475764924646074e-03, -1.3668464681863240e-03, 8.9023027147699402e-03, -3.0464816895161466e-03, 5.5483638104875219e-04, 2.5866682413927889e-03, 2.8475764924646074e-03, 5.5483638104875219e-04, 3.2244817585743671e-04, 1.7335129486130759e-03, -1.3668464681863240e-03, 2.5866682413927889e-03, 1.7335129486130759e-03, 6.7373810044801786e-04, -1.3043290920668447e-02, 7.5390776005711682e-03, -1.4967658915720180e-03, -1.1709919402049843e-03, 7.5390776005711682e-03, -2.3042283980880748e-03, 1.8948249232033040e-03, 1.4633606749955709e-03, -1.4967658915720180e-03, 1.8948249232033040e-03, -1.1525903461801575e-04, 8.6522916210846480e-04, -1.1709919402049843e-03, 1.4633606749955709e-03, 8.6522916210846480e-04, -3.7584188113171969e-06, -1.5585713708593366e-02, 8.6119952233053958e-03, 2.0853199702149908e-04, -3.6538641086258830e-04, 8.6119952233053958e-03, -2.0708945444859184e-03, 2.0916162538495752e-03, 1.6963122585672433e-03, 2.0853199702149908e-04, 2.0916162538495752e-03, 8.4673232959246606e-04, 1.7050167169310555e-03, -3.6538641086258830e-04, 1.6963122585672433e-03, 1.7050167169310555e-03, 9.7321838069529471e-04, -8.5883608444249915e-03, 5.1230145745255022e-03, 8.9159902137280552e-04, 1.2313522195822745e-03, 5.1230145745255022e-03, -3.2070396434171729e-03, -4.0842243388821992e-04, -5.0228366774250242e-04, 8.9159902137280552e-04, -4.0842243388821992e-04, -1.6659900154853617e-04, 1.6703129248579686e-04, 1.2313522195822745e-03, -5.0228366774250242e-04, 1.6703129248579686e-04, -1.3910653088204271e-04, -1.7177487424496159e-02, 8.6439434323196465e-03, -1.2676542069329643e-03, 1.5530920065632684e-03, 8.6439434323196465e-03, -3.0711886933060752e-03, 2.6503690364040600e-03, 1.5823408628991603e-03, -1.2676542069329643e-03, 2.6503690364040600e-03, 2.0205318440554335e-05, 9.7190007034572039e-04, 1.5530920065632684e-03, 1.5823408628991603e-03, 9.7190007034572039e-04, 6.3167332201993298e-04, -1.8198829655079997e-02, 6.1783746997468043e-03, 2.7097557207645976e-03, 1.0674413677482549e-04, 6.1783746997468043e-03, -1.5255143488708853e-03, 4.2446808059708652e-04, 2.0056066309057513e-03, 2.7097557207645976e-03, 4.2446808059708652e-04, -7.0722767103247085e-05, 2.1439352866911227e-03, 1.0674413677482549e-04, 2.0056066309057513e-03, 2.1439352866911227e-03, 4.9248673587442768e-04, -1.2899230683789387e-02, 3.8809561952671224e-03, 5.8590703877280138e-03, 7.0773016587898281e-04, 3.8809561952671224e-03, -1.7333578324938207e-03, 1.5513687237511255e-03, 7.3324212478700988e-04, 5.8590703877280138e-03, 1.5513687237511255e-03, -1.6842888582058607e-03, 2.0427657447758315e-03, 7.0773016587898281e-04, 7.3324212478700988e-04, 2.0427657447758315e-03, -3.9900791227274096e-04, -1.3724818137332397e-02, 4.4851048250000233e-03, -1.3002218055153201e-03, -1.2182844898688022e-03, 4.4851048250000233e-03, -1.8202270259962889e-03, 1.9639130830478775e-03, 7.5413518528937972e-04, -1.3002218055153201e-03, 1.9639130830478775e-03, 3.1870016559458009e-04, 1.0685796268949520e-03, -1.2182844898688022e-03, 7.5413518528937972e-04, 1.0685796268949520e-03, 4.8322321243228277e-04, -1.2727886752614260e-02, 7.6627520867563600e-03, -1.0977617387408160e-03, -9.2741573884433924e-05, 7.6627520867563600e-03, -3.0923177506530018e-03, 5.2725763824365925e-04, 1.3750576763160460e-03, -1.0977617387408160e-03, 5.2725763824365925e-04, -3.8624246299677348e-04, 9.9356026784475603e-04, -9.2741573884433924e-05, 1.3750576763160460e-03, 9.9356026784475603e-04, -3.6185112057586651e-04, -1.4230368421296529e-02, 6.4613018087884382e-03, 4.2545431560881697e-03, 2.3312567462464729e-03, 6.4613018087884382e-03, -1.5885108206906821e-03, 3.4554820247452228e-04, 1.8419118470444126e-03, 4.2545431560881697e-03, 3.4554820247452228e-04, -2.8748861071839916e-04, 9.0566105830775748e-04, 2.3312567462464729e-03, 1.8419118470444126e-03, 9.0566105830775748e-04, 2.7039215950598432e-04, -1.2948564663328069e-02, 8.4157282283072385e-03, 5.0620127290202754e-04, -1.3314748626910642e-03, 8.4157282283072385e-03, -3.5176641019498255e-03, 1.5307289251449395e-03, 1.8525663157440806e-03, 5.0620127290202754e-04, 1.5307289251449395e-03, 5.6978788798275784e-05, 1.2935940913392739e-03, -1.3314748626910642e-03, 1.8525663157440806e-03, 1.2935940913392739e-03, 5.2341304653500667e-04, -1.2728855074017827e-02, 7.6246707479808553e-03, -9.3224478600061457e-04, -1.5620738423386569e-03, 7.6246707479808553e-03, -1.9782856856559788e-03, 1.4103550247563932e-03, 2.0801314899663139e-03, -9.3224478600061457e-04, 1.4103550247563932e-03, 4.6054846597137436e-04, 1.2330604509034212e-03, -1.5620738423386569e-03, 2.0801314899663139e-03, 1.2330604509034212e-03, 7.7627861087214880e-04, -1.7487269493133485e-02, 8.1945615648457912e-03, -1.3867371780327564e-03, -7.7203633752733088e-04, 8.1945615648457912e-03, -2.6839597442567947e-03, 2.3107751960829994e-03, 1.6061326831885306e-03, -1.3867371780327564e-03, 2.3107751960829994e-03, 6.8401777560147333e-04, 1.1030118862660431e-03, -7.7203633752733088e-04, 1.6061326831885306e-03, 1.1030118862660431e-03, 7.3617364729802910e-04, -1.3077682176402587e-02, 6.5573215542587115e-03, 8.9992300772369772e-04, 1.8631593971488946e-03, 6.5573215542587115e-03, -1.7553672105479552e-03, 1.4263355120099006e-03, -6.6131788423017066e-05, 8.9992300772369772e-04, 1.4263355120099006e-03, -1.5795558529122932e-04, 9.3709040689480281e-04, 1.8631593971488946e-03, -6.6131788423017066e-05, 9.3709040689480281e-04, 2.7558380426612552e-04, -1.2016216250619665e-02, 1.0198974276762779e-03, 2.9684095074686417e-03, 4.7574388367168719e-04, 1.0198974276762779e-03, -1.0457484751015638e-03, 1.4462132483450711e-03, 1.4655862021442318e-03, 2.9684095074686417e-03, 1.4462132483450711e-03, -1.1640147094591868e-04, 4.5714086841669362e-04, 4.7574388367168719e-04, 1.4655862021442318e-03, 4.5714086841669362e-04, 3.7349208876016085e-04, -1.2729011740008157e-02, 6.2751467801390199e-03, 2.7374611452219005e-03, -4.8769041908477083e-04, 6.2751467801390199e-03, -2.5912066920437101e-03, -3.6502488298821795e-04, 1.2747675934628886e-03, 2.7374611452219005e-03, -3.6502488298821795e-04, -1.3461493454670274e-04, 5.1297874213225157e-04, -4.8769041908477083e-04, 1.2747675934628886e-03, 5.1297874213225157e-04, 9.4116610547478451e-05, -1.6566854215408953e-02, 7.5656107280793628e-03, 2.2151664856272313e-03, -6.2355508020778901e-04, 7.5656107280793628e-03, -1.6767633757818533e-03, 7.2857964830252552e-04, 2.0018839786305805e-03, 2.2151664856272313e-03, 7.2857964830252552e-04, 7.8168565633001156e-04, 1.2563330405130704e-03, -6.2355508020778901e-04, 2.0018839786305805e-03, 1.2563330405130704e-03, 9.4171843217346913e-04, -1.3827220555148755e-02, 8.0412338777692237e-03, -9.2914695167431839e-04, 2.5139109709975873e-03, 8.0412338777692237e-03, -2.8902523593109979e-03, 1.7337506184933089e-03, -2.4479541495431615e-04, -9.2914695167431839e-04, 1.7337506184933089e-03, -6.2590822563885222e-04, 3.3033744058861008e-04, 2.5139109709975873e-03, -2.4479541495431615e-04, 3.3033744058861008e-04, 1.1867575245440103e-04, -1.1673022270160038e-02, 6.3818705599438547e-03, 1.5529035996520919e-03, 1.9023499244602438e-03, 6.3818705599438547e-03, -2.7838066962823493e-03, -3.4731692421037671e-04, -5.1143360268925109e-04, 1.5529035996520919e-03, -3.4731692421037671e-04, -2.9269524595909200e-04, 6.5189571167041226e-04, 1.9023499244602438e-03, -5.1143360268925109e-04, 6.5189571167041226e-04, -1.8842002149771150e-04, -1.7624872761522331e-02, 7.6102659079345675e-03, 2.9313602118366056e-03, -1.2670120134682693e-03, 7.6102659079345675e-03, -1.6176859045345274e-03, 5.2235785563708548e-04, 2.1125237303853172e-03, 2.9313602118366056e-03, 5.2235785563708548e-04, -6.9336521692627494e-05, 6.2961316498085299e-04, -1.2670120134682693e-03, 2.1125237303853172e-03, 6.2961316498085299e-04, 2.8049822298363992e-04, -1.1443728662686095e-02, 7.1090692233263322e-03, -1.0267468148229923e-03, -9.7148107774755140e-04, 7.1090692233263322e-03, -3.5986082376354236e-03, 1.0764295384459879e-03, 1.4077630540134337e-03, -1.0267468148229923e-03, 1.0764295384459879e-03, -1.4573111804904847e-04, 5.4872481291025343e-04, -9.7148107774755140e-04, 1.4077630540134337e-03, 5.4872481291025343e-04, -4.9397324463356642e-05, -1.4863131313394926e-02, 6.3276765757360525e-03, 4.1762520161465356e-03, 4.6274316270408979e-03, 6.3276765757360525e-03, -1.5123375588949818e-03, 1.5393235939542785e-03, 1.7038958618260523e-04, 4.1762520161465356e-03, 1.5393235939542785e-03, -3.8236797488742761e-04, 4.2645241632206950e-04, 4.6274316270408979e-03, 1.7038958618260523e-04, 4.2645241632206950e-04, -2.8027135821990324e-04, -9.2269149280535777e-03, 6.1800015820290153e-03, -1.1177505002325030e-03, -7.3655389049955076e-04, 6.1800015820290153e-03, -3.1949632173689504e-03, 9.2780949436812210e-04, 5.4061711672642406e-04, -1.1177505002325030e-03, 9.2780949436812210e-04, -3.6714991602973209e-04, 4.4326007298994439e-04, -7.3655389049955076e-04, 5.4061711672642406e-04, 4.4326007298994439e-04, -2.6509650841867525e-04, -1.8043017394515380e-02, 8.2372721119834628e-03, -1.3666634730605748e-03, 2.5937673526110440e-03, 8.2372721119834628e-03, -2.2765382593275749e-03, 1.8098836907605163e-03, 6.2774788742545431e-04, -1.3666634730605748e-03, 1.8098836907605163e-03, 6.3750682573144961e-04, 1.5016614599137833e-03, 2.5937673526110440e-03, 6.2774788742545431e-04, 1.5016614599137833e-03, 7.9656988901641562e-04, -1.7677863320201812e-02, 7.9918211430818238e-03, 2.8809808774680002e-03, 3.3219012282079618e-04, 7.9918211430818238e-03, -2.5093243675945506e-03, 1.6427685072652385e-03, 1.3849057522712679e-03, 2.8809808774680002e-03, 1.6427685072652385e-03, -6.6341536325757559e-04, 1.6383230989587684e-03, 3.3219012282079618e-04, 1.3849057522712679e-03, 1.6383230989587684e-03, -5.4016870687916257e-04, -1.2521451784673619e-02, 9.4223977497539906e-03, 2.8055309806167853e-03, 1.7872667456776705e-03, 9.4223977497539906e-03, -3.2829775106665285e-03, -1.4133846391320361e-05, 3.4392328922804288e-04, 2.8055309806167853e-03, -1.4133846391320361e-05, 6.9100703062136178e-05, 1.4271286897663935e-03, 1.7872667456776705e-03, 3.4392328922804288e-04, 1.4271286897663935e-03, 5.3937434576994730e-04, -1.3632536222978362e-02, 1.0257748639522706e-02, 1.4462286898582958e-03, 1.1312437976068149e-04, 1.0257748639522706e-02, -3.6576998284675250e-03, 1.6043422861050061e-03, 1.7092618248746980e-03, 1.4462286898582958e-03, 1.6043422861050061e-03, 1.6618519504767670e-04, 9.6235513287789881e-04, 1.1312437976068149e-04, 1.7092618248746980e-03, 9.6235513287789881e-04, 3.5802719238072031e-04, -9.2473425953727914e-03, 5.4646516574637042e-03, -9.5962421166676426e-04, -7.8247003179074371e-04, 5.4646516574637042e-03, -2.7506657954682067e-03, 5.9386441787538592e-04, 5.4897582411844530e-04, -9.5962421166676426e-04, 5.9386441787538592e-04, -3.0196113116772263e-04, 1.8573011529360198e-04, -7.8247003179074371e-04, 5.4897582411844530e-04, 1.8573011529360198e-04, -2.1319293163917321e-04, -1.2206117329009541e-02, 6.1092810650537708e-03, -1.4481345745921150e-03, -2.9722362809001516e-04, 6.1092810650537708e-03, -1.9126144266505905e-03, 1.7554634731526244e-03, 5.5972781767352870e-04, -1.4481345745921150e-03, 1.7554634731526244e-03, 8.6367120618161951e-05, 6.7487884435146381e-04, -2.9722362809001516e-04, 5.5972781767352870e-04, 6.7487884435146381e-04, 2.3556476013967504e-04, -1.5414965310465042e-02, 7.3079303890448630e-03, -6.8902344013111860e-05, -1.3856544124642479e-03, 7.3079303890448630e-03, -2.3520366982052667e-03, 2.2813099826687437e-03, 1.1422089660014121e-03, -6.8902344013111860e-05, 2.2813099826687437e-03, -5.6644557392149263e-05, 1.1994499087383925e-03, -1.3856544124642479e-03, 1.1422089660014121e-03, 1.1994499087383925e-03, 3.4944002873506328e-04, -1.1752221672219454e-02, 6.9456319878067867e-03, -1.5705985744796634e-03, -1.2694046046713740e-03, 6.9456319878067867e-03, -2.1753723772528176e-03, 1.5503236118177893e-03, 2.0219529729626610e-03, -1.5705985744796634e-03, 1.5503236118177893e-03, 2.4923829829079700e-04, 8.6809285519335883e-04, -1.2694046046713740e-03, 2.0219529729626610e-03, 8.6809285519335883e-04, 3.2743478195986473e-04, -1.4690777928137583e-02, 9.3382417975744429e-03, -8.7839174936228269e-04, -1.0828156619940759e-03, 9.3382417975744429e-03, -3.6232340991021171e-03, 1.5142500530220151e-03, 1.4677066708721873e-03, -8.7839174936228269e-04, 1.5142500530220151e-03, 4.0376228484767084e-06, 1.0556394847561596e-03, -1.0828156619940759e-03, 1.4677066708721873e-03, 1.0556394847561596e-03, 3.6668747259171032e-04, -1.5476441143750821e-02, 9.2269293204535301e-03, 3.6995872897747060e-04, -7.7034041709245649e-04, 9.2269293204535301e-03, -4.4645128477425761e-03, 2.1128383054205796e-03, 2.4504815100855485e-03, 3.6995872897747060e-04, 2.1128383054205796e-03, 2.2586802223079191e-04, 1.8926509467373552e-03, -7.7034041709245649e-04, 2.4504815100855485e-03, 1.8926509467373552e-03, 8.5536596431988856e-04, -1.5609938715099692e-02, 8.9557198628165934e-03, -1.3057139142808542e-03, -8.7481229959170993e-06, 8.9557198628165934e-03, -3.2036744864203165e-03, 1.8940821063869206e-03, 1.8774348378665215e-03, -1.3057139142808542e-03, 1.8940821063869206e-03, 7.2606869807052658e-04, 1.3119329296710162e-03, -8.7481229959170993e-06, 1.8774348378665215e-03, 1.3119329296710162e-03, 1.0619415404491685e-03, -1.5481617424794581e-02, 7.3302600110515892e-03, 6.5644710521001073e-04, -1.2283650081392702e-03, 7.3302600110515892e-03, -1.8696748976957526e-03, 5.9596303915765750e-04, 1.2499531031902422e-03, 6.5644710521001073e-04, 5.9596303915765750e-04, 9.7138240642967119e-05, 8.8503568194063412e-04, -1.2283650081392702e-03, 1.2499531031902422e-03, 8.8503568194063412e-04, 2.5511830692021193e-04, -1.3520103229756882e-02, 7.8484109792015359e-03, -4.1632387603416781e-04, -1.1558280730735919e-03, 7.8484109792015359e-03, -2.0728512914414475e-03, 1.9517286698148926e-03, 2.0216583965520000e-03, -4.1632387603416781e-04, 1.9517286698148926e-03, 4.6063232535377560e-04, 1.5362873491442208e-03, -1.1558280730735919e-03, 2.0216583965520000e-03, 1.5362873491442208e-03, 5.2457975844262875e-04, -1.5104116485397874e-02, 8.5473834321937604e-03, -1.3043437235430149e-03, 1.5276622030354890e-03, 8.5473834321937604e-03, -3.3780289659805359e-03, 1.9847858497536674e-03, 5.0757574599316559e-04, -1.3043437235430149e-03, 1.9847858497536674e-03, -1.5945405766001441e-04, 1.5678019969852818e-03, 1.5276622030354890e-03, 5.0757574599316559e-04, 1.5678019969852818e-03, 2.4763257841851471e-04, -1.6361908776597107e-02, 4.3568452185025310e-03, 3.4563556520869625e-03, 4.3658362914368865e-03, 4.3568452185025310e-03, -8.6002608849411477e-04, 1.1935067562380381e-03, 9.5763305550593627e-05, 3.4563556520869625e-03, 1.1935067562380381e-03, -6.5655114602498324e-04, 2.8551486975809026e-04, 4.3658362914368865e-03, 9.5763305550593627e-05, 2.8551486975809026e-04, -5.2757450862172910e-04, -1.7807729170147439e-02, 7.1610610051428998e-03, -1.1911025226577043e-03, 2.6056388311463353e-03, 7.1610610051428998e-03, -2.1885651750746023e-03, 2.9274154172681229e-03, 8.3377402546824613e-04, -1.1911025226577043e-03, 2.9274154172681229e-03, 1.7570402880203167e-04, 1.4504468369839752e-03, 2.6056388311463353e-03, 8.3377402546824613e-04, 1.4504468369839752e-03, 6.9093609359350367e-04, -1.1247931682705574e-02, 5.7455202196775934e-03, 1.1174148432837694e-04, -1.0001185298297672e-03, 5.7455202196775934e-03, -2.1567917357754388e-03, 6.2799688175952328e-04, 5.6474713215460648e-04, 1.1174148432837694e-04, 6.2799688175952328e-04, -1.1840763525279758e-04, 5.3189815096175985e-04, -1.0001185298297672e-03, 5.6474713215460648e-04, 5.3189815096175985e-04, -8.5340380560854686e-05, -1.0115751598013022e-02, 7.1450423354329218e-03, -9.6769024323713497e-04, 6.0983843256720333e-04, 7.1450423354329218e-03, -2.6969102530045707e-03, 1.0319270924417643e-03, 2.3496098746511552e-04, -9.6769024323713497e-04, 1.0319270924417643e-03, -8.0513400430597755e-04, 4.7088258363160609e-04, 6.0983843256720333e-04, 2.3496098746511552e-04, 4.7088258363160609e-04, -1.4457178691538466e-04
-  };
-
-  std::vector<double> dy = {
-    -3.7309172874861328e-03, 1.3333653131861634e-03, 8.8577244948839816e-04, 4.4771776498148510e-04, -3.2036744215949474e-03, 1.5721737770764861e-03, 7.5774810984830811e-04, -7.6103439612831651e-06, -1.5642091060224157e-03, 2.0953017056858877e-03, 1.8124937780487031e-04, -1.0725465141096370e-03, -2.2923883203997790e-03, 1.8300760226060355e-03, 4.8940452841059862e-04, -6.7210654905128198e-04, -4.3341964593999359e-03, 1.4253156363736956e-03, 9.8923131293439642e-04, 1.2115998085801848e-03, -2.6188268847725871e-03, 1.7119021466805964e-03, 5.6117501695963988e-04, -4.2324237308625010e-04, -2.2900131399922951e-03, 1.8386032061780638e-03, 5.2171200216442610e-04, -6.4883325259721075e-04, -2.5812803651831646e-03, 1.7293876231322676e-03, 6.0622938832230022e-04, -4.3579477573257375e-04, -2.8530249847228155e-03, 1.6808033517384546e-03, 6.6042307748444143e-04, -2.8824157968878450e-04, -3.7393733570642322e-03, 1.3716770835582519e-03, 8.7227862962462044e-04, 4.5872483076204578e-04, -2.4437272319513359e-03, 1.7400758488808985e-03, 5.1182653748711650e-04, -5.8105073770823191e-04, -3.2810369412823396e-03, 1.4918691558186718e-03, 7.4724281784686224e-04, 6.3875403362581882e-05, -3.4247726827394570e-03, 1.4560852013918019e-03, 7.6169488667705810e-04, 1.5409667480043344e-04, -3.7529573893496722e-03, 1.3819905220282573e-03, 8.6645778079868173e-04, 4.5529019815005340e-04, -2.7761885434245126e-03, 1.6221283633832189e-03, 6.4100239162366926e-04, -2.8385959725213443e-04, -2.8619525975001991e-03, 1.6742414664644566e-03, 6.5987011922647798e-04, -2.5588583656454151e-04, -3.5782213588599009e-03, 1.4067925694494811e-03, 8.1801204214687574e-04, 2.9417370593075526e-04, -3.2179311027623852e-03, 1.5351083071313909e-03, 7.6347536572857384e-04, -1.2101519294540594e-05, -3.5358134475080953e-03, 1.4165640362083939e-03, 8.2941835466963714e-04, 2.5395670515060458e-04, -2.3142024782931113e-03, 1.8181293172609410e-03, 5.4151803735599758e-04, -6.0053505167333827e-04, -4.0744831658946472e-03, 1.3071202590006017e-03, 9.9633651691117707e-04, 8.1295718049464744e-04, -3.8475776506513007e-03, 1.2962018740413476e-03, 9.3330073985875681e-04, 5.8452572124980187e-04, -1.9683320699194757e-03, 1.8795410802464392e-03, 3.5931418446205485e-04, -7.9622117146730320e-04, -4.2836432069418432e-03, 1.3057358949544792e-03, 1.0478903069398337e-03, 1.1388072821866995e-03, -3.6481289572086509e-03, 1.2680834323314254e-03, 9.1419213899072288e-04, 4.1250969611241915e-04, -3.7868589170986242e-03, 1.4248346144113545e-03, 8.6769642282752434e-04, 4.8009987220807757e-04, -2.8109243740096651e-03, 1.6305799665922716e-03, 6.4582051914641788e-04, -2.7233613703581204e-04, -3.8714234700849403e-03, 1.4365872229829926e-03, 7.9039059028440748e-04, 5.8082810834859922e-04, -3.2390351248452994e-03, 1.5083636177925740e-03, 7.0100191142906398e-04, 2.1276162385565475e-05, -4.0758464441795483e-03, 1.3467789743563711e-03, 9.0375839572505727e-04, 8.1824793954867955e-04, -4.2307140951998419e-03, 1.3156828039467543e-03, 1.0778378449497364e-03, 9.7419386839855905e-04, -3.8482460366827680e-03, 1.3425192918780234e-03, 8.4666567817293223e-04, 6.0686241880002418e-04, -3.5314219963828199e-03, 1.4381299141681125e-03, 7.9553487485571899e-04, 2.6599193472912579e-04, -3.0625963328189260e-03, 1.5598565683800933e-03, 7.2517085286940468e-04, -1.1417249492739269e-04, -3.2401979571803855e-03, 1.5423006052675326e-03, 7.2952676962876912e-04, -8.5276110538955337e-06, -2.3676304952203239e-03, 1.7913062056963405e-03, 5.4550664708489952e-04, -5.8142337266284694e-04, -3.3562413373964786e-03, 1.4439022806622475e-03, 7.3437787621236539e-04, 1.1756361872177236e-04, -4.3718193202676135e-03, 1.3310441122746876e-03, 1.1562039243607975e-03, 1.1472230349748561e-03, -3.3313992651614545e-03, 1.4915589370723611e-03, 7.8084944429722110e-04, 9.1372270882632316e-05, -4.3740557131606459e-03, 1.3268717399460552e-03, 1.1579324109626387e-03, 1.1521185202595687e-03, -3.7927167356510412e-03, 1.3231482293682613e-03, 8.4755885080500565e-04, 5.4665126459383327e-04, -3.5935568152285231e-03, 1.4290305096829330e-03, 7.8606733253349130e-04, 3.1315221994287850e-04, -3.1614896010873366e-03, 1.5105982619965496e-03, 7.0021276781268450e-04, -2.3983710609199078e-05, -3.0147657599133232e-03, 1.5495419519657451e-03, 6.6899185166963951e-04, -1.2057880405017928e-04, -2.9963762079088952e-03, 1.6034868108259063e-03, 6.3607675253841956e-04, -1.8417206074768401e-04, -2.7426643259774895e-03, 1.6157336038172625e-03, 6.1401235615143987e-04, -3.3306000572104970e-04, -2.4766813065329693e-03, 1.7832827610314490e-03, 5.7063483799111604e-04, -5.1939389006707012e-04, -2.7058508682563827e-03, 1.6218793335994073e-03, 5.7753543630265211e-04, -3.4623185385953561e-04, -3.6714826224726309e-03, 1.4597702029387118e-03, 8.5442239573998893e-04, 3.7440232615804407e-04, -2.5070783356414134e-03, 1.7472384470121067e-03, 5.6462790247820919e-04, -4.6769781044539899e-04, -3.4387661142797959e-03, 1.4535061065990102e-03, 7.7895608021457275e-04, 2.0354737904839946e-04, -2.7050922014808390e-03, 1.6959138589712618e-03, 6.1077224833082030e-04, -3.8335512759285180e-04, -2.5394784289842513e-03, 1.7326482568320832e-03, 5.8030789834628870e-04, -4.9658497358456876e-04, -3.7139080629854208e-03, 1.3083146125837427e-03, 8.8709895889324357e-04, 4.3181952475237497e-04, -3.0902912715277077e-03, 1.5458493373768866e-03, 6.8330337138244770e-04, -7.7274160748078970e-05, -3.6486457748529568e-03, 1.4380340692779169e-03, 8.0537862363315113e-04, 3.6166524605696793e-04, -3.8259014218855834e-03, 1.3935398904790382e-03, 8.5063806525072705e-04, 5.0369999227149601e-04, -3.3219004182506043e-03, 1.5169900953600937e-03, 7.2943565247969642e-04, 1.2454041603774481e-04, -2.9710886292812757e-03, 1.6122308537815642e-03, 6.9407008210209398e-04, -1.8550200210251709e-04, -3.5823989245151611e-03, 1.3803897342254585e-03, 7.9659655171532566e-04, 2.9806889205486018e-04, -2.6709594617153878e-03, 1.7132749589768494e-03, 6.1556197573809268e-04, -3.9142476838036435e-04, -2.9481681479607618e-03, 1.5766328367677427e-03, 6.6232144637444707e-04, -1.4989734301076280e-04, -3.2491817681866895e-03, 1.5341764317035110e-03, 7.6268657981340220e-04, 2.4406488167046862e-05, -3.2248794286049952e-03, 1.4615786669513607e-03, 7.3069596608385794e-04, 5.0300823976094416e-05, 1.2628166438797038e-03, -8.9274753600505690e-04, -4.9076860067750170e-04, -4.1869309125592256e-04, 1.3715102644465138e-03, -1.0141291898274978e-03, -7.8766533967201816e-04, -3.9772035849276288e-04, 1.1612302578037791e-03, -4.3053803181958750e-04, -2.0146672160117711e-04, -7.4016307381392259e-04, 1.4537946412679723e-03, -9.5065863784825301e-04, -1.0343570332467453e-03, -3.8768498653108665e-04, 1.3854917882825165e-03, -1.0295014786804450e-03, -9.6373001786993968e-04, -3.0535774540021835e-04, 1.4511572929488627e-03, -9.2318578761469057e-04, -1.0422547691855640e-03, -4.2723072416751189e-04, 1.4452773695581647e-03, -9.7728273043188804e-04, -1.0741338772551449e-03, -3.0937363161101300e-04, 1.5259064198728280e-03, -8.8493370962583592e-04, -1.2007486502659927e-03, -3.1227283339035905e-04, 1.5436700653155932e-03, -8.7589313303263426e-04, -1.3280253258668860e-03, -1.8105115666848938e-04, 1.5404932421320258e-03, -8.5758359271832579e-04, -1.4832458365412898e-03, 2.6407770476321191e-06, 1.4155612513337844e-03, -1.0046531590963029e-03, -9.6842134578424706e-04, -3.5401027400996205e-04, 1.3400184836343154e-03, -1.0256045874409653e-03, -6.5169805282558834e-04, -4.2701606068519234e-04, 1.2857877730087135e-03, -9.0508762673802764e-04, -5.1943388839087771e-04, -4.6352002092798518e-04, 1.5051360776270669e-03, -8.9594973727753411e-04, -1.1527368214360100e-03, -3.3755565066477403e-04, 1.4546640992893533e-03, -9.5988516608341162e-04, -9.1623755155505670e-04, -4.8588890598821486e-04, 1.4016196272903870e-03, -1.0178182975371106e-03, -9.9041538684325634e-04, -3.1234386991993196e-04, 1.4055482177189956e-03, -1.0213236463436626e-03, -9.7796586357687832e-04, -3.0205558859203868e-04, 1.4242664925470684e-03, -9.8289044930037375e-04, -9.1476699353148475e-04, -4.3593144707404561e-04, 1.4563487652851604e-03, -9.5674318018110129e-04, -1.0204295900713581e-03, -3.8614169684175954e-04, 1.3340538380055412e-03, -9.8166445704493547e-04, -6.4235456808529520e-04, -4.6185514700680652e-04, 1.3251633800902419e-03, -9.7699364526281410e-04, -6.0020814973974911e-04, -4.4027417584270871e-04, 1.3352824913690901e-03, -9.3227658428344747e-04, -6.1244190731189806e-04, -4.9720054122240971e-04, 1.2307343717364581e-03, -8.8718012579170583e-04, -3.7907952728977293e-04, -4.7149395834188992e-04, 1.2872312473441560e-03, -1.0074761274442257e-03, -6.9178075288798071e-04, -2.7705625448618486e-04, 1.2927150624045740e-03, -9.7803534445332430e-04, -4.8720068931597341e-04, -4.6004378064328215e-04, 1.4257607447382041e-03, -9.2923784202873067e-04, -9.6755887729674294e-04, -4.6242569285732927e-04, 1.3914989006502964e-03, -1.0200176224497951e-03, -9.2458206518511248e-04, -3.4506509486466449e-04, 1.4333760204447944e-03, -9.1097262948909249e-04, -1.2188972198513615e-03, -2.5996549081574896e-04, 1.2461083659099338e-03, -8.2861754923992321e-04, -4.1605933810697306e-04, -4.8905777062213857e-04, 1.3768615064283817e-03, -9.9713742062896974e-04, -7.9858980387651933e-04, -4.3504985424060790e-04, 1.3291889767625101e-03, -9.9040477132564600e-04, -6.5776027239921122e-04, -3.9846418355279120e-04, 1.5206075965447940e-03, -9.2006057004575127e-04, -1.2474234877704595e-03, -2.3140418373219520e-04, 1.2887820380652511e-03, -9.3080226682111587e-04, -5.2786640162325342e-04, -4.4118156984124777e-04, 1.3907544080002891e-03, -9.6351013629205012e-04, -7.8303340377082974e-04, -4.7622449399401109e-04, 1.4091048902409534e-03, -9.7169766598279418e-04, -9.3316898961782319e-04, -4.0724649531488546e-04, 1.3879165258355152e-03, -1.0811464187553267e-03, -8.8168502399533827e-04, -3.0373866499100119e-04, 1.2870954343195211e-03, -8.0890289393696527e-04, -5.3267861480527198e-04, -4.9356122412130170e-04, 1.5429774711287242e-03, -8.5866927657185196e-04, -1.3306313542314914e-03, -1.7873354099492435e-04, 1.4021085501997211e-03, -1.0221324371805936e-03, -9.2699019936940438e-04, -3.5288337262228114e-04, 1.3239521433104891e-03, -1.0047418588945290e-03, -8.3997109532851777e-04, -2.7449751294088233e-04, 1.3868837677795806e-03, -1.0276648847350152e-03, -7.1775681753454717e-04, -4.5782917392179828e-04, 1.5808700010297582e-03, -8.3038113202047045e-04, -1.6908149849389628e-03, 2.5410913367872757e-04, 1.4647947565545553e-03, -9.5678426470656116e-04, -9.6423665069701801e-04, -4.5175848785730601e-04, 1.3804322148250373e-03, -9.9700774526987799e-04, -8.1337331990322353e-04, -4.0374879312031649e-04, 1.2800075192210976e-03, -9.2649222387085459e-04, -5.2976884029422029e-04, -4.2936605007220911e-04, 1.3810570455247023e-03, -1.0722378487912155e-03, -7.7520984586550872e-04, -3.6667718738961482e-04, 1.4037862914692491e-03, -9.9147719341820565e-04, -9.5712521087853390e-04, -3.7439255803680066e-04, 1.3299618332732993e-03, -9.9744087373244928e-04, -6.3658280665772381e-04, -4.0128206925905106e-04, 1.5066576047840761e-03, -8.7658152159021132e-04, -1.2789382944763108e-03, -2.3242884748562141e-04, 1.3071504645501193e-03, -8.8996140075365670e-04, -4.9614457597987795e-04, -5.5075072441456200e-04, 1.4159595821463559e-03, -9.9120400447633844e-04, -8.8041573992293201e-04, -4.2244235314895873e-04, 1.2554376918387320e-03, -7.9168866122310321e-04, -4.5152569311177868e-04, -5.2227602995275849e-04, 1.5629805055743966e-03, -8.5058550567704435e-04, -1.3544920149855563e-03, -1.5043929062635852e-04, 1.2912095035131571e-03, -9.1748375464204240e-04, -5.3427771054738980e-04, -4.4418198090508067e-04, 1.4057467167300063e-03, -9.6192143297912003e-04, -7.2872273247311125e-04, -5.4389115598480571e-04, 1.5409804354399271e-03, -8.6418016393906435e-04, -1.5638051161781471e-03, 1.3537325148278473e-04, 1.5124150369572446e-03, -9.0296330713242421e-04, -1.4053433983987566e-03, -5.3260164448894459e-06, 1.3265199858472445e-03, -1.0277688029524160e-03, -7.2309275058722795e-04, -3.4610937063137806e-04, 1.3022445273983721e-03, -9.6922788833204770e-04, -5.7901862780536037e-04, -4.2446243249374797e-04, 1.5616377636392905e-03, -8.6248219043335368e-04, -1.3732629178650024e-03, -1.3294236495870761e-04, 1.2462257930009543e-03, -8.1096438665143804e-04, -4.9300621444041675e-04, -4.4992238895687783e-04, 1.2048138996092884e-03, -7.4155402937961089e-04, -3.8562341095634543e-04, -4.8265185178004110e-04, 1.3997918624215518e-03, -1.0216209066369918e-03, -9.4397461640859136e-04, -3.4199098846130318e-04, 1.2620735286065163e-03, -8.5604575570593841e-04, -4.6109387630229590e-04, -4.8688792774158394e-04, 1.5208163918870386e-03, -8.8722580384502858e-04, -1.1514411523661054e-03, -3.6068716415424309e-04, 1.5616431264257710e-03, -8.6373104473164315e-04, -1.2257596188148397e-03, -3.1867733110531793e-04, 1.2389429452407711e-03, -7.3425077652598814e-04, -3.7818319125320504e-04, -5.5296180344098229e-04, 1.4236745199894749e-03, -9.8144413894297945e-04, -1.0339917147206214e-03, -3.3224350123284209e-04, 1.3639301846054431e-03, -9.7698616283006996e-04, -7.6271561854903563e-04, -4.4038843862668760e-04, 1.3319077186367961e-03, -9.8418504392616744e-04, -6.1766568127290646e-04, -4.5146401018658297e-04, 1.4441953622589801e-03, -9.5685273246004965e-04, -1.0902814510323519e-03, -3.0850762291715034e-04, 1.3433425680632694e-03, -9.9356279190803516e-04, -6.1534605867470541e-04, -4.6558966036282527e-04, 1.4546708164129235e-03, -9.6300590913229187e-04, -1.1337359994421921e-03, -2.5746740945545017e-04, 1.3087856139503663e-03, -1.0168973495678301e-03, -6.4497486510166216e-04, -3.7542838371827940e-04, 1.4352742897946794e-03, -1.0009281183037142e-03, -8.1467745598339078e-04, -4.9780948738641084e-04, 1.3267280323664397e-03, -9.9210831897400889e-04, -7.2110752592905965e-04, -3.7053587233734035e-04, 1.4367724596286544e-03, -9.7287646205622543e-04, -1.0689733437145245e-03, -3.0929713667895097e-04, 1.4284826558297228e-03, -1.0032946508087481e-03, -9.3215819081625623e-04, -3.9266944529891222e-04, 1.2638165713808633e-03, -9.4203549844022077e-04, -4.3172960405687657e-04, -4.4825396345361454e-04, 1.3144188019784330e-03, -1.0229345575007232e-03, -7.8213089400696050e-04, -2.9459056286120584e-04, 1.2977967295550502e-03, -9.1823850004044442e-04, -5.4312108628579527e-04, -4.4973852658337552e-04, 1.4589399346793047e-03, -9.6337020175007505e-04, -9.5571443993837163e-04, -4.3500741544292166e-04, 1.3679082255077248e-03, -1.0456586778204123e-03, -7.9689290020671321e-04, -3.6447108180251604e-04, 1.2192301505625884e-03, -6.8914758742765522e-04, -4.1801535483624729e-04, -5.0855084936264909e-04, 1.4209422098361410e-03, -9.7982501545086425e-04, -9.5383290882026508e-04, -4.0628628933239729e-04, 1.2840876758166296e-03, -9.5726205004418880e-04, -5.9263894588719901e-04, -3.6034174047262833e-04, 1.2193989165566138e-03, -8.6362711921495882e-04, -3.3089474187258407e-04, -4.7367739380385751e-04, 1.4454340065456679e-03, -9.4903106726323190e-04, -9.4294645768342182e-04, -4.7443129921609459e-04, 1.3223313806910361e-03, -1.0793092901145601e-03, -6.6527927680775072e-04, -3.2359110026424040e-04, 1.4973920733738924e-03, -8.8491990550246182e-04, -1.2171591571912377e-03, -3.1820729740532089e-04, 1.2211117894376268e-03, -6.9737457441970436e-04, -3.8056940530299850e-04, -5.5878299644200971e-04, 1.3871354491036012e-03, -1.0196636806439811e-03, -8.9397096134337427e-04, -3.7123677529844246e-04, 1.5073496738918853e-03, -8.9914274586309299e-04, -1.1100923521957767e-03, -3.9534394591372900e-04, 1.5389834372201414e-03, -8.6865302431473463e-04, -1.2678597357811232e-03, -2.2479040064971316e-04, 1.5857715616750692e-03, -8.4802792400804413e-04, -1.4431505536957709e-03, -6.5066771441232851e-05, 1.3040976914584432e-03, -9.5378487842587192e-04, -5.6107062491615094e-04, -4.3607781750943416e-04, 1.4361202688561447e-03, -9.5224831111481809e-04, -1.1297794380246147e-03, -2.6778384205976237e-04, 1.3635676357551174e-03, -9.9412255599946506e-04, -7.3051951765478731e-04, -4.4154800562627911e-04, 1.5508477502083700e-03, -8.7939607075253085e-04, -1.3561935393377267e-03, -1.2635932200992647e-04, 1.3977902932929805e-03, -9.9082462373056976e-04, -8.0283463213426192e-04, -4.5650787679754265e-04, 1.3207741731525238e-03, -9.6358005340555517e-04, -6.3085951867438014e-04, -4.3968075348902217e-04, 1.6252559503457818e-03, -8.1207361624099851e-04, -1.5190942898645995e-03, 3.4005528914352132e-05, 1.2909097015961842e-03, -9.6864624807368213e-04, -5.2347902381152912e-04, -4.3715405458455165e-04, 1.4001851162801044e-03, -9.6767807815415017e-04, -1.1232365779831662e-03, -2.6481040763848013e-04, 1.3217217954799262e-03, -9.7991362264913690e-04, -5.5341730738921137e-04, -4.8295238369989513e-04, 1.2640698090863750e-03, -9.4752318346179938e-04, -4.4218067665729841e-04, -4.6578091847270366e-04, 1.4951267809199774e-03, -9.1642762891185319e-04, -1.0214860239958733e-03, -4.6970740845900466e-04, 1.2841169680887078e-03, -8.6417312011187616e-04, -4.8407383177561302e-04, -5.0088996321260085e-04, 1.3512353593900953e-03, -1.0439736418993391e-03, -9.0759977568137897e-04, -3.0173432033581658e-04, 1.2345305066023997e-03, -8.6838807328022921e-04, -4.0413743732280768e-04, -4.4479537319962951e-04, 1.4466525287875924e-03, -9.5170674859887969e-04, -9.7973885160627201e-04, -4.3948849284228546e-04, 1.3725941331708027e-03, -1.0166781230845619e-03, -7.6755671498962628e-04, -4.3191896703312273e-04, 1.3538563227843109e-03, -1.0199928751009691e-03, -7.7442160079980896e-04, -3.8161567314677245e-04, 1.3859472864117407e-03, -1.0345636883744270e-03, -8.1387538504034307e-04, -4.0318758271787206e-04, 1.2260084446081257e-03, -8.1329688437282478e-04, -3.8608177755934897e-04, -4.6857032807702829e-04, 1.6004862228002157e-03, -8.5869813229410576e-04, -1.4050654852032346e-03, -9.6057664582574606e-05, 1.3459385533108785e-03, -1.0014225817083209e-03, -7.5445110209810162e-04, -3.6394681092073613e-04, 1.5802112204707694e-03, -8.8418143184290860e-04, -1.3656321230276084e-03, -9.9859855886459247e-05, 1.3938777586773245e-03, -9.8734047951481258e-04, -8.2208943704233193e-04, -4.0539504453435344e-04, 1.4360158838886581e-03, -9.5466630949037588e-04, -1.0695021763836086e-03, -3.4966385012963883e-04, 1.4782335719419745e-03, -9.2096462932788139e-04, -9.9386049752195902e-04, -4.9044154671578190e-04, 1.3242075112925250e-03, -9.0801458127461564e-04, -6.9494238580068419e-04, -4.0874412726708961e-04, 1.4746718485486884e-03, -9.4454358380030991e-04, -1.2097928041591858e-03, -1.9567585923707621e-04, 1.3680805611761193e-03, -9.9714444979884850e-04, -6.7787533895967075e-04, -4.8246661902258490e-04, 1.3149700282666921e-03, -9.8943531187651343e-04, -6.8955875031408869e-04, -4.0063500875313666e-04, 1.3856527284201953e-03, -1.0075706013894159e-03, -8.9064557198323456e-04, -3.8669351114642469e-04, 1.2372161408350122e-03, -8.1355539160972760e-04, -5.0496057042071407e-04, -4.0950187262171203e-04, 1.2788105421023745e-03, -8.8057490788653024e-04, -5.8240597405691328e-04, -3.9217722061436343e-04
-  };
+      -1.0600000163027882e+02, 7.7059358807135015e+02,  -5.6954714749735385e+03,
+      1.2167808756610991e+03,  -7.6199102434332218e+01, 1.0706136029373441e+00,
+      -1.0600000164528124e+02, 7.7059358630452323e+02,  -5.6954715659539552e+03,
+      1.2167808757436076e+03,  -7.6199099707724926e+01, 1.0706134206080884e+00,
+      -1.0600000163027882e+02, 7.7059358807135015e+02,  -5.6954714749735385e+03,
+      1.2167808756610991e+03,  -7.6199102434332218e+01, 1.0706136029373441e+00,
+      -1.0600000164528124e+02, 7.7059358630452323e+02,  -5.6954715659539552e+03,
+      1.2167808757436076e+03,  -7.6199099707724926e+01, 1.0706134206080884e+00,
+      -9.6000006759336443e+01, 6.2969719646863621e+02,  -4.2053706363664551e+03,
+      9.0372155784831205e+02,  -5.7600014239472898e+01, 8.6528676197113796e-01,
+      -9.6000006828502180e+01, 6.2969718981238339e+02,  -4.2053709121998018e+03,
+      9.0372156236848912e+02,  -5.7600006817493266e+01, 8.6528625106787871e-01,
+      -9.6000006759336443e+01, 6.2969719646863621e+02,  -4.2053706363664551e+03,
+      9.0372155784831205e+02,  -5.7600014239472898e+01, 8.6528676197113796e-01,
+      -9.6000006828502180e+01, 6.2969718981238339e+02,  -4.2053709121998018e+03,
+      9.0372156236848912e+02,  -5.7600006817493266e+01, 8.6528625106787871e-01,
+      -8.6000028021606425e+01, 5.0303296429845562e+02,  -3.0008648248894533e+03,
+      6.4939597734382562e+02,  -4.2250984019314707e+01, 6.8180015607155764e-01,
+      -8.6000028340480625e+01, 5.0303293978396903e+02,  -3.0008656209622986e+03,
+      6.4939600529391078e+02,  -4.2250965541906716e+01, 6.8179882734268982e-01,
+      -8.6000028021606425e+01, 5.0303296429845562e+02,  -3.0008648248894533e+03,
+      6.4939597734382562e+02,  -4.2250984019314707e+01, 6.8180015607155764e-01,
+      -8.6000028340480625e+01, 5.0303293978396903e+02,  -3.0008656209622986e+03,
+      6.4939600529353049e+02,  -4.2250965541830588e+01, 6.8179882733888086e-01,
+      -7.6000116148038558e+01, 3.9060139597613619e+02,  -2.0515743554479322e+03,
+      4.4772754091167945e+02,  -2.9848087537832814e+01, 5.2014755686537917e-01,
+      -7.6000117618125429e+01, 3.9060130821883052e+02,  -2.0515765138621105e+03,
+      4.4772766653712006e+02,  -2.9848047259266409e+01, 5.2014443989116910e-01,
+      -7.6000116148038558e+01, 3.9060139597613619e+02,  -2.0515743554479322e+03,
+      4.4772754091167945e+02,  -2.9848087537832814e+01, 5.2014755686537917e-01,
+      -7.6000117618125742e+01, 3.9060130821877993e+02,  -2.0515765138659344e+03,
+      4.4772766652483722e+02,  -2.9848047256692499e+01, 5.2014443976043645e-01,
+      -6.6000481290731443e+01, 2.9240425245900917e+02,  -1.3271250821434478e+03,
+      2.9263955624337893e+02,  -2.0087224005740719e+01, 3.8031147992206349e-01,
+      -6.6000488067863742e+01, 2.9240394960550276e+02,  -1.3271304743966571e+03,
+      2.9264002765325057e+02,  -2.0087154325946980e+01, 3.8030522013794582e-01,
+      -6.6000481290731443e+01, 2.9240425245900917e+02,  -1.3271250821434478e+03,
+      2.9263955624337893e+02,  -2.0087224005740719e+01, 3.8031147992206349e-01,
+      -6.6000488067883694e+01, 2.9240394960308691e+02,  -1.3271304745319526e+03,
+      2.9264002727267626e+02,  -2.0087154245656002e+01, 3.8030521605011575e-01,
+      -5.6001992867343972e+01, 2.0844745574402617e+02,  -7.9715799906587699e+02,
+      1.7805563184427194e+02,  -1.2663929104029080e+01, 2.6224978307822894e-01,
+      -5.6002024103130161e+01, 2.0844646075692629e+02,  -7.9717003898786652e+02,
+      1.7805715054974732e+02,  -1.2663864677938077e+01, 2.6224029170957303e-01,
+      -5.6001992867343972e+01, 2.0844745574402617e+02,  -7.9715799906587699e+02,
+      1.7805563184427194e+02,  -1.2663929104029080e+01, 2.6224978307822894e-01,
+      -5.6002024104383771e+01, 2.0844646064871867e+02,  -7.9717004324410516e+02,
+      1.7805714044473001e+02,  -1.2663862524337585e+01, 2.6224018166598279e-01,
+      -4.6008230210744550e+01, 1.3874976550319553e+02,  -4.3134867537287749e+02,
+      9.7902623595157010e+01,  -7.2734403121911884e+00, 1.6589123996688057e-01,
+      -4.6008373996710617e+01, 1.3874671965012058e+02,  -4.3137141216256458e+02,
+      9.7906861443792735e+01,  -7.2735856084076280e+00, 1.6588642735924275e-01,
+      -4.6008230210744550e+01, 1.3874976550319553e+02,  -4.3134867537287749e+02,
+      9.7902623595157010e+01,  -7.2734403121911884e+00, 1.6589123996688057e-01,
+      -4.6008374075307870e+01, 1.3874671513440606e+02,  -4.3137152784492957e+02,
+      9.7906652364871050e+01,  -7.2735401377994249e+00, 1.6588408717348646e-01,
+      -3.6033642533368131e+01, 8.3364086172019398e+01,  -1.9942175516407502e+02,
+      4.6124022747838069e+01,  -3.6130563858549958e+00, 9.1249773312287188e-02,
+      -3.6034298111245583e+01, 8.3355843868269616e+01,  -1.9945266030093268e+02,
+      4.6135000705962462e+01,  -3.6142786797647353e+00, 9.1293932043118198e-02,
+      -3.6033642533368131e+01, 8.3364086172019398e+01,  -1.9942175516407502e+02,
+      4.6124022747838069e+01,  -3.6130563858549958e+00, 9.1249773312287188e-02,
+      -3.6034302998781108e+01, 8.3355675173745269e+01,  -1.9945516784358935e+02,
+      4.6132303200740992e+01,  -3.6136582565667807e+00, 9.1261386291659793e-02,
+      -2.6132076703837274e+01, 4.2398929436319683e+01,  -7.1037171119057973e+01,
+      1.3425662262407457e+01,  -7.5172495708992593e-01, 7.7522572203268742e-03,
+      -2.6134776894873077e+01, 4.2384732735328775e+01,  -7.1030526549717337e+01,
+      1.3431455085299461e+01,  -7.5302028721199155e-01, 7.8186246126207160e-03,
+      -2.6132076703837274e+01, 4.2398929436319683e+01,  -7.1037171119057973e+01,
+      1.3425662262405055e+01,  -7.5172495708944420e-01, 7.7522572203027138e-03,
+      -2.6135071381093578e+01, 4.2379566840123424e+01,  -7.1067162844830236e+01,
+      1.3434603316099608e+01,  -7.5251233833488806e-01, 7.7734884077347950e-03,
+      -2.2221480705551805e+01, 3.0067218434037404e+01,  -4.1779705297521097e+01,
+      -1.9077757705724110e+02, 3.6413466026808294e+02,  -1.6067397401486718e+02,
+      -2.2225430071703467e+01, 3.0060809113889512e+01,  -4.1712800191721314e+01,
+      -1.9084786311022177e+02, 3.6410062714257685e+02,  -1.6063028238785057e+02,
+      -2.2221480705551830e+01, 3.0067218434036263e+01,  -4.1779705297545611e+01,
+      -1.9077757705723738e+02, 3.6413466026815809e+02,  -1.6067397401492047e+02,
+      -2.2226913938674084e+01, 3.0042371820589185e+01,  -4.1801582285426832e+01,
+      -1.9048619249019526e+02, 3.6373874557858261e+02,  -1.6052358406417352e+02,
+      -2.1250858373060836e+01, 2.7343847665267702e+01,  -3.6044215009418814e+01,
+      -1.7618484800469861e+02, 3.3120085405644409e+02,  -1.4534825256321494e+02,
+      -2.1254939505030809e+01, 2.7342716030835884e+01,  -3.5955450545431681e+01,
+      -1.7635550119316844e+02, 3.3127447930769307e+02,  -1.4533876561022046e+02,
+      -2.1250858373060954e+01, 2.7343847665262818e+01,  -3.6044215009514119e+01,
+      -1.7618484800464822e+02, 3.3120085405666612e+02,  -1.4534825256338749e+02,
+      -2.1257155379297881e+01, 2.7317691772612619e+01,  -3.6063526926252166e+01,
+      -1.7588696592837897e+02, 3.3079005662384850e+02,  -1.4519086534447842e+02,
+      -2.0283472228681301e+01, 2.4763027042036295e+01,  -3.0876160316998963e+01,
+      -1.6184864900381874e+02, 2.9976970905591691e+02,  -1.3084395423768876e+02,
+      -2.0287461515322455e+01, 2.4769400540137131e+01,  -3.0762734380983186e+01,
+      -1.6214886052089241e+02, 2.9998995088792128e+02,  -1.3088331758129965e+02,
+      -2.0283472228681809e+01, 2.4763027042017129e+01,  -3.0876160317336627e+01,
+      -1.6184864900359682e+02, 2.9976970905662938e+02,  -1.3084395423826805e+02,
+      -2.0290765181946348e+01, 2.4735639907973120e+01,  -3.0892738413082597e+01,
+      -1.6154574482310053e+02, 2.9934595420013272e+02,  -1.3068028494926122e+02,
+      -1.9319499689234629e+01, 2.2323824431805683e+01,  -2.6243395369841849e+01,
+      -1.4782286378121026e+02, 2.6985759662396487e+02,  -1.1715474197881395e+02,
+      -1.9323022570439292e+01, 2.2340565860680357e+01,  -2.6102786429129356e+01,
+      -1.4828764857305418e+02, 2.7027298759214750e+02,  -1.1726163007473576e+02,
+      -1.9319499689236839e+01, 2.2323824431730525e+01,  -2.6243395371031539e+01,
+      -1.4782286378021576e+02, 2.6985759662609979e+02,  -1.1715474198068593e+02,
+      -1.9327939259284843e+01, 2.2295320666731183e+01,  -2.6257097174199931e+01,
+      -1.4751677383623073e+02, 2.6942341041084092e+02,  -1.1698575776762208e+02,
+      -1.8359079763330211e+01, 2.0025118950280675e+01,  -2.2113826757823226e+01,
+      -1.3415932552431914e+02, 2.4147795894487624e+02,  -1.0427314537549884e+02,
+      -1.8361534194530734e+01, 2.0055847278170305e+01,  -2.1944107342764479e+01,
+      -1.3482982214648752e+02, 2.4214772485703989e+02,  -1.0447085300268679e+02,
+      -1.8359079763339750e+01, 2.0025118949989704e+01,  -2.2113826761939308e+01,
+      -1.3415932552009582e+02, 2.4147795895089951e+02,  -1.0427314538136979e+02,
+      -1.8368836959765495e+01, 1.9995657614892380e+01,  -2.2124533894067383e+01,
+      -1.3385233293246981e+02, 2.4103659293914149e+02,  -1.0410011400771683e+02,
+      -1.7402299525814517e+01, 1.7865597763687486e+01,  -1.8455503416511757e+01,
+      -1.2090765118569301e+02, 2.1464125749038132e+02,  -9.2190581022134992e+01,
+      -1.7402744551259310e+01, 1.7914800567904472e+01,  -1.8255754666855470e+01,
+      -1.2183089355280822e+02, 2.1563582256173194e+02,  -9.2507405324257306e+01,
+      -1.7402299525855486e+01, 1.7865597762572605e+01,  -1.8455503430527756e+01,
+      -1.2090765116826699e+02, 2.1464125750558804e+02,  -9.2190581039770791e+01,
+      -1.7413567239985614e+01, 1.7835392747330133e+01,  -1.8463115133795956e+01,
+      -1.2060260469703572e+02, 2.1419685510959093e+02,  -9.2015134441585104e+01,
+      -1.6449179896085464e+01, 1.5843762224435309e+01,  -1.5236722252652665e+01,
+      -1.0811515163854509e+02, 1.8935506712501905e+02,  -8.0897437157402223e+01,
+      -1.6446174965543889e+01, 1.5916874201410112e+01,  -1.5007553197461570e+01,
+      -1.0934291295595986e+02, 1.9075532567542470e+02,  -8.1366596347119696e+01,
+      -1.6449179896260411e+01, 1.5843762220214204e+01,  -1.5236722299508587e+01,
+      -1.0811515156878269e+02, 1.8935506715588940e+02,  -8.0897437207525684e+01,
+      -1.6462173655481337e+01, 1.5813096619069219e+01,  -1.5241142983208677e+01,
+      -1.0781563484017332e+02, 1.8891289499393798e+02,  -8.0721658713418606e+01,
+      -1.5499661595231082e+01, 1.3957945516559789e+01,  -1.2426145992195885e+01,
+      -9.5826844741964834e+01, 1.6562434781973772e+02,  -7.0383233416004117e+01,
+      -1.5491037589250178e+01, 1.4061349904707843e+01,  -1.2170301483989650e+01,
+      -9.7412966929875139e+01, 1.6751874597575440e+02,  -7.1041920384880939e+01,
+      -1.5499661595973759e+01, 1.3957945500778198e+01,  -1.2426146145776961e+01,
+      -9.5826844470313858e+01, 1.6562434784656404e+02,  -7.0383233547510557e+01,
+      -1.5514618579274794e+01, 1.3927192540790591e+01,  -1.2427264674287118e+01,
+      -9.5537423121432880e+01, 1.6519113036542510e+02,  -7.0209783384625098e+01,
+      -1.4553592409098401e+01, 1.2206343505203831e+01,  -9.9929274597052196e+00,
+      -8.4085595900823435e+01, 1.4345191724964303e+02,  -6.0636862050381758e+01,
+      -1.4536130507533649e+01, 1.2347228125716077e+01,  -9.7159302678980044e+00,
+      -8.6081002959763751e+01, 1.4592996741513730e+02,  -6.1523840242331410e+01,
+      -1.4553592412232879e+01, 1.2206343446986155e+01,  -9.9929279524397305e+00,
+      -8.4085594870780753e+01, 1.4345191706222485e+02,  -6.0636862352071532e+01,
+      -1.4570766853404239e+01, 1.2175998366492486e+01,  -9.9905856922863112e+00,
+      -8.3812185051328299e+01, 1.4303633648493073e+02,  -6.0469165577726159e+01,
+      -1.3610717065161962e+01, 1.0587059629986399e+01,  -7.9068321681349163e+00,
+      -7.2932404423885004e+01, 1.2283913327111270e+02,  -5.1646910322317169e+01,
+      -1.3579708436673444e+01, 1.0773027159520954e+01,  -7.6175370796795425e+00,
+      -7.5376833196183071e+01, 1.2597958225245242e+02,  -5.2797863799745748e+01,
+      -1.3610717078313911e+01, 1.0587059418306087e+01,  -7.9068337121483454e+00,
+      -7.2932400620636059e+01, 1.2283913169238102e+02,  -5.1646910832841897e+01,
+      -1.3630368323321786e+01, 1.0557789879027116e+01,  -7.9007777139483810e+00,
+      -7.2682825476758552e+01, 1.2245259140017740e+02,  -5.1489446559796768e+01,
+      -1.2670671078399982e+01, 9.0981634949263963e+00,  -6.1383490362855788e+00,
+      -6.2406844162279825e+01, 1.0378677653422224e+02,  -4.3402055519687693e+01,
+      -1.2619333100308433e+01, 9.3364634226935799e+00,  -5.8491811509717584e+00,
+      -6.5316414528433455e+01, 1.0763857666200300e+02,  -4.4841832720191050e+01,
+      -1.2670671133253135e+01, 9.0981627374157021e+00,  -6.1383537481895356e+00,
+      -6.2406830503476570e+01, 1.0378676818216074e+02,  -4.3402055529436716e+01,
+      -1.2693036794620980e+01, 9.0708908225804148e+00,  -6.1281713411274001e+00,
+      -6.2191660620037396e+01, 1.0344456594081470e+02,  -4.3260806640248063e+01,
+      -1.1732979767504439e+01, 7.7377614739662697e+00,  -4.6587775146685351e+00,
+      -5.2547655563671029e+01, 8.6296103981829802e+01,  -3.5891515805495345e+01,
+      -1.1651721415208119e+01, 8.0340005825064456e+00,  -4.3852919661646119e+00,
+      -5.5898160750405737e+01, 9.0851291378134590e+01,  -3.7622755083739385e+01,
+      -1.1732979994779518e+01, 7.7377588120662892e+00,  -4.6587914600219875e+00,
+      -5.2547607987974565e+01, 8.6296066930227624e+01,  -3.5891510429190419e+01,
+      -1.1758218632638741e+01, 7.7137968422318544e+00,  -4.6438239588320966e+00,
+      -5.2381405657406454e+01, 8.6019170302439520e+01,  -3.5774653697918737e+01,
+      -1.0797063195543267e+01, 6.5040766534586290e+00,  -3.4402783696562169e+00,
+      -4.3393478931462226e+01, 7.0370032342568010e+01,  -2.9105535302381853e+01,
+      -1.0672637254876815e+01, 6.8603244928014488e+00,  -3.1995767859681346e+00,
+      -4.7101348454718874e+01, 7.5530774605740319e+01,  -3.1094453979913311e+01,
+      -1.0797064129672576e+01, 6.5040675030570139e+00,  -3.4403181344841500e+00,
+      -4.3393319126804485e+01, 7.0369884883020177e+01,  -2.9105501594155889e+01,
+      -1.0825134802124644e+01, 6.4853446725127366e+00,  -3.4195560956016346e+00,
+      -4.3296381389022351e+01, 7.0187483762520671e+01,  -2.9024415860031247e+01,
+      -9.8622468030169337e+00, 5.3955359781222549e+00,  -2.4558741324534137e+00,
+      -3.4983728078555984e+01, 5.6014425934291204e+01,  -2.3035887876475471e+01,
+      -9.6769173769353625e+00, 5.8079540801032961e+00,  -2.2635143148159220e+00,
+      -3.8890523502249145e+01, 6.1563046720547966e+01,  -2.5198820521877391e+01,
+      -9.8622505990399034e+00, 5.3955054149765509e+00,  -2.4559821583353774e+00,
+      -3.4983216045684472e+01, 5.6013889382190079e+01,  -2.3035736114340502e+01,
+      -9.8926597117464805e+00, 5.3849440641688187e+00,  -2.4279562878572039e+00,
+      -3.4983707025980287e+01, 5.5966629574570753e+01,  -2.3006306589550750e+01,
+      -8.9277749780883457e+00, 4.4108678323349286e+00,  -1.6793815271288624e+00,
+      -2.7359655656676122e+01, 4.3239544183593061e+01,  -1.7676416286664047e+01,
+      -8.6587749152265552e+00, 4.8674392165289442e+00,  -1.5450097170494306e+00,
+      -3.1230915545542118e+01, 4.8829474992442343e+01,  -1.9874755288141955e+01,
+      -8.9277901202336185e+00, 4.4107699183102085e+00,  -1.6796551456533098e+00,
+      -2.7358123514289456e+01, 4.3237769027728554e+01,  -1.7675844947587926e+01,
+      -8.9590559763951383e+00, 4.4128957610428623e+00,  -1.6423658138809611e+00,
+      -2.7493743583145054e+01, 4.3380518846300511e+01,  -1.7719639183506050e+01,
+      -7.9928164326293913e+00, 3.5492331091008302e+00,  -1.0852462622393610e+00,
+      -2.0565792757352423e+01, 3.2061909496398073e+01,  -1.3023704651715642e+01,
+      -7.6125412569887647e+00, 4.0287966748633526e+00,  -1.0084592804412351e+00,
+      -2.4116992333062022e+01, 3.7252797603904497e+01,  -1.5077495076198684e+01,
+      -7.9928747817255603e+00, 3.5489404571097585e+00,  -1.0858609980296849e+00,
+      -2.0561701094768868e+01, 3.2056747083970720e+01,  -1.3021877019728107e+01,
+      -8.0213899495838241e+00, 3.5708128515175943e+00,  -1.0368753205735253e+00,
+      -2.0877831538201836e+01, 3.2456559535389509e+01,  -1.3165540198118645e+01,
+      -7.0564174984379102e+00, 2.8104770395789380e+00,  -6.4821407306458223e-01,
+      -1.4652118176169953e+01, 2.2507145963021038e+01,  -9.0780963613608154e+00,
+      -6.5338936679228468e+00, 3.2846161494194233e+00,  -6.1760141818709846e-01,
+      -1.7606122820367215e+01, 2.6855555289500277e+01,  -1.0803821410528570e+01,
+      -7.0566263531717324e+00, 2.8097184139861691e+00,  -6.4925197579297411e-01,
+      -1.4643483271177150e+01, 2.2495243692983838e+01,  -9.0734373052814821e+00,
+      -7.0742646195707266e+00, 2.8621047467298468e+00,  -5.8641470402843421e-01,
+      -1.5178915176777426e+01, 2.3211717123277591e+01,  -9.3414295847965061e+00,
+      -6.1172231064332783e+00, 2.1957964102200167e+00,  -3.4265643705632465e-01,
+      -9.6769153352706798e+00, 1.4613873405033004e+01,  -5.8450824172251430e+00,
+      -5.4212678780860326e+00, 2.6341589573018260e+00,  -3.4085224757280796e-01,
+      -1.1835854891340576e+01, 1.7794701474942944e+01,  -7.1075278532253687e+00,
+      -6.1178367984533244e+00, 2.1945528943967396e+00,  -3.4261268423617658e-01,
+      -9.6695829134679272e+00, 1.4600877298870854e+01,  -5.8381668136523013e+00,
+      -6.1072022151656586e+00, 2.2922503774685161e+00,  -2.6715334266026142e-01,
+      -1.0408120531614587e+01, 1.5617405440391840e+01,  -6.2270636615178061e+00,
+      -5.1722074807324017e+00, 1.7098190643016411e+00,  -1.4098618492175408e-01,
+      -5.7061337346696464e+00, 8.4331806866534098e+00,  -3.3349192888568142e+00,
+      -4.2766424379800121e+00, 2.0860564217794284e+00,  -1.5548660419053545e-01,
+      -7.0034949575065015e+00, 1.0332245608764421e+01,  -4.0873492185766374e+00,
+      -5.1727690165421372e+00, 1.7132539127425084e+00,  -1.2776576793785877e-01,
+      -5.7565343018918274e+00, 8.4941254548170697e+00,  -3.3479852132230872e+00,
+      -5.0998839330979591e+00, 1.8678855512825561e+00,  -5.7718910331047868e-02,
+      -6.5095346397755423e+00, 9.5462002113817768e+00,  -3.7632628689263172e+00,
+      -4.2112469382255613e+00, 1.3675717927787789e+00,  -9.4961575783498800e-03,
+      -2.7877417589321136e+00, 3.9953503912711956e+00,  -1.5499906707437840e+00,
+      -3.1046711877098376e+00, 1.6568346830533449e+00,  -4.5990009889900242e-02,
+      -3.3140676307068091e+00, 4.7472200808709299e+00,  -1.8492173878772247e+00,
+      -4.1976749320353317e+00, 1.4246952243441517e+00,  8.7531923058200650e-02,
+      -3.0996975434049761e+00, 4.4668738099197531e+00,  -1.7103055321708385e+00,
+      -4.0163145894665320e+00, 1.5923303121893606e+00,  5.8249749369824022e-02,
+      -3.3748048713195491e+00, 4.7925769874900315e+00,  -1.8598420111853879e+00,
+      -3.1955533414298376e+00, 1.2168024121915868e+00,  9.9474205814620603e-02,
+      -8.6811124876189694e-01, 1.1994338853723501e+00,  -4.4837238870567747e-01,
+      -1.9098914522594992e+00, 1.3654451552507061e+00,  2.9537044429980407e-03,
+      -9.3701125207094127e-01, 1.2575365835116745e+00,  -4.7248060681970733e-01,
+      -3.0285770502890443e+00, 1.6166340190704305e+00,  4.8662683065338386e-01,
+      -1.2308607057515726e+00, 1.6114560066217587e+00,  -6.5896729332189652e-01,
+      -2.8078044229222514e+00, 1.4555130910035559e+00,  9.0876948497501955e-02,
+      -1.0566809618626720e+00, 1.3938154223720176e+00,  -5.2279617091852160e-01,
+      -1.9963264755188566e+00, 1.3672906754961440e+00,  2.0801988470625002e-01,
+      2.0083818728351077e-02,  -1.5135587406137185e-02, -1.4175240342178652e-02,
+      -6.9344786794476854e-01, 1.2280621078720415e+00,  1.2333381103148277e-02,
+      -1.0895386066093759e-02, 2.1764282171790141e-02,  -1.0106900291744604e-02,
+      -1.2036881930169383e+00, 2.0482931230000392e+00,  -1.2689218008973949e-01,
+      -5.0580690719339239e-01, 3.4047786101030464e-01,  -7.0959386937004015e-02,
+      -1.4470760938303664e+00, 1.4285049373060201e+00,  5.5764887956399375e-02,
+      -2.9461990750009881e-02, 2.3005167601875431e-02,  -1.0760396189439407e-02,
+      -4.3024292433642597e-01, 1.7121633497582587e+00,  3.5705413032693957e-02,
+      -9.9216800479772127e-01, 1.5115432403429119e+00,  -6.3985596276149748e-01,
+      5.4770961684437192e-01,  1.2565653391084903e+00,  9.1639130181564755e-03,
+      -6.8547618650262643e-01, 1.2037212931265591e+00,  -5.1526772142324506e-01,
+      4.8142431677326969e-01,  1.2842025505965851e+00,  -3.1103960497811806e-01,
+      -3.8667287940463613e-01, 9.2663039525338942e-01,  -4.1330437951972537e-01,
+      1.9976512094478704e-02,  1.4898674304290889e+00,  -2.1940405767858565e-03,
+      -8.0791207141984167e-01, 1.3979310081478775e+00,  -5.9845265079421794e-01,
+      1.1971451112382212e+00,  1.6539633089946477e+00,  -2.7009878691796618e-01,
+      -2.8868139196850624e+00, 4.7294193613612734e+00,  -1.9578020397520424e+00,
+      1.8164162541717044e+00,  1.4570111710269262e+00,  2.2385898037164991e-02,
+      -3.1195681762439769e+00, 4.9723722392038878e+00,  -2.0423972644796100e+00,
+      1.5812403987207633e+00,  1.1421043858413655e+00,  -4.4319666868952730e-02,
+      -2.3144705949527720e+00, 3.7448930479898297e+00,  -1.5426803544433196e+00,
+      1.4992161878806018e+00,  1.6612039136364238e+00,  -2.2870713891204597e-02,
+      -3.4442115437939465e+00, 5.5057190995408973e+00,  -2.2657208348376137e+00,
+      2.4658130352390710e+00,  1.5819912227884063e+00,  -1.3204477532594588e-01,
+      -5.7752803465671017e+00, 9.0677018990478242e+00,  -3.6843468204828174e+00,
+      3.1062201217160963e+00,  1.8205810727868250e+00,  7.3942159732456811e-02,
+      -7.3418038323250947e+00, 1.1309154676354810e+01,  -4.5733470083866452e+00,
+      2.5667672162869133e+00,  1.3762236869878626e+00,  5.4823291778512563e-02,
+      -5.5558964069977943e+00, 8.5620133672289516e+00,  -3.4575259608624478e+00,
+      2.9333361085351610e+00,  1.9771000784477066e+00,  2.1600903596218385e-02,
+      -7.7786452012965430e+00, 1.2026327126407146e+01,  -4.8722408979121159e+00,
+      3.5238342146994350e+00,  1.8411341262124141e+00,  1.0485737443151430e-01,
+      -1.0316470080846322e+01, 1.5628354265192609e+01,  -6.2547428286449396e+00,
+      4.3947471898784478e+00,  2.3129375587624681e+00,  1.6998863701958250e-01,
+      -1.3069120913924280e+01, 1.9764673064124775e+01,  -7.9234176878170990e+00,
+      3.5464051944219954e+00,  1.7786047141550632e+00,  1.8395466553434961e-01,
+      -1.0256713338978345e+01, 1.5450540198835597e+01,  -6.1709943751208902e+00,
+      4.3074781177775723e+00,  2.4284702978185178e+00,  1.2121907902830774e-01,
+      -1.3510697720561426e+01, 2.0490823414440431e+01,  -8.2265504110307699e+00,
+      4.5269670710447079e+00,  2.3411415500822019e+00,  3.7814443659878427e-01,
+      -1.6533454371385766e+01, 2.4532574055181296e+01,  -9.7222898630871342e+00,
+      5.6498078480438974e+00,  2.8871559084424092e+00,  3.1648740182441881e-01,
+      -1.9832336139347099e+01, 2.9630584562783888e+01,  -1.1804975183138390e+01,
+      4.5317970588477650e+00,  2.3235629480266455e+00,  4.0711209040396701e-01,
+      -1.6523611973754900e+01, 2.4482080409856291e+01,  -9.6968326211377835e+00,
+      5.6107427774726322e+00,  2.9693568967987254e+00,  2.6856229367890733e-01,
+      -2.0186235796983127e+01, 3.0228033555488111e+01,  -1.2057362656117963e+01,
+      5.5230828784340904e+00,  3.0159142144119913e+00,  7.5032702265793638e-01,
+      -2.4452361306480910e+01, 3.5745746299744695e+01,  -1.4059387633540990e+01,
+      6.8467243986091164e+00,  3.5205846294935204e+00,  5.5323452910250115e-01,
+      -2.7424447720726722e+01, 4.0542113968978946e+01,  -1.6058340606199877e+01,
+      5.5241079122419858e+00,  3.0111097413061287e+00,  7.6043241689918206e-01,
+      -2.4453330947201032e+01, 3.5733842835424838e+01,  -1.4052622761934279e+01,
+      6.8330970703372866e+00,  3.5730950345697865e+00,  5.0442967447855436e-01,
+      -2.7630302835415993e+01, 4.0921397061842079e+01,  -1.6223699529825666e+01,
+      6.5233214752268127e+00,  3.8455313715589599e+00,  1.2738445662734672e+00,
+      -3.4142511056048967e+01, 4.9288751118195229e+01,  -1.9258816488331760e+01,
+      7.9798691992574877e+00,  4.2304633704347614e+00,  9.4916911879724064e-01,
+      -3.6082800915305256e+01, 5.2740474636382487e+01,  -2.0757970588732530e+01,
+      6.5235391967368317e+00,  3.8442392655293900e+00,  1.2772689685023881e+00,
+      -3.4144245582802192e+01, 4.9286600694030149e+01,  -1.9257235266278844e+01,
+      7.9780164759860508e+00,  4.2581364755189171e+00,  9.0490824102641643e-01,
+      -3.6146890048111374e+01, 5.2902251888236343e+01,  -2.0834714063750525e+01,
+      7.5301209868737518e+00,  4.8266093670811516e+00,  1.9906532239804082e+00,
+      -4.5696171225139402e+01, 6.5222794336738914e+01,  -2.5330008845677121e+01,
+      9.0592048208341964e+00,  5.0524444639807982e+00,  1.5639083038511417e+00,
+      -4.6227354827270197e+01, 6.6742768625790532e+01,  -2.6090733281390481e+01,
+      7.5301672757177256e+00,  4.8262668988539703e+00,  1.9917837214882572e+00,
+      -4.5697152262800707e+01, 6.5222641787790508e+01,  -2.5329699752317662e+01,
+      9.0617089689058279e+00,  5.0627200474303731e+00,  1.5306087886050987e+00,
+      -4.6201245261995687e+01, 6.6753711704174307e+01,  -2.6103836713323240e+01,
+      8.5439978438576958e+00,  5.9605352581937785e+00,  2.9388171122244109e+00,
+      -5.9213652478598007e+01, 8.3623964589400401e+01,  -3.2288651007290504e+01,
+      1.0100238105795977e+01,  6.0156046860821641e+00,  2.4311227628788585e+00,
+      -5.8189717323516248e+01, 8.2972590004142106e+01,  -3.2212869674305303e+01,
+      8.5440076687321067e+00,  5.9604459430021439e+00,  2.9391801366526531e+00,
+      -5.9214078468041464e+01, 8.3624068891376510e+01,  -3.2288610777657510e+01,
+      1.0103667533796683e+01,  6.0158650887345448e+00,  2.4107760944314816e+00,
+      -5.8125625048064265e+01, 8.2906979417176174e+01,  -3.2191629006406409e+01,
+      9.5650113177877785e+00,  7.2498153679976820e+00,  4.1551371399277919e+00,
+      -7.4795843598083408e+01, 1.0457037732454131e+02,  -4.0151433068943419e+01,
+      1.1116968561077568e+01,  7.1347098863330896e+00,  3.5688140741297674e+00,
+      -7.2151486218593305e+01, 1.0165680693075836e+02,  -3.9206269356622016e+01,
+      9.5650133940644455e+00,  7.2497924894015711e+00,  4.1552503042122613e+00,
+      -7.4796005009548836e+01, 1.0457044971811401e+02,  -4.0151435976986221e+01,
+      1.1120034079668221e+01,  7.1303147700774092e+00,  3.5594873892317103e+00,
+      -7.2082067018068685e+01, 1.0156598726189708e+02,  -3.9171834664292227e+01,
+      1.0593064483227742e+01,  8.6969028070512202e+00,  5.6755396034912966e+00,
+      -9.2539537763180832e+01, 1.2813560149579646e+02,  -4.8933613418447223e+01,
+      1.2119543877083460e+01,  8.4137603187360543e+00,  4.9925034366798311e+00,
+      -8.8194505075704640e+01, 1.2287993196505218e+02,  -4.7096724506223822e+01,
+      1.0593064919257221e+01,  8.6968970567044934e+00,  5.6755738143875760e+00,
+      -9.2539593640863643e+01, 1.2813563331215474e+02,  -4.8933618162805772e+01,
+      1.2121921818513506e+01,  8.4078642204619420e+00,  4.9908632634858190e+00,
+      -8.8134432374832016e+01, 1.2279086550380391e+02,  -4.7060844505587738e+01,
+      1.1627957207938659e+01,  1.0303707615441018e+01,  7.5344011042552923e+00,
+      -1.1253294830348190e+02, 1.5438372244089408e+02,  -5.8647453529357783e+01,
+      1.3114510015623049e+01,  9.8513572940713416e+00,  6.7213349376406626e+00,
+      -1.0635738219113546e+02, 1.4665751311861146e+02,  -5.5881528760137869e+01,
+      1.1627957298834614e+01,  1.0303706197478814e+01,  7.5344111366673712e+00,
+      -1.1253296638384563e+02, 1.5438373415898508e+02,  -5.8647455853629580e+01,
+      1.3116237925845430e+01,  9.8455331102145145e+00,  6.7243141059359051e+00,
+      -1.0631074264006560e+02, 1.4658112805680690e+02,  -5.5849452095162235e+01,
+      1.2669386535689361e+01,  1.2071287030293307e+01,  9.7633555455962835e+00,
+      -1.3485075345900265e+02, 1.8336444946299886e+02,  -6.9300787627414508e+01,
+      1.4105804414673191e+01,  1.1444289269702800e+01,  8.7789794745243590e+00,
+      -1.2666835962860844e+02, 1.7298274034188972e+02,  -6.5547771558832267e+01,
+      1.2669386554490638e+01,  1.2071286687068984e+01,  9.7633584027450482e+00,
+      -1.3485075900242089e+02, 1.8336445335820781e+02,  -6.9300788508071975e+01,
+      1.4107018463574896e+01,  1.1439185153305873e+01,  8.7843335749580440e+00,
+      -1.2663444344319166e+02, 1.7292158897636148e+02,  -6.5521162694327174e+01,
+      1.3716937488160630e+01,  1.3999597459400730e+01,  1.2389915672436279e+01,
+      -1.5954894249539399e+02, 2.1510813446746886e+02,  -8.0895567204040049e+01,
+      1.5095682313349364e+01,  1.3189272906323732e+01,  1.1192627051714643e+01,
+      -1.4915916817312757e+02, 2.0184825850919157e+02,  -7.6081293415969839e+01,
+      1.3716937492019641e+01,  1.3999597377767842e+01,  1.2389916464009524e+01,
+      -1.5954894412085929e+02, 2.1510813567394996e+02,  -8.0895567498068928e+01,
+      1.5096520030681436e+01,  1.3185064407456906e+01,  1.1198910160279951e+01,
+      -1.4913565617175487e+02, 2.0180124290250004e+02,  -7.6060129778156622e+01,
+      1.4770075388032444e+01,  1.6087303167766446e+01,  1.5436222950666867e+01,
+      -1.8666021493779203e+02, 2.4962122089688103e+02,  -9.3426463524457304e+01,
+      1.6085379191481852e+01,  1.5083589447287226e+01,  1.3991739427782750e+01,
+      -1.7386892459375579e+02, 2.3325385095807121e+02,  -8.7470099643500802e+01,
+      1.4770075388818769e+01,  1.6087303148664304e+01,  1.5436223164442264e+01,
+      -1.8666021539675981e+02, 2.4962122125116741e+02,  -9.3426463615076329e+01,
+      1.6085951551006787e+01,  1.5080238931969067e+01,  1.3998101278449143e+01,
+      -1.7385331837944693e+02, 2.3321864790104019e+02,  -8.7453697552144448e+01,
+      1.5828143941097450e+01,  1.8331670220961666e+01,  1.8918268274003861e+01,
+      -2.1619095210442941e+02, 2.8688297635978756e+02,  -1.0687973526499771e+02,
+      1.7075534787366465e+01,  1.7125200136366264e+01,  1.7207074959934751e+01,
+      -2.0084388544719391e+02, 2.6720765911058965e+02,  -9.9705133726570395e+01,
+      1.5828143941256627e+01,  1.8331670216557445e+01,  1.8918268330404022e+01,
+      -2.1619095222989833e+02, 2.8688297645950814e+02,  -1.0687973529137253e+02,
+      1.7075923730873765e+01,  1.7122590193964911e+01,  1.7213058024904747e+01,
+      -2.0083402645820061e+02, 2.6718180837697332e+02,  -9.9692640534772679e+01,
+      1.6890371426423382e+01,  2.0728579569842751e+01,  2.2845917469463828e+01,
+      -2.4812083435502871e+02, 3.2684448823688496e+02,  -1.2123263616047282e+02,
+      1.8066449820492846e+01,  1.9312661524160735e+01,  2.0870036016187061e+01,
+      -2.3013589616073858e+02, 3.0372498377642154e+02,  -1.1277999824352135e+02,
+      1.6890371426455424e+01,  2.0728579568840633e+01,  2.2845917484032956e+01,
+      -2.4812083438838550e+02, 3.2684448826399682e+02,  -1.2123263616782057e+02,
+      1.8066713333743454e+01,  1.9310657703202459e+01,  2.0875423564416035e+01,
+      -2.3013008228413184e+02, 3.0370630494679148e+02,  -1.1277060230387309e+02,
+      1.7955886187113396e+01,  2.3272683588860026e+01,  2.7223982220959247e+01,
+      -2.8240595076334000e+02, 3.6943078590316281e+02,  -1.3645364576977221e+02,
+      1.9058236733002300e+01,  2.1644988962398710e+01,  2.5012267757287322e+01,
+      -2.6180071928343307e+02, 3.4282650121799617e+02,  -1.2669036882336400e+02,
+      1.7955886187119816e+01,  2.3272683588634656e+01,  2.7223982224651898e+01,
+      -2.8240595077199526e+02, 3.6943078591032139e+02,  -1.3645364577174797e+02,
+      1.9058414960148450e+01,  2.1643466247439289e+01,  2.5016983354038196e+01,
+      -2.6179767020610126e+02, 3.4281320617581565e+02,  -1.2668337355331974e+02,
+      1.9023741366983238e+01,  2.5957710504548576e+01,  3.2054387652193789e+01,
+      -3.1898571318422574e+02, 4.1454655650462962e+02,  -1.5250373535684176e+02,
+      2.0050906563887416e+01,  2.4121527381838824e+01,  2.9665428981325245e+01,
+      -2.9589665055055406e+02, 3.8453661583827250e+02,  -1.4143340987287985e+02,
+      1.9023741366984520e+01,  2.5957710504498362e+01,  3.2054387653114766e+01,
+      -3.1898571318642672e+02, 4.1454655650647550e+02,  -1.5250373535735841e+02,
+      2.0051026978020587e+01,  2.4120379273875816e+01,  2.9669474257430963e+01,
+      -2.9589543070583102e+02, 3.8452729731205977e+02,  -1.4142824748467820e+02,
+      2.0092947487287756e+01,  2.8776895490568755e+01,  3.7339233558876920e+01,
+      -9.8781982607414882e+00, 7.0916635282296292e-01,  -1.2340880155534291e-02,
+      2.1044418341890132e+01,  2.6741847681518077e+01,  3.4861073630499796e+01,
+      -9.1700568642165461e+00, 6.5220324713443967e-01,  -1.1045071585279443e-02,
+      2.0092947487288011e+01,  2.8776895490557653e+01,  3.7339233559103448e+01,
+      -9.8781982608033179e+00, 7.0916635282857932e-01,  -1.2340880155703077e-02,
+      2.1044499630877905e+01,  2.6740987496092696e+01,  3.4864491165514394e+01,
+      -9.1707199731434574e+00, 6.5223741134844682e-01,  -1.1045188698410773e-02,
+      2.1162510215379026e+01,  3.1723491960797684e+01,  4.3084295875067085e+01,
+      -4.1033675985379521e+00, -6.6095139594000130e-01, 6.0977735530407223e-02,
+      2.2038706806958309e+01,  2.9505670300337073e+01,  4.0630600131872811e+01,
+      -2.7905442844326718e+00, -8.3885972791335117e-01, 6.8309956404426039e-02,
+      2.1162510215379076e+01,  3.1723491960795304e+01,  4.3084295875120795e+01,
+      -4.1033675985539224e+00, -6.6095139593840913e-01, 6.0977735530354210e-02,
+      2.2038761643178379e+01,  2.9505029336592230e+01,  4.0633451796171073e+01,
+      -2.7913314472201640e+00, -8.3878528163749511e-01, 6.8307595298566767e-02,
+      3.1719012432820758e+01,  6.7480322661109355e+01,  1.3318978565899991e+02,
+      -1.6791944323404795e+01, -1.0181217992701848e+00, 1.2989592638281225e-01,
+      3.2009499874031789e+01,  6.5013296175889408e+01,  1.3669799889514238e+02,
+      -1.7009031615065428e+01, -1.0689880784706638e+00, 1.3388972346122466e-01,
+      3.1719012432820758e+01,  6.7480322661109355e+01,  1.3318978565899991e+02,
+      -1.6791944323404795e+01, -1.0181217992701848e+00, 1.2989592638281225e-01,
+      3.2009500887769519e+01,  6.5013269472322307e+01,  1.3669829238273672e+02,
+      -1.7009116366540379e+01, -1.0689798256828462e+00, 1.3388945486998777e-01,
+      4.1931127118492086e+01,  1.1600186087954401e+02,  3.1751764022286790e+02,
+      -4.6438894455748802e+01, -8.7599401950869438e-01, 2.2297105562740663e-01,
+      4.2002297497564768e+01,  1.1479764873768737e+02,  3.2393143797302810e+02,
+      -4.7847299173836262e+01, -7.8150712905299369e-01, 2.2131248436241077e-01,
+      4.1931127118492086e+01,  1.1600186087954401e+02,  3.1751764022286790e+02,
+      -4.6438894455748802e+01, -8.7599401950869438e-01, 2.2297105562740663e-01,
+      4.2002297514594851e+01,  1.1479764793294436e+02,  3.2393145467669495e+02,
+      -4.7847304068128608e+01, -7.8150664807362491e-01, 2.2131246858403722e-01,
+      5.1984670105634827e+01,  1.7926303194781252e+02,  6.2846495111925287e+02,
+      -1.0034649475039414e+02, 2.4606292097951082e-01,  3.3256752105517051e-01,
+      5.2000554052128159e+01,  1.7883235795593501e+02,  6.3273302895025176e+02,
+      -1.0138733878813618e+02, 3.2804187851642969e-01,  3.3055293107858102e-01,
+      5.1984670105634827e+01,  1.7926303194781252e+02,  6.2846495111925287e+02,
+      -1.0034649475039414e+02, 2.4606292097951082e-01,  3.3256752105517051e-01,
+      5.2000554052402805e+01,  1.7883235793562420e+02,  6.3273302962903426e+02,
+      -1.0138733898825184e+02, 3.2804189825766372e-01,  3.3055293042886030e-01,
+      6.1996666427075382e+01,  2.5724136589119979e+02,  1.0913830717468406e+03,
+      -1.8317243758181812e+02, 2.5193786568880601e+00,  4.6277932792022042e-01,
+      6.2000133522892554e+01,  2.5710536851489377e+02,  1.0934673032018356e+03,
+      -1.8370056934287794e+02, 2.5630609198690104e+00,  4.6162176037505448e-01,
+      6.1996666427075382e+01,  2.5724136589119979e+02,  1.0913830717468406e+03,
+      -1.8317243758181812e+02, 2.5193786568880601e+00,  4.6277932792022042e-01,
+      6.2000133522896938e+01,  2.5710536851442714e+02,  1.0934673032246803e+03,
+      -1.8370056934963364e+02, 2.5630609205366826e+00,  4.6162176035304603e-01,
+      7.1999279107664492e+01,  3.4965254984584158e+02,  1.7356304176273381e+03,
+      -3.0063395678020430e+02, 6.2079056750108883e+00,  6.1505333334154833e-01,
+      7.2000032172982571e+01,  3.4961232791697932e+02,  1.7365043785874466e+03,
+      -3.0086002522613632e+02, 6.2270725229979789e+00,  6.1452738833821030e-01,
+      7.1999279107664492e+01,  3.4965254984584158e+02,  1.7356304176273381e+03,
+      -3.0063395678020430e+02, 6.2079056750108883e+00,  6.1505333334154833e-01,
+      7.2000032172982642e+01,  3.4961232791696904e+02,  1.7365043785881401e+03,
+      -3.0086002522634379e+02, 6.2270725230187063e+00,  6.1452738833751985e-01,
+      8.1999844359310714e+01,  4.5636323545227941e+02,  2.5918884526432239e+03,
+      -4.5885344883307727e+02, 1.1616256691917803e+01,  7.8948404417119522e-01,
+      8.2000007751936337e+01,  4.5635184072744744e+02,  2.5922210189842476e+03,
+      -4.5894061525528980e+02, 1.1623761628208563e+01,  7.8927378661620728e-01,
+      8.1999844359310714e+01,  4.5636323545227941e+02,  2.5918884526432239e+03,
+      -4.5885344883307727e+02, 1.1616256691917803e+01,  7.8948404417119522e-01,
+      8.2000007751936337e+01,  4.5635184072744744e+02,  2.5922210189842476e+03,
+      -4.5894061525528980e+02, 1.1623761628208563e+01,  7.8927378661620728e-01};
+  std::vector<double> expected_xyz_scatter = {
+      1.4271973325754339e+00,  2.5214997685364109e+00,  3.1394341134078902e+00,
+      2.2727894815158436e+00,  1.9127738317829568e+00,  2.5288382955492263e+00,
+      3.1401587802428659e+00,  2.5252400661016079e+00,  9.4806287131835343e-01,
+      2.3778589851963829e+00,  2.8273548699126683e+00,  1.9358633427396228e+00,
+      2.1586806210305824e+00,  2.6256636737020518e+00,  3.3955783231847523e+00,
+      2.7091329174140033e+00,  -1.9231004620365049e+00, -4.6499941633630704e-01,
+      -1.1594526098009617e+00, -1.2686640472208488e+00, 2.0867847214069872e+00,
+      3.0003750888529219e+00,  3.6325449823191440e+00,  2.8788902557067368e+00,
+      1.2684738158575621e+00,  1.8537695728403008e+00,  2.1955525109720693e+00,
+      1.7836450721166277e+00,  1.8550735634159015e+00,  2.4434013845454778e+00,
+      3.0971074319021614e+00,  2.4481507963338514e+00,  1.6439641588553517e+00,
+      1.9173245315063490e+00,  2.4213050183154365e+00,  2.0154649449162125e+00,
+      8.6044027444396542e-01,  1.6761956340909820e+00,  1.9714372427825169e+00,
+      1.4694269993819085e+00,  1.1578881590922248e+00,  2.4304644465537262e+00,
+      2.8997419900334167e+00,  2.0775716876050363e+00,  2.3918652577373138e+00,
+      2.7767532459788180e+00,  3.5565699066582859e+00,  2.9253650111396308e+00,
+      1.6429790566102422e+00,  2.3353986933747315e+00,  2.8497701445565649e+00,
+      2.2665599345093730e+00,  6.2452940515269861e-01,  1.3483891434563131e+00,
+      1.5336055353368097e+00,  1.1476467351376733e+00,  2.3375903116778036e+00,
+      3.0294370345439616e+00,  3.8114115382246951e+00,  3.0486965696352639e+00,
+      6.1627879872497271e-01,  1.0222504107870520e+00,  1.1967221175625382e+00,
+      9.4398210879701261e-01,  -1.7068032019607302e+00, 3.3613403560802918e-01,
+      -1.0411939552994098e-01, -6.6667768860645871e-01, 1.8511171935709925e+00,
+      2.2616949107465572e+00,  2.8901786544735999e+00,  2.3344195582834213e+00,
+      1.8684390110773692e+00,  2.3089422940069237e+00,  2.9226805832398313e+00,
+      2.3717710946817374e+00,  1.9375672494736595e+00,  2.2557776291035463e+00,
+      2.8639227616937220e+00,  2.3752065452942275e+00,  -3.2972712763415735e-02,
+      1.0799332278489837e+00,  1.0885237991034180e+00,  6.4736232064759269e-01,
+      2.8245006381754121e+00,  3.5259487523490192e+00,  4.6113702776738981e+00,
+      3.5998420702676723e+00,  2.5905343058532044e+00,  3.9030377854459730e+00,
+      4.9001863961421570e+00,  3.6937521865974929e+00,  -2.0466716707172710e+00,
+      -1.0685624603518851e-01, -7.3049231485296484e-01, -1.1126777403630335e+00,
+      4.5968126827466538e+00,  4.8629895439961155e+00,  6.7099151660558576e+00,
+      5.3272372780027073e+00,  1.3052826537711313e+00,  1.5171629367774915e+00,
+      1.9011692135645539e+00,  1.5983578000913097e+00,  2.7500355791930211e+00,
+      3.1012540902842334e+00,  4.0886454556166472e+00,  3.2988732261870899e+00,
+      -3.7661410117701113e+00, -1.3602653035667422e+00, -2.5196607983439852e+00,
+      -2.7610055328203522e+00, 2.4189623440903629e+00,  2.8963366391936933e+00,
+      3.8107924830413253e+00,  3.0083029750449866e+00,  -3.4747440084737047e+00,
+      -1.1388326294486402e+00, -2.4182412000178957e+00, -2.4782338467864626e+00,
+      1.8127024518519697e+00,  2.2230503769241436e+00,  2.7468475012971849e+00,
+      2.2887727041772736e+00,  -1.2285895780562228e-01, 1.5434771625279660e+00,
+      1.6000755001429154e+00,  8.7985271502585627e-01,  1.9011366955569318e+00,
+      2.4167616547852120e+00,  3.1462829168951041e+00,  2.4517928223455625e+00,
+      3.4327869085046898e+00,  3.8030109751616310e+00,  4.8712356823465610e+00,
+      4.0764499721493568e+00,  -1.2035506504910221e-01, 1.0522704557335492e+00,
+      9.2716931026249949e-01,  5.7901898977964616e-01,  2.3714074680568968e+00,
+      2.7069461333245264e+00,  3.4788009563530058e+00,  2.8668785353548181e+00,
+      1.3173599955901605e+00,  2.3373622162330081e+00,  2.7950932510153166e+00,
+      2.0958887568436859e+00,  8.4352826372327494e-01,  1.3787710702843035e+00,
+      1.6194525500748886e+00,  1.2753294206512922e+00,  2.1844818576218366e+00,
+      2.4600046540695972e+00,  3.2061976014984541e+00,  2.6275850202185489e+00,
+      7.7180988879817070e-01,  1.0272946579967681e+00,  1.2621894302014174e+00,
+      1.0265959882640883e+00,  1.6049231964243249e+00,  2.4546141304955089e+00,
+      3.1543902252316531e+00,  2.3261815654970941e+00,  -1.5851508845166586e+00,
+      1.7616609630246921e-01,  -4.6927862795076358e-01, -7.0847602690642730e-01,
+      9.7206101520523258e-01,  2.1984829496765985e+00,  2.5547429254737746e+00,
+      1.8332359752494667e+00,  1.7754746253185822e+00,  2.6825317821817345e+00,
+      3.3991991376107316e+00,  2.5509087537769037e+00,  1.0721995919270044e-01,
+      1.2979498217369176e+00,  1.3387413397315138e+00,  8.4672254891200061e-01,
+      2.9293634097685093e+00,  3.7870623674013393e+00,  4.9049858395715571e+00,
+      3.8089529879570825e+00,  8.4019940154161687e-01,  1.4394975389766356e+00,
+      1.7212747966327231e+00,  1.3114842544997232e+00,  -4.4681831340750566e-01,
+      1.6558375814978303e+00,  1.5645433370489232e+00,  7.7463977013538887e-01,
+      5.8820535859917089e-01,  9.1338322532915583e-01,  1.1202010966286036e+00,
+      8.6364930010308649e-01,  7.0926056868283660e-01,  1.4218405965219119e+00,
+      1.6894042270047498e+00,  1.2378761768042328e+00,  2.3099219963216546e+00,
+      2.6932789796161916e+00,  3.5453161672341622e+00,  2.8363942767964803e+00,
+      1.8678281855424870e+00,  2.6920312666879047e+00,  3.2316323462531087e+00,
+      2.6004324324975743e+00,  1.4005208650900944e+00,  2.1811523726875692e+00,
+      2.7048686478151001e+00,  2.0527720081111767e+00,  2.3136081060507738e+00,
+      2.9431216274000898e+00,  3.7927332883433289e+00,  2.9867484155017419e+00,
+      2.4183082750679299e+00,  3.4684542472901878e+00,  4.3044551492126208e+00,
+      3.3503390337668466e+00,  1.3369220495937211e+00,  2.0576414218953958e+00,
+      2.5357049204003479e+00,  1.9409755276270539e+00,  -2.1107398251243468e+00,
+      -4.8988978509617087e-01, -1.0925226074379997e+00, -1.3770224787571617e+00,
+      1.6149648338580387e+00,  1.9730143918958940e+00,  2.5005508027915648e+00,
+      2.0369912328773259e+00,  1.9358903207989977e+00,  2.1474863817546317e+00,
+      2.7816238043705535e+00,  2.3089858393655152e+00,  5.9610253563576776e-01,
+      1.8174801336559421e+00,  2.1475049000471036e+00,  1.4151097394224248e+00,
+      5.1090833065932995e-01,  1.2160531841070317e+00,  1.3152029243838474e+00,
+      1.0054053301687891e+00,  2.1775101573737672e+00,  2.7592291083038578e+00,
+      3.5514577227476543e+00,  2.8051749838391071e+00,  1.4574449572780601e+00,
+      1.8257794999887023e+00,  2.3529474069241134e+00,  1.8687169225939499e+00,
+      3.9934672259732729e+00,  4.6069625003686925e+00,  6.0365399138833418e+00,
+      4.8498952298984239e+00,  5.0206849491088514e+02,  5.2990619575924950e+02,
+      5.0718360719485423e+02,  5.3078609113850609e+02,  4.2099706807708640e+02,
+      4.4995011865286330e+02,  4.2613481931665478e+02,  4.5054742567627943e+02,
+      6.4624126958401507e+02,  6.7232697462462943e+02,  6.5259938738906271e+02,
+      6.7148563064230416e+02,  4.2978137542372599e+02,  4.5001246708893814e+02,
+      4.3535643412910235e+02,  4.5267999547985386e+02,  3.3452377892226485e+02,
+      3.5665923744531250e+02,  3.3921143957791395e+02,  3.5904334578072132e+02,
+      4.2377024314145552e+02,  4.4269249140996482e+02,  4.3013979474766063e+02,
+      4.4602164788241845e+02,  4.0106095277790195e+02,  4.2025993436574691e+02,
+      4.0597499630524908e+02,  4.2366611812473144e+02,  3.8696471878412717e+02,
+      4.0913035507396773e+02,  3.8997037364405418e+02,  4.1288273173906788e+02,
+      3.6331657327614209e+02,  3.8874925104444759e+02,  3.6568536230961831e+02,
+      3.8997929930096944e+02,  3.0766058208873443e+02,  3.3007628916707438e+02,
+      3.1095679032603221e+02,  3.3214170847015549e+02,  3.7417389932974652e+02,
+      3.9610585314922997e+02,  3.8035396619631911e+02,  4.0089442860025434e+02,
+      3.8077716282948728e+02,  4.0439557659241609e+02,  3.8495076836644944e+02,
+      4.0710965835899611e+02,  5.0832346005741056e+02,  5.2918139375030751e+02,
+      5.0999148566291376e+02,  5.3238824760640534e+02,  3.5895613327835940e+02,
+      3.7388263235784137e+02,  3.6145315529806305e+02,  3.7979834444073651e+02,
+      3.7956459278145832e+02,  4.0172961800115581e+02,  3.8172408748658489e+02,
+      4.0500568949748191e+02,  3.9684913832843944e+02,  4.1936779798804349e+02,
+      3.9896001344387633e+02,  4.2078747802693186e+02,  3.3493113256513072e+02,
+      3.5265874034487007e+02,  3.3927703565539474e+02,  3.6107266137720109e+02,
+      3.6604604197249961e+02,  3.8893228913059670e+02,  3.6730129201251361e+02,
+      3.9043728041862283e+02,  3.8812581530185560e+02,  4.0924615019224609e+02,
+      3.9322998085289782e+02,  4.1390337702757438e+02,  4.8856029801345204e+02,
+      5.0271192916654570e+02,  4.9091594198952845e+02,  5.0809377638444926e+02,
+      5.2321208035994221e+02,  5.4770952441490192e+02,  5.2817006845838080e+02,
+      5.4714484189009147e+02,  4.9081225091120268e+02,  5.1240397279603928e+02,
+      4.9158073027031935e+02,  5.1502480371472871e+02,  5.7223828602721358e+02,
+      5.9667394727556575e+02,  5.7537894734461975e+02,  5.9925470530439986e+02,
+      4.9646123158168882e+02,  5.1903534403318656e+02,  5.0181634086118572e+02,
+      5.2172511442944483e+02,  5.2233159969818155e+02,  5.4780412588510796e+02,
+      5.2846475284626229e+02,  5.4849754426441416e+02,  4.5617530091144931e+02,
+      4.7396892884953650e+02,  4.5877481670469268e+02,  4.7577802295466512e+02,
+      4.4587673834159580e+02,  4.6732395897795834e+02,  4.5162986675993960e+02,
+      4.6775280428052747e+02,  4.0283354373844770e+02,  4.1961075089899697e+02,
+      4.0745259059538853e+02,  4.2303359882010614e+02,  5.3834353723774768e+02,
+      5.6525863139407920e+02,  5.4218387057666916e+02,  5.6547607603652864e+02,
+      4.4261815891116561e+02,  4.6322139706306598e+02,  4.4557510448028398e+02,
+      4.6409152553377004e+02,  4.4247192104148161e+02,  4.6619614492253584e+02,
+      4.4614612324987121e+02,  4.6625206457720230e+02,  3.6128738532891242e+02,
+      3.8988552843469040e+02,  3.6940156471526564e+02,  3.8850389331677923e+02,
+      4.7389962473318047e+02,  5.0013101019299427e+02,  4.7841978642421418e+02,
+      5.0240916591976708e+02,  4.4507921732155654e+02,  4.7390246086556681e+02,
+      4.5040565227666025e+02,  4.7420235641495236e+02,  4.8993159334334075e+02,
+      5.1119545519430841e+02,  4.9450301753071705e+02,  5.1079476503475638e+02,
+      3.9149835455877076e+02,  4.1695134005502371e+02,  3.9850449284689967e+02,
+      4.1768853701434006e+02,  5.5536615451459932e+02,  5.8218713084686863e+02,
+      5.6219650600540592e+02,  5.8110371415189206e+02,  3.5857465223194458e+02,
+      3.7805893452052851e+02,  3.6313044059114281e+02,  3.8278842764472688e+02,
+      3.9613973998079075e+02,  4.2121250028684204e+02,  4.0322872883880973e+02,
+      4.2129275763397044e+02,  4.2936205593131211e+02,  4.5353270360878503e+02,
+      4.3287345252040535e+02,  4.5448469042238418e+02,  4.4950161157908241e+02,
+      4.7532105082965637e+02,  4.5647550173342893e+02,  4.7547228050864646e+02,
+      3.2679348365595416e+02,  3.4926301408293159e+02,  3.3184472007084833e+02,
+      3.4766058741420920e+02,  3.7386411648728529e+02,  3.9861550748020761e+02,
+      3.7822194089162900e+02,  4.0013870143390176e+02,  4.4829502159394286e+02,
+      4.7694032786407286e+02,  4.5274923302353994e+02,  4.7569909856079317e+02,
+      5.0605732715124822e+02,  5.2982960997204441e+02,  5.1018585735851212e+02,
+      5.3097883224725592e+02,  3.5863665220726045e+02,  3.8682448095468220e+02,
+      3.6639443664339842e+02,  3.8645991462669110e+02,  4.2980724222044194e+02,
+      4.4200509624071060e+02,  4.3377896483976394e+02,  4.5049650817962009e+02,
+      4.7353077646683136e+02,  5.0370872675117209e+02,  4.7734477563720822e+02,
+      5.0383525959610876e+02,  3.3018963371387275e+02,  3.5335741347599691e+02,
+      3.3490687757435569e+02,  3.5549069288195227e+02,  5.4332101159480499e+02,
+      5.5773440615406525e+02,  5.4693692877181081e+02,  5.6229241672395904e+02,
+      4.3911838198513118e+02,  4.6389559606570378e+02,  4.4555972724814870e+02,
+      4.6330360136131640e+02,  5.9012746767991473e+02,  6.0881103345863914e+02,
+      5.9238046236563105e+02,  6.1153339968383796e+02,  3.3567654669942158e+02,
+      3.5471649519755044e+02,  3.3738613420476901e+02,  3.6005001180565057e+02,
+      4.5183734883466576e+02,  4.7792551648388240e+02,  4.5576605194928987e+02,
+      4.7882989629493864e+02,  4.9963595110319130e+02,  5.2447596823087997e+02,
+      5.0502830948039860e+02,  5.2575477228206853e+02,  3.0830398627916276e+02,
+      3.3105832470397951e+02,  3.1259710901928895e+02,  3.3410602914065612e+02,
+      3.2743062231073293e+02,  3.4891769728491801e+02,  3.3177463909525744e+02,
+      3.5362578896828563e+02,  4.7519868494408144e+02,  4.9515900441603736e+02,
+      4.7779717459656456e+02,  4.9826625741531501e+02,  4.9284871034497542e+02,
+      5.1106984057352037e+02,  4.9641742709973187e+02,  5.1336922593850761e+02,
+      3.5619924997896692e+02,  3.7973909500494273e+02,  3.6036976487086139e+02,
+      3.8244786836827933e+02,  5.1023777988632258e+02,  5.2792203133332578e+02,
+      5.1181598214733094e+02,  5.3093252840119374e+02,  5.8515851970733684e+02,
+      6.0662095917829583e+02,  5.8836080558705828e+02,  6.0856740950778476e+02,
+      4.2320528132965376e+02,  4.4433409233260238e+02,  4.2774087770870437e+02,
+      4.4493227381084881e+02,  5.3649403282476419e+02,  5.6002649142365817e+02,
+      5.4100544048421602e+02,  5.6344537433495304e+02,  3.2108481178606974e+02,
+      3.3751402850679085e+02,  3.2639602686352305e+02,  3.4280497127600313e+02,
+      3.6088723193495770e+02,  3.8330727327246012e+02,  3.6766216892694246e+02,
+      3.8408740198273415e+02,  5.3789217565204069e+02,  5.6077082817082771e+02,
+      5.4561433429711803e+02,  5.6002190651264800e+02,  3.8542525382667549e+02,
+      4.0818706679585972e+02,  3.8989237977139476e+02,  4.1071672112601937e+02,
+      4.4017080999360439e+02,  4.6096607829162730e+02,  4.4148597908498385e+02,
+      4.6196829584408204e+02,  4.4910149091628733e+02,  4.7307394363534456e+02,
+      4.5076663700944351e+02,  4.7551430237289520e+02,  3.9227814682587132e+02,
+      4.1257798087214985e+02,  3.9521371950921736e+02,  4.1711897460767489e+02,
+      5.1452231508718626e+02,  5.3859526400720893e+02,  5.1952695545599056e+02,
+      5.3961923578685878e+02,  3.8109275356108719e+02,  4.0346504014461300e+02,
+      3.8610825630499738e+02,  4.0617030886712132e+02,  4.8452457505595487e+02,
+      5.0041974589465406e+02,  4.8964722068875346e+02,  5.0550298995865631e+02,
+      3.9804275290318742e+02,  4.2264782863487699e+02,  4.0321007872679951e+02,
+      4.2492264399123434e+02,  5.0509147743746217e+02,  5.2641842260951853e+02,
+      5.0615604907658002e+02,  5.2851782681184159e+02,  3.3459128881879548e+02,
+      3.5785989581765483e+02,  3.4014053555863410e+02,  3.6113224451566020e+02,
+      3.7243330242446217e+02,  3.9454364197091792e+02,  3.7709573464905105e+02,
+      3.9881470989101706e+02,  4.6373112347911342e+02,  4.9281947700430516e+02,
+      4.6700883320630629e+02,  4.9432223003118827e+02,  4.8356479533205874e+02,
+      4.9924019843893802e+02,  4.8602244108327392e+02,  5.0403944602754200e+02,
+      4.9405865887431497e+02,  5.1764099704293153e+02,  4.9706952654679344e+02,
+      5.1857657553800686e+02,  3.2353192335630808e+02,  3.4294458004548534e+02,
+      3.3005085921688982e+02,  3.4832237869662674e+02,  3.7709990325592833e+02,
+      4.0170421910177203e+02,  3.8082050238146007e+02,  4.0277160778705706e+02,
+      5.3575655043259815e+02,  5.5597457504626493e+02,  5.4128699868812475e+02,
+      5.5631870299488321e+02,  4.0289822061838851e+02,  4.2011136295440394e+02,
+      4.0596438967835684e+02,  4.2376691548223067e+02,  4.9819597054194503e+02,
+      5.2512706626398779e+02,  5.0197315929161960e+02,  5.2498152747596998e+02,
+      4.6817667276634273e+02,  5.0014722377303497e+02,  4.7458649647723576e+02,
+      5.0150495171951650e+02,  4.1941666791857625e+02,  4.3883151509608655e+02,
+      4.2401675298355462e+02,  4.4187117909268017e+02,  4.4249214476881394e+02,
+      4.6979092814746605e+02,  4.5029339653448335e+02,  4.7008378840721508e+02,
+      3.5083981057495157e+02,  3.7313087632713695e+02,  3.5453905916645419e+02,
+      3.7328522070000724e+02,  5.9181740721854158e+02,  6.1605214530968453e+02,
+      5.9371833379543057e+02,  6.1944402449941288e+02,  4.5118997327463666e+02,
+      4.7157660592958536e+02,  4.5537464602637550e+02,  4.7369769489472503e+02,
+      4.0138177193179956e+02,  4.1688091486099427e+02,  4.0497942623326634e+02,
+      4.2129355581523492e+02,  3.2454476068904751e+02,  3.4375410350243283e+02,
+      3.2903937444908257e+02,  3.4952628701737717e+02,  3.2747645819233173e+02,
+      3.5223863743599901e+02,  3.3462584368670485e+02,  3.5238457621937755e+02,
+      5.1406293910821455e+02,  5.4054861864779025e+02,  5.1862961318339524e+02,
+      5.3983332800144319e+02,  3.2626786506568948e+02,  3.4484875304946956e+02,
+      3.2968597510617190e+02,  3.5115983614977830e+02,  4.3161832975765424e+02,
+      4.5816388988101232e+02,  4.3575161757387821e+02,  4.5976959274894392e+02,
+      3.5654924037338890e+02,  3.8348265216764844e+02,  3.6337728104134629e+02,
+      3.8304023786626510e+02,  4.8355605334720593e+02,  5.0545440221729081e+02,
+      4.8988058276802548e+02,  5.0618475071249503e+02,  4.0865412723946616e+02,
+      4.2800407219894146e+02,  4.1095472883899140e+02,  4.3167357399059108e+02,
+      2.9708316761901597e+02,  3.1383146748172811e+02,  2.9852303627317781e+02,
+      3.1710940115047504e+02,  4.7142270881905046e+02,  4.8987477487142007e+02,
+      4.7426138183490190e+02,  4.9403105234490903e+02,  3.6559540566801468e+02,
+      3.8870805730365379e+02,  3.6991533799271076e+02,  3.9018512214896242e+02,
+      5.3051276561692873e+02,  5.5329760522784727e+02,  5.3526924112310326e+02,
+      5.5512839001953125e+02,  4.9480860427837490e+02,  5.1417711766271100e+02,
+      4.9754426266298464e+02,  5.1841294991322388e+02,  3.7755705785212410e+02,
+      3.9871403752122859e+02,  3.8399149816388353e+02,  3.9960423516196516e+02,
+      5.4606883830754020e+02,  5.7269177786419971e+02,  5.5192452991915388e+02,
+      5.7279122308323679e+02,  3.7257595764014530e+02,  3.9118140540161784e+02,
+      3.7687331126515454e+02,  3.9896571592453080e+02,  5.2752105969967772e+02,
+      5.5611293296971098e+02,  5.3281649703346557e+02,  5.5594920150426572e+02,
+      4.3231846898537026e+02,  4.5288085667282348e+02,  4.3724671179152733e+02,
+      4.5453580950036348e+02,  4.7848078271205947e+02,  4.9368761153563361e+02,
+      4.8122739366968841e+02,  4.9813960591790141e+02,  4.1534041583572440e+02,
+      4.3799832356033329e+02,  4.1706050141647705e+02,  4.4120345083811236e+02,
+      4.2450222526363888e+02,  4.4987978590265880e+02,  4.2579221415895904e+02,
+      4.5062389839125194e+02,  5.3367829890285134e+02,  5.5925299920437146e+02,
+      5.3827600387794030e+02,  5.5875107897809198e+02,  3.1886322192082901e+02,
+      3.4604588931737567e+02,  3.2494503946587514e+02,  3.4498806567101104e+02,
+      4.5828995348069998e+02,  4.8134664875820539e+02,  4.6362842114441025e+02,
+      4.8218008418611402e+02,  3.2815013315367082e+02,  3.5330311766072475e+02,
+      3.3495746008397487e+02,  3.5350425839038832e+02,  4.5574559510937661e+02,
+      4.8461323076426964e+02,  4.6148981459068813e+02,  4.8263615785873958e+02,
+      4.2501531271042046e+02,  4.5132404339430190e+02,  4.2823468757230540e+02,
+      4.5342670023963024e+02,  3.7302979260694212e+02,  3.9811608227111651e+02,
+      3.7780483413354148e+02,  3.9775933708215859e+02,  5.3852258883589616e+02,
+      5.5889183953662018e+02,  5.4315382086627653e+02,  5.5817153685794256e+02,
+      3.5722233146453829e+02,  3.8308198559090272e+02,  3.6280918007156259e+02,
+      3.8401915651539724e+02,  4.8783906491191323e+02,  5.0943975381762459e+02,
+      4.9217676882174038e+02,  5.1161007146016738e+02,  4.6331654743950884e+02,
+      4.7499075201329407e+02,  4.6705071715769213e+02,  4.8374951558367218e+02,
+      4.2992453268172954e+02,  4.4705802004563787e+02,  4.3466409215168113e+02,
+      4.5143517678668394e+02,  5.8618728290886418e+02,  6.0641021558623345e+02,
+      5.8936993438739410e+02,  6.0675386578249834e+02,  4.9964299424902913e+02,
+      5.2969306085468259e+02,  5.0356864383992928e+02,  5.2869887670804815e+02};
+  std::vector<double> expected_dy_dem_x = {
+      -8.5282566239703315e-04, 9.7434896238470878e-05,
+      2.8198670103749745e-05,  1.4683718725367045e-04,
+      9.7434896238470878e-05,  -8.2177981795691663e-04,
+      1.4730179246350320e-04,  2.2697700888374512e-04,
+      2.8198670103749745e-05,  1.4730179246350320e-04,
+      -3.0546099757163860e-04, -1.0691955275624962e-04,
+      1.4683718725367045e-04,  2.2697700888374512e-04,
+      -1.0691955275624962e-04, -2.0472393176281583e-04,
+      -5.8986194101090419e-04, 2.0301485015849167e-05,
+      1.4477690521761920e-04,  1.5589944596906697e-04,
+      2.0301485015849167e-05,  -3.4790826051777767e-04,
+      1.6188954118095066e-04,  -6.4754798873391454e-05,
+      1.4477690521761920e-04,  1.6188954118095066e-04,
+      -2.9910593935885945e-04, -5.0936876433791144e-05,
+      1.5589944596906697e-04,  -6.4754798873391454e-05,
+      -5.0936876433791144e-05, -1.0355517826698125e-04,
+      -6.9459162938867685e-04, 2.8327565248668782e-05,
+      2.8430463755414195e-05,  4.6197796678653893e-05,
+      2.8327565248668782e-05,  -4.3779001120184660e-04,
+      3.9125697200477542e-05,  -3.1649070137933027e-05,
+      2.8430463755414195e-05,  3.9125697200477542e-05,
+      -4.0417658999342403e-04, 7.6519185026183547e-05,
+      4.6197796678653893e-05,  -3.1649070137933027e-05,
+      7.6519185026183547e-05,  -2.8017988936616456e-04,
+      -8.6012936276845748e-04, 8.9176446815027605e-05,
+      1.6318850169617433e-05,  1.4131109278314074e-04,
+      8.9176446815027605e-05,  -2.3539344428552647e-04,
+      1.0554839943982053e-04,  6.1754240894486574e-05,
+      1.6318850169617433e-05,  1.0554839943982053e-04,
+      -2.2356938850427748e-04, -5.5818570386833549e-05,
+      1.4131109278314074e-04,  6.1754240894486574e-05,
+      -5.5818570386833549e-05, -1.8579307724742348e-04,
+      -1.4579471606530545e-05, 1.8373628547591411e-06,
+      6.3404455687761808e-09,  1.6520017494432617e-05,
+      1.8373628547591411e-06,  4.0055076619841326e-05,
+      5.1870915506862245e-07,  6.5455154995692458e-06,
+      6.3404455687761808e-09,  5.1870915506862245e-07,
+      3.5777013395772088e-05,  1.2331072770409084e-06,
+      1.6520017494432617e-05,  6.5455154995692458e-06,
+      1.2331072770409084e-06,  2.9330374271367200e-05,
+      -1.0591838639597137e-03, 3.7324386487840001e-04,
+      1.1201603730535799e-04,  -1.3094998715550417e-04,
+      3.7324386487840001e-04,  -5.3825225173688483e-04,
+      1.5433463803274103e-05,  6.7929980996522018e-06,
+      1.1201603730535799e-04,  1.5433463803274103e-05,
+      -1.5347431592130682e-04, 7.3816404184980853e-05,
+      -1.3094998715550417e-04, 6.7929980996522018e-06,
+      7.3816404184980853e-05,  -1.4291206449640258e-04,
+      -3.3653765271671784e-04, 9.6409327441248667e-05,
+      6.4078783813068993e-05,  -5.7869265471838981e-05,
+      9.6409327441248667e-05,  -2.2563161687222687e-04,
+      7.2686803292055054e-05,  -6.2940087651933607e-05,
+      6.4078783813068993e-05,  7.2686803292055054e-05,
+      -1.6173581616879356e-04, 8.8234028851700132e-05,
+      -5.7869265471838981e-05, -6.2940087651933607e-05,
+      8.8234028851700132e-05,  -9.8164250335740287e-05,
+      -4.8591645753528860e-04, 9.7740935502860828e-05,
+      4.4478158377719349e-05,  1.0419035578929442e-04,
+      9.7740935502860828e-05,  -2.8008833575539116e-04,
+      1.0234342742699657e-05,  -1.1066375197239399e-05,
+      4.4478158377719349e-05,  1.0234342742699657e-05,
+      -2.7922668025205693e-04, 1.1806531746953321e-04,
+      1.0419035578929442e-04,  -1.1066375197239399e-05,
+      1.1806531746953321e-04,  -1.9974381044785472e-04,
+      -4.5059018192044660e-04, 1.5855953261830535e-04,
+      1.0703891128048314e-04,  -2.9320496830173838e-05,
+      1.5855953261830535e-04,  -2.0101672795536590e-04,
+      5.3750835060043594e-06,  6.3549045572453374e-05,
+      1.0703891128048314e-04,  5.3750835060043594e-06,
+      -2.0012584084945853e-04, 8.1458143700506442e-05,
+      -2.9320496830173838e-05, 6.3549045572453374e-05,
+      8.1458143700506442e-05,  -1.7784532544621600e-04,
+      -4.7553664863859020e-04, 9.0606794366112880e-05,
+      1.1547757782783547e-04,  1.7942202135921617e-04,
+      9.0606794366112880e-05,  -3.7929902055660708e-04,
+      1.1141005377050407e-04,  -1.0369999356845449e-04,
+      1.1547757782783547e-04,  1.1141005377050407e-04,
+      -3.0010715447777566e-04, 5.9933061644597410e-05,
+      1.7942202135921617e-04,  -1.0369999356845449e-04,
+      5.9933061644597410e-05,  -1.5711713725349039e-04,
+      -8.4512694843494427e-04, -1.5998766504508188e-04,
+      1.3459366005811189e-04,  8.9050023157920225e-05,
+      -1.5998766504508188e-04, -7.4497721059562944e-04,
+      2.1490465386259777e-04,  8.6993211035125107e-06,
+      1.3459366005811189e-04,  2.1490465386259777e-04,
+      -3.5600902729500730e-04, 1.2490893288676265e-04,
+      8.9050023157920225e-05,  8.6993211035125107e-06,
+      1.2490893288676265e-04,  -2.4912698171829185e-04,
+      -5.4677729864457437e-04, 1.4544760640130386e-04,
+      2.1553577462634675e-04,  -5.7697338533785043e-05,
+      1.4544760640130386e-04,  -3.9389932680879196e-04,
+      9.3103780652668911e-05,  1.5781465334615180e-04,
+      2.1553577462634675e-04,  9.3103780652668911e-05,
+      -3.5209769647293266e-04, 3.0908080405118161e-05,
+      -5.7697338533785043e-05, 1.5781465334615180e-04,
+      3.0908080405118161e-05,  -2.9381991178282840e-04,
+      -8.4897434901643008e-04, 2.4958296800820011e-04,
+      1.6042793880630297e-04,  2.0191539006263895e-04,
+      2.4958296800820011e-04,  -4.9074044762242083e-04,
+      -3.8390868153238754e-05, 6.9794232794373798e-06,
+      1.6042793880630297e-04,  -3.8390868153238754e-05,
+      -2.3180481912080813e-04, -1.2344065882196416e-04,
+      2.0191539006263895e-04,  6.9794232794373798e-06,
+      -1.2344065882196416e-04, -1.9034910644016834e-04,
+      -3.9234583079674319e-04, 1.0779962022557863e-04,
+      7.7172205961845708e-05,  -1.0845300489032052e-04,
+      1.0779962022557863e-04,  -3.6501653375163899e-04,
+      1.6932449587587277e-04,  1.1497268397185604e-04,
+      7.7172205961845708e-05,  1.6932449587587277e-04,
+      -2.4040175903203856e-04, 6.3315499528010007e-05,
+      -1.0845300489032052e-04, 1.1497268397185604e-04,
+      6.3315499528010007e-05,  -1.4974518011387353e-04,
+      -7.5415253127048964e-04, 1.4628668950921841e-04,
+      1.6870584651280783e-04,  1.4071636815485435e-04,
+      1.4628668950921841e-04,  -3.5212613519392216e-04,
+      8.9576029143352727e-05,  -4.6556408920235956e-05,
+      1.6870584651280783e-04,  8.9576029143352727e-05,
+      -2.7822614419127146e-04, -7.8397846864094857e-05,
+      1.4071636815485435e-04,  -4.6556408920235956e-05,
+      -7.8397846864094857e-05, -2.7346041679651505e-04,
+      -2.6033241393473680e-04, 6.1297134521054865e-05,
+      1.1370943254758424e-05,  4.1345709213983918e-05,
+      6.1297134521054865e-05,  -2.0636544320363793e-04,
+      3.6633072227909512e-05,  9.6487308065657876e-05,
+      1.1370943254758424e-05,  3.6633072227909512e-05,
+      -1.7929253284262379e-04, 1.0075401050272592e-04,
+      4.1345709213983918e-05,  9.6487308065657876e-05,
+      1.0075401050272592e-04,  -1.7514728993513846e-04,
+      -9.1976596775803973e-04, -1.1250093174975719e-05,
+      1.0574848982308227e-04,  5.6892733088968351e-05,
+      -1.1250093174975719e-05, -5.3677767474141075e-04,
+      7.5975050471457976e-05,  2.1341143425148652e-04,
+      1.0574848982308227e-04,  7.5975050471457976e-05,
+      -4.6910404481979227e-04, -1.1961907952442812e-04,
+      5.6892733088968351e-05,  2.1341143425148652e-04,
+      -1.1961907952442812e-04, -1.6713401053931241e-04,
+      -5.0318411138792780e-04, -7.8642752393614670e-06,
+      1.4734980833474556e-04,  1.5694775605953879e-04,
+      -7.8642752393614670e-06, -4.3428679183052686e-04,
+      1.3240571026484175e-04,  9.4284982663807307e-05,
+      1.4734980833474556e-04,  1.3240571026484175e-04,
+      -2.6476905195442599e-04, -4.4663985184858534e-05,
+      1.5694775605953879e-04,  9.4284982663807307e-05,
+      -4.4663985184858534e-05, -1.4901238055304647e-04,
+      -5.2022971006193295e-04, 1.8673817993987294e-04,
+      3.4824093151832877e-06,  1.9316715603711834e-04,
+      1.8673817993987294e-04,  -3.5661136332554733e-04,
+      9.2514978038235173e-05,  -8.6662703285468833e-07,
+      3.4824093151832877e-06,  9.2514978038235173e-05,
+      -3.4175347533838051e-04, 8.7641158627568827e-05,
+      1.9316715603711834e-04,  -8.6662703285468833e-07,
+      8.7641158627568827e-05,  -2.3228701071836513e-04,
+      -2.3172744537639032e-04, -2.0145061787998218e-05,
+      8.0361470649018278e-05,  7.8686158095771946e-05,
+      -2.0145061787998218e-05, -2.1788437548850563e-04,
+      6.8244915676574325e-05,  4.7753842109443553e-05,
+      8.0361470649018278e-05,  6.8244915676574325e-05,
+      -1.8538465680992033e-04, 6.3885883987663494e-06,
+      7.8686158095771946e-05,  4.7753842109443553e-05,
+      6.3885883987663494e-06,  -1.5576199151219941e-04,
+      -2.6053492859149329e-04, 3.7985008257765683e-05,
+      -4.1618594585238569e-05, 1.2029024299682188e-04,
+      3.7985008257765683e-05,  -2.5883101840537494e-04,
+      2.5194049224745282e-05,  3.9122106536011156e-05,
+      -4.1618594585238569e-05, 2.5194049224745282e-05,
+      -1.8215627475922963e-04, 9.5955293419038848e-05,
+      1.2029024299682188e-04,  3.9122106536011156e-05,
+      9.5955293419038848e-05,  -8.8495058887861342e-05,
+      -8.5077111031875798e-04, 2.7142179798126236e-05,
+      2.3506811184181804e-04,  -4.0221741191713819e-05,
+      2.7142179798126236e-05,  -5.8825160593922092e-04,
+      1.2928972567669391e-04,  1.8492434110243213e-04,
+      2.3506811184181804e-04,  1.2928972567669391e-04,
+      -4.8258381050380911e-04, 3.8157653583435203e-05,
+      -4.0221741191713819e-05, 1.8492434110243213e-04,
+      3.8157653583435203e-05,  -1.3868330525066390e-04,
+      -1.0046100956135153e-03, 1.0071981850677016e-04,
+      2.8872287092350934e-05,  2.7319207214565589e-05,
+      1.0071981850677016e-04,  -5.8995054305638166e-04,
+      1.9420289297011076e-04,  -6.4394518219290390e-05,
+      2.8872287092350934e-05,  1.9420289297011076e-04,
+      -4.6301081539337568e-04, 6.2018876700662711e-05,
+      2.7319207214565589e-05,  -6.4394518219290390e-05,
+      6.2018876700662711e-05,  -3.1496980814001716e-04,
+      -3.5505598207892898e-04, 4.2612480787834219e-05,
+      4.6241360306879771e-06,  2.1108926449578879e-05,
+      4.2612480787834219e-05,  -7.2955350707598794e-06,
+      1.8755820202733821e-05,  -2.6907330166430605e-06,
+      4.6241360306879771e-06,  1.8755820202733821e-05,
+      -1.6364300965012292e-06, -2.4890773965917357e-06,
+      2.1108926449578879e-05,  -2.6907330166430605e-06,
+      -2.4890773965917357e-06, -3.1119034082904818e-06,
+      -1.3157832950043162e-03, 1.5109719057770362e-04,
+      2.2136411827069661e-04,  1.5620989616059463e-04,
+      1.5109719057770362e-04,  -6.9909797727330165e-04,
+      1.3446109058413912e-04,  1.1910768000198964e-04,
+      2.2136411827069661e-04,  1.3446109058413912e-04,
+      -5.1538523518062426e-04, 7.5113122085345235e-05,
+      1.5620989616059463e-04,  1.1910768000198964e-04,
+      7.5113122085345235e-05,  -3.0505421799441914e-04,
+      -3.0462471772100332e-04, 3.9065280474790656e-05,
+      1.4614293236463156e-04,  9.5882906751313209e-05,
+      3.9065280474790656e-05,  -1.9543435847266125e-04,
+      -1.1587160757702439e-05, 1.1931573498340947e-04,
+      1.4614293236463156e-04,  -1.1587160757702439e-05,
+      -1.8470088623319614e-04, 2.0760996552120055e-05,
+      9.5882906751313209e-05,  1.1931573498340947e-04,
+      2.0760996552120055e-05,  -1.6350571548014739e-04,
+      -1.0266618133570869e-03, 2.2637567583948763e-04,
+      4.5648808840412684e-05,  1.0877858120403682e-04,
+      2.2637567583948763e-04,  -3.5855168729171305e-04,
+      1.0575495913196480e-04,  4.7811314368940493e-05,
+      4.5648808840412684e-05,  1.0575495913196480e-04,
+      -2.0795540905090660e-04, -2.8126314053445194e-05,
+      1.0877858120403682e-04,  4.7811314368940493e-05,
+      -2.8126314053445194e-05, -1.7015380275751095e-04,
+      -1.0008635815443789e-03, 1.0735782597685222e-04,
+      -8.2342347671546411e-05, -1.0621035075752513e-04,
+      1.0735782597685222e-04,  -3.9974471825416052e-04,
+      -4.2847890972143996e-05, 1.1148431692879854e-04,
+      -8.2342347671546411e-05, -4.2847890972143996e-05,
+      -1.0364328030944446e-04, 1.8027890592285884e-05,
+      -1.0621035075752513e-04, 1.1148431692879854e-04,
+      1.8027890592285884e-05,  -5.3335283910055137e-05,
+      -1.1022946107544056e-03, 1.2081709463727365e-04,
+      -8.2220753605772352e-06, 1.9181878385071303e-04,
+      1.2081709463727365e-04,  -6.5229668590924652e-04,
+      2.4088842641070782e-04,  3.0891558204400955e-05,
+      -8.2220753605772352e-06, 2.4088842641070782e-04,
+      -2.8026542830655177e-04, 4.0661846631706496e-05,
+      1.9181878385071303e-04,  3.0891558204400955e-05,
+      4.0661846631706496e-05,  -1.8847765937598992e-04,
+      -2.4152123416571131e-04, -8.6922232482105472e-05,
+      1.6955733405143119e-04,  -6.4111931600053363e-05,
+      -8.6922232482105472e-05, -2.3487488104965331e-04,
+      5.1809081045441156e-05,  2.1850790757452379e-05,
+      1.6955733405143119e-04,  5.1809081045441156e-05,
+      -2.0805036913184400e-04, -3.4798889866819799e-05,
+      -6.4111931600053363e-05, 2.1850790757452379e-05,
+      -3.4798889866819799e-05, -1.2312192870086894e-04,
+      -8.8445499497716014e-05, 2.4421506868587198e-05,
+      2.2044662993515158e-06,  6.5601836003054636e-05,
+      2.4421506868587198e-05,  -5.6415856340379447e-05,
+      9.2118704714902022e-05,  -1.9706075570445630e-05,
+      2.2044662993515158e-06,  9.2118704714902022e-05,
+      -4.0161585013125762e-05, 5.5260071625401637e-05,
+      6.5601836003054636e-05,  -1.9706075570445630e-05,
+      5.5260071625401637e-05,  -3.3100174236203148e-05,
+      -8.2209958500230578e-04, -1.0375911757141743e-04,
+      2.0710347102188484e-04,  -2.4978971692820028e-05,
+      -1.0375911757141743e-04, -7.3910742021759002e-04,
+      5.5770606372779485e-06,  1.0998087187508114e-04,
+      2.0710347102188484e-04,  5.5770606372779485e-06,
+      -3.0781601772972918e-04, 1.7042830142744166e-04,
+      -2.4978971692820028e-05, 1.0998087187508114e-04,
+      1.7042830142744166e-04,  -1.9931789083998724e-04,
+      -8.2864124175878646e-04, 2.1156993777400108e-04,
+      1.3222387946538444e-04,  7.3125614419211014e-05,
+      2.1156993777400108e-04,  -6.7998120575166903e-04,
+      -4.5284032495574803e-05, 1.5448833703626970e-04,
+      1.3222387946538444e-04,  -4.5284032495574803e-05,
+      -1.6486495415349870e-04, -2.7910903682007980e-05,
+      7.3125614419211014e-05,  1.5448833703626970e-04,
+      -2.7910903682007980e-05, -1.3616141316573496e-04,
+      -6.9168427037508595e-04, 1.6848337105401589e-04,
+      2.0347536376145238e-05,  2.1730204740541038e-04,
+      1.6848337105401589e-04,  -5.1567181939462376e-04,
+      2.2442346917809767e-04,  -4.1679702475652895e-05,
+      2.0347536376145238e-05,  2.2442346917809767e-04,
+      -2.7886738080818829e-04, -7.2100020041017474e-05,
+      2.1730204740541038e-04,  -4.1679702475652895e-05,
+      -7.2100020041017474e-05, -2.1591694500250683e-04,
+      -6.3118866694050128e-04, 1.7407698534034082e-04,
+      2.2506708310326124e-04,  -1.1855521616198067e-04,
+      1.7407698534034082e-04,  -3.4276284162076975e-04,
+      -2.4678327316965964e-05, -7.0132865868384085e-05,
+      2.2506708310326124e-04,  -2.4678327316965964e-05,
+      -2.4829751991729555e-04, 8.0665853806108728e-05,
+      -1.1855521616198067e-04, -7.0132865868384085e-05,
+      8.0665853806108728e-05,  -1.4560588738992748e-04,
+      -5.4516146248809595e-04, 1.5429566078092839e-04,
+      7.3002508020663260e-05,  -5.4127939476372286e-05,
+      1.5429566078092839e-04,  -2.7006749001243577e-04,
+      3.8245018464081647e-05,  6.4001466507229872e-05,
+      7.3002508020663260e-05,  3.8245018464081647e-05,
+      -1.9064634129416447e-04, 4.8315978083461554e-05,
+      -5.4127939476372286e-05, 6.4001466507229872e-05,
+      4.8315978083461554e-05,  -9.6287270269265838e-05,
+      -1.0867871706203747e-03, 2.2665889399974761e-05,
+      3.8527790611792381e-04,  -5.7606042046986583e-05,
+      2.2665889399974761e-05,  -5.9377950867244152e-04,
+      1.6458617522517848e-04,  9.8647825618332862e-05,
+      3.8527790611792381e-04,  1.6458617522517848e-04,
+      -3.5517610380986437e-04, 1.5047714871992010e-06,
+      -5.7606042046986583e-05, 9.8647825618332862e-05,
+      1.5047714871992010e-06,  -2.4847179034300285e-04,
+      1.7287370288645611e-05,  2.4751905195362399e-05,
+      5.7409278650844171e-06,  1.5778384555210318e-06,
+      2.4751905195362399e-05,  6.9233671176431198e-05,
+      1.2056763828265809e-05,  5.8193528185755228e-06,
+      5.7409278650844171e-06,  1.2056763828265809e-05,
+      2.5803013934721890e-05,  9.7800078032772889e-06,
+      1.5778384555210318e-06,  5.8193528185755228e-06,
+      9.7800078032772889e-06,  1.2290894822139189e-05,
+      -4.7276129504627868e-04, 1.7996071145545987e-04,
+      -2.3559613078385275e-05, 9.1817364759875372e-05,
+      1.7996071145545987e-04,  -4.4031640931680089e-04,
+      1.3129665680176059e-04,  8.5753307504504326e-05,
+      -2.3559613078385275e-05, 1.3129665680176059e-04,
+      -2.6082369278761529e-04, 1.1008038444960020e-04,
+      9.1817364759875372e-05,  8.5753307504504326e-05,
+      1.1008038444960020e-04,  -2.2448972892423485e-04,
+      7.3552911328923082e-05,  -4.6386393976681180e-07,
+      -3.6302661188033895e-07, 8.1292949427203692e-06,
+      -4.6386393976681180e-07, 7.1855824442516002e-05,
+      2.0977058635296311e-06,  -8.1660148046199487e-07,
+      -3.6302661188033895e-07, 2.0977058635296311e-06,
+      4.9178179355036989e-05,  5.5111033912396161e-07,
+      8.1292949427203692e-06,  -8.1660148046199487e-07,
+      5.5111033912396161e-07,  4.3126931159686622e-05,
+      -1.0356645478699321e-03, 3.5655021996677344e-05,
+      1.7863455145561712e-04,  9.9345386863453332e-05,
+      3.5655021996677344e-05,  -4.6868369051367967e-04,
+      8.4528986648654719e-05,  2.6178454134616406e-05,
+      1.7863455145561712e-04,  8.4528986648654719e-05,
+      -4.0189105052683167e-04, 1.7827808102152204e-04,
+      9.9345386863453332e-05,  2.6178454134616406e-05,
+      1.7827808102152204e-04,  -2.4096643692971789e-04,
+      -5.2947177689736285e-04, 2.9155995696660334e-04,
+      1.0881309013420403e-04,  6.5216605248019719e-05,
+      2.9155995696660334e-04,  -3.7174127902678917e-04,
+      -1.0713195660439511e-05, -1.4652487345689542e-04,
+      1.0881309013420403e-04,  -1.0713195660439511e-05,
+      -3.0848787251277967e-04, -8.1788880974243585e-05,
+      6.5216605248019719e-05,  -1.4652487345689542e-04,
+      -8.1788880974243585e-05, -1.8187514475847530e-04,
+      -8.1675147016875278e-04, -9.1132812142916361e-05,
+      3.7581090557227133e-04,  -1.6391843424681004e-05,
+      -9.1132812142916361e-05, -8.0928426677666680e-04,
+      1.0186682914852628e-04,  6.6974048500429245e-05,
+      3.7581090557227133e-04,  1.0186682914852628e-04,
+      -3.9501800512992450e-04, 1.1097843258641806e-04,
+      -1.6391843424681004e-05, 6.6974048500429245e-05,
+      1.1097843258641806e-04,  -1.6922741791056576e-04,
+      -9.5550488243767526e-04, 8.9834092429790310e-05,
+      1.4676450162607620e-04,  -8.9486200173685058e-05,
+      8.9834092429790310e-05,  -5.7124935343482407e-04,
+      1.9674652749940278e-04,  2.7820147231366192e-05,
+      1.4676450162607620e-04,  1.9674652749940278e-04,
+      -4.5311825527210236e-04, 1.5751505797459724e-04,
+      -8.9486200173685058e-05, 2.7820147231366192e-05,
+      1.5751505797459724e-04,  -2.1504103761769665e-04,
+      -7.4655646881417747e-04, 1.7137670375811731e-04,
+      2.3305985967898233e-04,  2.0974576016239595e-05,
+      1.7137670375811731e-04,  -6.3490393076444808e-04,
+      -3.7460934394642461e-05, 3.8915692525215832e-05,
+      2.3305985967898233e-04,  -3.7460934394642461e-05,
+      -2.3619871093608590e-04, -9.5966181361813001e-05,
+      2.0974576016239595e-05,  3.8915692525215832e-05,
+      -9.5966181361813001e-05, -1.5753671328738253e-04,
+      -8.4952522473083828e-04, 1.0905671249046153e-04,
+      1.6050124252773700e-04,  -3.5054030436609191e-06,
+      1.0905671249046153e-04,  -7.8916382592320218e-04,
+      5.4410597072967622e-05,  3.1697067660733621e-04,
+      1.6050124252773700e-04,  5.4410597072967622e-05,
+      -6.2431502557835482e-04, 4.0685779353217203e-05,
+      -3.5054030436609191e-06, 3.1697067660733621e-04,
+      4.0685779353217203e-05,  -3.1869838579109982e-04,
+      -4.9197474900203851e-04, 2.3175272353917089e-05,
+      4.8349993627407357e-05,  8.3431439033521311e-05,
+      2.3175272353917089e-05,  -2.1237686325728994e-04,
+      1.1579272422842471e-04,  4.4948063413445143e-06,
+      4.8349993627407357e-05,  1.1579272422842471e-04,
+      -1.4387542985386714e-04, -2.8807020627775499e-05,
+      8.3431439033521311e-05,  4.4948063413445143e-06,
+      -2.8807020627775499e-05, -8.8314386801545690e-05,
+      -1.0377932964218571e-03, 3.1548342438866215e-04,
+      -7.7539032460135759e-06, 1.5862268899707329e-04,
+      3.1548342438866215e-04,  -5.2279750482530675e-04,
+      -9.7156212604978696e-05, 7.8070746092368879e-05,
+      -7.7539032460135759e-06, -9.7156212604978696e-05,
+      -5.1909921397954076e-04, -7.0888358650239406e-05,
+      1.5862268899707329e-04,  7.8070746092368879e-05,
+      -7.0888358650239406e-05, -4.3559478522129486e-04,
+      -3.4047542887403288e-04, 9.9485414683848702e-05,
+      5.3467360125445010e-05,  5.5687160259303899e-05,
+      9.9485414683848702e-05,  -2.1688590318227243e-04,
+      5.8207991561983353e-05,  3.8807798300090633e-05,
+      5.3467360125445010e-05,  5.8207991561983353e-05,
+      -1.4444518993710908e-04, 6.9712029871340034e-05,
+      5.5687160259303899e-05,  3.8807798300090633e-05,
+      6.9712029871340034e-05,  -8.8308431324746653e-05,
+      -3.9748580631234745e-04, 1.6061520126347354e-05,
+      7.4535335825201364e-05,  -1.2455905762848885e-05,
+      1.6061520126347354e-05,  -3.0428877235287100e-04,
+      6.7638842446531838e-05,  9.0216218435291209e-05,
+      7.4535335825201364e-05,  6.7638842446531838e-05,
+      -1.8476291197258023e-04, 8.6490256848873625e-05,
+      -1.2455905762848885e-05, 9.0216218435291209e-05,
+      8.6490256848873625e-05,  -1.7290120999552497e-04,
+      -6.3017890323441988e-04, 6.8925477551151667e-05,
+      1.4591855761896425e-04,  5.8696848733904197e-05,
+      6.8925477551151667e-05,  -4.4840596419734699e-04,
+      1.7185871633737483e-04,  1.3636588971003942e-04,
+      1.4591855761896425e-04,  1.7185871633737483e-04,
+      -3.5633566053381324e-04, 6.9798068644545436e-05,
+      5.8696848733904197e-05,  1.3636588971003942e-04,
+      6.9798068644545436e-05,  -2.8194929183414266e-04,
+      -5.2084239851836716e-04, 1.3919308449155372e-04,
+      7.9297302299835321e-05,  -1.0666209674729959e-04,
+      1.3919308449155372e-04,  -4.8272864193460750e-04,
+      1.4580575117746091e-04,  1.8231015042159636e-04,
+      7.9297302299835321e-05,  1.4580575117746091e-04,
+      -2.4109836209905614e-04, -9.6833449169729229e-05,
+      -1.0666209674729959e-04, 1.8231015042159636e-04,
+      -9.6833449169729229e-05, -2.2652176409735576e-04,
+      -5.9265052462615796e-04, 3.2921496507552024e-05,
+      1.6741839899836377e-04,  -1.8287094414409389e-05,
+      3.2921496507552024e-05,  -4.7506545424716396e-04,
+      8.4264843353050377e-05,  3.2049421513026473e-05,
+      1.6741839899836377e-04,  8.4264843353050377e-05,
+      -2.6830906645465910e-04, 1.0562771031346049e-04,
+      -1.8287094414409389e-05, 3.2049421513026473e-05,
+      1.0562771031346049e-04,  -1.8350092261002626e-04,
+      -7.5685265706840210e-04, 2.5873322291747580e-04,
+      6.4147355541750696e-06,  4.8379384836955772e-05,
+      2.5873322291747580e-04,  -4.5814972293314378e-04,
+      5.2257125816072260e-05,  1.7555007655928345e-04,
+      6.4147355541750696e-06,  5.2257125816072260e-05,
+      -4.5065665859371718e-04, 1.3365888276934993e-04,
+      4.8379384836955772e-05,  1.7555007655928345e-04,
+      1.3365888276934993e-04,  -2.8055080587645673e-04,
+      -6.8617367385189203e-04, 2.0111488204472137e-04,
+      -3.5677735636164015e-05, 1.5038190317367591e-05,
+      2.0111488204472137e-04,  -6.8307312914361548e-04,
+      3.7317688096927270e-04,  2.2841592720268073e-05,
+      -3.5677735636164015e-05, 3.7317688096927270e-04,
+      -5.7456997444991467e-04, 9.8364592445165106e-05,
+      1.5038190317367591e-05,  2.2841592720268073e-05,
+      9.8364592445165106e-05,  -4.9874689148873266e-04,
+      -9.7937117522682625e-04, 1.3365493445469090e-04,
+      -3.5389680247569871e-05, 1.7409836485831839e-04,
+      1.3365493445469090e-04,  -3.0622931287801688e-04,
+      2.0447820578798172e-04,  -2.0241678808795576e-05,
+      -3.5389680247569871e-05, 2.0447820578798172e-04,
+      -2.9965533718679050e-04, -5.0337561347213358e-05,
+      1.7409836485831839e-04,  -2.0241678808795576e-05,
+      -5.0337561347213358e-05, -9.7218271603616025e-05,
+      -7.2361999623641096e-04, 6.0238770967297993e-05,
+      9.5583099624811875e-06,  -4.8817196772293789e-05,
+      6.0238770967297993e-05,  -4.2873796707020983e-04,
+      3.5290062490134286e-05,  1.0186668437085330e-04,
+      9.5583099624811875e-06,  3.5290062490134286e-05,
+      -1.7327171153624178e-04, 7.5826006592362129e-05,
+      -4.8817196772293789e-05, 1.0186668437085330e-04,
+      7.5826006592362129e-05,  -6.6625240506338742e-05,
+      -5.1082007985057531e-04, 9.0145069011309307e-05,
+      1.0545714803937606e-04,  9.9838943414637320e-05,
+      9.0145069011309307e-05,  -2.9666343813120901e-04,
+      1.6607259863244859e-04,  -8.0518077824595215e-05,
+      1.0545714803937606e-04,  1.6607259863244859e-04,
+      -2.7888486151681505e-04, 9.9492300205694249e-05,
+      9.9838943414637320e-05,  -8.0518077824595215e-05,
+      9.9492300205694249e-05,  -1.6153484908064224e-04,
+      -3.8893832026680984e-04, 8.4480598688399479e-05,
+      1.3917939203462009e-04,  6.4028139288043303e-05,
+      8.4480598688399479e-05,  -2.9031480540068355e-04,
+      4.1581354137412694e-05,  1.3719607068135917e-04,
+      1.3917939203462009e-04,  4.1581354137412694e-05,
+      -2.5727897912316704e-04, 2.3355557894492145e-05,
+      6.4028139288043303e-05,  1.3719607068135917e-04,
+      2.3355557894492145e-05,  -2.0869295477029555e-04,
+      -1.2940743236606971e-03, 9.9242112186229852e-05,
+      -3.0883079008754396e-05, 2.4110922845507835e-04,
+      9.9242112186229852e-05,  -5.4844927137480013e-04,
+      1.5284676340009107e-04,  1.5074954460357989e-04,
+      -3.0883079008754396e-05, 1.5284676340009107e-04,
+      -3.5890880634564429e-04, -9.2111965277796469e-05,
+      2.4110922845507835e-04,  1.5074954460357989e-04,
+      -9.2111965277796469e-05, -2.1606644503010969e-04,
+      -3.6859095195743705e-04, 1.2811340827727349e-04,
+      7.1617940606111583e-05,  1.2147695942115610e-04,
+      1.2811340827727349e-04,  -2.4093211232803426e-04,
+      -4.5817408623953358e-05, -2.6602561355328134e-05,
+      7.1617940606111583e-05,  -4.5817408623953358e-05,
+      -1.8715896488104966e-04, -2.9073404906152265e-06,
+      1.2147695942115610e-04,  -2.6602561355328134e-05,
+      -2.9073404906152265e-06, -1.1706145694897137e-04,
+      -6.6786161879298469e-04, 3.5686415150814899e-05,
+      1.5672913017009308e-04,  8.1161443650996505e-05,
+      3.5686415150814899e-05,  -4.8659658995020956e-04,
+      1.8844759048223573e-04,  1.8572079841468395e-05,
+      1.5672913017009308e-04,  1.8844759048223573e-04,
+      -3.3591563943804917e-04, 5.8356892899359846e-05,
+      8.1161443650996505e-05,  1.8572079841468395e-05,
+      5.8356892899359846e-05,  -2.5729244353890580e-04,
+      -4.9057710957934610e-04, 3.5189384292357178e-05,
+      1.2768527966492655e-04,  7.7111559864786111e-05,
+      3.5189384292357178e-05,  -3.3022996564975609e-04,
+      1.1998806440725087e-04,  7.9127474986119613e-05,
+      1.2768527966492655e-04,  1.1998806440725087e-04,
+      -2.2746335211673159e-04, 8.6400013414019474e-05,
+      7.7111559864786111e-05,  7.9127474986119613e-05,
+      8.6400013414019474e-05,  -2.0017500168890246e-04,
+      -1.1930564853647864e-03, 3.6059432907568098e-04,
+      -4.5802509267350180e-05, 1.6549786604919792e-04,
+      3.6059432907568098e-04,  -6.6459120881215568e-04,
+      2.2815747528720748e-04,  5.2558786107152938e-05,
+      -4.5802509267350180e-05, 2.2815747528720748e-04,
+      -6.2636194192342237e-04, 1.8747959874315315e-05,
+      1.6549786604919792e-04,  5.2558786107152938e-05,
+      1.8747959874315315e-05,  -3.2850961953909732e-04,
+      7.8706872876127071e-02,  -2.2653584740143385e-02,
+      9.7740231630116456e-04,  -8.0082023113196138e-04,
+      -2.2653584740143385e-02, -8.9447422099334685e-03,
+      -8.8792954595688558e-04, 1.1368137224524006e-03,
+      9.7740231630116456e-04,  -8.8792954595688558e-04,
+      -7.7069586145163686e-04, 2.3926064253705276e-04,
+      -8.0082023113196138e-04, 1.1368137224524006e-03,
+      2.3926064253705276e-04,  -6.6984821695051074e-04,
+      -1.0791500301598586e-01, -3.2421420547026750e-02,
+      -1.6310176240427882e-03, -1.6324544291089519e-03,
+      -3.2421420547026750e-02, -1.0068088253218213e-02,
+      3.7700151002736698e-04,  1.5579093564988948e-03,
+      -1.6310176240427882e-03, 3.7700151002736698e-04,
+      -6.4009621154418304e-04, 1.3364317447552753e-04,
+      -1.6324544291089519e-03, 1.5579093564988948e-03,
+      1.3364317447552753e-04,  -4.5908668219426775e-04,
+      1.9931647893026413e-02,  1.8868322719129860e-02,
+      -2.6059845702808022e-04, -1.7677688147390257e-04,
+      1.8868322719129860e-02,  -2.0969852051011485e-04,
+      3.6441845096447370e-04,  2.7994468344729460e-04,
+      -2.6059845702808022e-04, 3.6441845096447370e-04,
+      -2.8402192286764761e-04, -5.7796869103119775e-06,
+      -1.7677688147390257e-04, 2.7994468344729460e-04,
+      -5.7796869103119775e-06, -2.7098438358809471e-04,
+      -2.5171876544209310e-01, -1.6215224556807420e-02,
+      -1.7135769001415840e-03, 1.5448964301369569e-03,
+      -1.6215224556807420e-02, -2.3023489339995557e-03,
+      1.5454079463723322e-03,  1.4540171518868922e-04,
+      -1.7135769001415840e-03, 1.5454079463723322e-03,
+      -1.5783435634697221e-03, -5.6267811205058696e-04,
+      1.5448964301369569e-03,  1.4540171518868922e-04,
+      -5.6267811205058696e-04, -1.4668478620555464e-03,
+      9.0064272324944780e-02,  1.0159854544206332e-02,
+      -1.8417068624857591e-03, 1.5581049875507725e-03,
+      1.0159854544206332e-02,  -1.2626008140885292e-03,
+      5.5672594951240815e-04,  -6.6992565886242451e-04,
+      -1.8417068624857591e-03, 5.5672594951240815e-04,
+      -6.3183326791236682e-04, 3.4048390925801674e-04,
+      1.5581049875507725e-03,  -6.6992565886242451e-04,
+      3.4048390925801674e-04,  -4.5748292658650842e-04,
+      -2.2443359343759173e-01, 2.3550848933061329e-02,
+      -1.3605814923240537e-03, 1.6581748189729430e-03,
+      2.3550848933061329e-02,  -1.2119208694257528e-03,
+      1.1877044380419087e-03,  -8.7237437059008887e-04,
+      -1.3605814923240537e-03, 1.1877044380419087e-03,
+      -1.0281058010702063e-03, -1.6587408018026506e-04,
+      1.6581748189729430e-03,  -8.7237437059008887e-04,
+      -1.6587408018026506e-04, -1.0202663289786004e-03,
+      -2.6189782764010544e-01, 2.2986538547258420e-02,
+      -1.7632069264149162e-03, 1.5107406905049286e-03,
+      2.2986538547258420e-02,  -1.2746144428941505e-03,
+      4.8343451999544908e-04,  -5.7379894194384067e-04,
+      -1.7632069264149162e-03, 4.8343451999544908e-04,
+      -5.6480453719830008e-04, 3.4954233236744335e-04,
+      1.5107406905049286e-03,  -5.7379894194384067e-04,
+      3.4954233236744335e-04,  -4.8900362963805524e-04,
+      -1.2749879550430313e-01, 2.8523823603441460e-02,
+      -1.9125154882043663e-03, 2.7084423469016369e-03,
+      2.8523823603441460e-02,  -2.3862377659643530e-03,
+      5.1372875563453473e-04,  -4.3154165895067698e-04,
+      -1.9125154882043663e-03, 5.1372875563453473e-04,
+      -1.1276693699735670e-03, -8.2543254355266842e-05,
+      2.7084423469016369e-03,  -4.3154165895067698e-04,
+      -8.2543254355266842e-05, -1.0951105630241573e-03,
+      -1.0158866176089715e-01, 1.6602982315561777e-02,
+      -1.7772582548894245e-03, 4.3920384137976376e-04,
+      1.6602982315561777e-02,  -4.7547131566108777e-03,
+      6.8783893805187952e-05,  -1.0748720663370313e-03,
+      -1.7772582548894245e-03, 6.8783893805187952e-05,
+      -5.0205120710799611e-04, -3.3919151133434616e-05,
+      4.3920384137976376e-04,  -1.0748720663370313e-03,
+      -3.3919151133434616e-05, -3.1422927937441901e-04,
+      1.5108981059488835e-01,  2.1974044174935103e-03,
+      4.9973793800626900e-04,  -2.0324655241621987e-03,
+      2.1974044174935103e-03,  -2.0249834345665027e-03,
+      2.8912368069502044e-04,  -3.9641649235281848e-04,
+      4.9973793800626900e-04,  2.8912368069502044e-04,
+      -1.4279057483715719e-03, 2.3314451382963299e-04,
+      -2.0324655241621987e-03, -3.9641649235281848e-04,
+      2.3314451382963299e-04,  -9.6409814362172263e-04,
+      -9.7276844843051602e-02, 3.5504450866312058e-03,
+      3.8203830614491319e-03,  -1.4507036855037911e-03,
+      3.5504450866312058e-03,  -1.1050016265867703e-03,
+      3.0086962405142534e-05,  2.4446145351075344e-04,
+      3.8203830614491319e-03,  3.0086962405142534e-05,
+      -1.8455518785688229e-03, 6.3062318909916208e-04,
+      -1.4507036855037911e-03, 2.4446145351075344e-04,
+      6.3062318909916208e-04,  -8.0531364204684895e-04,
+      2.9804711899684446e-02,  -1.5978165071995631e-02,
+      1.0778430397325950e-03,  -1.4446815201809952e-03,
+      -1.5978165071995631e-02, -5.6345296642827322e-03,
+      -1.0626058855508918e-03, 5.0267944657573355e-04,
+      1.0778430397325950e-03,  -1.0626058855508918e-03,
+      -1.0914650387547676e-03, 4.3259937074976290e-04,
+      -1.4446815201809952e-03, 5.0267944657573355e-04,
+      4.3259937074976290e-04,  -4.5142572576815595e-04,
+      -6.9494494593566603e-02, -2.0051133800610695e-02,
+      5.8710738033530515e-04,  3.5010545121283096e-04,
+      -2.0051133800610695e-02, -7.1098727986929232e-03,
+      -2.5423159406186779e-04, -9.3348474227718226e-04,
+      5.8710738033530515e-04,  -2.5423159406186779e-04,
+      -6.0375150441947952e-04, 2.7259811627839867e-04,
+      3.5010545121283096e-04,  -9.3348474227718226e-04,
+      2.7259811627839867e-04,  -5.4004855753860101e-04,
+      1.5401777363000305e-01,  8.1160613829283474e-04,
+      9.2628051082108370e-04,  3.6211771998279100e-03,
+      8.1160613829283474e-04,  -1.7141151995087387e-03,
+      -3.3990186206657631e-04, -2.5417245475395459e-04,
+      9.2628051082108370e-04,  -3.3990186206657631e-04,
+      -1.4122852387503289e-03, -1.0293746792916486e-04,
+      3.6211771998279100e-03,  -2.5417245475395459e-04,
+      -1.0293746792916486e-04, -1.2188212673550846e-03,
+      -1.2066137822324684e-01, -2.1235012929347232e-02,
+      1.6254696003078779e-03,  -1.2375969117924457e-03,
+      -2.1235012929347232e-02, -6.7234109883531987e-03,
+      4.4368086424732868e-05,  5.4519256781239238e-04,
+      1.6254696003078779e-03,  4.4368086424732868e-05,
+      -1.7851002974240047e-03, 2.6435810786128557e-04,
+      -1.2375969117924457e-03, 5.4519256781239238e-04,
+      2.6435810786128557e-04,  -9.5219784353280741e-04,
+      -2.0428062162336544e-01, -1.8822402400988779e-02,
+      8.0995888860823685e-04,  -1.1746114214952719e-03,
+      -1.8822402400988779e-02, -3.1320256402858366e-03,
+      -9.4173163158250614e-04, 5.3739665278471852e-04,
+      8.0995888860823685e-04,  -9.4173163158250614e-04,
+      -8.4068628230452394e-04, 2.2479483070440945e-04,
+      -1.1746114214952719e-03, 5.3739665278471852e-04,
+      2.2479483070440945e-04,  -7.5550494619625963e-04,
+      2.0478820977633771e-02,  5.6432816347106261e-03,
+      1.5567905281500807e-03,  3.1015131190170152e-03,
+      5.6432816347106261e-03,  -1.8593392130277192e-03,
+      -1.0887013744527499e-03, -5.9319521599679871e-04,
+      1.5567905281500807e-03,  -1.0887013744527499e-03,
+      -1.8709510047864347e-03, -1.6277602269798984e-04,
+      3.1015131190170152e-03,  -5.9319521599679871e-04,
+      -1.6277602269798984e-04, -1.8685922653237117e-03,
+      6.5107726886614783e-03,  -3.6209927731580757e-03,
+      -3.8090764358667738e-04, 4.8090075186154230e-04,
+      -3.6209927731580757e-03, -2.5033792471679753e-03,
+      2.3549698237273272e-04,  -6.0379270164610918e-04,
+      -3.8090764358667738e-04, 2.3549698237273272e-04,
+      -7.2631802550718566e-04, 3.6336152250252679e-04,
+      4.8090075186154230e-04,  -6.0379270164610918e-04,
+      3.6336152250252679e-04,  -5.9720775259365871e-04,
+      1.5597924802072299e-01,  -9.1287267636976852e-03,
+      5.1940698209609363e-03,  -1.9270196737078854e-03,
+      -9.1287267636976852e-03, -1.0171355227070189e-03,
+      -8.3707115487426011e-04, 2.6648470329260969e-04,
+      5.1940698209609363e-03,  -8.3707115487426011e-04,
+      -1.5133538294868042e-03, 5.0193401691492020e-04,
+      -1.9270196737078854e-03, 2.6648470329260969e-04,
+      5.0193401691492020e-04,  -7.5563292076018306e-04,
+      -5.1184574889172579e-02, 2.7987397416586360e-02,
+      7.5080665467791617e-04,  7.0012226943657642e-04,
+      2.7987397416586360e-02,  -2.1716159037027593e-03,
+      -4.8086372671787576e-04, -9.8018175618315873e-04,
+      7.5080665467791617e-04,  -4.8086372671787576e-04,
+      -7.3577207641989818e-04, 2.6562080093313106e-04,
+      7.0012226943657642e-04,  -9.8018175618315873e-04,
+      2.6562080093313106e-04,  -6.0973448697062660e-04,
+      1.3382668541587697e-01,  -2.2722610733945561e-02,
+      -1.2434543017359170e-03, -1.2728876503270323e-03,
+      -2.2722610733945561e-02, -4.3688188071787232e-03,
+      7.7645284186351441e-04,  6.3845101211561532e-04,
+      -1.2434543017359170e-03, 7.7645284186351441e-04,
+      -7.3990375201463685e-04, -2.2683288029159060e-04,
+      -1.2728876503270323e-03, 6.3845101211561532e-04,
+      -2.2683288029159060e-04, -6.7815800933636403e-04,
+      1.3120488245988418e-01,  4.8547089951903696e-02,
+      5.2274693388694210e-04,  4.0755937244519420e-04,
+      4.8547089951903696e-02,  -8.3907942102404050e-03,
+      -3.8252224092104130e-04, -4.0281273644884673e-04,
+      5.2274693388694210e-04,  -3.8252224092104130e-04,
+      -6.3441175364586163e-04, 3.6516056969289123e-04,
+      4.0755937244519420e-04,  -4.0281273644884673e-04,
+      3.6516056969289123e-04,  -6.1351796194961811e-04,
+      -1.5706881122068012e-01, -2.8064609799324989e-02,
+      5.2866261510743358e-04,  -5.0158079779070628e-04,
+      -2.8064609799324989e-02, -7.9255987347370254e-03,
+      -7.2907309715153476e-04, 2.0023722312570202e-04,
+      5.2866261510743358e-04,  -7.2907309715153476e-04,
+      -8.7726820594463044e-04, 9.3347733493378919e-04,
+      -5.0158079779070628e-04, 2.0023722312570202e-04,
+      9.3347733493378919e-04,  -7.6444145077611959e-04,
+      -2.4185449825431532e-01, 9.3190796925950465e-03,
+      -1.0360517460863226e-03, 7.5166654771481383e-04,
+      9.3190796925950465e-03,  -1.8089927328024354e-03,
+      4.5529238180270712e-04,  -6.7548648300266129e-04,
+      -1.0360517460863226e-03, 4.5529238180270712e-04,
+      -9.8195456656893299e-04, 3.2953602624600028e-04,
+      7.5166654771481383e-04,  -6.7548648300266129e-04,
+      3.2953602624600028e-04,  -7.5088680434060126e-04,
+      -6.2266646786067938e-03, 4.3482306684820587e-02,
+      -6.7931544150714681e-04, -1.1318057792932172e-03,
+      4.3482306684820587e-02,  -3.0163016880633831e-03,
+      1.1154407743767898e-03,  1.2680847876108238e-03,
+      -6.7931544150714681e-04, 1.1154407743767898e-03,
+      -6.2686682105641005e-04, 2.4799994581475311e-05,
+      -1.1318057792932172e-03, 1.2680847876108238e-03,
+      2.4799994581475311e-05,  -4.8261912829217966e-04,
+      4.4034487767024420e-02,  1.2027693141635914e-02,
+      -3.1617252369308421e-04, -1.3810034906793043e-03,
+      1.2027693141635914e-02,  -1.9189382672973364e-03,
+      3.5771288333242082e-04,  3.2903469104204127e-04,
+      -3.1617252369308421e-04, 3.5771288333242082e-04,
+      -7.7434480573853985e-04, 1.4662364658262446e-04,
+      -1.3810034906793043e-03, 3.2903469104204127e-04,
+      1.4662364658262446e-04,  -7.1146947717666163e-04,
+      -1.7439979214498852e-01, 2.5559515911645361e-02,
+      -1.3609574326154626e-03, -8.1347979141622453e-04,
+      2.5559515911645361e-02,  -1.9101252004779083e-03,
+      5.8401786740861872e-04,  2.6463889212369153e-04,
+      -1.3609574326154626e-03, 5.8401786740861872e-04,
+      -1.2022614404840581e-03, 1.9183593028871340e-04,
+      -8.1347979141622453e-04, 2.6463889212369153e-04,
+      1.9183593028871340e-04,  -7.2688162163724512e-04,
+      -2.6243218345287456e-01, 4.9398209870091560e-03,
+      -1.5696233107405566e-03, 1.0567220238967725e-03,
+      4.9398209870091560e-03,  -1.4022787059415794e-03,
+      5.8256688059597295e-04,  1.9025682403384436e-05,
+      -1.5696233107405566e-03, 5.8256688059597295e-04,
+      -1.3070453333682249e-03, -3.3851763928833433e-04,
+      1.0567220238967725e-03,  1.9025682403384436e-05,
+      -3.3851763928833433e-04, -1.2320723061259361e-03,
+      -8.2714390070501104e-02, 1.0975412304762084e-02,
+      -4.2098382115161030e-04, -6.7702215349947506e-04,
+      1.0975412304762084e-02,  -8.7719105355609575e-03,
+      6.4626568971351604e-04,  3.4082672855708366e-04,
+      -4.2098382115161030e-04, 6.4626568971351604e-04,
+      -5.3896694179129197e-04, 1.8641783252149489e-04,
+      -6.7702215349947506e-04, 3.4082672855708366e-04,
+      1.8641783252149489e-04,  -3.3770767382408195e-04,
+      -1.2193197837136689e-01, -1.5380109380286541e-02,
+      3.3246097773883404e-04,  -1.6628731429037037e-03,
+      -1.5380109380286541e-02, -2.5373182102857170e-03,
+      -2.0443003453976912e-05, 8.2993773737306147e-04,
+      3.3246097773883404e-04,  -2.0443003453976912e-05,
+      -8.8731220769894020e-04, -6.0009982488096482e-05,
+      -1.6628731429037037e-03, 8.2993773737306147e-04,
+      -6.0009982488096482e-05, -7.1835196502551967e-04,
+      7.8012293187519954e-02,  3.1132058063563710e-02,
+      -1.1696899428820292e-03, -1.3753629772876416e-03,
+      3.1132058063563710e-02,  -2.9320533071647394e-03,
+      4.4209006788966515e-04,  5.4507080398151985e-04,
+      -1.1696899428820292e-03, 4.4209006788966515e-04,
+      -6.3061860103715917e-04, 2.3835122850300404e-04,
+      -1.3753629772876416e-03, 5.4507080398151985e-04,
+      2.3835122850300404e-04,  -4.8807112103491492e-04,
+      1.2489186610021678e-01,  -4.2121994803512846e-03,
+      -1.4615798111060961e-03, -1.3284544933660910e-03,
+      -4.2121994803512846e-03, -1.9469301448134307e-03,
+      2.4383944981524552e-04,  9.0191595070716760e-04,
+      -1.4615798111060961e-03, 2.4383944981524552e-04,
+      -8.4489411054921443e-04, -3.2983470816998103e-04,
+      -1.3284544933660910e-03, 9.0191595070716760e-04,
+      -3.2983470816998103e-04, -5.5892089821921545e-04,
+      -1.3647684084529246e-01, 3.9776937406723417e-02,
+      6.1334383189688029e-04,  -8.9816660782599911e-04,
+      3.9776937406723417e-02,  -4.3020139627246353e-04,
+      -9.3557080941627938e-04, 7.7958935546057592e-04,
+      6.1334383189688029e-04,  -9.3557080941627938e-04,
+      -3.5211495153066340e-04, 5.8831013523111768e-05,
+      -8.9816660782599911e-04, 7.7958935546057592e-04,
+      5.8831013523111768e-05,  -3.1382659001747163e-04,
+      -1.6918244557222134e-01, -2.6159967833569130e-02,
+      -1.5521334165285558e-03, -1.6552342040953793e-03,
+      -2.6159967833569130e-02, -8.5733501045166700e-03,
+      1.1884549627826878e-03,  2.9341595161121387e-04,
+      -1.5521334165285558e-03, 1.1884549627826878e-03,
+      -6.8065594315602643e-04, 1.8544180606965729e-04,
+      -1.6552342040953793e-03, 2.9341595161121387e-04,
+      1.8544180606965729e-04,  -4.5742794945487230e-04,
+      -2.0437904412209100e-02, 2.0278428462028566e-02,
+      -3.1833318892537861e-04, -1.5672358504625003e-03,
+      2.0278428462028566e-02,  -2.3308820806625680e-03,
+      -9.2413164888836781e-05, 1.2261910114897223e-03,
+      -3.1833318892537861e-04, -9.2413164888836781e-05,
+      -6.1509973454375463e-04, -3.1369065713858962e-04,
+      -1.5672358504625003e-03, 1.2261910114897223e-03,
+      -3.1369065713858962e-04, -6.1311372710779475e-04,
+      -2.1068663677001948e-01, 3.0586587729756580e-02,
+      -1.4638083866968235e-03, -1.7663639913775599e-03,
+      3.0586587729756580e-02,  -2.4896000109830450e-03,
+      1.0878021744662348e-03,  5.5784495882122158e-04,
+      -1.4638083866968235e-03, 1.0878021744662348e-03,
+      -8.5226768150814753e-04, 2.4896475900177203e-04,
+      -1.7663639913775599e-03, 5.5784495882122158e-04,
+      2.4896475900177203e-04,  -7.2954677906140835e-04,
+      -2.0670514661104566e-01, 2.3101872560098982e-03,
+      -6.3615804468624706e-04, -1.0191121784747393e-03,
+      2.3101872560098982e-03,  -4.5012725771569565e-03,
+      3.0966024212130121e-04,  7.3480770548280236e-04,
+      -6.3615804468624706e-04, 3.0966024212130121e-04,
+      -7.6577049132799655e-04, 1.9069869693581136e-04,
+      -1.0191121784747393e-03, 7.3480770548280236e-04,
+      1.9069869693581136e-04,  -3.3060687835284803e-04,
+      1.4979203013904469e-01,  7.1888739604317664e-03,
+      -1.6498112176118718e-03, 7.6090317854697837e-04,
+      7.1888739604317664e-03,  -1.6847252134716941e-03,
+      9.1765413550792092e-04,  -8.1660576268565398e-04,
+      -1.6498112176118718e-03, 9.1765413550792092e-04,
+      -5.6736119553899445e-04, 2.4293054607895927e-04,
+      7.6090317854697837e-04,  -8.1660576268565398e-04,
+      2.4293054607895927e-04,  -3.0141462646083878e-04,
+      -2.5893180917593467e-01, -8.1773154879442519e-03,
+      -1.8051666448928912e-03, -1.7078409149329419e-03,
+      -8.1773154879442519e-03, -2.4391627814064182e-03,
+      2.5949401569512947e-04,  7.3814751053530889e-04,
+      -1.8051666448928912e-03, 2.5949401569512947e-04,
+      -8.9181287706463540e-04, 1.2172298924298193e-04,
+      -1.7078409149329419e-03, 7.3814751053530889e-04,
+      1.2172298924298193e-04,  -5.5666018893034495e-04,
+      1.3114296578438139e-01,  -6.0075695408917090e-03,
+      4.3242635386152908e-04,  -1.6174936298550607e-03,
+      -6.0075695408917090e-03, -2.1679629901973250e-03,
+      -1.4790030682634993e-04, 4.0251669474305628e-04,
+      4.3242635386152908e-04,  -1.4790030682634993e-04,
+      -6.4946095998946827e-04, 2.8330245317322158e-04,
+      -1.6174936298550607e-03, 4.0251669474305628e-04,
+      2.8330245317322158e-04,  -5.4324532966633539e-04,
+      1.5527234406654167e-01,  4.5641696399707443e-02,
+      -1.1740442464421269e-03, -1.5447474252538395e-03,
+      4.5641696399707443e-02,  -6.1793537674581797e-03,
+      1.5275860858276267e-03,  8.6030738024539066e-04,
+      -1.1740442464421269e-03, 1.5275860858276267e-03,
+      -7.8182161800352231e-04, -1.5885947204160602e-04,
+      -1.5447474252538395e-03, 8.6030738024539066e-04,
+      -1.5885947204160602e-04, -7.3143195173575981e-04,
+      1.1697380540622421e-01,  -1.8452554290786850e-03,
+      1.0016961110488366e-03,  -1.4829998143344009e-03,
+      -1.8452554290786850e-03, -1.1489869626221500e-03,
+      -4.0492587305436355e-04, 2.7582642932447637e-04,
+      1.0016961110488366e-03,  -4.0492587305436355e-04,
+      -8.2641017439131223e-04, 3.3828661526212394e-04,
+      -1.4829998143344009e-03, 2.7582642932447637e-04,
+      3.3828661526212394e-04,  -7.1775652801919342e-04,
+      2.4087451790493968e-02,  3.6657022667584582e-02,
+      -2.3160169352650246e-05, -1.4754112553395189e-03,
+      3.6657022667584582e-02,  -2.7192998188347670e-03,
+      4.9290573568910629e-04,  4.2019978689329402e-04,
+      -2.3160169352650246e-05, 4.9290573568910629e-04,
+      -8.0300459280031010e-04, 3.0740767103767887e-04,
+      -1.4754112553395189e-03, 4.2019978689329402e-04,
+      3.0740767103767887e-04,  -6.4767348263235984e-04,
+      -1.6342252605621727e-01, 1.8127756599951576e-02,
+      -1.1824339446502448e-03, -5.6869655256144131e-04,
+      1.8127756599951576e-02,  -5.4663896424227282e-03,
+      9.1072733783564201e-04,  -3.4788638053916024e-05,
+      -1.1824339446502448e-03, 9.1072733783564201e-04,
+      -6.6244397559521830e-04, -2.5432935828759249e-04,
+      -5.6869655256144131e-04, -3.4788638053916024e-05,
+      -2.5432935828759249e-04, -4.8751736339739109e-04,
+      -1.5701412918240615e-01, 2.7992421613277401e-02,
+      -4.6515704530449996e-04, -7.9964297888608410e-04,
+      2.7992421613277401e-02,  -3.9221662977877072e-03,
+      2.9742173175084299e-04,  7.9338187798423123e-04,
+      -4.6515704530449996e-04, 2.9742173175084299e-04,
+      -5.5822864771270446e-04, 2.1181161515901515e-04,
+      -7.9964297888608410e-04, 7.9338187798423123e-04,
+      2.1181161515901515e-04,  -3.9618825805997281e-04,
+      -1.2003184408108214e-01, 1.8018611826677796e-03,
+      -1.4587721561363381e-03, -1.4251735569210393e-03,
+      1.8018611826677796e-03,  -2.3078955332620707e-03,
+      3.8402353601999531e-04,  3.6911457383390317e-04,
+      -1.4587721561363381e-03, 3.8402353601999531e-04,
+      -1.1519702353104452e-03, 9.8372557539751129e-05,
+      -1.4251735569210393e-03, 3.6911457383390317e-04,
+      9.8372557539751129e-05,  -7.3110630617569865e-04,
+      -1.6462935008848217e-01, 1.0600946531245254e-02,
+      3.6977083048745467e-03,  -1.4181644891152485e-03,
+      1.0600946531245254e-02,  -1.3992372112707173e-03,
+      3.4296475948198595e-04,  -1.1160182310149349e-03,
+      3.6977083048745467e-03,  3.4296475948198595e-04,
+      -1.6724888886693919e-03, -7.8051702685589782e-04,
+      -1.4181644891152485e-03, -1.1160182310149349e-03,
+      -7.8051702685589782e-04, -1.4795383843152874e-03,
+      2.5193698717292578e-03,  -7.2228540313489780e-03,
+      8.7131868658691573e-04,  -1.1871506186637458e-03,
+      -7.2228540313489780e-03, -8.3981655790035283e-03,
+      -3.1337460147976907e-05, 1.7507484534873217e-04,
+      8.7131868658691573e-04,  -3.1337460147976907e-05,
+      -1.1381726073134719e-03, 9.0691674803716218e-05,
+      -1.1871506186637458e-03, 1.7507484534873217e-04,
+      9.0691674803716218e-05,  -5.4923843003268163e-04,
+      -9.8541123283475321e-02, 2.0328745160678043e-02,
+      -2.0458389881609796e-03, 9.0630002244413007e-04,
+      2.0328745160678043e-02,  -7.4221841135019870e-04,
+      5.3925274862334381e-04,  -9.0169207887240497e-04,
+      -2.0458389881609796e-03, 5.3925274862334381e-04,
+      -8.1237048011580354e-04, -1.7572671277406646e-04,
+      9.0630002244413007e-04,  -9.0169207887240497e-04,
+      -1.7572671277406646e-04, -4.4334028399372748e-04,
+      -2.7234646577534043e-02, 2.5369573930940532e-02,
+      1.5705630781754590e-04,  6.1198876277706281e-04,
+      2.5369573930940532e-02,  -1.3071102928106637e-03,
+      -7.2368957471257269e-04, -5.2265333441466977e-04,
+      1.5705630781754590e-04,  -7.2368957471257269e-04,
+      -1.1462134164282704e-03, 2.3337377507278166e-04,
+      6.1198876277706281e-04,  -5.2265333441466977e-04,
+      2.3337377507278166e-04,  -7.2368697376535771e-04,
+      -2.4800412677279193e-01, 3.0558589210589435e-02,
+      -1.7679744500246292e-03, -1.5482210113520617e-03,
+      3.0558589210589435e-02,  -2.4786384007542978e-03,
+      8.5356892230528767e-04,  4.3403640403337902e-04,
+      -1.7679744500246292e-03, 8.5356892230528767e-04,
+      -8.3267405947969693e-04, 9.7644740296861087e-05,
+      -1.5482210113520617e-03, 4.3403640403337902e-04,
+      9.7644740296861087e-05,  -7.6184659130409797e-04,
+      6.4088879122978254e-02,  7.4033191067212354e-03,
+      -3.0166036111540026e-04, 2.2866639862654022e-03,
+      7.4033191067212354e-03,  -1.9075281126971072e-03,
+      -1.7513934189862141e-04, -8.3685859198624453e-04,
+      -3.0166036111540026e-04, -1.7513934189862141e-04,
+      -6.4451282241398183e-04, 1.0242732795762495e-04,
+      2.2866639862654022e-03,  -8.3685859198624453e-04,
+      1.0242732795762495e-04,  -4.7151425790170642e-04,
+      2.6280963047272420e-03,  -8.2523616999923399e-03,
+      6.9274155947225767e-04,  2.2806878867334188e-04,
+      -8.2523616999923399e-03, -9.9628281026221477e-04,
+      2.5812365885309900e-04,  -1.1568156376056965e-04,
+      6.9274155947225767e-04,  2.5812365885309900e-04,
+      -1.0820703009744815e-03, -3.1530431722761752e-05,
+      2.2806878867334188e-04,  -1.1568156376056965e-04,
+      -3.1530431722761752e-05, -7.5527985458567228e-04,
+      9.1101115309797379e-02,  -2.2884644065069411e-02,
+      9.1368322276223378e-04,  -1.0850217992609608e-03,
+      -2.2884644065069411e-02, -2.2375718156060894e-03,
+      -6.8171614704544544e-04, 4.3298786621578863e-04,
+      9.1368322276223378e-04,  -6.8171614704544544e-04,
+      -6.0655582575581761e-04, -1.9700957313198807e-04,
+      -1.0850217992609608e-03, 4.3298786621578863e-04,
+      -1.9700957313198807e-04, -5.4760699412119591e-04,
+      -2.2640416371139069e-01, -2.5191853524724098e-02,
+      -1.3198312724450061e-03, -1.3359296338435942e-03,
+      -2.5191853524724098e-02, -7.4114781706104268e-03,
+      1.4884771292347921e-03,  1.5714670158787022e-03,
+      -1.3198312724450061e-03, 1.4884771292347921e-03,
+      -1.3119866716855199e-03, -1.5536016251956771e-04,
+      -1.3359296338435942e-03, 1.5714670158787022e-03,
+      -1.5536016251956771e-04, -5.5696348361036394e-04,
+      9.2726790578004134e-02,  -2.0242369791069288e-03,
+      3.8405423708959545e-04,  6.3967354157029978e-04,
+      -2.0242369791069288e-03, -2.0157885093086683e-03,
+      -3.2058306533741196e-04, 4.2971640692212861e-04,
+      3.8405423708959545e-04,  -3.2058306533741196e-04,
+      -1.9162959308769002e-03, -6.3827678873427419e-04,
+      6.3967354157029978e-04,  4.2971640692212861e-04,
+      -6.3827678873427419e-04, -1.1765363907219604e-03,
+      -1.1010994988318533e-01, 1.8364595516921723e-03,
+      -1.7052805868601824e-03, 1.5722132456195729e-03,
+      1.8364595516921723e-03,  -1.6428731888699357e-03,
+      -6.9125587365518731e-05, -6.1003796613763421e-04,
+      -1.7052805868601824e-03, -6.9125587365518731e-05,
+      -1.3517785548352037e-03, 3.9700219843950759e-04,
+      1.5722132456195729e-03,  -6.1003796613763421e-04,
+      3.9700219843950759e-04,  -9.2229729865463410e-04,
+      1.0482002463291755e-01,  4.1269081557235562e-02,
+      -1.4243305558994911e-03, 7.2560546133989428e-04,
+      4.1269081557235562e-02,  -2.4533418282568198e-03,
+      1.3112208013762515e-03,  -8.1980100268941986e-04,
+      -1.4243305558994911e-03, 1.3112208013762515e-03,
+      -6.5149545825962077e-04, -7.9051325639480671e-05,
+      7.2560546133989428e-04,  -8.1980100268941986e-04,
+      -7.9051325639480671e-05, -2.9459876874997148e-04,
+      5.2687763814020426e-02,  2.5753172346694865e-02,
+      -1.1364673445506180e-03, 1.6847984517648468e-03,
+      2.5753172346694865e-02,  -1.9744024088759471e-03,
+      6.5730101175142199e-04,  -7.6106378367768782e-04,
+      -1.1364673445506180e-03, 6.5730101175142199e-04,
+      -6.8322027298045856e-04, -2.8125846979498703e-04,
+      1.6847984517648468e-03,  -7.6106378367768782e-04,
+      -2.8125846979498703e-04, -6.4384013872712724e-04,
+      5.9218429229155281e-02,  -4.4827537798794370e-03,
+      1.0270811096682296e-03,  -1.9168586516354947e-03,
+      -4.4827537798794370e-03, -6.8593562640291818e-04,
+      -4.6835704849159830e-04, -2.2268361484443280e-04,
+      1.0270811096682296e-03,  -4.6835704849159830e-04,
+      -1.3752987145318814e-03, 3.1079072153330253e-04,
+      -1.9168586516354947e-03, -2.2268361484443280e-04,
+      3.1079072153330253e-04,  -9.2178851145784926e-04,
+      6.4476070763014870e-02,  -8.2336865840800244e-03,
+      -8.2613444706260115e-05, 2.2870252609144106e-03,
+      -8.2336865840800244e-03, -3.2744036284686373e-03,
+      -4.1704987639591122e-05, -6.4166807270754048e-04,
+      -8.2613444706260115e-05, -4.1704987639591122e-05,
+      -4.1548386737107250e-04, -1.7479369462839750e-05,
+      2.2870252609144106e-03,  -6.4166807270754048e-04,
+      -1.7479369462839750e-05, -3.4493527017004036e-04,
+      -6.1828450559953466e-03, 2.8428624253357817e-02,
+      -7.5163309079767377e-04, 5.4940663791714793e-04,
+      2.8428624253357817e-02,  -6.7829893110121844e-03,
+      8.1954861327358602e-04,  -5.3114117045863587e-04,
+      -7.5163309079767377e-04, 8.1954861327358602e-04,
+      -6.0173813996780567e-04, -6.7160875496170326e-05,
+      5.4940663791714793e-04,  -5.3114117045863587e-04,
+      -6.7160875496170326e-05, -2.4845337051236009e-04,
+      1.0533880384113113e-01,  3.6210071469509400e-02,
+      -1.7107999791441781e-03, -4.7083865881270926e-04,
+      3.6210071469509400e-02,  -2.5591369610882472e-03,
+      1.0474090798181057e-03,  1.1172328488792965e-04,
+      -1.7107999791441781e-03, 1.0474090798181057e-03,
+      -6.2681311095400482e-04, -1.0956014522883990e-04,
+      -4.7083865881270926e-04, 1.1172328488792965e-04,
+      -1.0956014522883990e-04, -6.2294976074032634e-04,
+      1.1902524122755653e-01,  -1.0909771422571959e-02,
+      1.2448713677229743e-03,  -7.8880695949750681e-04,
+      -1.0909771422571959e-02, -1.5951056840545162e-03,
+      2.7429495897176952e-04,  7.3506920307663841e-05,
+      1.2448713677229743e-03,  2.7429495897176952e-04,
+      -9.7786177177501001e-04, 6.6672113049240779e-04,
+      -7.8880695949750681e-04, 7.3506920307663841e-05,
+      6.6672113049240779e-04,  -5.0679343969428484e-04,
+      1.6159868780566031e-01,  3.7481421868546007e-03,
+      3.3383110783462289e-03,  -1.4389234118571464e-03,
+      3.7481421868546007e-03,  -1.8935398127113731e-03,
+      2.5240285683527449e-04,  7.9691796069654658e-04,
+      3.3383110783462289e-03,  2.5240285683527449e-04,
+      -1.3490096726389840e-03, 1.9074939429963151e-04,
+      -1.4389234118571464e-03, 7.9691796069654658e-04,
+      1.9074939429963151e-04,  -8.9877461888945853e-04,
+      -2.2502909073369459e-01, -3.7321287736186322e-03,
+      -1.7987858311244466e-03, -1.9708047702503991e-03,
+      -3.7321287736186322e-03, -1.9214934405923476e-03,
+      3.5283302489755764e-04,  4.8110254301636892e-04,
+      -1.7987858311244466e-03, 3.5283302489755764e-04,
+      -1.2680475239585940e-03, 2.0205708932592430e-04,
+      -1.9708047702503991e-03, 4.8110254301636892e-04,
+      2.0205708932592430e-04,  -7.8681400841451241e-04,
+      -2.3952675967064883e-02, -6.5409371210018051e-03,
+      -2.6851469943078203e-04, -4.9805803828584922e-04,
+      -6.5409371210018051e-03, -3.7561313498678302e-03,
+      2.3206665741903022e-04,  3.0048228666978370e-04,
+      -2.6851469943078203e-04, 2.3206665741903022e-04,
+      -4.4121780270757531e-04, -8.2215427654899162e-05,
+      -4.9805803828584922e-04, 3.0048228666978370e-04,
+      -8.2215427654899162e-05, -3.7500716387283193e-04,
+      -2.1228367131798687e-01, 2.7113291916486364e-02,
+      4.6016317977870395e-04,  -1.9594764054362895e-03,
+      2.7113291916486364e-02,  -1.0853572773374739e-03,
+      2.5219233819137219e-04,  5.1172231054065541e-04,
+      4.6016317977870395e-04,  2.5219233819137219e-04,
+      -1.8213699224644336e-03, 3.7393213810628812e-04,
+      -1.9594764054362895e-03, 5.1172231054065541e-04,
+      3.7393213810628812e-04,  -5.5588635692437049e-04,
+      -1.2594061059151607e-01, 1.2592656724822970e-02,
+      -4.0285977545487228e-04, -4.4341698620668172e-04,
+      1.2592656724822970e-02,  -6.1572534159074592e-03,
+      4.8152144273438187e-04,  -4.9727853505236708e-04,
+      -4.0285977545487228e-04, 4.8152144273438187e-04,
+      -9.3221660800086681e-04, 4.3322214079744763e-04,
+      -4.4341698620668172e-04, -4.9727853505236708e-04,
+      4.3322214079744763e-04,  -6.2440230731627326e-04,
+      -1.3585509657998510e-01, -3.8909226522543441e-03,
+      3.0193138572521729e-03,  -4.6754349870445496e-04,
+      -3.8909226522543441e-03, -5.6220236698656771e-03,
+      -8.0283448531211330e-04, 4.9313364840345667e-04,
+      3.0193138572521729e-03,  -8.0283448531211330e-04,
+      -8.4724654438547091e-04, -4.1535841093396958e-04,
+      -4.6754349870445496e-04, 4.9313364840345667e-04,
+      -4.1535841093396958e-04, -4.7533675347965273e-04,
+      -2.1524465492323114e-01, 2.6830885704129219e-02,
+      4.8987007530070020e-03,  -7.4769890428344238e-04,
+      2.6830885704129219e-02,  -2.4008792532451296e-03,
+      -4.3520127006076755e-04, -5.5384351394842390e-04,
+      4.8987007530070020e-03,  -4.3520127006076755e-04,
+      -1.9799533182669562e-03, 3.8152653140380451e-04,
+      -7.4769890428344238e-04, -5.5384351394842390e-04,
+      3.8152653140380451e-04,  -5.9785566430098553e-04,
+      -1.6784213165410733e-01, 2.0244058370466202e-02,
+      -1.3420658225188609e-03, -1.3189834234737445e-03,
+      2.0244058370466202e-02,  -7.2263618147349398e-03,
+      1.1939034574010880e-03,  9.4667802145852230e-04,
+      -1.3420658225188609e-03, 1.1939034574010880e-03,
+      -7.6018644585713153e-04, -1.0210604952061623e-04,
+      -1.3189834234737445e-03, 9.4667802145852230e-04,
+      -1.0210604952061623e-04, -4.0821100223754577e-04,
+      -2.3751039223678280e-01, 2.6165234489045515e-02,
+      -1.9846169871215727e-03, 3.7170136665050013e-03,
+      2.6165234489045515e-02,  -9.2783860491590594e-04,
+      2.8032134504712048e-04,  -9.6957018524106912e-04,
+      -1.9846169871215727e-03, 2.8032134504712048e-04,
+      -1.2399063877565513e-03, 1.0528263449886494e-06,
+      3.7170136665050013e-03,  -9.6957018524106912e-04,
+      1.0528263449886494e-06,  -1.0651880667174132e-03,
+      6.8982016656450096e-02,  -1.0194592948666651e-02,
+      4.0224436318745071e-04,  -1.1696801694026104e-03,
+      -1.0194592948666651e-02, -2.2058854995895081e-03,
+      4.3964931839049595e-05,  3.0469030637087147e-04,
+      4.0224436318745071e-04,  4.3964931839049595e-05,
+      -1.3777303555632778e-03, 4.3404728696010988e-04,
+      -1.1696801694026104e-03, 3.0469030637087147e-04,
+      4.3404728696010988e-04,  -4.0950956530560686e-04,
+      -9.2461109228483468e-02, -1.3650469255974099e-03,
+      6.9549155108044203e-04,  -1.3791803108254750e-03,
+      -1.3650469255974099e-03, -3.4426360382142198e-03,
+      -8.9312514713293023e-04, 1.5185160693596093e-03,
+      6.9549155108044203e-04,  -8.9312514713293023e-04,
+      -1.2055712134559134e-03, 4.1803484747986825e-04,
+      -1.3791803108254750e-03, 1.5185160693596093e-03,
+      4.1803484747986825e-04,  -8.1179190435472248e-04,
+      1.7492329868952405e-01,  -2.9646943331972139e-02,
+      -6.6563429686947173e-04, 4.4682634999685810e-04,
+      -2.9646943331972139e-02, -2.4173338183744193e-03,
+      3.3542502232973938e-04,  1.9443059924745616e-05,
+      -6.6563429686947173e-04, 3.3542502232973938e-04,
+      -9.9183926207136765e-04, 2.0084203476038989e-04,
+      4.4682634999685810e-04,  1.9443059924745616e-05,
+      2.0084203476038989e-04,  -6.3908876172596529e-04,
+      3.1539924140147259e-02,  7.6934952984409844e-03,
+      3.6809732723858749e-03,  -1.9589949144150562e-03,
+      7.6934952984409844e-03,  -1.0048732991394538e-03,
+      -6.8525949419794615e-04, 1.8196951541743413e-04,
+      3.6809732723858749e-03,  -6.8525949419794615e-04,
+      -1.9447331645722941e-03, 1.5283056340779168e-04,
+      -1.9589949144150562e-03, 1.8196951541743413e-04,
+      1.5283056340779168e-04,  -9.1738980564369424e-04,
+      -3.9105676791886118e-02, 7.5467303744346081e-03,
+      -1.8311155447274202e-03, 3.9722587176520933e-03,
+      7.5467303744346081e-03,  -2.4625304120398338e-03,
+      6.5110354661161197e-04,  -1.1424140297801545e-03,
+      -1.8311155447274202e-03, 6.5110354661161197e-04,
+      -7.6013689055211709e-04, 3.1982239567331837e-04,
+      3.9722587176520933e-03,  -1.1424140297801545e-03,
+      3.1982239567331837e-04,  -6.7960924512481386e-04,
+      -6.1502540343009511e-02, -2.5918936122811376e-02,
+      8.2260585699679700e-04,  -8.5737616123583824e-04,
+      -2.5918936122811376e-02, -9.1045995335339511e-03,
+      -7.6582517466873253e-04, 2.8347777788018374e-04,
+      8.2260585699679700e-04,  -7.6582517466873253e-04,
+      -9.8142522999483802e-04, 1.5709724166556660e-04,
+      -8.5737616123583824e-04, 2.8347777788018374e-04,
+      1.5709724166556660e-04,  -4.6604755267205529e-04,
+      2.1148032486205992e-01,  -6.3731337543181353e-03,
+      8.0925966188404811e-04,  7.1463137701211561e-04,
+      -6.3731337543181353e-03, -9.3133813463367691e-04,
+      -1.3767800389944088e-04, -7.8134699474057532e-04,
+      8.0925966188404811e-04,  -1.3767800389944088e-04,
+      -5.9804522674363695e-04, -2.4139815546091799e-04,
+      7.1463137701211561e-04,  -7.8134699474057532e-04,
+      -2.4139815546091799e-04, -4.7788602351705350e-04,
+      -3.7426472661389397e-02, 2.8157517432252185e-02,
+      -1.0934144857086604e-03, 3.1458337980653973e-06,
+      2.8157517432252185e-02,  -5.9309295616162740e-03,
+      2.5241788599436376e-04,  5.8278212231620947e-04,
+      -1.0934144857086604e-03, 2.5241788599436376e-04,
+      -5.9659440322057725e-04, 1.4773715153137406e-04,
+      3.1458337980653973e-06,  5.8278212231620947e-04,
+      1.4773715153137406e-04,  -4.9439441008183914e-04,
+      1.2948178533847016e-01,  -4.4858655966967973e-03,
+      -7.3394293433360475e-04, -1.8318789975846868e-03,
+      -4.4858655966967973e-03, -1.3111242346283915e-03,
+      -1.0753742916959626e-03, 6.1098836231564713e-04,
+      -7.3394293433360475e-04, -1.0753742916959626e-03,
+      -1.7980863990064372e-03, 4.7774929182747947e-04,
+      -1.8318789975846868e-03, 6.1098836231564713e-04,
+      4.7774929182747947e-04,  -7.2636447350640115e-04,
+      1.1246840827726504e-01,  2.1631039403096977e-02,
+      -1.6528967084977182e-03, -1.3069704047438378e-03,
+      2.1631039403096977e-02,  -6.2735534467153794e-03,
+      9.1784915417337241e-04,  3.7040218222533875e-04,
+      -1.6528967084977182e-03, 9.1784915417337241e-04,
+      -6.1695925898898460e-04, 5.1243512009102464e-05,
+      -1.3069704047438378e-03, 3.7040218222533875e-04,
+      5.1243512009102464e-05,  -5.8607009651544847e-04,
+      2.3191443887515172e-02,  -2.6201262501037315e-03,
+      -7.6581304428420961e-04, -7.0793868722308494e-04,
+      -2.6201262501037315e-03, -1.2427377919089994e-03,
+      1.3774011176280213e-04,  5.3774177980385463e-04,
+      -7.6581304428420961e-04, 1.3774011176280213e-04,
+      -4.4342025234310304e-04, 9.5568301616175372e-05,
+      -7.0793868722308494e-04, 5.3774177980385463e-04,
+      9.5568301616175372e-05,  -2.5406888063130668e-04,
+      9.3549762929661148e-02,  2.4789338496675476e-02,
+      -1.7999250937495298e-03, 9.1491381013646730e-04,
+      2.4789338496675476e-02,  -2.4686873006394385e-03,
+      1.1959407625742012e-04,  2.3453642308929890e-04,
+      -1.7999250937495298e-03, 1.1959407625742012e-04,
+      -1.3058948205367103e-03, 2.8220390404668665e-04,
+      9.1491381013646730e-04,  2.3453642308929890e-04,
+      2.8220390404668665e-04,  -9.0785502558073951e-04,
+      8.0716780382424247e-02,  2.5365518135899486e-02,
+      -1.0966084834676559e-03, 3.9092402053228742e-04,
+      2.5365518135899486e-02,  -3.5742604372390177e-04,
+      1.1161995819547624e-03,  -2.3097494896500610e-04,
+      -1.0966084834676559e-03, 1.1161995819547624e-03,
+      -9.9846801442279387e-04, -3.3563094825744084e-04,
+      3.9092402053228742e-04,  -2.3097494896500610e-04,
+      -3.3563094825744084e-04, -4.9557683336977874e-04,
+      4.0111243527158481e-02,  -5.1743679607531489e-03,
+      -6.4668882267963457e-04, -5.3256658217573259e-04,
+      -5.1743679607531489e-03, -2.1959890278307748e-03,
+      3.3406917357637071e-04,  5.5860960683749177e-04,
+      -6.4668882267963457e-04, 3.3406917357637071e-04,
+      -4.0449951144626246e-04, 1.4870745971877001e-04,
+      -5.3256658217573259e-04, 5.5860960683749177e-04,
+      1.4870745971877001e-04,  -4.0088619373289668e-04,
+      1.3936559957096936e-02,  -9.2345720437320422e-03,
+      1.7112603251330989e-03,  -1.5812360931300338e-03,
+      -9.2345720437320422e-03, -6.0853404733679922e-03,
+      -8.1587691393434074e-04, 1.5127202442355710e-03,
+      1.7112603251330989e-03,  -8.1587691393434074e-04,
+      -1.0582512267918579e-03, 3.0169179197568852e-04,
+      -1.5812360931300338e-03, 1.5127202442355710e-03,
+      3.0169179197568852e-04,  -6.9818940668481635e-04,
+      1.2271035315361421e-01,  3.3834534832647936e-02,
+      -8.4425345932574085e-04, -1.2845411056578771e-03,
+      3.3834534832647936e-02,  -2.7021681799793487e-03,
+      1.4589329290161041e-03,  6.8134662217169816e-04,
+      -8.4425345932574085e-04, 1.4589329290161041e-03,
+      -8.5494381562856267e-04, 1.0288019018269524e-04,
+      -1.2845411056578771e-03, 6.8134662217169816e-04,
+      1.0288019018269524e-04,  -7.3984903838043902e-04,
+      1.3426877609097060e-01,  1.7349015756059596e-02,
+      -1.3246192802241865e-03, -1.9305711238934450e-03,
+      1.7349015756059596e-02,  -1.5270102850098125e-03,
+      4.4956077650381329e-04,  1.9714221201937447e-04,
+      -1.3246192802241865e-03, 4.4956077650381329e-04,
+      -6.4182469515364075e-04, 2.0433900996145839e-04,
+      -1.9305711238934450e-03, 1.9714221201937447e-04,
+      2.0433900996145839e-04,  -5.1586663656365934e-04,
+      8.6391392047467014e-02,  3.6980521889860229e-02,
+      9.3321794676103585e-04,  9.3948476321605902e-04,
+      3.6980521889860229e-02,  -9.8928180871077319e-03,
+      -5.3070991717869105e-04, -6.2377425133496771e-04,
+      9.3321794676103585e-04,  -5.3070991717869105e-04,
+      -3.0488330333303663e-04, 1.5659386456603057e-05,
+      9.3948476321605902e-04,  -6.2377425133496771e-04,
+      1.5659386456603057e-05,  -2.7999703918634182e-04,
+      1.0146744017771264e-01,  7.2816252131513558e-03,
+      -1.6155238061515398e-03, 2.5644205137853007e-04,
+      7.2816252131513558e-03,  -6.3112672678755539e-03,
+      1.6890811365672615e-03,  2.7693395172731348e-04,
+      -1.6155238061515398e-03, 1.6890811365672615e-03,
+      -1.2647032121969489e-03, -2.9370354320717849e-04,
+      2.5644205137853007e-04,  2.7693395172731348e-04,
+      -2.9370354320717849e-04, -6.2342159510005504e-04,
+      1.2959056902281882e-01,  -6.1301776131238232e-05,
+      1.6534216868320365e-03,  -1.3058105872678105e-03,
+      -6.1301776131238232e-05, -1.4249381206094562e-03,
+      -9.5380678395206903e-04, 4.7419065106550553e-04,
+      1.6534216868320365e-03,  -9.5380678395206903e-04,
+      -1.5076699002388233e-03, 6.4805275159887318e-04,
+      -1.3058105872678105e-03, 4.7419065106550553e-04,
+      6.4805275159887318e-04,  -8.8238756291255597e-04,
+      8.9247536658271412e-02,  2.3842441423826456e-04,
+      -1.4020150585281280e-03, -5.1152963328585229e-04,
+      2.3842441423826456e-04,  -9.0191620517334706e-04,
+      2.8625020652046294e-04,  -4.8584743596851983e-04,
+      -1.4020150585281280e-03, 2.8625020652046294e-04,
+      -1.0559260959327309e-03, 7.8831939884397011e-04,
+      -5.1152963328585229e-04, -4.8584743596851983e-04,
+      7.8831939884397011e-04,  -1.7109718079099097e-03,
+      1.8544185491158613e-01,  -1.0718215088244560e-03,
+      -1.8813810349262525e-03, -1.9339063688273953e-03,
+      -1.0718215088244560e-03, -7.2274547605494753e-04,
+      8.6435447211605384e-04,  -3.6026940405211872e-04,
+      -1.8813810349262525e-03, 8.6435447211605384e-04,
+      -8.1277716432238999e-04, -3.7277556893117323e-05,
+      -1.9339063688273953e-03, -3.6026940405211872e-04,
+      -3.7277556893117323e-05, -6.3915541802464406e-04,
+      1.4434415789530322e-01,  -2.6872190143700809e-03,
+      -1.0846599699061007e-03, -6.4477346277669280e-04,
+      -2.6872190143700809e-03, -5.6018401280834278e-03,
+      -5.0911052653693770e-05, 9.6450041626136844e-04,
+      -1.0846599699061007e-03, -5.0911052653693770e-05,
+      -9.4975456411969575e-04, 3.1940035065807030e-04,
+      -6.4477346277669280e-04, 9.6450041626136844e-04,
+      3.1940035065807030e-04,  -9.2420815563382878e-04,
+      7.8171789374052239e-02,  1.1131802654471883e-02,
+      1.7637127962972779e-03,  9.4754776914784435e-04,
+      1.1131802654471883e-02,  -1.3560867793273460e-03,
+      -1.0739127680493021e-03, 3.5149852956582349e-04,
+      1.7637127962972779e-03,  -1.0739127680493021e-03,
+      -1.7712750948741785e-03, -4.9962373537378436e-04,
+      9.4754776914784435e-04,  3.5149852956582349e-04,
+      -4.9962373537378436e-04, -1.1574313866688994e-03,
+      5.2957801369084900e-02,  -1.0005678772364536e-02,
+      -4.7804628490494196e-04, -1.2361403815386134e-03,
+      -1.0005678772364536e-02, -4.5116744307715714e-03,
+      4.1017691673670646e-04,  9.4250866230064672e-04,
+      -4.7804628490494196e-04, 4.1017691673670646e-04,
+      -9.2170564193822815e-04, 2.5987259125205417e-04,
+      -1.2361403815386134e-03, 9.4250866230064672e-04,
+      2.5987259125205417e-04,  -4.6176740062320114e-04,
+      1.0365332441174736e-01,  -9.8221692578944128e-03,
+      -1.9915804870139055e-03, -1.4829964959047716e-03,
+      -9.8221692578944128e-03, -8.0048197952266309e-04,
+      2.0484157742938923e-04,  9.2083194131700275e-04,
+      -1.9915804870139055e-03, 2.0484157742938923e-04,
+      -7.4074324466875886e-04, 4.6683122189394826e-05,
+      -1.4829964959047716e-03, 9.2083194131700275e-04,
+      4.6683122189394826e-05,  -4.1774348179361514e-04,
+      -1.3421959250355708e-01, 3.3713260029558795e-03,
+      -1.3243455638564110e-03, -1.6977772976177158e-03,
+      3.3713260029558795e-03,  -4.3427989568832846e-03,
+      1.5008659836834226e-03,  3.6385328660509515e-04,
+      -1.3243455638564110e-03, 1.5008659836834226e-03,
+      -4.4959639797028332e-04, -3.5993966797194079e-05,
+      -1.6977772976177158e-03, 3.6385328660509515e-04,
+      -3.5993966797194079e-05, -3.9959494583247839e-04,
+      1.5319795291380678e-01,  1.5606938575385582e-02,
+      1.2896044217671962e-04,  1.4042986843700814e-03,
+      1.5606938575385582e-02,  -1.6968432451613968e-03,
+      6.1781606247467071e-04,  -8.1022537873411268e-04,
+      1.2896044217671962e-04,  6.1781606247467071e-04,
+      -9.0781561089793507e-04, 1.6085869694517344e-04,
+      1.4042986843700814e-03,  -8.1022537873411268e-04,
+      1.6085869694517344e-04,  -4.7576363868138846e-04,
+      1.2254882374915720e-01,  1.4693545363766116e-04,
+      2.1938591971393732e-03,  -3.9982695744349866e-04,
+      1.4693545363766116e-04,  -1.8792585045041851e-03,
+      4.2828698130133657e-04,  4.5172919991448188e-04,
+      2.1938591971393732e-03,  4.2828698130133657e-04,
+      -1.0432885144689712e-03, -4.1920428823997481e-04,
+      -3.9982695744349866e-04, 4.5172919991448188e-04,
+      -4.1920428823997481e-04, -5.0654766396187863e-04,
+      6.2567741767689450e-02,  -1.5942792665184773e-02,
+      9.4219357229582805e-04,  -1.0047556566068546e-03,
+      -1.5942792665184773e-02, -4.9222058747801188e-03,
+      -8.8081902987161898e-04, 8.5096048171422992e-04,
+      9.4219357229582805e-04,  -8.8081902987161898e-04,
+      -6.4309961958789396e-04, -2.4752901697258346e-05,
+      -1.0047556566068546e-03, 8.5096048171422992e-04,
+      -2.4752901697258346e-05, -4.2749730100807973e-04,
+      2.0912880324801708e-01,  -7.6064433064355411e-03,
+      5.3323058431332623e-04,  -2.0532588685479958e-03,
+      -7.6064433064355411e-03, -1.3135732341439657e-03,
+      -7.8988659553189968e-04, 3.7928836724313318e-04,
+      5.3323058431332623e-04,  -7.8988659553189968e-04,
+      -7.3649612756357917e-04, -2.6573320308412903e-04,
+      -2.0532588685479958e-03, 3.7928836724313318e-04,
+      -2.6573320308412903e-04, -5.7797801692182097e-04,
+      1.5055274915795547e-01,  -2.1924936777925716e-02,
+      -1.2612611024713787e-03, 2.7821375886116877e-03,
+      -2.1924936777925716e-02, -4.2068916459438062e-03,
+      1.3684687816957373e-03,  -9.0195379182343130e-04,
+      -1.2612611024713787e-03, 1.3684687816957373e-03,
+      -1.2345358774530297e-03, -3.4235325803685078e-04,
+      2.7821375886116877e-03,  -9.0195379182343130e-04,
+      -3.4235325803685078e-04, -5.3816147544009192e-04,
+      -9.4235991321761955e-02, -2.1367388228175833e-02,
+      1.0448569989099396e-03,  1.7485944032731482e-03,
+      -2.1367388228175833e-02, -2.6716566352546589e-03,
+      -7.3073829452656989e-04, -8.9610200660087183e-04,
+      1.0448569989099396e-03,  -7.3073829452656989e-04,
+      -6.7501385163585615e-04, 2.0355441125821877e-04,
+      1.7485944032731482e-03,  -8.9610200660087183e-04,
+      2.0355441125821877e-04,  -5.7236545019238698e-04,
+      -6.9503808891479835e-02, 3.1383773305167471e-03,
+      1.5986828965354864e-03,  -1.7402981803352200e-03,
+      3.1383773305167471e-03,  -1.2489391870377849e-03,
+      -8.5751835076954214e-04, 5.8268795422606202e-04,
+      1.5986828965354864e-03,  -8.5751835076954214e-04,
+      -1.5076108825684306e-03, -7.4944126836778806e-04,
+      -1.7402981803352200e-03, 5.8268795422606202e-04,
+      -7.4944126836778806e-04, -1.1142665347040458e-03,
+      4.7151474002859579e-02,  3.1740728795928382e-02,
+      -9.4301630376494167e-04, -9.8957451562421933e-04,
+      3.1740728795928382e-02,  -3.0449847294980667e-03,
+      7.3961197038087075e-04,  1.1737609646021582e-03,
+      -9.4301630376494167e-04, 7.3961197038087075e-04,
+      -5.3898208391981741e-04, 1.2215824265585573e-04,
+      -9.8957451562421933e-04, 1.1737609646021582e-03,
+      1.2215824265585573e-04,  -4.4878575781268185e-04,
+      6.9890878585001953e-02,  5.5058801359721072e-03,
+      4.3303599481317440e-04,  6.7321007698798990e-03,
+      5.5058801359721072e-03,  -1.3315438647224626e-03,
+      2.4082287446273412e-04,  -1.1070907494787486e-03,
+      4.3303599481317440e-04,  2.4082287446273412e-04,
+      -1.6833930700655335e-03, -8.3440886703805834e-04,
+      6.7321007698798990e-03,  -1.1070907494787486e-03,
+      -8.3440886703805834e-04, -1.5967950989918100e-03,
+      8.5697346753994430e-02,  -1.8323684437467489e-02,
+      -5.9473967389843570e-04, -8.4314615829175212e-04,
+      -1.8323684437467489e-02, -1.1875494620811390e-03,
+      9.7079341037182275e-04,  2.7501065040690445e-04,
+      -5.9473967389843570e-04, 9.7079341037182275e-04,
+      -5.2204544453808501e-04, 2.0919905664672838e-04,
+      -8.4314615829175212e-04, 2.7501065040690445e-04,
+      2.0919905664672838e-04,  -4.2639051308978156e-04,
+      -2.2631222057764766e-01, -1.3601514363752679e-02,
+      -1.6137759080852433e-03, 1.4950759744726269e-03,
+      -1.3601514363752679e-02, -2.6453255589728329e-03,
+      3.3968249078500379e-04,  -7.5978605583900977e-04,
+      -1.6137759080852433e-03, 3.3968249078500379e-04,
+      -7.5010511151759902e-04, 1.0313781788359500e-04,
+      1.4950759744726269e-03,  -7.5978605583900977e-04,
+      1.0313781788359500e-04,  -5.9448373489452483e-04,
+      4.1116271730053472e-02,  -4.5650281290770516e-03,
+      1.0174302480600848e-03,  -7.4144675491881571e-04,
+      -4.5650281290770516e-03, -3.0111597874487158e-03,
+      4.1892149452060594e-04,  2.5662370553909404e-04,
+      1.0174302480600848e-03,  4.1892149452060594e-04,
+      -1.6351264041670321e-03, 4.1540798986628444e-04,
+      -7.4144675491881571e-04, 2.5662370553909404e-04,
+      4.1540798986628444e-04,  -1.5888041899304089e-03,
+      3.8822005325296664e-02,  3.4682085173453489e-02,
+      1.1383554058731083e-03,  6.9763412662967174e-04,
+      3.4682085173453489e-02,  -3.8778621426546131e-03,
+      -1.0502459984301810e-03, -6.7815726314415445e-04,
+      1.1383554058731083e-03,  -1.0502459984301810e-03,
+      -9.5953811566263974e-04, 3.1547011250005413e-04,
+      6.9763412662967174e-04,  -6.7815726314415445e-04,
+      3.1547011250005413e-04,  -4.9094447850356326e-04,
+      -1.3275117419010990e-01, 7.7532610202420025e-03,
+      2.6481743952388775e-04,  -1.0233025705150341e-03,
+      7.7532610202420025e-03,  -3.5307144422284118e-03,
+      3.6081301612281790e-04,  4.2984812221947383e-04,
+      2.6481743952388775e-04,  3.6081301612281790e-04,
+      -9.6620391750736523e-04, -1.8268752035532475e-04,
+      -1.0233025705150341e-03, 4.2984812221947383e-04,
+      -1.8268752035532475e-04, -7.6865748942518946e-04,
+      2.0902111595826358e-02,  2.8778407466407915e-02,
+      -7.3589637063263200e-04, -7.9232400295570942e-04,
+      2.8778407466407915e-02,  -6.8244148733939813e-03,
+      4.6837342411344191e-04,  3.8337154291441424e-04,
+      -7.3589637063263200e-04, 4.6837342411344191e-04,
+      -3.9870959441253668e-04, 6.3661217652106041e-05,
+      -7.9232400295570942e-04, 3.8337154291441424e-04,
+      6.3661217652106041e-05,  -3.1873633147878725e-04,
+      1.2306903655844117e-01,  1.1790694654017940e-02,
+      -1.6930694484504614e-03, -1.4478486853861244e-03,
+      1.1790694654017940e-02,  -2.6362432912232283e-04,
+      5.7801647588433605e-04,  -5.0260654622532848e-04,
+      -1.6930694484504614e-03, 5.7801647588433605e-04,
+      -1.0057986783918533e-03, -3.8473412149296924e-04,
+      -1.4478486853861244e-03, -5.0260654622532848e-04,
+      -3.8473412149296924e-04, -8.4155363764574717e-04,
+      -2.6072264441980286e-01, -5.3839312494948985e-03,
+      -9.8286481836498294e-04, -1.1077071063072626e-03,
+      -5.3839312494948985e-03, -2.8144397517049936e-03,
+      1.3570909178396836e-03,  2.0352636344653444e-04,
+      -9.8286481836498294e-04, 1.3570909178396836e-03,
+      -9.6948887126791080e-04, 2.4502863167189274e-04,
+      -1.1077071063072626e-03, 2.0352636344653444e-04,
+      2.4502863167189274e-04,  -5.5674073681467461e-04,
+      -1.4461247437719649e-01, -1.3593073092315819e-02,
+      -1.4591789215271309e-03, -1.8444309608458772e-03,
+      -1.3593073092315819e-02, -1.3790376045237883e-03,
+      3.6507611650380594e-04,  1.0989155862134194e-03,
+      -1.4591789215271309e-03, 3.6507611650380594e-04,
+      -8.2908061421262179e-04, -1.9093220436421467e-04,
+      -1.8444309608458772e-03, 1.0989155862134194e-03,
+      -1.9093220436421467e-04, -7.4603886066833670e-04,
+      1.5718948391852267e-01,  3.7509266885863875e-02,
+      -1.6535289369067915e-03, -1.5835485386314300e-03,
+      3.7509266885863875e-02,  -5.9100449278194633e-03,
+      3.5324348250094338e-04,  3.2687210216937634e-04,
+      -1.6535289369067915e-03, 3.5324348250094338e-04,
+      -1.0567635462974483e-03, -3.8959429122272150e-06,
+      -1.5835485386314300e-03, 3.2687210216937634e-04,
+      -3.8959429122272150e-06, -6.7923415962746719e-04,
+      -2.0460083814262353e-01, 2.6461245236457143e-02,
+      -1.0964643716453355e-03, -2.0155849422843325e-03,
+      2.6461245236457143e-02,  -1.1607499563668883e-02,
+      4.9712684089718660e-04,  1.0744196203423314e-03,
+      -1.0964643716453355e-03, 4.9712684089718660e-04,
+      -1.2575560415559299e-03, 3.5355569910289946e-04,
+      -2.0155849422843325e-03, 1.0744196203423314e-03,
+      3.5355569910289946e-04,  -5.9829675272084996e-04,
+      1.4227457831415008e-01,  3.7409193782549421e-02,
+      -1.6999608012668766e-03, -1.4775810720134223e-03,
+      3.7409193782549421e-02,  -6.6980266639126880e-03,
+      3.7313860360236049e-04,  3.6390843416024218e-04,
+      -1.6999608012668766e-03, 3.7313860360236049e-04,
+      -6.8923641265649434e-04, -1.1118512619169452e-04,
+      -1.4775810720134223e-03, 3.6390843416024218e-04,
+      -1.1118512619169452e-04, -3.6167006383143143e-04,
+      -3.0408057058263434e-02, 2.8102062565578297e-03,
+      -9.9186210750588349e-05, -1.0848067328911566e-03,
+      2.8102062565578297e-03,  -1.9168669695793325e-03,
+      -1.5989044944160041e-04, 3.7928404911171667e-04,
+      -9.9186210750588349e-05, -1.5989044944160041e-04,
+      -6.4212418303488965e-04, 1.2414097957366861e-04,
+      -1.0848067328911566e-03, 3.7928404911171667e-04,
+      1.2414097957366861e-04,  -4.8937310642877495e-04,
+      -5.8151762326818325e-02, 2.0612537512420814e-02,
+      -1.8069350755089380e-03, -1.9159225235797013e-03,
+      2.0612537512420814e-02,  -1.5273838578175230e-03,
+      4.8641178441277146e-04,  5.7168029371084457e-04,
+      -1.8069350755089380e-03, 4.8641178441277146e-04,
+      -8.5373804438111515e-04, 2.1202613463988789e-04,
+      -1.9159225235797013e-03, 5.7168029371084457e-04,
+      2.1202613463988789e-04,  -7.8732691228072034e-04,
+      1.1798334731925102e-01,  -2.8140161709447171e-02,
+      -1.2130478454804230e-03, 4.6360451013415833e-04,
+      -2.8140161709447171e-02, -2.5008525901788615e-03,
+      1.3314100722865963e-03,  -4.2462405872217889e-04,
+      -1.2130478454804230e-03, 1.3314100722865963e-03,
+      -1.0941534938684865e-03, 5.1529382431827911e-04,
+      4.6360451013415833e-04,  -4.2462405872217889e-04,
+      5.1529382431827911e-04,  -6.7010849513206446e-04,
+      -1.9635177370699608e-01, 3.1453073526812375e-03,
+      1.9943467720728159e-04,  1.8490379843829265e-03,
+      3.1453073526812375e-03,  -1.4755660222860181e-03,
+      2.5216877122513355e-04,  -7.8528688355206447e-04,
+      1.9943467720728159e-04,  2.5216877122513355e-04,
+      -1.4555949485632702e-03, -5.9788067272640536e-04,
+      1.8490379843829265e-03,  -7.8528688355206447e-04,
+      -5.9788067272640536e-04, -1.4009824713669468e-03,
+      1.8079617326597403e-01,  1.3311705456384233e-02,
+      -1.6813583174003447e-03, 1.6754281220701891e-03,
+      1.3311705456384233e-02,  -2.5836933131966949e-03,
+      1.2524623632335693e-03,  -4.4318906832989434e-04,
+      -1.6813583174003447e-03, 1.2524623632335693e-03,
+      -1.1071301248058326e-03, 1.6237170251639789e-04,
+      1.6754281220701891e-03,  -4.4318906832989434e-04,
+      1.6237170251639789e-04,  -5.8066403722343185e-04,
+      -4.7658243260452392e-02, -1.1547225654362255e-02,
+      -1.6724724586724625e-04, -8.6129819877703328e-04,
+      -1.1547225654362255e-02, -3.2589278968339326e-03,
+      2.6655390367207022e-04,  2.2553348630838165e-04,
+      -1.6724724586724625e-04, 2.6655390367207022e-04,
+      -3.8468694210936846e-04, 2.0656031333140019e-04,
+      -8.6129819877703328e-04, 2.2553348630838165e-04,
+      2.0656031333140019e-04,  -3.5394037175600837e-04,
+      -6.6559656789261540e-02, 3.6780140422847912e-02,
+      -1.0833735822547434e-03, 1.3914255331022606e-04,
+      3.6780140422847912e-02,  -3.3904219310184804e-03,
+      5.4509538508334726e-04,  -2.2439596920688072e-04,
+      -1.0833735822547434e-03, 5.4509538508334726e-04,
+      -1.1195941512654017e-03, 1.0549891235828525e-05,
+      1.3914255331022606e-04,  -2.2439596920688072e-04,
+      1.0549891235828525e-05,  -5.8598966626073352e-04};
+  std::vector<double> expected_dy_dem = {
+      4.8687245272451135e-03,  5.5397159651024933e-03,
+      5.4642599386694624e-03,  5.6057159373549721e-03,
+      5.5397159651024933e-03,  4.8806501836285432e-03,
+      5.6063767619383984e-03,  5.6970860664459413e-03,
+      5.4642599386694624e-03,  5.6063767619383984e-03,
+      5.1211835750794290e-03,  5.3223602254697877e-03,
+      5.6057159373549721e-03,  5.6970860664459413e-03,
+      5.3223602254697877e-03,  5.2081125072059725e-03,
+      4.5292484636524086e-03,  4.9816448819259198e-03,
+      5.1363544712933759e-03,  5.1501696199608019e-03,
+      4.9816448819259198e-03,  4.6419992655419269e-03,
+      5.1569135569294903e-03,  4.8945949693767234e-03,
+      5.1363544712933759e-03,  5.1569135569294903e-03,
+      4.6706613224722264e-03,  4.9092906666249794e-03,
+      5.1501696199608019e-03,  4.8945949693767234e-03,
+      4.9092906666249794e-03,  4.8515691645339065e-03,
+      3.0190652370495003e-03,  3.4898418636317798e-03,
+      3.4899656132580579e-03,  3.5145415393083900e-03,
+      3.4898418636317798e-03,  3.1452916478866737e-03,
+      3.5037937416394702e-03,  3.4286490652383700e-03,
+      3.4899656132580579e-03,  3.5037937416394702e-03,
+      3.1627359787623427e-03,  3.5804187550924349e-03,
+      3.5145415393083900e-03,  3.4286490652383700e-03,
+      3.5804187550924349e-03,  3.2322057910012985e-03,
+      3.6840833901333001e-03,  4.3387482039692832e-03,
+      4.2515867787302417e-03,  4.4155206099863904e-03,
+      4.3387482039692832e-03,  4.0028491849676334e-03,
+      4.3652298563497116e-03,  4.3014839026378881e-03,
+      4.2515867787302417e-03,  4.3652298563497116e-03,
+      4.0124345907042516e-03,  4.1790409970907686e-03,
+      4.4155206099863904e-03,  4.3014839026378881e-03,
+      4.1790409970907686e-03,  4.0456958994549621e-03,
+      4.7788898285209408e-03,  4.7316589641173863e-03,
+      4.7277506758881538e-03,  4.7420787786160946e-03,
+      4.7316589641173863e-03,  4.7632472674770847e-03,
+      4.7277799549445667e-03,  4.7294160442947122e-03,
+      4.7277506758881538e-03,  4.7277799549445667e-03,
+      4.7576353824550499e-03,  4.7311669494084236e-03,
+      4.7420787786160946e-03,  4.7294160442947122e-03,
+      4.7311669494084236e-03,  4.7516945819656171e-03,
+      3.9248662197341371e-03,  4.9180169646964866e-03,
+      4.7190463613870446e-03,  4.4520204316217879e-03,
+      4.9180169646964866e-03,  4.1556621508365569e-03,
+      4.6033674111750095e-03,  4.5946737014459435e-03,
+      4.7190463613870446e-03,  4.6033674111750095e-03,
+      4.4277151435242147e-03,  4.6668537390335040e-03,
+      4.4520204316217879e-03,  4.5946737014459435e-03,
+      4.6668537390335040e-03,  4.4390865038161998e-03,
+      3.7758343215872881e-03,  4.1709220560399215e-03,
+      4.1202109662083035e-03,  3.9893563131548088e-03,
+      4.1709220560399215e-03,  3.8379626600639266e-03,
+      4.1328523107198936e-03,  3.9840470230024889e-03,
+      4.1202109662083035e-03,  4.1328523107198936e-03,
+      3.8852759103779593e-03,  4.1580920004019896e-03,
+      3.9893563131548088e-03,  3.9840470230024889e-03,
+      4.1580920004019896e-03,  3.9467666481718391e-03,
+      3.9761642677066033e-03,  4.4504099654305816e-03,
+      4.3794412854383678e-03,  4.4604317957851781e-03,
+      4.4504099654305816e-03,  4.0768567666647814e-03,
+      4.3435449806077473e-03,  4.3222112460340181e-03,
+      4.3794412854383678e-03,  4.3435449806077473e-03,
+      4.0773633573082564e-03,  4.4803490047300660e-03,
+      4.4604317957851781e-03,  4.3222112460340181e-03,
+      4.4803490047300660e-03,  4.1310317786866901e-03,
+      4.3424352854361066e-03,  4.9111142266166450e-03,
+      4.8395299678665781e-03,  4.6865337839814593e-03,
+      4.9111142266166450e-03,  4.5038688193735138e-03,
+      4.7214010478928668e-03,  4.7827433155358395e-03,
+      4.8395299678665781e-03,  4.7214010478928668e-03,
+      4.5047095985936833e-03,  4.8043858107812464e-03,
+      4.6865337839814593e-03,  4.7827433155358395e-03,
+      4.8043858107812464e-03,  4.5266636394073641e-03,
+      5.0023501655291321e-03,  5.4960776974766461e-03,
+      5.5277863655162403e-03,  5.6131610328358644e-03,
+      5.4960776974766461e-03,  5.0497823718039583e-03,
+      5.5223616852066977e-03,  5.2893854856376011e-03,
+      5.5277863655162403e-03,  5.5223616852066977e-03,
+      5.0964808619146599e-03,  5.4611470394996591e-03,
+      5.6131610328358644e-03,  5.2893854856376011e-03,
+      5.4611470394996591e-03,  5.2261512984334412e-03,
+      3.9613105413924812e-03,  4.4015218128590642e-03,
+      4.7307418574636991e-03,  4.6640477363723105e-03,
+      4.4015218128590642e-03,  4.0082410785944010e-03,
+      4.8261944934070503e-03,  4.5757895595947961e-03,
+      4.7307418574636991e-03,  4.8261944934070503e-03,
+      4.2233067127250678e-03,  4.7151130208320496e-03,
+      4.6640477363723105e-03,  4.5757895595947961e-03,
+      4.7151130208320496e-03,  4.3106975949268253e-03,
+      4.7334182996649809e-03,  5.3634328062759831e-03,
+      5.4504527709104602e-03,  5.1343603133321243e-03,
+      5.3634328062759831e-03,  4.8165188916604821e-03,
+      5.2922209819789611e-03,  5.3818126119621716e-03,
+      5.4504527709104602e-03,  5.2922209819789611e-03,
+      4.8435704822708701e-03,  5.2240647899442503e-03,
+      5.1343603133321243e-03,  5.3818126119621716e-03,
+      5.2240647899442503e-03,  4.8868508221224377e-03,
+      4.7996614634904892e-03,  5.7179083120858371e-03,
+      5.6117201232891321e-03,  5.6700701716033263e-03,
+      5.7179083120858371e-03,  4.9727735792746818e-03,
+      5.3848863488571300e-03,  5.4305318435656936e-03,
+      5.6117201232891321e-03,  5.3848863488571300e-03,
+      5.1731090323620544e-03,  5.2948377822407506e-03,
+      5.6700701716033263e-03,  5.4305318435656936e-03,
+      5.2948377822407506e-03,  5.2191935738847518e-03,
+      5.0888074222028307e-03,  5.5682241956695952e-03,
+      5.5311688481500410e-03,  5.3354587343858242e-03,
+      5.5682241956695952e-03,  5.1039796573406886e-03,
+      5.6527375470408862e-03,  5.5776264780928323e-03,
+      5.5311688481500410e-03,  5.6527375470408862e-03,
+      5.1908856775166106e-03,  5.5157564883515529e-03,
+      5.3354587343858242e-03,  5.5776264780928323e-03,
+      5.5157564883515529e-03,  5.2868882706481306e-03,
+      4.1148293948602473e-03,  4.8099243464647095e-03,
+      4.8354127941843744e-03,  4.8024021654871083e-03,
+      4.8099243464647095e-03,  4.3036710353764468e-03,
+      4.7276148863981300e-03,  4.5819645174844182e-03,
+      4.8354127941843744e-03,  4.7276148863981300e-03,
+      4.3517326519376268e-03,  4.5484962243548931e-03,
+      4.8024021654871083e-03,  4.5819645174844182e-03,
+      4.5484962243548931e-03,  4.3552076928133352e-03,
+      4.4103831260207836e-03,  4.7323280089558032e-03,
+      4.6792976546604689e-03,  4.7101816275088885e-03,
+      4.7323280089558032e-03,  4.4526464210458746e-03,
+      4.7051812634566420e-03,  4.7775181030841720e-03,
+      4.6792976546604689e-03,  4.7051812634566420e-03,
+      4.4777362867795979e-03,  4.7837047168366176e-03,
+      4.7101816275088885e-03,  4.7775181030841720e-03,
+      4.7837047168366176e-03,  4.4818283635942383e-03,
+      4.8171219099102398e-03,  5.4329072017648130e-03,
+      5.5577769098679687e-03,  5.5021033559999686e-03,
+      5.4329072017648130e-03,  4.9840681446863353e-03,
+      5.5228366571162838e-03,  5.6998588607070494e-03,
+      5.5577769098679687e-03,  5.5228366571162838e-03,
+      5.0189254656526020e-03,  5.3181989951699288e-03,
+      5.5021033559999686e-03,  5.6998588607070494e-03,
+      5.3181989951699288e-03,  5.2634498662377308e-03,
+      4.6530516200842852e-03,  5.0650394647562338e-03,
+      5.2488756975870273e-03,  5.2624908207797184e-03,
+      5.0650394647562338e-03,  4.6874447846501164e-03,
+      5.2268492983034561e-03,  5.1745585095054458e-03,
+      5.2488756975870273e-03,  5.2268492983034561e-03,
+      4.7952117024116173e-03,  5.0276292129730416e-03,
+      5.2624908207797184e-03,  5.1745585095054458e-03,
+      5.0276292129730416e-03,  4.9122789880896404e-03,
+      4.9251289238065234e-03,  5.5933241606014724e-03,
+      5.3718594158626860e-03,  5.6008531360396017e-03,
+      5.5933241606014724e-03,  5.0150407582346635e-03,
+      5.4662584346395404e-03,  5.3675120118810739e-03,
+      5.3718594158626860e-03,  5.4662584346395404e-03,
+      5.0248322936143662e-03,  5.4605145072560580e-03,
+      5.6008531360396017e-03,  5.3675120118810739e-03,
+      5.4605145072560580e-03,  5.1143948516248604e-03,
+      3.7657249048296967e-03,  3.9486926147620297e-03,
+      4.0691576682980483e-03,  4.0664780157542643e-03,
+      3.9486926147620297e-03,  3.7736205874903724e-03,
+      4.0495831622273807e-03,  4.0206411044168824e-03,
+      4.0691576682980483e-03,  4.0495831622273807e-03,
+      3.7937874360149263e-03,  3.9752869288898511e-03,
+      4.0664780157542643e-03,  4.0206411044168824e-03,
+      3.9752869288898511e-03,  3.8149050745084684e-03,
+      5.0114061217704613e-03,  5.2617991497744074e-03,
+      5.1790540139945674e-03,  5.3591951134087162e-03,
+      5.2617991497744074e-03,  5.0120596768805908e-03,
+      5.2487154791349359e-03,  5.2629872576859908e-03,
+      5.1790540139945674e-03,  5.2487154791349359e-03,
+      5.0448755388764347e-03,  5.3303535417822852e-03,
+      5.3591951134087162e-03,  5.2629872576859908e-03,
+      5.3303535417822852e-03,  5.1174676770792023e-03,
+      4.8377902633377264e-03,  5.3788546108106241e-03,
+      5.6040768285432335e-03,  5.3105068959469794e-03,
+      5.3788546108106241e-03,  4.9326524445286032e-03,
+      5.4991607490083253e-03,  5.5655132712644469e-03,
+      5.6040768285432335e-03,  5.4991607490083253e-03,
+      4.9741473776001794e-03,  5.3900550171932238e-03,
+      5.3105068959469794e-03,  5.5655132712644469e-03,
+      5.3900550171932238e-03,  5.1950052918756442e-03,
+      3.2492587714875146e-03,  3.9827109719638674e-03,
+      3.8774234602011355e-03,  3.8757465280329336e-03,
+      3.9827109719638674e-03,  3.4344256880305358e-03,
+      4.0545162671736575e-03,  3.7835976611712702e-03,
+      3.8774234602011355e-03,  4.0545162671736575e-03,
+      3.4953556313526311e-03,  3.9175382918411519e-03,
+      3.8757465280329336e-03,  3.7835976611712702e-03,
+      3.9175382918411519e-03,  3.5756760339217433e-03,
+      4.7617721014200634e-03,  4.8614225555244890e-03,
+      4.8279075259455451e-03,  4.8427156866381622e-03,
+      4.8614225555244890e-03,  4.8195643885916269e-03,
+      4.8405369639889255e-03,  4.8156734615472999e-03,
+      4.8279075259455451e-03,  4.8405369639889255e-03,
+      4.8197639580753243e-03,  4.8155984576041654e-03,
+      4.8427156866381622e-03,  4.8156734615472999e-03,
+      4.8155984576041654e-03,  4.8158790994777771e-03,
+      4.5833139216121957e-03,  5.4837863258358531e-03,
+      5.5638146695959553e-03,  5.4909136024647049e-03,
+      5.4837863258358531e-03,  4.8077007992908590e-03,
+      5.4604862042020273e-03,  5.4396976154026198e-03,
+      5.5638146695959553e-03,  5.4604862042020273e-03,
+      4.8833715685529635e-03,  5.3864868915666220e-03,
+      5.4909136024647049e-03,  5.4396976154026198e-03,
+      5.3864868915666220e-03,  4.9933154810546838e-03,
+      5.1071558319641056e-03,  5.4433074957792454e-03,
+      5.5765474271101826e-03,  5.5076635863146820e-03,
+      5.4433074957792454e-03,  5.1894387573805066e-03,
+      5.3922841131452456e-03,  5.5390126993166908e-03,
+      5.5765474271101826e-03,  5.3922841131452456e-03,
+      5.2001066482717309e-03,  5.4246759715010128e-03,
+      5.5076635863146820e-03,  5.5390126993166908e-03,
+      5.4246759715010128e-03,  5.2227895716483279e-03,
+      4.0497679429561092e-03,  4.9282556022888424e-03,
+      4.7237087909624649e-03,  4.8020213273712292e-03,
+      4.9282556022888424e-03,  4.3464895120150539e-03,
+      4.7976125238518097e-03,  4.7260356905951930e-03,
+      4.7237087909624649e-03,  4.7976125238518097e-03,
+      4.4574002582117803e-03,  4.6487121118467404e-03,
+      4.8020213273712292e-03,  4.7260356905951930e-03,
+      4.6487121118467404e-03,  4.4956041562837267e-03,
+      4.9714633531457494e-03,  5.7206656536672494e-03,
+      5.5180734069265810e-03,  5.4919793509825418e-03,
+      5.7206656536672494e-03,  5.2356045663686296e-03,
+      5.5596394964592278e-03,  5.7261136434031473e-03,
+      5.5180734069265810e-03,  5.5596394964592278e-03,
+      5.4948204143983092e-03,  5.6210477153196018e-03,
+      5.4919793509825418e-03,  5.7261136434031473e-03,
+      5.6210477153196018e-03,  5.5487852497226155e-03,
+      4.5489572462475468e-03,  5.4201128096244833e-03,
+      5.2786942174057458e-03,  5.5227997797472469e-03,
+      5.4201128096244833e-03,  4.7506506604289601e-03,
+      5.5748809286163452e-03,  5.3179890118108808e-03,
+      5.2786942174057458e-03,  5.5748809286163452e-03,
+      4.9901034258736697e-03,  5.3280021534388728e-03,
+      5.5227997797472469e-03,  5.3179890118108808e-03,
+      5.3280021534388728e-03,  5.0865046620443850e-03,
+      5.1827253238118367e-03,  5.3230838505120767e-03,
+      5.6092186758799996e-03,  5.3498800832996584e-03,
+      5.3230838505120767e-03,  5.1866043895766913e-03,
+      5.4707858854025323e-03,  5.4396236153839645e-03,
+      5.6092186758799996e-03,  5.4707858854025323e-03,
+      5.2034777387438419e-03,  5.3820989828010021e-03,
+      5.3498800832996584e-03,  5.4396236153839645e-03,
+      5.3820989828010021e-03,  5.2795140455242154e-03,
+      4.9373256245385227e-03,  5.0306921107680144e-03,
+      5.0091821137527148e-03,  5.0745697481229361e-03,
+      5.0306921107680144e-03,  4.9460721328636499e-03,
+      5.0969673520163961e-03,  4.9827250786815078e-03,
+      5.0091821137527148e-03,  5.0969673520163961e-03,
+      4.9517391298398959e-03,  5.0636541925258224e-03,
+      5.0745697481229361e-03,  4.9827250786815078e-03,
+      5.0636541925258224e-03,  4.9552504666375910e-03,
+      4.9253932922107505e-03,  5.3550337394986059e-03,
+      5.7052084741338500e-03,  5.4398733606965252e-03,
+      5.3550337394986059e-03,  4.9574861537077779e-03,
+      5.4706377679931946e-03,  5.5861137412303828e-03,
+      5.7052084741338500e-03,  5.4706377679931946e-03,
+      5.1587536865196583e-03,  5.6684419038570740e-03,
+      5.4398733606965252e-03,  5.5861137412303828e-03,
+      5.6684419038570740e-03,  5.2466699874090649e-03,
+      4.8003051442717763e-03,  5.6347570986480860e-03,
+      5.5318100167355980e-03,  5.4577257727782651e-03,
+      5.6347570986480860e-03,  4.8631401810418006e-03,
+      5.3361212366442329e-03,  5.5641617598327581e-03,
+      5.5318100167355980e-03,  5.3361212366442329e-03,
+      5.2040986395562412e-03,  5.3538907108157440e-03,
+      5.4577257727782651e-03,  5.5641617598327581e-03,
+      5.3538907108157440e-03,  5.2371703144156933e-03,
+      4.4297692564271461e-03,  5.1376827019044427e-03,
+      4.9522665099534903e-03,  5.1833712956327353e-03,
+      5.1376827019044427e-03,  4.5080542081015985e-03,
+      5.1885572418010912e-03,  4.8897090121346205e-03,
+      4.9522665099534903e-03,  5.1885572418010912e-03,
+      4.6450870564566172e-03,  4.8578374835205329e-03,
+      5.1833712956327353e-03,  4.8897090121346205e-03,
+      4.8578374835205329e-03,  4.6996959359605593e-03,
+      4.7160296966838235e-03,  5.4354504254331372e-03,
+      5.4914673382449068e-03,  5.0994910096800114e-03,
+      5.4354504254331372e-03,  4.8780256297317678e-03,
+      5.1983414836785551e-03,  5.1516527659604631e-03,
+      5.4914673382449068e-03,  5.1983414836785551e-03,
+      4.9576745968284569e-03,  5.3076257372673850e-03,
+      5.0994910096800114e-03,  5.1516527659604631e-03,
+      5.3076257372673850e-03,  5.0692404281867864e-03,
+      3.7521616561831035e-03,  4.3041195036684684e-03,
+      4.2083507066731441e-03,  4.0701551841232234e-03,
+      4.3041195036684684e-03,  3.8824364771630269e-03,
+      4.1647702647614748e-03,  4.1958378911293734e-03,
+      4.2083507066731441e-03,  4.1647702647614748e-03,
+      3.9355724928669756e-03,  4.1762562220603594e-03,
+      4.0701551841232234e-03,  4.1958378911293734e-03,
+      4.1762562220603594e-03,  4.0252316104492768e-03,
+      4.6691024276046839e-03,  5.4358497609484843e-03,
+      5.7984562413035670e-03,  5.3548860709007389e-03,
+      5.4358497609484843e-03,  4.8958005316626745e-03,
+      5.6061623938377849e-03,  5.5174561676182539e-03,
+      5.7984562413035670e-03,  5.6061623938377849e-03,
+      5.0465532622400548e-03,  5.4146406099752648e-03,
+      5.3548860709007389e-03,  5.5174561676182539e-03,
+      5.4146406099752648e-03,  5.1458567514192288e-03,
+      4.7873760079603245e-03,  4.7132530407841666e-03,
+      4.7097574777507523e-03,  4.7067597555051121e-03,
+      4.7132530407841666e-03,  4.7759539827926852e-03,
+      4.7079931840557261e-03,  4.7054012333601347e-03,
+      4.7097574777507523e-03,  4.7079931840557261e-03,
+      4.7228911829172273e-03,  4.7124589687313331e-03,
+      4.7067597555051121e-03,  4.7054012333601347e-03,
+      4.7124589687313331e-03,  4.7141024992431325e-03,
+      4.7653857276584981e-03,  5.3935337863564433e-03,
+      5.1518363112552411e-03,  5.2735623271459057e-03,
+      5.3935337863564433e-03,  4.7820910815419837e-03,
+      5.3263045464322246e-03,  5.2662666132495747e-03,
+      5.1518363112552411e-03,  5.3263045464322246e-03,
+      4.8989837691361037e-03,  5.2967809909708055e-03,
+      5.2735623271459057e-03,  5.2662666132495747e-03,
+      5.2967809909708055e-03,  4.9322596897032446e-03,
+      4.7680183858758744e-03,  4.6997587251910548e-03,
+      4.6997371537680086e-03,  4.7022616189482079e-03,
+      4.6997587251910548e-03,  4.7586592022394198e-03,
+      4.7000771623067050e-03,  4.6999788734553346e-03,
+      4.6997371537680086e-03,  4.7000771623067050e-03,
+      4.7355142545157962e-03,  4.6997484975064945e-03,
+      4.7022616189482079e-03,  4.6999788734553346e-03,
+      4.6997484975064945e-03,  4.7310105077216554e-03,
+      4.8458569989126826e-03,  5.5268069114665867e-03,
+      5.7047145500931061e-03,  5.5972004900509010e-03,
+      5.5268069114665867e-03,  5.0812217431459358e-03,
+      5.5795445843535817e-03,  5.5171878865958018e-03,
+      5.7047145500931061e-03,  5.5795445843535817e-03,
+      5.1164073324255592e-03,  5.7042719550623056e-03,
+      5.5972004900509010e-03,  5.5171878865958018e-03,
+      5.7042719550623056e-03,  5.2296532393988453e-03,
+      5.0331052677300868e-03,  5.8126537574760377e-03,
+      5.6116616631243848e-03,  5.5610714403935496e-03,
+      5.8126537574760377e-03,  5.1251590071031677e-03,
+      5.4834432263391871e-03,  5.3387073396366554e-03,
+      5.6116616631243848e-03,  5.4834432263391871e-03,
+      5.1721761378333781e-03,  5.4102404378702880e-03,
+      5.5610714403935496e-03,  5.3387073396366554e-03,
+      5.4102404378702880e-03,  5.2983146020102812e-03,
+      4.5634809762413915e-03,  5.0708627795713048e-03,
+      5.5285155509920162e-03,  5.1479022947186481e-03,
+      5.0708627795713048e-03,  4.5667821151481602e-03,
+      5.2739955449695626e-03,  5.2334032096775420e-03,
+      5.5285155509920162e-03,  5.2739955449695626e-03,
+      4.7796850268603851e-03,  5.2855491938456917e-03,
+      5.1479022947186481e-03,  5.2334032096775420e-03,
+      5.2855491938456917e-03,  4.9846472734427676e-03,
+      4.3408943946436742e-03,  5.0828126111047292e-03,
+      5.1619429658240372e-03,  4.8950748576602138e-03,
+      5.0828126111047292e-03,  4.5132933116312137e-03,
+      5.2279735123596507e-03,  5.0148402712815654e-03,
+      5.1619429658240372e-03,  5.2279735123596507e-03,
+      4.5757840526439138e-03,  5.1782251466555721e-03,
+      4.8950748576602138e-03,  5.0148402712815654e-03,
+      5.1782251466555721e-03,  4.7573202851706603e-03,
+      4.5244066537111763e-03,  5.3139854411244740e-03,
+      5.3841932493787804e-03,  5.1243544377133250e-03,
+      5.3139854411244740e-03,  4.5788274266713343e-03,
+      5.0657051731653390e-03,  5.1426785687150312e-03,
+      5.3841932493787804e-03,  5.0657051731653390e-03,
+      4.8535063031089169e-03,  5.0053417498441321e-03,
+      5.1243544377133250e-03,  5.1426785687150312e-03,
+      5.0053417498441321e-03,  4.9387189843115254e-03,
+      4.1561105459001855e-03,  4.8760140670205537e-03,
+      4.9530604056372751e-03,  4.7505323982066205e-03,
+      4.8760140670205537e-03,  4.1827775726392021e-03,
+      4.8099710346094313e-03,  5.0738733877216434e-03,
+      4.9530604056372751e-03,  4.8099710346094313e-03,
+      4.2585652889670211e-03,  4.7953656364286730e-03,
+      4.7505323982066205e-03,  5.0738733877216434e-03,
+      4.7953656364286730e-03,  4.4343641159264166e-03,
+      3.8850145135094689e-03,  4.2621118153951384e-03,
+      4.2895589113090549e-03,  4.3362464279518660e-03,
+      4.2621118153951384e-03,  4.0308729657790154e-03,
+      4.3844806003525494e-03,  4.2431337390287519e-03,
+      4.2895589113090549e-03,  4.3844806003525494e-03,
+      4.0886915647727800e-03,  4.2096924634853674e-03,
+      4.3362464279518660e-03,  4.2431337390287519e-03,
+      4.2096924634853674e-03,  4.1471058237790934e-03,
+      4.0748278012573509e-03,  5.0975121874835549e-03,
+      4.7643646721661492e-03,  4.9684148451479976e-03,
+      5.0975121874835549e-03,  4.3184840883046935e-03,
+      4.6729488469095803e-03,  4.8545826387352059e-03,
+      4.7643646721661492e-03,  4.6729488469095803e-03,
+      4.3205131005967286e-03,  4.7004116957686162e-03,
+      4.9684148451479976e-03,  4.8545826387352059e-03,
+      4.7004116957686162e-03,  4.3690205971449432e-03,
+      4.9909978133227360e-03,  5.4143910989253663e-03,
+      5.3601062423874191e-03,  5.3625046092275102e-03,
+      5.4143910989253663e-03,  5.0724427507293137e-03,
+      5.3652489479395936e-03,  5.3446386733428074e-03,
+      5.3601062423874191e-03,  5.3652489479395936e-03,
+      5.1460907707869393e-03,  5.3780861391384124e-03,
+      5.3625046092275102e-03,  5.3446386733428074e-03,
+      5.3780861391384124e-03,  5.2120089804020249e-03,
+      3.9588108570032090e-03,  4.2938117793838125e-03,
+      4.3611063350752835e-03,  4.2651906389425988e-03,
+      4.2938117793838125e-03,  4.0073255645855172e-03,
+      4.3518483851969093e-03,  4.3842419740188166e-03,
+      4.3611063350752835e-03,  4.3518483851969093e-03,
+      4.0893058354619380e-03,  4.3784881764625605e-03,
+      4.2651906389425988e-03,  4.3842419740188166e-03,
+      4.3784881764625605e-03,  4.0997083424903474e-03,
+      4.7902809088108339e-03,  5.3533071692810326e-03,
+      5.4529697742326999e-03,  5.3421394953302170e-03,
+      5.3533071692810326e-03,  4.8783628762572584e-03,
+      5.4898943571368179e-03,  5.4389677679356945e-03,
+      5.4529697742326999e-03,  5.4898943571368179e-03,
+      4.9324715462178625e-03,  5.3542755941681317e-03,
+      5.3421394953302170e-03,  5.4389677679356945e-03,
+      5.3542755941681317e-03,  4.9864655774714602e-03,
+      4.2147742524955437e-03,  4.8044404091749406e-03,
+      4.7174654013479485e-03,  4.5212986560996676e-03,
+      4.8044404091749406e-03,  4.2334336465773776e-03,
+      4.8136572475988510e-03,  4.8526412903096890e-03,
+      4.7174654013479485e-03,  4.8136572475988510e-03,
+      4.3841364814263201e-03,  4.5319687487354911e-03,
+      4.5212986560996676e-03,  4.8526412903096890e-03,
+      4.5319687487354911e-03,  4.3968275756536071e-03,
+      4.0044555899961193e-03,  4.4761433683108247e-03,
+      4.6472307349424728e-03,  4.4243677432400045e-03,
+      4.4761433683108247e-03,  4.0591447141326868e-03,
+      4.5360985593542293e-03,  4.4752304699918616e-03,
+      4.6472307349424728e-03,  4.5360985593542293e-03,
+      4.1784992937999085e-03,  4.5671014827176528e-03,
+      4.4243677432400045e-03,  4.4752304699918616e-03,
+      4.5671014827176528e-03,  4.2505134990343130e-03,
+      4.9235489798322716e-03,  5.7605067104734096e-03,
+      5.4765479512553930e-03,  5.5189947100062961e-03,
+      5.7605067104734096e-03,  5.0563592914372041e-03,
+      5.5230364268243756e-03,  5.6783945214777731e-03,
+      5.4765479512553930e-03,  5.5230364268243756e-03,
+      5.0602340964368484e-03,  5.6195955397581694e-03,
+      5.5189947100062961e-03,  5.6783945214777731e-03,
+      5.6195955397581694e-03,  5.1685388328162494e-03,
+      4.5391542914077748e-03,  5.3162333178131448e-03,
+      5.0344382645984304e-03,  5.0853779913647874e-03,
+      5.3162333178131448e-03,  4.5405811858949636e-03,
+      5.4256287620576297e-03,  5.0932428800818767e-03,
+      5.0344382645984304e-03,  5.4256287620576297e-03,
+      4.5923554538614669e-03,  5.1764781936595632e-03,
+      5.0853779913647874e-03,  5.0932428800818767e-03,
+      5.1764781936595632e-03,  4.6315286436058034e-03,
+      4.8241517294518987e-03,  5.6107202492192646e-03,
+      5.4225910095940417e-03,  5.6682617800773672e-03,
+      5.6107202492192646e-03,  5.1440593741325126e-03,
+      5.7028655987132184e-03,  5.4379837043436550e-03,
+      5.4225910095940417e-03,  5.7028655987132184e-03,
+      5.1490285191096163e-03,  5.4071926623830719e-03,
+      5.6682617800773672e-03,  5.4379837043436550e-03,
+      5.4071926623830719e-03,  5.3570980160283933e-03,
+      5.0497164090830202e-03,  5.6354102305264170e-03,
+      5.5833976153072101e-03,  5.5241887515719923e-03,
+      5.6354102305264170e-03,  5.1844311703979455e-03,
+      5.6093229622724144e-03,  5.6832732406642099e-03,
+      5.5833976153072101e-03,  5.6093229622724144e-03,
+      5.3845110635961542e-03,  5.6525395728626391e-03,
+      5.5241887515719923e-03,  5.6832732406642099e-03,
+      5.6525395728626391e-03,  5.5054350484104982e-03,
+      4.7330318788156075e-03,  5.2558542628192498e-03,
+      5.2754180637923608e-03,  5.2680668623478954e-03,
+      5.2558542628192498e-03,  4.8583176679629775e-03,
+      5.3617411087909300e-03,  5.0763527226022447e-03,
+      5.2754180637923608e-03,  5.3617411087909300e-03,
+      4.8719763416853719e-03,  5.2676200731791344e-03,
+      5.2680668623478954e-03,  5.0763527226022447e-03,
+      5.2676200731791344e-03,  4.9861268237371891e-03,
+      4.4829218835174276e-03,  4.9248762537746826e-03,
+      5.0024520635936409e-03,  4.9006320187329356e-03,
+      4.9248762537746826e-03,  4.5440399582473258e-03,
+      4.8761752126013965e-03,  4.9995342540547251e-03,
+      5.0024520635936409e-03,  4.8761752126013965e-03,
+      4.5687972074181541e-03,  4.8573575677983922e-03,
+      4.9006320187329356e-03,  4.9995342540547251e-03,
+      4.8573575677983922e-03,  4.6113723269492028e-03,
+      4.7439766500629171e-03,  5.6665405738237223e-03,
+      5.5308219569568514e-03,  5.8507003829033413e-03,
+      5.6665405738237223e-03,  5.0720272017036588e-03,
+      5.7362709767074119e-03,  5.7332701844372063e-03,
+      5.5308219569568514e-03,  5.7362709767074119e-03,
+      5.1913803385325689e-03,  5.4671990661370592e-03,
+      5.8507003829033413e-03,  5.7332701844372063e-03,
+      5.4671990661370592e-03,  5.3276953731318695e-03,
+      4.1781480426967242e-03,  4.6569448684048234e-03,
+      4.5751840200780334e-03,  4.6474757680489750e-03,
+      4.6569448684048234e-03,  4.2572109329085161e-03,
+      4.4514347800524946e-03,  4.4710709510486640e-03,
+      4.5751840200780334e-03,  4.4514347800524946e-03,
+      4.3020526773420746e-03,  4.4948839390591307e-03,
+      4.6474757680489750e-03,  4.4710709510486640e-03,
+      4.4948839390591307e-03,  4.3748034929899484e-03,
+      4.3313384846384567e-03,  4.8609426130105955e-03,
+      5.0178932944894578e-03,  4.9114981624153410e-03,
+      4.8609426130105955e-03,  4.4156139708581314e-03,
+      5.0527692323661466e-03,  4.8434482548828555e-03,
+      5.0178932944894578e-03,  5.0527692323661466e-03,
+      4.5006787363920647e-03,  4.8851573490110390e-03,
+      4.9114981624153410e-03,  4.8434482548828555e-03,
+      4.8851573490110390e-03,  4.5585418241746826e-03,
+      4.6655021290269603e-03,  5.1099289511134065e-03,
+      5.2222030661634514e-03,  5.1555694826168827e-03,
+      5.1099289511134065e-03,  4.7523395276319845e-03,
+      5.2110469413705869e-03,  5.1579337232921206e-03,
+      5.2222030661634514e-03,  5.2110469413705869e-03,
+      4.8305384374289825e-03,  5.1666370817119011e-03,
+      5.1555694826168827e-03,  5.1579337232921206e-03,
+      5.1666370817119011e-03,  4.8571729590057358e-03,
+      4.4124832580118107e-03,  5.5124750974480611e-03,
+      5.1133380051136147e-03,  5.3588860042126296e-03,
+      5.5124750974480611e-03,  4.6356559976615270e-03,
+      5.4305254470557798e-03,  5.2131271012044759e-03,
+      5.1133380051136147e-03,  5.4305254470557798e-03,
+      4.6534597410828934e-03,  5.1783873042292455e-03,
+      5.3588860042126296e-03,  5.2131271012044759e-03,
+      5.1783873042292455e-03,  4.8236077833933117e-03,
+      -9.6615927935495179e-03, 6.8675588255994405e-03,
+      1.0945473142527546e-03,  -1.1194179153164361e-03,
+      6.8675588255994405e-03,  -3.6346241719092134e-03,
+      -5.8138983110610643e-04, 1.2243832363445957e-03,
+      1.0945473142527546e-03,  -5.8138983110610643e-04,
+      -4.6126147746582667e-04, 6.3180189590662426e-04,
+      -1.1194179153164361e-03, 1.2243832363445957e-03,
+      6.3180189590662426e-04,  -3.6362492012642004e-04,
+      -1.2936789941773327e-02, 9.1335782726763882e-03,
+      -9.2413647644373851e-04, -9.1704209021694640e-04,
+      9.1335782726763882e-03,  -4.3195816573411861e-03,
+      1.5488956851291041e-03,  2.2741681746236771e-03,
+      -9.2413647644373851e-04, 1.5488956851291041e-03,
+      4.0145236950450106e-04,  1.1955645031987922e-03,
+      -9.1704209021694640e-04, 2.2741681746236771e-03,
+      1.1955645031987922e-03,  5.9101407236907759e-04,
+      -5.3962989027005557e-03, 2.6072667157267145e-03,
+      -8.2947112894151754e-04, -8.4707999007982405e-04,
+      2.6072667157267145e-03,  -1.9454893539958128e-03,
+      1.3259854799795544e-04,  9.1413360753332847e-05,
+      -8.2947112894151754e-04, 1.3259854799795544e-04,
+      -5.1496376171848712e-04, -2.0587710943157896e-04,
+      -8.4707999007982405e-04, 9.1413360753332847e-05,
+      -2.0587710943157896e-04, -4.9949758378796459e-04,
+      -1.7429393257872950e-02, 8.3520701836941741e-03,
+      -1.3077437245125536e-03, 2.6219650960785422e-03,
+      8.3520701836941741e-03,  -2.1730131032824999e-03,
+      2.9672791558086049e-03,  1.5427551148408693e-03,
+      -1.3077437245125536e-03, 2.9672791558086049e-03,
+      -1.2507874901661125e-04, 8.2649701074271687e-04,
+      2.6219650960785422e-03,  1.5427551148408693e-03,
+      8.2649701074271687e-04,  -2.3883234105498484e-05,
+      -1.3830306151965368e-02, 8.6140679080944672e-03,
+      -1.0519351576461367e-03, 2.8626024695093547e-03,
+      8.6140679080944672e-03,  -1.9075258568341821e-03,
+      2.0138742914406317e-03,  6.3743572321268256e-04,
+      -1.0519351576461367e-03, 2.0138742914406317e-03,
+      6.7646549781383313e-04,  1.7337725787928556e-03,
+      2.8626024695093547e-03,  6.3743572321268256e-04,
+      1.7337725787928556e-03,  8.5614628175256790e-04,
+      -1.8403458798923876e-02, 7.8573984911785699e-03,
+      -1.5961302550980016e-03, 3.5149754327317441e-03,
+      7.8573984911785699e-03,  -1.7067248914369689e-03,
+      2.5330322771104944e-03,  6.0551635352428294e-04,
+      -1.5961302550980016e-03, 2.5330322771104944e-03,
+      4.5693878376751882e-04,  1.3178252989731669e-03,
+      3.5149754327317441e-03,  6.0551635352428294e-04,
+      1.3178252989731669e-03,  4.6425349761707751e-04,
+      -1.7006699057036175e-02, 7.6077168403978004e-03,
+      -1.2881631030799989e-03, 2.9480535843260710e-03,
+      7.6077168403978004e-03,  -1.9702066590096216e-03,
+      2.0108629639478673e-03,  7.9516080819711594e-04,
+      -1.2881631030799989e-03, 2.0108629639478673e-03,
+      8.0433046450099942e-04,  1.8038266298348227e-03,
+      2.9480535843260710e-03,  7.9516080819711594e-04,
+      1.8038266298348227e-03,  8.8160771996069693e-04,
+      -1.3854826825508055e-02, 8.0479162307344476e-03,
+      -5.4775445908707208e-04, 3.4108070940140215e-03,
+      8.0479162307344476e-03,  -2.4855624480393168e-03,
+      2.0010342207153007e-03,  9.0050864299703079e-04,
+      -5.4775445908707208e-04, 2.0010342207153007e-03,
+      2.2879961439639510e-04,  1.2455851470582276e-03,
+      3.4108070940140215e-03,  9.0050864299703079e-04,
+      1.2455851470582276e-03,  2.5797630491429058e-04,
+      -1.3110891255545737e-02, 7.4933352500272960e-03,
+      -3.9694223029317251e-04, 1.8696987208199419e-03,
+      7.4933352500272960e-03,  -2.7269680038459173e-03,
+      1.3403236271558815e-03,  2.3125877913683179e-04,
+      -3.9694223029317251e-04, 1.3403236271558815e-03,
+      7.7648354810912158e-04,  1.2366194463463297e-03,
+      1.8696987208199419e-03,  2.3125877913683179e-04,
+      1.2366194463463297e-03,  9.6189796048403537e-04,
+      -1.3232672426819813e-02, 4.6823041218628670e-03,
+      1.8611702707055700e-03,  -8.3085468064121430e-04,
+      4.6823041218628670e-03,  -8.0583799511241597e-04,
+      1.5487811484838697e-03,  8.1349281204434981e-04,
+      1.8611702707055700e-03,  1.5487811484838697e-03,
+      -1.2621786723316788e-04, 1.4631208905251433e-03,
+      -8.3085468064121430e-04, 8.1349281204434981e-04,
+      1.4631208905251433e-03,  2.7511955514886215e-04,
+      -1.5084537572866579e-02, 7.1293848562809948e-03,
+      3.7045468755946658e-03,  -1.4711529451730022e-03,
+      7.1293848562809948e-03,  -1.8494973697146082e-03,
+      1.3524358852716983e-03,  1.5936537318735912e-03,
+      3.7045468755946658e-03,  1.3524358852716983e-03,
+      -5.7047546733440834e-04, 2.0738159150412733e-03,
+      -1.4711529451730022e-03, 1.5936537318735912e-03,
+      2.0738159150412733e-03,  5.0829312908897773e-04,
+      -1.1837724802376451e-02, 8.0065879433286139e-03,
+      2.5212064219669013e-03,  -8.6580744742516692e-04,
+      8.0065879433286139e-03,  -2.7205322761259480e-03,
+      -2.2105454381026531e-04, 1.4586534642126341e-03,
+      2.5212064219669013e-03,  -2.2105454381026531e-04,
+      -2.4777193779670914e-04, 1.4026084563219037e-03,
+      -8.6580744742516692e-04, 1.4586534642126341e-03,
+      1.4026084563219037e-03,  3.8679517983707091e-04,
+      -1.2017287033769334e-02, 6.9442711205541319e-03,
+      1.8032964267264526e-03,  9.4303015390613293e-04,
+      6.9442711205541319e-03,  -3.5149126553830593e-03,
+      2.3699469146257939e-04,  -4.5705934851597888e-04,
+      1.8032964267264526e-03,  2.3699469146257939e-04,
+      -1.3586411786986857e-04, 8.4154434886370209e-04,
+      9.4303015390613293e-04,  -4.5705934851597888e-04,
+      8.4154434886370209e-04,  -6.9789884734637771e-05,
+      -1.6094817869361303e-02, 6.3002762847164506e-03,
+      2.2530843424977648e-03,  3.8816772736172305e-03,
+      6.3002762847164506e-03,  -1.3384245408796698e-03,
+      1.0043242756394048e-03,  1.0894870315372434e-03,
+      2.2530843424977648e-03,  1.0043242756394048e-03,
+      -1.2100553317588711e-05, 1.2384575776262850e-03,
+      3.8816772736172305e-03,  1.0894870315372434e-03,
+      1.2384575776262850e-03,  1.5692877333876084e-04,
+      -1.4045801776472706e-02, 8.9584932766103818e-03,
+      2.6014796744964234e-03,  1.0166144512118521e-04,
+      8.9584932766103818e-03,  -3.2080958107171351e-03,
+      1.3686944034468785e-03,  2.0148513302307739e-03,
+      2.6014796744964234e-03,  1.3686944034468785e-03,
+      -4.7783480807389827e-04, 1.6221316661178631e-03,
+      1.0166144512118521e-04,  2.0148513302307739e-03,
+      1.6221316661178631e-03,  3.6300160393023845e-04,
+      -1.5035351590565366e-02, 9.6530182335056031e-03,
+      2.1949862437937429e-03,  1.8246414683111131e-04,
+      9.6530182335056031e-03,  -2.6652597726363591e-03,
+      3.9640886823489533e-04,  2.0283144683530636e-03,
+      2.1949862437937429e-03,  3.9640886823489533e-04,
+      4.9399384235801198e-04,  1.5872284744377334e-03,
+      1.8246414683111131e-04,  2.0283144683530636e-03,
+      1.5872284744377334e-03,  5.7853947086566285e-04,
+      -1.3983142687551818e-02, 5.0756892732599047e-03,
+      5.1905234137764912e-03,  4.2875838849719752e-03,
+      5.0756892732599047e-03,  -9.4954694217068230e-04,
+      1.8883020854449403e-04,  6.7109554143711891e-04,
+      5.1905234137764912e-03,  1.8883020854449403e-04,
+      -8.4870028551829930e-04, 1.1077830644977793e-03,
+      4.2875838849719752e-03,  6.7109554143711891e-04,
+      1.1077830644977793e-03,  -7.8542966877660964e-04,
+      -1.3600736364920495e-02, 9.7195178003623480e-03,
+      9.3917635136515779e-04,  1.9513372114623364e-03,
+      9.7195178003623480e-03,  -2.5366246539418099e-03,
+      1.5797585617940194e-03,  7.0858722530550288e-04,
+      9.3917635136515779e-04,  1.5797585617940194e-03,
+      5.8251868528788402e-04,  1.7783091417839559e-03,
+      1.9513372114623364e-03,  7.0858722530550288e-04,
+      1.7783091417839559e-03,  7.1541539011648952e-04,
+      -1.5411936953103653e-02, 7.3903417706619335e-03,
+      4.5483022121753222e-03,  -8.9677881534670116e-04,
+      7.3903417706619335e-03,  -1.8311445673321121e-03,
+      5.0618507124800330e-04,  1.6497657392883489e-03,
+      4.5483022121753222e-03,  5.0618507124800330e-04,
+      -1.0944142931277624e-04, 2.0053974090094044e-03,
+      -8.9677881534670116e-04, 1.6497657392883489e-03,
+      2.0053974090094044e-03,  5.8688361598166873e-04,
+      -1.5877997586428029e-02, 7.0102564979234233e-03,
+      2.3822117395892084e-03,  2.4183758414625888e-03,
+      7.0102564979234233e-03,  -2.0572286784362988e-03,
+      3.5983393342809214e-04,  -1.3917845780270373e-04,
+      2.3822117395892084e-03,  3.5983393342809214e-04,
+      9.3742516843216294e-05,  1.1681434312541931e-03,
+      2.4183758414625888e-03,  -1.3917845780270373e-04,
+      1.1681434312541931e-03,  2.2289605656281757e-04,
+      -1.4083572828047439e-02, 7.9925244381302291e-03,
+      -6.0284983439355796e-04, -9.7568039067081366e-04,
+      7.9925244381302291e-03,  -2.8403745612453242e-03,
+      1.4149913418989291e-03,  1.3483783335542488e-03,
+      -6.0284983439355796e-04, 1.4149913418989291e-03,
+      -7.9567702505795620e-05, 4.5761961508970946e-04,
+      -9.7568039067081366e-04, 1.3483783335542488e-03,
+      4.5761961508970946e-04,  -1.8122180927645343e-05,
+      -1.3011750140965523e-02, 8.0448027672424538e-03,
+      1.3711087550206127e-03,  1.2861069552821227e-03,
+      8.0448027672424538e-03,  -3.4752261499698942e-03,
+      3.7416175157491864e-04,  3.5251373821010203e-04,
+      1.3711087550206127e-03,  3.7416175157491864e-04,
+      1.0668685970538342e-04,  1.2410321451126983e-03,
+      1.2861069552821227e-03,  3.5251373821010203e-04,
+      1.2410321451126983e-03,  1.2833570311562067e-04,
+      -1.0240113275587109e-02, 6.1802880789633766e-03,
+      1.3904451755038282e-03,  -3.0192736293871718e-04,
+      6.1802880789633766e-03,  -3.0059440450167932e-03,
+      -5.3368175414203413e-04, 4.7195090306965839e-04,
+      1.3904451755038282e-03,  -5.3368175414203413e-04,
+      -7.9556056354311339e-04, 9.5483655393905145e-04,
+      -3.0192736293871718e-04, 4.7195090306965839e-04,
+      9.5483655393905145e-04,  -5.7134829653405345e-04,
+      -1.3385066274392758e-02, 6.6124934381380064e-03,
+      -1.2821932840326072e-03, 2.0730056600946553e-03,
+      6.6124934381380064e-03,  -1.9317403170367104e-03,
+      1.2403275688648368e-03,  -8.1880449480140711e-06,
+      -1.2821932840326072e-03, 1.2403275688648368e-03,
+      -2.9734471249323222e-04, 1.1154847964356678e-03,
+      2.0730056600946553e-03,  -8.1880449480140711e-06,
+      1.1154847964356678e-03,  -8.2008990837446980e-05,
+      -1.2832033261169851e-02, 7.1731149113646135e-03,
+      -1.3178043396281812e-03, -8.1927059876190744e-04,
+      7.1731149113646135e-03,  -3.8547355138079868e-03,
+      1.3689397012382246e-03,  1.4741597574298045e-03,
+      -1.3178043396281812e-03, 1.3689397012382246e-03,
+      -1.5523748509968895e-04, 5.2913940932647796e-04,
+      -8.1927059876190744e-04, 1.4741597574298045e-03,
+      5.2913940932647796e-04,  -2.0477161752664730e-06,
+      -1.9353179288859976e-02, 8.2472993213544895e-03,
+      1.1328241708247928e-03,  1.0173474843959841e-04,
+      8.2472993213544895e-03,  -2.0348560585739840e-03,
+      1.8916142915004761e-03,  1.8422284519757142e-03,
+      1.1328241708247928e-03,  1.8916142915004761e-03,
+      6.6390870098819582e-04,  1.6001417768552631e-03,
+      1.0173474843959841e-04,  1.8422284519757142e-03,
+      1.6001417768552631e-03,  7.2754320201389323e-04,
+      -1.8034057030268318e-02, 8.1439718393683594e-03,
+      -1.4927369306321973e-03, 4.4653038520066791e-04,
+      8.1439718393683594e-03,  -2.0375629958202432e-03,
+      1.9844371297367868e-03,  1.5706327924026185e-03,
+      -1.4927369306321973e-03, 1.9844371297367868e-03,
+      8.4221508811039951e-05,  1.4760224149450302e-03,
+      4.4653038520066791e-04,  1.5706327924026185e-03,
+      1.4760224149450302e-03,  5.3324147164490450e-04,
+      -1.9858435274308524e-02, 5.5927944148301684e-03,
+      -1.5438870160433414e-03, 3.3657100726607308e-03,
+      5.5927944148301684e-03,  2.3561660401123010e-04,
+      2.3242973956863057e-03,  1.5999686201749661e-03,
+      -1.5438870160433414e-03, 2.3242973956863057e-03,
+      3.1805397259977025e-04,  1.2466021708669767e-03,
+      3.3657100726607308e-03,  1.5999686201749661e-03,
+      1.2466021708669767e-03,  3.8368966980689126e-04,
+      -9.7281699245688677e-03, 6.3301451421522454e-03,
+      -2.1014055387616102e-04, -1.0747609780239144e-03,
+      6.3301451421522454e-03,  -3.4334745573678408e-03,
+      8.1169421191581016e-04,  6.4971659824279585e-04,
+      -2.1014055387616102e-04, 8.1169421191581016e-04,
+      -3.3545824360928884e-04, 4.5259596268414320e-04,
+      -1.0747609780239144e-03, 6.4971659824279585e-04,
+      4.5259596268414320e-04,  -1.1830597768979520e-04,
+      -1.6332140614730220e-02, 9.0103128113567417e-03,
+      1.5571710079426081e-03,  -6.0630677294044237e-04,
+      9.0103128113567417e-03,  -2.4498309848700814e-03,
+      1.1243148388670240e-03,  1.9746725100914823e-03,
+      1.5571710079426081e-03,  1.1243148388670240e-03,
+      2.4215985584758782e-04,  1.0849950462954012e-03,
+      -6.0630677294044237e-04, 1.9746725100914823e-03,
+      1.0849950462954012e-03,  4.0955394785959507e-04,
+      -1.3036079872743208e-02, 8.1986656104820928e-03,
+      -4.1339688016818390e-04, -7.9169837147628590e-04,
+      8.1986656104820928e-03,  -3.0128993657090463e-03,
+      1.3087483787852701e-03,  1.3783682086432115e-03,
+      -4.1339688016818390e-04, 1.3087483787852701e-03,
+      1.0389730541704824e-04,  1.0307092574933089e-03,
+      -7.9169837147628590e-04, 1.3783682086432115e-03,
+      1.0307092574933089e-03,  2.5367532767235548e-04,
+      -1.3984645154594681e-02, 8.3445337042039359e-03,
+      -1.5668162012331865e-03, -1.6294897225424144e-03,
+      8.3445337042039359e-03,  -2.3222775494725553e-03,
+      1.5981596851179622e-03,  2.2297148976875006e-03,
+      -1.5668162012331865e-03, 1.5981596851179622e-03,
+      4.9099274793647457e-04,  1.0006803325697039e-03,
+      -1.6294897225424144e-03, 2.2297148976875006e-03,
+      1.0006803325697039e-03,  7.7225711991308439e-04,
+      -1.0878208304283613e-02, 6.8495335213857884e-03,
+      1.8367434011084490e-03,  -1.1684248224645396e-03,
+      6.8495335213857884e-03,  -3.7485460530336514e-03,
+      -4.5636048439838095e-04, 1.1909478868148576e-03,
+      1.8367434011084490e-03,  -4.5636048439838095e-04,
+      1.3521556398979253e-04,  5.5654021720440735e-04,
+      -1.1684248224645396e-03, 1.1909478868148576e-03,
+      5.5654021720440735e-04,  1.7634298653690025e-04,
+      -1.3695683195390350e-02, 9.6440337102124008e-03,
+      -1.1111079511340462e-03, -7.3806515965964433e-04,
+      9.6440337102124008e-03,  -4.4655385326203868e-03,
+      2.6568699762222737e-03,  1.4535689858293593e-03,
+      -1.1111079511340462e-03, 2.6568699762222737e-03,
+      4.0767188718331062e-04,  1.3035911326184557e-03,
+      -7.3806515965964433e-04, 1.4535689858293593e-03,
+      1.3035911326184557e-03,  6.4104771006401936e-04,
+      -1.8783865558349294e-02, 9.5046988369441830e-03,
+      1.0054895662265832e-03,  -1.3398595612765494e-03,
+      9.5046988369441830e-03,  -2.4387572360332795e-03,
+      1.2308518058462559e-03,  2.3676189965730725e-03,
+      1.0054895662265832e-03,  1.2308518058462559e-03,
+      7.0028547773139820e-04,  1.0101870932540413e-03,
+      -1.3398595612765494e-03, 2.3676189965730725e-03,
+      1.0101870932540413e-03,  7.0233475365099978e-04,
+      -1.4976622456425184e-02, 8.1162153169009460e-03,
+      -1.3827823800513477e-03, -9.0475962868004594e-04,
+      8.1162153169009460e-03,  -2.2249946889018480e-03,
+      2.1152052285084320e-03,  1.8399989448963172e-03,
+      -1.3827823800513477e-03, 2.1152052285084320e-03,
+      2.8805366919883429e-04,  1.4321728932099045e-03,
+      -9.0475962868004594e-04, 1.8399989448963172e-03,
+      1.4321728932099045e-03,  4.1014738409446026e-04,
+      -1.1408226145512439e-02, 6.4954304965538492e-03,
+      -1.2001987796160765e-03, -9.3863075040407956e-04,
+      6.4954304965538492e-03,  -2.1295410823541430e-03,
+      8.3445635040981805e-04,  1.0828225854915745e-03,
+      -1.2001987796160765e-03, 8.3445635040981805e-04,
+      -3.5131917652677573e-04, 6.5050284812873853e-04,
+      -9.3863075040407956e-04, 1.0828225854915745e-03,
+      6.5050284812873853e-04,  9.2967121716285277e-05,
+      -1.3540012040553353e-02, 6.5044103809259023e-03,
+      -1.4509842112690869e-03, 2.9456259587854984e-03,
+      6.5044103809259023e-03,  -2.1362340178470861e-03,
+      2.1384291485579830e-03,  4.4150731301810292e-04,
+      -1.4509842112690869e-03, 2.1384291485579830e-03,
+      6.8321237604588041e-04,  1.5155198702477583e-03,
+      2.9456259587854984e-03,  4.4150731301810292e-04,
+      1.5155198702477583e-03,  9.4537651413804154e-04,
+      -1.6246993041419768e-02, 7.8928849361846465e-03,
+      -1.0371149787786690e-03, -1.1988565506819290e-03,
+      7.8928849361846465e-03,  -2.1711861031565201e-03,
+      1.5625386477991353e-03,  2.0766924041950999e-03,
+      -1.0371149787786690e-03, 1.5625386477991353e-03,
+      3.6916286503139387e-04,  1.3956257087938428e-03,
+      -1.1988565506819290e-03, 2.0766924041950999e-03,
+      1.3956257087938428e-03,  7.0753221752847730e-04,
+      -1.3605718835333182e-02, 8.4677963790206132e-03,
+      1.5674417326158496e-03,  -8.5156763496229099e-04,
+      8.4677963790206132e-03,  -2.0207185798241187e-03,
+      8.5611012401802392e-04,  1.5345153178721719e-03,
+      1.5674417326158496e-03,  8.5611012401802392e-04,
+      3.4310085919516103e-04,  1.3423485494321341e-03,
+      -8.5156763496229099e-04, 1.5345153178721719e-03,
+      1.3423485494321341e-03,  4.5225478537197169e-04,
+      -1.4669000264706085e-02, 8.7488167172959999e-03,
+      -1.4091880634556062e-03, -1.0017125355292859e-03,
+      8.7488167172959999e-03,  -3.0719648092492838e-03,
+      2.3683327512114935e-03,  1.8164163784652701e-03,
+      -1.4091880634556062e-03, 2.3683327512114935e-03,
+      1.8550057165905962e-04,  8.3199462205668607e-04,
+      -1.0017125355292859e-03, 1.8164163784652701e-03,
+      8.3199462205668607e-04,  2.3623702228027553e-04,
+      -1.3384842502986008e-02, -7.4284413610738024e-04,
+      2.2164427066970675e-03,  -1.6984495314060527e-03,
+      -7.4284413610738024e-04, -1.1789069761800031e-04,
+      5.5226050924804017e-04,  1.2865230734008021e-03,
+      2.2164427066970675e-03,  5.5226050924804017e-04,
+      1.6158157787987345e-04,  1.4162104619029536e-03,
+      -1.6984495314060527e-03, 1.2865230734008021e-03,
+      1.4162104619029536e-03,  2.5984053718450023e-04,
+      -1.4270238868634912e-02, 8.7435299844129590e-03,
+      1.3121371658921229e-03,  -1.4459562540351310e-03,
+      8.7435299844129590e-03,  -2.5869716021176296e-03,
+      1.9816371858388513e-03,  1.8906964142812917e-03,
+      1.3121371658921229e-03,  1.9816371858388513e-03,
+      5.2299567205402252e-04,  1.6974470674424424e-03,
+      -1.4459562540351310e-03, 1.8906964142812917e-03,
+      1.6974470674424424e-03,  6.8040298431208874e-04,
+      -1.3927808495450104e-02, 9.8114319729643538e-03,
+      -1.4585605825814291e-03, 5.1081452718003395e-04,
+      9.8114319729643538e-03,  -3.1780317017032489e-03,
+      1.9510988088846683e-03,  1.0552731530989561e-03,
+      -1.4585605825814291e-03, 1.9510988088846683e-03,
+      4.1351930697924790e-04,  8.3639732480242888e-04,
+      5.1081452718003395e-04,  1.0552731530989561e-03,
+      8.3639732480242888e-04,  5.9577771535064973e-04,
+      -1.2005957611549042e-02, 6.5269829696843855e-03,
+      5.4227086053001321e-06,  -1.2166655959930230e-03,
+      6.5269829696843855e-03,  -2.8587968071345869e-03,
+      8.7526046059167601e-04,  1.1870931547370881e-03,
+      5.4227086053001321e-06,  8.7526046059167601e-04,
+      -9.3426039127427613e-05, 7.3532331884749720e-04,
+      -1.2166655959930230e-03, 1.1870931547370881e-03,
+      7.3532331884749720e-04,  8.0025894282650164e-05,
+      -1.3643474898358156e-02, 7.2377155928434068e-03,
+      -1.2401442140379342e-03, -1.2740975088610677e-03,
+      7.2377155928434068e-03,  -2.2422690205721159e-03,
+      1.5138822054081335e-03,  1.4904684300819143e-03,
+      -1.2401442140379342e-03, 1.5138822054081335e-03,
+      -1.3634361034485141e-04, 1.1127686221486200e-03,
+      -1.2740975088610677e-03, 1.4904684300819143e-03,
+      1.1127686221486200e-03,  2.6190429702892281e-04,
+      -1.8536129654681068e-02, 5.7362215425607876e-03,
+      3.7077483503927737e-03,  5.3703505121290920e-03,
+      5.7362215425607876e-03,  -1.5089926703986004e-03,
+      1.7790988909616276e-03,  2.4855814646503223e-04,
+      3.7077483503927737e-03,  1.7790988909616276e-03,
+      -2.6330472344847811e-04, 5.6773516831324072e-04,
+      5.3703505121290920e-03,  2.4855814646503223e-04,
+      5.6773516831324072e-04,  -7.5082904071627644e-05,
+      -1.2009357144285096e-02, 8.3436157388768785e-03,
+      1.4898887848690406e-03,  -1.1644905255429930e-03,
+      8.3436157388768785e-03,  -3.9039776700446403e-03,
+      6.7845541120046616e-04,  8.9957003910951026e-04,
+      1.4898887848690406e-03,  6.7845541120046616e-04,
+      -4.3136022070644790e-04, 8.0279376655449392e-04,
+      -1.1644905255429930e-03, 8.9957003910951026e-04,
+      8.0279376655449392e-04,  1.4214812713657745e-04,
+      -1.3087027777011376e-02, 8.1091117611986480e-03,
+      -9.9940829784200174e-04, 3.0841860053121063e-03,
+      8.1091117611986480e-03,  -1.8957247580462895e-03,
+      2.0851167099702368e-03,  4.9914625071405998e-04,
+      -9.9940829784200174e-04, 2.0851167099702368e-03,
+      5.8420276661913983e-04,  1.2124350953385600e-03,
+      3.0841860053121063e-03,  4.9914625071405998e-04,
+      1.2124350953385600e-03,  9.4839886112247610e-04,
+      -1.4389875409702553e-02, 5.9650198667490139e-03,
+      2.7789793386952099e-03,  1.1951993133499255e-03,
+      5.9650198667490139e-03,  -1.5525484868771592e-03,
+      -1.8249951551237587e-04, 2.3756213628496397e-05,
+      2.7789793386952099e-03,  -1.8249951551237587e-04,
+      -7.0686530665496237e-04, 8.4787536338310149e-04,
+      1.1951993133499255e-03,  2.3756213628496397e-05,
+      8.4787536338310149e-04,  -1.8249697460956133e-04,
+      -1.6493749736099662e-02, 8.9845954108499354e-03,
+      -6.5855511936865679e-04, -1.2916630860458668e-03,
+      8.9845954108499354e-03,  -2.2720543779065185e-03,
+      2.0777202119131770e-03,  1.7945117300401194e-03,
+      -6.5855511936865679e-04, 2.0777202119131770e-03,
+      3.7547177825330777e-04,  1.3207948522699173e-03,
+      -1.2916630860458668e-03, 1.7945117300401194e-03,
+      1.3207948522699173e-03,  4.4592713214989318e-04,
+      -1.1661413908588582e-02, 5.3356769263466705e-03,
+      2.3910148956967888e-05,  2.0144431709342532e-03,
+      5.3356769263466705e-03,  -2.3763451303913793e-03,
+      1.5795406491315219e-04,  -5.2947905482373585e-04,
+      2.3910148956967888e-05,  1.5795406491315219e-04,
+      -3.3709897439123792e-04, 4.4156371402711333e-04,
+      2.0144431709342532e-03,  -5.2947905482373585e-04,
+      4.4156371402711333e-04,  -1.6067267371488200e-04,
+      -1.2127195499525786e-02, 6.3575178498569175e-03,
+      2.7885477665962593e-03,  1.4103487851294598e-03,
+      6.3575178498569175e-03,  -1.9866201831196012e-03,
+      1.4532946357492936e-03,  1.0405145770188389e-03,
+      2.7885477665962593e-03,  1.4532946357492936e-03,
+      1.1650211077524758e-04,  1.1231612548733749e-03,
+      1.4103487851294598e-03,  1.0405145770188389e-03,
+      1.1231612548733749e-03,  4.1629479593797221e-04,
+      -1.0520573085213737e-02, 6.8711276675652456e-03,
+      1.6939148906612810e-03,  -9.8113687728168184e-04,
+      6.8711276675652456e-03,  -2.6887735100749720e-03,
+      -2.1586496673177543e-04, 1.0064124321346908e-03,
+      1.6939148906612810e-03,  -2.1586496673177543e-04,
+      -1.4067199641936031e-04, 2.9361950909233638e-04,
+      -9.8113687728168184e-04, 1.0064124321346908e-03,
+      2.9361950909233638e-04,  -7.9727460669166473e-05,
+      -1.5912209340202287e-02, 8.8708507277150267e-03,
+      -2.2876273236139503e-04, -1.2962376027821204e-03,
+      8.8708507277150267e-03,  -3.7886872287483438e-03,
+      2.2364194787288061e-03,  2.3055217337219964e-03,
+      -2.2876273236139503e-04, 2.2364194787288061e-03,
+      -2.2121966075386214e-04, 9.2457524181229541e-04,
+      -1.2962376027821204e-03, 2.3055217337219964e-03,
+      9.2457524181229541e-04,  5.0819851534159976e-04,
+      -1.1988487259813843e-02, -1.0817915696993614e-03,
+      4.4055451227390865e-03,  2.6194469136628352e-03,
+      -1.0817915696993614e-03, -9.4238486903406541e-04,
+      7.7092876812179679e-04,  1.6765166378430332e-03,
+      4.4055451227390865e-03,  7.7092876812179679e-04,
+      -7.0957594439696422e-04, 4.6632501327589830e-04,
+      2.6194469136628352e-03,  1.6765166378430332e-03,
+      4.6632501327589830e-04,  -1.7141234209432641e-05,
+      -1.2965555240827500e-02, 4.6872799977542403e-03,
+      -1.4510821237139582e-03, 3.7946233749072901e-03,
+      4.6872799977542403e-03,  -3.5962713002839149e-04,
+      1.0943003685336375e-03,  5.6646742816718408e-04,
+      -1.4510821237139582e-03, 1.0943003685336375e-03,
+      -1.0401090694716338e-04, 1.7088920364535401e-03,
+      3.7946233749072901e-03,  5.6646742816718408e-04,
+      1.7088920364535401e-03,  2.7162106952229655e-04,
+      -1.4785199674470367e-02, 8.0633700283524075e-03,
+      -5.6495833882158305e-04, 2.4514928345558051e-03,
+      8.0633700283524075e-03,  -3.2143330398933306e-03,
+      1.9126378978111445e-03,  4.8878097615942593e-05,
+      -5.6495833882158305e-04, 1.9126378978111445e-03,
+      2.1638725769767891e-04,  8.0709190039421428e-04,
+      2.4514928345558051e-03,  4.8878097615942593e-05,
+      8.0709190039421428e-04,  5.8999550418438625e-04,
+      -1.4240209598961997e-02, 6.7516548024444747e-03,
+      -5.1918416449977969e-04, 2.2353477983456612e-03,
+      6.7516548024444747e-03,  -1.8474456163168917e-03,
+      1.2971818549383650e-03,  -1.5014058192087238e-04,
+      -5.1918416449977969e-04, 1.2971818549383650e-03,
+      -7.3709152533265676e-05, 3.5066038413611596e-04,
+      2.2353477983456612e-03,  -1.5014058192087238e-04,
+      3.5066038413611596e-04,  -3.3978637630894203e-05,
+      -1.3007652156842001e-02, 6.6856636493797493e-03,
+      2.9160109924347812e-03,  -6.5543528412060508e-04,
+      6.6856636493797493e-03,  -1.8216554256699276e-03,
+      7.3027879176171099e-04,  9.7092376946287130e-04,
+      2.9160109924347812e-03,  7.3027879176171099e-04,
+      -1.0356785011530854e-04, 1.5729902578397168e-03,
+      -6.5543528412060508e-04, 9.7092376946287130e-04,
+      1.5729902578397168e-03,  2.9497919465760276e-04,
+      -1.0866405763036151e-02, 5.3513859835064457e-03,
+      2.4060930773512137e-04,  1.9862948106334644e-03,
+      5.3513859835064457e-03,  -1.8512769013365881e-03,
+      2.8133169930445625e-04,  -3.4053514307311997e-04,
+      2.4060930773512137e-04,  2.8133169930445625e-04,
+      -1.0872990161973775e-04, 3.0543245383033921e-04,
+      1.9862948106334644e-03,  -3.4053514307311997e-04,
+      3.0543245383033921e-04,  -3.2564062326252566e-05,
+      -9.0348481159891574e-03, 4.9368779887051665e-03,
+      -7.7858023749495433e-04, 1.0920385408053164e-03,
+      4.9368779887051665e-03,  -2.4651064596398460e-03,
+      7.2405402568656097e-04,  -4.7031756933928247e-04,
+      -7.7858023749495433e-04, 7.2405402568656097e-04,
+      -5.4175927276914012e-04, 2.2338122120519862e-05,
+      1.0920385408053164e-03,  -4.7031756933928247e-04,
+      2.2338122120519862e-05,  -1.6681805314778045e-04,
+      -1.7304180355604392e-02, 8.6652343814632091e-03,
+      -1.2246474341539783e-03, 8.2415022799262484e-04,
+      8.6652343814632091e-03,  -2.2303662865870645e-03,
+      2.2578893430555296e-03,  1.4121004619557782e-03,
+      -1.2246474341539783e-03, 2.2578893430555296e-03,
+      6.6297648327747090e-04,  1.1885000811279875e-03,
+      8.2415022799262484e-04,  1.4121004619557782e-03,
+      1.1885000811279875e-03,  6.6695685275927760e-04,
+      -1.0778073152356201e-02, 6.3694652581600224e-03,
+      1.6961032371909813e-03,  -1.0948355265557148e-03,
+      6.3694652581600224e-03,  -3.4260749027166322e-03,
+      7.2077630136983231e-04,  4.4084306708303315e-04,
+      1.6961032371909813e-03,  7.2077630136983231e-04,
+      -8.9011520033262931e-04, 9.7699882816715137e-04,
+      -1.0948355265557148e-03, 4.4084306708303315e-04,
+      9.7699882816715137e-04,  -1.6721134424032506e-04,
+      -1.5880217492160682e-02, 3.6937157026263458e-03,
+      4.3136575901598258e-03,  -1.5179193912709036e-03,
+      3.6937157026263458e-03,  -1.1384551327959617e-03,
+      1.6183111798056420e-03,  2.1843622767156786e-03,
+      4.3136575901598258e-03,  1.6183111798056420e-03,
+      3.6830220626843141e-05,  1.5392367332523269e-03,
+      -1.5179193912709036e-03, 2.1843622767156786e-03,
+      1.5392367332523269e-03,  4.4276775065660106e-04,
+      -1.6165406785950832e-02, 5.8699367964453637e-03,
+      -1.2859790511595471e-03, -9.6075957427906315e-04,
+      5.8699367964453637e-03,  -1.1179968213466582e-03,
+      1.7183301947139779e-03,  1.9085251825466231e-03,
+      -1.2859790511595471e-03, 1.7183301947139779e-03,
+      4.8200250785465073e-05,  1.4857105304615032e-03,
+      -9.6075957427906315e-04, 1.9085251825466231e-03,
+      1.4857105304615032e-03,  4.8795341166811253e-04,
+      -1.0013561677409232e-02, 4.0961622741432311e-03,
+      -1.1185134603428390e-03, -1.0574249764454893e-03,
+      4.0961622741432311e-03,  -1.7667047214563205e-03,
+      4.4148465732737679e-04,  5.1292958257300033e-04,
+      -1.1185134603428390e-03, 4.4148465732737679e-04,
+      -3.2715402184443172e-04, 5.9087815524321886e-05,
+      -1.0574249764454893e-03, 5.1292958257300033e-04,
+      5.9087815524321886e-05,  -2.5498418232061302e-04,
+      -1.5836685529134305e-02, 7.9890379919545819e-03,
+      2.0022017376080106e-03,  -7.4049310281811551e-04,
+      7.9890379919545819e-03,  -1.8638004635911647e-03,
+      1.6707614648490442e-03,  2.0576430038936316e-03,
+      2.0022017376080106e-03,  1.6707614648490442e-03,
+      -3.8921469816428380e-04, 1.8660395072033363e-03,
+      -7.4049310281811551e-04, 2.0576430038936316e-03,
+      1.8660395072033363e-03,  8.3166983679526074e-04,
+      -1.5585823655705712e-02, 8.6547358140740775e-03,
+      6.4685805130901839e-04,  6.0437717832279090e-04,
+      8.6547358140740775e-03,  -2.9179966185179798e-03,
+      1.6650090091244428e-03,  5.4772087545596447e-04,
+      6.4685805130901839e-04,  1.6650090091244428e-03,
+      1.0944173789827910e-04,  1.6193714005056402e-03,
+      6.0437717832279090e-04,  5.4772087545596447e-04,
+      1.6193714005056402e-03,  4.1435400171744153e-04,
+      -1.2002918703686777e-02, 8.5494004897721486e-03,
+      2.6869376009802685e-03,  2.8041002192664066e-04,
+      8.5494004897721486e-03,  -3.3375720513194084e-03,
+      -6.4781613669761683e-05, 1.3524488057429884e-03,
+      2.6869376009802685e-03,  -6.4781613669761683e-05,
+      -1.0720086261486713e-04, 3.3638662164407361e-04,
+      2.8041002192664066e-04,  1.3524488057429884e-03,
+      3.3638662164407361e-04,  2.7204403023007185e-04,
+      -1.5230508969721116e-02, 8.0033674082133788e-03,
+      4.5505572033502154e-03,  6.3142263681769913e-04,
+      8.0033674082133788e-03,  -2.2473990191216016e-03,
+      9.4766049239732128e-04,  8.2712654749629762e-04,
+      4.5505572033502154e-03,  9.4766049239732128e-04,
+      -8.7138227660763510e-04, 1.8741234298296900e-03,
+      6.3142263681769913e-04,  8.2712654749629762e-04,
+      1.8741234298296900e-03,  7.8238202994568230e-04,
+      -1.4728673062431606e-02, 7.7272261248470383e-03,
+      -6.5330828262795480e-04, -1.0320780914468404e-03,
+      7.7272261248470383e-03,  -3.4770903165693631e-03,
+      1.7224962220012408e-03,  1.6016916176218841e-03,
+      -6.5330828262795480e-04, 1.7224962220012408e-03,
+      -1.0624168683721165e-05, 6.7411356195351047e-04,
+      -1.0320780914468404e-03, 1.6016916176218841e-03,
+      6.7411356195351047e-04,  3.5793923973344256e-04,
+      -1.5365215679984470e-02, 7.7326906220242997e-03,
+      -8.7919884112348649e-04, 3.9232895421004452e-03,
+      7.7326906220242997e-03,  -1.8951873402245514e-03,
+      1.6702212822668559e-03,  3.8698617614536183e-04,
+      -8.7919884112348649e-04, 1.6702212822668559e-03,
+      1.4305624533357813e-04,  1.3483365102715016e-03,
+      3.9232895421004452e-03,  3.8698617614536183e-04,
+      1.3483365102715016e-03,  2.9887961813433576e-04,
+      -1.5308665984215052e-02, 6.4035427214453088e-03,
+      3.2264588182182893e-03,  -1.2172293673001523e-03,
+      6.4035427214453088e-03,  -2.1803365653642855e-03,
+      8.1169763975199303e-04,  1.1516553650030236e-03,
+      3.2264588182182893e-03,  8.1169763975199303e-04,
+      -8.9273426545469822e-04, 1.3149492890516085e-03,
+      -1.2172293673001523e-03, 1.1516553650030236e-03,
+      1.3149492890516085e-03,  3.4916307447500070e-04,
+      -1.4177990755768224e-02, 8.4436539885229638e-03,
+      1.9644514003780713e-03,  -1.4051939351763857e-03,
+      8.4436539885229638e-03,  -2.6365958058229824e-03,
+      2.7988734419438126e-04,  2.3791557288914468e-03,
+      1.9644514003780713e-03,  2.7988734419438126e-04,
+      -9.3295814287705590e-06, 1.7397564635589064e-03,
+      -1.4051939351763857e-03, 2.3791557288914468e-03,
+      1.7397564635589064e-03,  3.5990020010247216e-04,
+      -1.4509970137912609e-02, 8.3321965664366754e-03,
+      1.9636760666178182e-04,  1.4472834003371236e-03,
+      8.3321965664366754e-03,  -3.1644286858689462e-03,
+      1.3130696372032696e-03,  8.9901256003085657e-04,
+      1.9636760666178182e-04,  1.3130696372032696e-03,
+      -1.1641434273373312e-04, 1.1005872558455100e-03,
+      1.4472834003371236e-03,  8.9901256003085657e-04,
+      1.1005872558455100e-03,  2.2352300907388465e-04,
+      -1.4191547859266384e-02, 7.2008193102188825e-03,
+      3.7147795995076251e-03,  -9.3769917155450059e-04,
+      7.2008193102188825e-03,  -1.8544479396288801e-03,
+      6.8480624138753587e-04,  1.5662746452927681e-03,
+      3.7147795995076251e-03,  6.8480624138753587e-04,
+      -6.5351803821489724e-04, 1.5327770952292775e-03,
+      -9.3769917155450059e-04, 1.5662746452927681e-03,
+      1.5327770952292775e-03,  4.5630359182475774e-04,
+      -1.4160940337820836e-02, 7.8821478615683557e-03,
+      -9.8558148594463752e-04, 3.7918168562442430e-03,
+      7.8821478615683557e-03,  -2.1790556607648682e-03,
+      2.0369416094564120e-03,  1.4304949644710783e-04,
+      -9.8558148594463752e-04, 2.0369416094564120e-03,
+      5.0614978147559312e-04,  1.6610917581866963e-03,
+      3.7918168562442430e-03,  1.4304949644710783e-04,
+      1.6610917581866963e-03,  5.8794575109223548e-04,
+      -9.2845894059316349e-03, 6.4939565529886596e-03,
+      1.4724840417364997e-03,  -5.6610856876894574e-04,
+      6.4939565529886596e-03,  -3.7833463506718019e-03,
+      -4.7204290292012798e-04, 6.9241613661032045e-04,
+      1.4724840417364997e-03,  -4.7204290292012798e-04,
+      -8.1865701212015295e-04, 4.9894256490768405e-04,
+      -5.6610856876894574e-04, 6.9241613661032045e-04,
+      4.9894256490768405e-04,  -1.6611368267292618e-04,
+      -1.6184598560687900e-02, 6.8439613963293747e-03,
+      1.7365578719469512e-03,  2.5532525388744873e-03,
+      6.8439613963293747e-03,  -1.6997340703512266e-03,
+      8.1255506906787128e-04,  1.5422686239700562e-04,
+      1.7365578719469512e-03,  8.1255506906787128e-04,
+      3.3892711957371837e-04,  7.0860872528022347e-04,
+      2.5532525388744873e-03,  1.5422686239700562e-04,
+      7.0860872528022347e-04,  4.6427613812592781e-04,
+      -1.2357824977244209e-02, 6.9688664588778747e-03,
+      -1.0006387693880701e-03, 5.1736619278341814e-04,
+      6.9688664588778747e-03,  -3.6183785683711568e-03,
+      8.2525839785551063e-04,  1.1173236493360916e-03,
+      -1.0006387693880701e-03, 8.2525839785551063e-04,
+      -1.0729556618775974e-04, 6.7298930398621047e-04,
+      5.1736619278341814e-04,  1.1173236493360916e-03,
+      6.7298930398621047e-04,  -3.1875208496333698e-08,
+      -1.4252206077143996e-02, 6.2144795121130665e-03,
+      5.2879460633076993e-03,  -1.0233446873137595e-03,
+      6.2144795121130665e-03,  -1.5331372596218611e-03,
+      2.2209966824697589e-04,  2.0306655712737896e-03,
+      5.2879460633076993e-03,  2.2209966824697589e-04,
+      -1.0936295576075275e-03, 1.9226034908899328e-03,
+      -1.0233446873137595e-03, 2.0306655712737896e-03,
+      1.9226034908899328e-03,  5.5839135772282368e-04,
+      -1.3330238816035098e-02, 8.1905708096769846e-03,
+      -7.6394721968447781e-04, -2.2394664524934586e-04,
+      8.1905708096769846e-03,  -3.0167954985111760e-03,
+      1.9263001035794555e-03,  1.5435010884285658e-03,
+      -7.6394721968447781e-04, 1.9263001035794555e-03,
+      4.3194129081048293e-04,  1.1144179655621770e-03,
+      -2.2394664524934586e-04, 1.5435010884285658e-03,
+      1.1144179655621770e-03,  4.6419314960482224e-04,
+      -9.3078896127232972e-03, 3.7808378683754900e-03,
+      -8.2500659938291763e-04, -9.2253721497522736e-04,
+      3.7808378683754900e-03,  -1.3735593478715945e-03,
+      2.6895103878370749e-04,  6.0702527043364398e-04,
+      -8.2500659938291763e-04, 2.6895103878370749e-04,
+      -3.5102799517677157e-04, 2.1534278727519608e-04,
+      -9.2253721497522736e-04, 6.0702527043364398e-04,
+      2.1534278727519608e-04,  -1.4676179836516589e-04,
+      -1.7610358444604211e-02, 7.7354116237610682e-03,
+      -4.3061286359086953e-04, 3.1429218095944938e-03,
+      7.7354116237610682e-03,  -2.3939505233226636e-03,
+      1.4834692755721455e-03,  1.6181041921510780e-03,
+      -4.3061286359086953e-04, 1.4834692755721455e-03,
+      8.0959866125124443e-05,  1.6836715825834035e-03,
+      3.1429218095944938e-03,  1.6181041921510780e-03,
+      1.6836715825834035e-03,  4.4455549963187051e-04,
+      -1.1327541090185321e-02, 8.0271674256418221e-03,
+      -1.0748118107689127e-03, 1.0220978517679886e-03,
+      8.0271674256418221e-03,  -3.7625982726974937e-03,
+      1.4015004959102217e-03,  2.9941859123995497e-04,
+      -1.0748118107689127e-03, 1.4015004959102217e-03,
+      -4.7414240813170117e-04, 1.8984462650148025e-04,
+      1.0220978517679886e-03,  2.9941859123995497e-04,
+      1.8984462650148025e-04,  1.9168610525451880e-05,
+      -7.7599972015153637e-03, 5.0487328889732026e-03,
+      -1.0150335177867968e-03, -1.0787256637662084e-03,
+      5.0487328889732026e-03,  -3.8283334708152520e-03,
+      4.8095191605469594e-04,  1.0961988375689778e-03,
+      -1.0150335177867968e-03, 4.8095191605469594e-04,
+      -3.5006941784132919e-04, 2.5142046401834721e-04,
+      -1.0787256637662084e-03, 1.0961988375689778e-03,
+      2.5142046401834721e-04,  -3.4601993607214204e-04,
+      -1.6722037962919063e-02, 8.9023027147699402e-03,
+      2.8475764924646074e-03,  -1.3668464681863240e-03,
+      8.9023027147699402e-03,  -3.0464816895161466e-03,
+      5.5483638104875219e-04,  2.5866682413927889e-03,
+      2.8475764924646074e-03,  5.5483638104875219e-04,
+      3.2244817585743671e-04,  1.7335129486130759e-03,
+      -1.3668464681863240e-03, 2.5866682413927889e-03,
+      1.7335129486130759e-03,  6.7373810044801786e-04,
+      -1.3043290920668447e-02, 7.5390776005711682e-03,
+      -1.4967658915720180e-03, -1.1709919402049843e-03,
+      7.5390776005711682e-03,  -2.3042283980880748e-03,
+      1.8948249232033040e-03,  1.4633606749955709e-03,
+      -1.4967658915720180e-03, 1.8948249232033040e-03,
+      -1.1525903461801575e-04, 8.6522916210846480e-04,
+      -1.1709919402049843e-03, 1.4633606749955709e-03,
+      8.6522916210846480e-04,  -3.7584188113171969e-06,
+      -1.5585713708593366e-02, 8.6119952233053958e-03,
+      2.0853199702149908e-04,  -3.6538641086258830e-04,
+      8.6119952233053958e-03,  -2.0708945444859184e-03,
+      2.0916162538495752e-03,  1.6963122585672433e-03,
+      2.0853199702149908e-04,  2.0916162538495752e-03,
+      8.4673232959246606e-04,  1.7050167169310555e-03,
+      -3.6538641086258830e-04, 1.6963122585672433e-03,
+      1.7050167169310555e-03,  9.7321838069529471e-04,
+      -8.5883608444249915e-03, 5.1230145745255022e-03,
+      8.9159902137280552e-04,  1.2313522195822745e-03,
+      5.1230145745255022e-03,  -3.2070396434171729e-03,
+      -4.0842243388821992e-04, -5.0228366774250242e-04,
+      8.9159902137280552e-04,  -4.0842243388821992e-04,
+      -1.6659900154853617e-04, 1.6703129248579686e-04,
+      1.2313522195822745e-03,  -5.0228366774250242e-04,
+      1.6703129248579686e-04,  -1.3910653088204271e-04,
+      -1.7177487424496159e-02, 8.6439434323196465e-03,
+      -1.2676542069329643e-03, 1.5530920065632684e-03,
+      8.6439434323196465e-03,  -3.0711886933060752e-03,
+      2.6503690364040600e-03,  1.5823408628991603e-03,
+      -1.2676542069329643e-03, 2.6503690364040600e-03,
+      2.0205318440554335e-05,  9.7190007034572039e-04,
+      1.5530920065632684e-03,  1.5823408628991603e-03,
+      9.7190007034572039e-04,  6.3167332201993298e-04,
+      -1.8198829655079997e-02, 6.1783746997468043e-03,
+      2.7097557207645976e-03,  1.0674413677482549e-04,
+      6.1783746997468043e-03,  -1.5255143488708853e-03,
+      4.2446808059708652e-04,  2.0056066309057513e-03,
+      2.7097557207645976e-03,  4.2446808059708652e-04,
+      -7.0722767103247085e-05, 2.1439352866911227e-03,
+      1.0674413677482549e-04,  2.0056066309057513e-03,
+      2.1439352866911227e-03,  4.9248673587442768e-04,
+      -1.2899230683789387e-02, 3.8809561952671224e-03,
+      5.8590703877280138e-03,  7.0773016587898281e-04,
+      3.8809561952671224e-03,  -1.7333578324938207e-03,
+      1.5513687237511255e-03,  7.3324212478700988e-04,
+      5.8590703877280138e-03,  1.5513687237511255e-03,
+      -1.6842888582058607e-03, 2.0427657447758315e-03,
+      7.0773016587898281e-04,  7.3324212478700988e-04,
+      2.0427657447758315e-03,  -3.9900791227274096e-04,
+      -1.3724818137332397e-02, 4.4851048250000233e-03,
+      -1.3002218055153201e-03, -1.2182844898688022e-03,
+      4.4851048250000233e-03,  -1.8202270259962889e-03,
+      1.9639130830478775e-03,  7.5413518528937972e-04,
+      -1.3002218055153201e-03, 1.9639130830478775e-03,
+      3.1870016559458009e-04,  1.0685796268949520e-03,
+      -1.2182844898688022e-03, 7.5413518528937972e-04,
+      1.0685796268949520e-03,  4.8322321243228277e-04,
+      -1.2727886752614260e-02, 7.6627520867563600e-03,
+      -1.0977617387408160e-03, -9.2741573884433924e-05,
+      7.6627520867563600e-03,  -3.0923177506530018e-03,
+      5.2725763824365925e-04,  1.3750576763160460e-03,
+      -1.0977617387408160e-03, 5.2725763824365925e-04,
+      -3.8624246299677348e-04, 9.9356026784475603e-04,
+      -9.2741573884433924e-05, 1.3750576763160460e-03,
+      9.9356026784475603e-04,  -3.6185112057586651e-04,
+      -1.4230368421296529e-02, 6.4613018087884382e-03,
+      4.2545431560881697e-03,  2.3312567462464729e-03,
+      6.4613018087884382e-03,  -1.5885108206906821e-03,
+      3.4554820247452228e-04,  1.8419118470444126e-03,
+      4.2545431560881697e-03,  3.4554820247452228e-04,
+      -2.8748861071839916e-04, 9.0566105830775748e-04,
+      2.3312567462464729e-03,  1.8419118470444126e-03,
+      9.0566105830775748e-04,  2.7039215950598432e-04,
+      -1.2948564663328069e-02, 8.4157282283072385e-03,
+      5.0620127290202754e-04,  -1.3314748626910642e-03,
+      8.4157282283072385e-03,  -3.5176641019498255e-03,
+      1.5307289251449395e-03,  1.8525663157440806e-03,
+      5.0620127290202754e-04,  1.5307289251449395e-03,
+      5.6978788798275784e-05,  1.2935940913392739e-03,
+      -1.3314748626910642e-03, 1.8525663157440806e-03,
+      1.2935940913392739e-03,  5.2341304653500667e-04,
+      -1.2728855074017827e-02, 7.6246707479808553e-03,
+      -9.3224478600061457e-04, -1.5620738423386569e-03,
+      7.6246707479808553e-03,  -1.9782856856559788e-03,
+      1.4103550247563932e-03,  2.0801314899663139e-03,
+      -9.3224478600061457e-04, 1.4103550247563932e-03,
+      4.6054846597137436e-04,  1.2330604509034212e-03,
+      -1.5620738423386569e-03, 2.0801314899663139e-03,
+      1.2330604509034212e-03,  7.7627861087214880e-04,
+      -1.7487269493133485e-02, 8.1945615648457912e-03,
+      -1.3867371780327564e-03, -7.7203633752733088e-04,
+      8.1945615648457912e-03,  -2.6839597442567947e-03,
+      2.3107751960829994e-03,  1.6061326831885306e-03,
+      -1.3867371780327564e-03, 2.3107751960829994e-03,
+      6.8401777560147333e-04,  1.1030118862660431e-03,
+      -7.7203633752733088e-04, 1.6061326831885306e-03,
+      1.1030118862660431e-03,  7.3617364729802910e-04,
+      -1.3077682176402587e-02, 6.5573215542587115e-03,
+      8.9992300772369772e-04,  1.8631593971488946e-03,
+      6.5573215542587115e-03,  -1.7553672105479552e-03,
+      1.4263355120099006e-03,  -6.6131788423017066e-05,
+      8.9992300772369772e-04,  1.4263355120099006e-03,
+      -1.5795558529122932e-04, 9.3709040689480281e-04,
+      1.8631593971488946e-03,  -6.6131788423017066e-05,
+      9.3709040689480281e-04,  2.7558380426612552e-04,
+      -1.2016216250619665e-02, 1.0198974276762779e-03,
+      2.9684095074686417e-03,  4.7574388367168719e-04,
+      1.0198974276762779e-03,  -1.0457484751015638e-03,
+      1.4462132483450711e-03,  1.4655862021442318e-03,
+      2.9684095074686417e-03,  1.4462132483450711e-03,
+      -1.1640147094591868e-04, 4.5714086841669362e-04,
+      4.7574388367168719e-04,  1.4655862021442318e-03,
+      4.5714086841669362e-04,  3.7349208876016085e-04,
+      -1.2729011740008157e-02, 6.2751467801390199e-03,
+      2.7374611452219005e-03,  -4.8769041908477083e-04,
+      6.2751467801390199e-03,  -2.5912066920437101e-03,
+      -3.6502488298821795e-04, 1.2747675934628886e-03,
+      2.7374611452219005e-03,  -3.6502488298821795e-04,
+      -1.3461493454670274e-04, 5.1297874213225157e-04,
+      -4.8769041908477083e-04, 1.2747675934628886e-03,
+      5.1297874213225157e-04,  9.4116610547478451e-05,
+      -1.6566854215408953e-02, 7.5656107280793628e-03,
+      2.2151664856272313e-03,  -6.2355508020778901e-04,
+      7.5656107280793628e-03,  -1.6767633757818533e-03,
+      7.2857964830252552e-04,  2.0018839786305805e-03,
+      2.2151664856272313e-03,  7.2857964830252552e-04,
+      7.8168565633001156e-04,  1.2563330405130704e-03,
+      -6.2355508020778901e-04, 2.0018839786305805e-03,
+      1.2563330405130704e-03,  9.4171843217346913e-04,
+      -1.3827220555148755e-02, 8.0412338777692237e-03,
+      -9.2914695167431839e-04, 2.5139109709975873e-03,
+      8.0412338777692237e-03,  -2.8902523593109979e-03,
+      1.7337506184933089e-03,  -2.4479541495431615e-04,
+      -9.2914695167431839e-04, 1.7337506184933089e-03,
+      -6.2590822563885222e-04, 3.3033744058861008e-04,
+      2.5139109709975873e-03,  -2.4479541495431615e-04,
+      3.3033744058861008e-04,  1.1867575245440103e-04,
+      -1.1673022270160038e-02, 6.3818705599438547e-03,
+      1.5529035996520919e-03,  1.9023499244602438e-03,
+      6.3818705599438547e-03,  -2.7838066962823493e-03,
+      -3.4731692421037671e-04, -5.1143360268925109e-04,
+      1.5529035996520919e-03,  -3.4731692421037671e-04,
+      -2.9269524595909200e-04, 6.5189571167041226e-04,
+      1.9023499244602438e-03,  -5.1143360268925109e-04,
+      6.5189571167041226e-04,  -1.8842002149771150e-04,
+      -1.7624872761522331e-02, 7.6102659079345675e-03,
+      2.9313602118366056e-03,  -1.2670120134682693e-03,
+      7.6102659079345675e-03,  -1.6176859045345274e-03,
+      5.2235785563708548e-04,  2.1125237303853172e-03,
+      2.9313602118366056e-03,  5.2235785563708548e-04,
+      -6.9336521692627494e-05, 6.2961316498085299e-04,
+      -1.2670120134682693e-03, 2.1125237303853172e-03,
+      6.2961316498085299e-04,  2.8049822298363992e-04,
+      -1.1443728662686095e-02, 7.1090692233263322e-03,
+      -1.0267468148229923e-03, -9.7148107774755140e-04,
+      7.1090692233263322e-03,  -3.5986082376354236e-03,
+      1.0764295384459879e-03,  1.4077630540134337e-03,
+      -1.0267468148229923e-03, 1.0764295384459879e-03,
+      -1.4573111804904847e-04, 5.4872481291025343e-04,
+      -9.7148107774755140e-04, 1.4077630540134337e-03,
+      5.4872481291025343e-04,  -4.9397324463356642e-05,
+      -1.4863131313394926e-02, 6.3276765757360525e-03,
+      4.1762520161465356e-03,  4.6274316270408979e-03,
+      6.3276765757360525e-03,  -1.5123375588949818e-03,
+      1.5393235939542785e-03,  1.7038958618260523e-04,
+      4.1762520161465356e-03,  1.5393235939542785e-03,
+      -3.8236797488742761e-04, 4.2645241632206950e-04,
+      4.6274316270408979e-03,  1.7038958618260523e-04,
+      4.2645241632206950e-04,  -2.8027135821990324e-04,
+      -9.2269149280535777e-03, 6.1800015820290153e-03,
+      -1.1177505002325030e-03, -7.3655389049955076e-04,
+      6.1800015820290153e-03,  -3.1949632173689504e-03,
+      9.2780949436812210e-04,  5.4061711672642406e-04,
+      -1.1177505002325030e-03, 9.2780949436812210e-04,
+      -3.6714991602973209e-04, 4.4326007298994439e-04,
+      -7.3655389049955076e-04, 5.4061711672642406e-04,
+      4.4326007298994439e-04,  -2.6509650841867525e-04,
+      -1.8043017394515380e-02, 8.2372721119834628e-03,
+      -1.3666634730605748e-03, 2.5937673526110440e-03,
+      8.2372721119834628e-03,  -2.2765382593275749e-03,
+      1.8098836907605163e-03,  6.2774788742545431e-04,
+      -1.3666634730605748e-03, 1.8098836907605163e-03,
+      6.3750682573144961e-04,  1.5016614599137833e-03,
+      2.5937673526110440e-03,  6.2774788742545431e-04,
+      1.5016614599137833e-03,  7.9656988901641562e-04,
+      -1.7677863320201812e-02, 7.9918211430818238e-03,
+      2.8809808774680002e-03,  3.3219012282079618e-04,
+      7.9918211430818238e-03,  -2.5093243675945506e-03,
+      1.6427685072652385e-03,  1.3849057522712679e-03,
+      2.8809808774680002e-03,  1.6427685072652385e-03,
+      -6.6341536325757559e-04, 1.6383230989587684e-03,
+      3.3219012282079618e-04,  1.3849057522712679e-03,
+      1.6383230989587684e-03,  -5.4016870687916257e-04,
+      -1.2521451784673619e-02, 9.4223977497539906e-03,
+      2.8055309806167853e-03,  1.7872667456776705e-03,
+      9.4223977497539906e-03,  -3.2829775106665285e-03,
+      -1.4133846391320361e-05, 3.4392328922804288e-04,
+      2.8055309806167853e-03,  -1.4133846391320361e-05,
+      6.9100703062136178e-05,  1.4271286897663935e-03,
+      1.7872667456776705e-03,  3.4392328922804288e-04,
+      1.4271286897663935e-03,  5.3937434576994730e-04,
+      -1.3632536222978362e-02, 1.0257748639522706e-02,
+      1.4462286898582958e-03,  1.1312437976068149e-04,
+      1.0257748639522706e-02,  -3.6576998284675250e-03,
+      1.6043422861050061e-03,  1.7092618248746980e-03,
+      1.4462286898582958e-03,  1.6043422861050061e-03,
+      1.6618519504767670e-04,  9.6235513287789881e-04,
+      1.1312437976068149e-04,  1.7092618248746980e-03,
+      9.6235513287789881e-04,  3.5802719238072031e-04,
+      -9.2473425953727914e-03, 5.4646516574637042e-03,
+      -9.5962421166676426e-04, -7.8247003179074371e-04,
+      5.4646516574637042e-03,  -2.7506657954682067e-03,
+      5.9386441787538592e-04,  5.4897582411844530e-04,
+      -9.5962421166676426e-04, 5.9386441787538592e-04,
+      -3.0196113116772263e-04, 1.8573011529360198e-04,
+      -7.8247003179074371e-04, 5.4897582411844530e-04,
+      1.8573011529360198e-04,  -2.1319293163917321e-04,
+      -1.2206117329009541e-02, 6.1092810650537708e-03,
+      -1.4481345745921150e-03, -2.9722362809001516e-04,
+      6.1092810650537708e-03,  -1.9126144266505905e-03,
+      1.7554634731526244e-03,  5.5972781767352870e-04,
+      -1.4481345745921150e-03, 1.7554634731526244e-03,
+      8.6367120618161951e-05,  6.7487884435146381e-04,
+      -2.9722362809001516e-04, 5.5972781767352870e-04,
+      6.7487884435146381e-04,  2.3556476013967504e-04,
+      -1.5414965310465042e-02, 7.3079303890448630e-03,
+      -6.8902344013111860e-05, -1.3856544124642479e-03,
+      7.3079303890448630e-03,  -2.3520366982052667e-03,
+      2.2813099826687437e-03,  1.1422089660014121e-03,
+      -6.8902344013111860e-05, 2.2813099826687437e-03,
+      -5.6644557392149263e-05, 1.1994499087383925e-03,
+      -1.3856544124642479e-03, 1.1422089660014121e-03,
+      1.1994499087383925e-03,  3.4944002873506328e-04,
+      -1.1752221672219454e-02, 6.9456319878067867e-03,
+      -1.5705985744796634e-03, -1.2694046046713740e-03,
+      6.9456319878067867e-03,  -2.1753723772528176e-03,
+      1.5503236118177893e-03,  2.0219529729626610e-03,
+      -1.5705985744796634e-03, 1.5503236118177893e-03,
+      2.4923829829079700e-04,  8.6809285519335883e-04,
+      -1.2694046046713740e-03, 2.0219529729626610e-03,
+      8.6809285519335883e-04,  3.2743478195986473e-04,
+      -1.4690777928137583e-02, 9.3382417975744429e-03,
+      -8.7839174936228269e-04, -1.0828156619940759e-03,
+      9.3382417975744429e-03,  -3.6232340991021171e-03,
+      1.5142500530220151e-03,  1.4677066708721873e-03,
+      -8.7839174936228269e-04, 1.5142500530220151e-03,
+      4.0376228484767084e-06,  1.0556394847561596e-03,
+      -1.0828156619940759e-03, 1.4677066708721873e-03,
+      1.0556394847561596e-03,  3.6668747259171032e-04,
+      -1.5476441143750821e-02, 9.2269293204535301e-03,
+      3.6995872897747060e-04,  -7.7034041709245649e-04,
+      9.2269293204535301e-03,  -4.4645128477425761e-03,
+      2.1128383054205796e-03,  2.4504815100855485e-03,
+      3.6995872897747060e-04,  2.1128383054205796e-03,
+      2.2586802223079191e-04,  1.8926509467373552e-03,
+      -7.7034041709245649e-04, 2.4504815100855485e-03,
+      1.8926509467373552e-03,  8.5536596431988856e-04,
+      -1.5609938715099692e-02, 8.9557198628165934e-03,
+      -1.3057139142808542e-03, -8.7481229959170993e-06,
+      8.9557198628165934e-03,  -3.2036744864203165e-03,
+      1.8940821063869206e-03,  1.8774348378665215e-03,
+      -1.3057139142808542e-03, 1.8940821063869206e-03,
+      7.2606869807052658e-04,  1.3119329296710162e-03,
+      -8.7481229959170993e-06, 1.8774348378665215e-03,
+      1.3119329296710162e-03,  1.0619415404491685e-03,
+      -1.5481617424794581e-02, 7.3302600110515892e-03,
+      6.5644710521001073e-04,  -1.2283650081392702e-03,
+      7.3302600110515892e-03,  -1.8696748976957526e-03,
+      5.9596303915765750e-04,  1.2499531031902422e-03,
+      6.5644710521001073e-04,  5.9596303915765750e-04,
+      9.7138240642967119e-05,  8.8503568194063412e-04,
+      -1.2283650081392702e-03, 1.2499531031902422e-03,
+      8.8503568194063412e-04,  2.5511830692021193e-04,
+      -1.3520103229756882e-02, 7.8484109792015359e-03,
+      -4.1632387603416781e-04, -1.1558280730735919e-03,
+      7.8484109792015359e-03,  -2.0728512914414475e-03,
+      1.9517286698148926e-03,  2.0216583965520000e-03,
+      -4.1632387603416781e-04, 1.9517286698148926e-03,
+      4.6063232535377560e-04,  1.5362873491442208e-03,
+      -1.1558280730735919e-03, 2.0216583965520000e-03,
+      1.5362873491442208e-03,  5.2457975844262875e-04,
+      -1.5104116485397874e-02, 8.5473834321937604e-03,
+      -1.3043437235430149e-03, 1.5276622030354890e-03,
+      8.5473834321937604e-03,  -3.3780289659805359e-03,
+      1.9847858497536674e-03,  5.0757574599316559e-04,
+      -1.3043437235430149e-03, 1.9847858497536674e-03,
+      -1.5945405766001441e-04, 1.5678019969852818e-03,
+      1.5276622030354890e-03,  5.0757574599316559e-04,
+      1.5678019969852818e-03,  2.4763257841851471e-04,
+      -1.6361908776597107e-02, 4.3568452185025310e-03,
+      3.4563556520869625e-03,  4.3658362914368865e-03,
+      4.3568452185025310e-03,  -8.6002608849411477e-04,
+      1.1935067562380381e-03,  9.5763305550593627e-05,
+      3.4563556520869625e-03,  1.1935067562380381e-03,
+      -6.5655114602498324e-04, 2.8551486975809026e-04,
+      4.3658362914368865e-03,  9.5763305550593627e-05,
+      2.8551486975809026e-04,  -5.2757450862172910e-04,
+      -1.7807729170147439e-02, 7.1610610051428998e-03,
+      -1.1911025226577043e-03, 2.6056388311463353e-03,
+      7.1610610051428998e-03,  -2.1885651750746023e-03,
+      2.9274154172681229e-03,  8.3377402546824613e-04,
+      -1.1911025226577043e-03, 2.9274154172681229e-03,
+      1.7570402880203167e-04,  1.4504468369839752e-03,
+      2.6056388311463353e-03,  8.3377402546824613e-04,
+      1.4504468369839752e-03,  6.9093609359350367e-04,
+      -1.1247931682705574e-02, 5.7455202196775934e-03,
+      1.1174148432837694e-04,  -1.0001185298297672e-03,
+      5.7455202196775934e-03,  -2.1567917357754388e-03,
+      6.2799688175952328e-04,  5.6474713215460648e-04,
+      1.1174148432837694e-04,  6.2799688175952328e-04,
+      -1.1840763525279758e-04, 5.3189815096175985e-04,
+      -1.0001185298297672e-03, 5.6474713215460648e-04,
+      5.3189815096175985e-04,  -8.5340380560854686e-05,
+      -1.0115751598013022e-02, 7.1450423354329218e-03,
+      -9.6769024323713497e-04, 6.0983843256720333e-04,
+      7.1450423354329218e-03,  -2.6969102530045707e-03,
+      1.0319270924417643e-03,  2.3496098746511552e-04,
+      -9.6769024323713497e-04, 1.0319270924417643e-03,
+      -8.0513400430597755e-04, 4.7088258363160609e-04,
+      6.0983843256720333e-04,  2.3496098746511552e-04,
+      4.7088258363160609e-04,  -1.4457178691538466e-04};
+
+  std::vector<double> dy = {-3.7309172874861328e-03, 1.3333653131861634e-03,
+                            8.8577244948839816e-04,  4.4771776498148510e-04,
+                            -3.2036744215949474e-03, 1.5721737770764861e-03,
+                            7.5774810984830811e-04,  -7.6103439612831651e-06,
+                            -1.5642091060224157e-03, 2.0953017056858877e-03,
+                            1.8124937780487031e-04,  -1.0725465141096370e-03,
+                            -2.2923883203997790e-03, 1.8300760226060355e-03,
+                            4.8940452841059862e-04,  -6.7210654905128198e-04,
+                            -4.3341964593999359e-03, 1.4253156363736956e-03,
+                            9.8923131293439642e-04,  1.2115998085801848e-03,
+                            -2.6188268847725871e-03, 1.7119021466805964e-03,
+                            5.6117501695963988e-04,  -4.2324237308625010e-04,
+                            -2.2900131399922951e-03, 1.8386032061780638e-03,
+                            5.2171200216442610e-04,  -6.4883325259721075e-04,
+                            -2.5812803651831646e-03, 1.7293876231322676e-03,
+                            6.0622938832230022e-04,  -4.3579477573257375e-04,
+                            -2.8530249847228155e-03, 1.6808033517384546e-03,
+                            6.6042307748444143e-04,  -2.8824157968878450e-04,
+                            -3.7393733570642322e-03, 1.3716770835582519e-03,
+                            8.7227862962462044e-04,  4.5872483076204578e-04,
+                            -2.4437272319513359e-03, 1.7400758488808985e-03,
+                            5.1182653748711650e-04,  -5.8105073770823191e-04,
+                            -3.2810369412823396e-03, 1.4918691558186718e-03,
+                            7.4724281784686224e-04,  6.3875403362581882e-05,
+                            -3.4247726827394570e-03, 1.4560852013918019e-03,
+                            7.6169488667705810e-04,  1.5409667480043344e-04,
+                            -3.7529573893496722e-03, 1.3819905220282573e-03,
+                            8.6645778079868173e-04,  4.5529019815005340e-04,
+                            -2.7761885434245126e-03, 1.6221283633832189e-03,
+                            6.4100239162366926e-04,  -2.8385959725213443e-04,
+                            -2.8619525975001991e-03, 1.6742414664644566e-03,
+                            6.5987011922647798e-04,  -2.5588583656454151e-04,
+                            -3.5782213588599009e-03, 1.4067925694494811e-03,
+                            8.1801204214687574e-04,  2.9417370593075526e-04,
+                            -3.2179311027623852e-03, 1.5351083071313909e-03,
+                            7.6347536572857384e-04,  -1.2101519294540594e-05,
+                            -3.5358134475080953e-03, 1.4165640362083939e-03,
+                            8.2941835466963714e-04,  2.5395670515060458e-04,
+                            -2.3142024782931113e-03, 1.8181293172609410e-03,
+                            5.4151803735599758e-04,  -6.0053505167333827e-04,
+                            -4.0744831658946472e-03, 1.3071202590006017e-03,
+                            9.9633651691117707e-04,  8.1295718049464744e-04,
+                            -3.8475776506513007e-03, 1.2962018740413476e-03,
+                            9.3330073985875681e-04,  5.8452572124980187e-04,
+                            -1.9683320699194757e-03, 1.8795410802464392e-03,
+                            3.5931418446205485e-04,  -7.9622117146730320e-04,
+                            -4.2836432069418432e-03, 1.3057358949544792e-03,
+                            1.0478903069398337e-03,  1.1388072821866995e-03,
+                            -3.6481289572086509e-03, 1.2680834323314254e-03,
+                            9.1419213899072288e-04,  4.1250969611241915e-04,
+                            -3.7868589170986242e-03, 1.4248346144113545e-03,
+                            8.6769642282752434e-04,  4.8009987220807757e-04,
+                            -2.8109243740096651e-03, 1.6305799665922716e-03,
+                            6.4582051914641788e-04,  -2.7233613703581204e-04,
+                            -3.8714234700849403e-03, 1.4365872229829926e-03,
+                            7.9039059028440748e-04,  5.8082810834859922e-04,
+                            -3.2390351248452994e-03, 1.5083636177925740e-03,
+                            7.0100191142906398e-04,  2.1276162385565475e-05,
+                            -4.0758464441795483e-03, 1.3467789743563711e-03,
+                            9.0375839572505727e-04,  8.1824793954867955e-04,
+                            -4.2307140951998419e-03, 1.3156828039467543e-03,
+                            1.0778378449497364e-03,  9.7419386839855905e-04,
+                            -3.8482460366827680e-03, 1.3425192918780234e-03,
+                            8.4666567817293223e-04,  6.0686241880002418e-04,
+                            -3.5314219963828199e-03, 1.4381299141681125e-03,
+                            7.9553487485571899e-04,  2.6599193472912579e-04,
+                            -3.0625963328189260e-03, 1.5598565683800933e-03,
+                            7.2517085286940468e-04,  -1.1417249492739269e-04,
+                            -3.2401979571803855e-03, 1.5423006052675326e-03,
+                            7.2952676962876912e-04,  -8.5276110538955337e-06,
+                            -2.3676304952203239e-03, 1.7913062056963405e-03,
+                            5.4550664708489952e-04,  -5.8142337266284694e-04,
+                            -3.3562413373964786e-03, 1.4439022806622475e-03,
+                            7.3437787621236539e-04,  1.1756361872177236e-04,
+                            -4.3718193202676135e-03, 1.3310441122746876e-03,
+                            1.1562039243607975e-03,  1.1472230349748561e-03,
+                            -3.3313992651614545e-03, 1.4915589370723611e-03,
+                            7.8084944429722110e-04,  9.1372270882632316e-05,
+                            -4.3740557131606459e-03, 1.3268717399460552e-03,
+                            1.1579324109626387e-03,  1.1521185202595687e-03,
+                            -3.7927167356510412e-03, 1.3231482293682613e-03,
+                            8.4755885080500565e-04,  5.4665126459383327e-04,
+                            -3.5935568152285231e-03, 1.4290305096829330e-03,
+                            7.8606733253349130e-04,  3.1315221994287850e-04,
+                            -3.1614896010873366e-03, 1.5105982619965496e-03,
+                            7.0021276781268450e-04,  -2.3983710609199078e-05,
+                            -3.0147657599133232e-03, 1.5495419519657451e-03,
+                            6.6899185166963951e-04,  -1.2057880405017928e-04,
+                            -2.9963762079088952e-03, 1.6034868108259063e-03,
+                            6.3607675253841956e-04,  -1.8417206074768401e-04,
+                            -2.7426643259774895e-03, 1.6157336038172625e-03,
+                            6.1401235615143987e-04,  -3.3306000572104970e-04,
+                            -2.4766813065329693e-03, 1.7832827610314490e-03,
+                            5.7063483799111604e-04,  -5.1939389006707012e-04,
+                            -2.7058508682563827e-03, 1.6218793335994073e-03,
+                            5.7753543630265211e-04,  -3.4623185385953561e-04,
+                            -3.6714826224726309e-03, 1.4597702029387118e-03,
+                            8.5442239573998893e-04,  3.7440232615804407e-04,
+                            -2.5070783356414134e-03, 1.7472384470121067e-03,
+                            5.6462790247820919e-04,  -4.6769781044539899e-04,
+                            -3.4387661142797959e-03, 1.4535061065990102e-03,
+                            7.7895608021457275e-04,  2.0354737904839946e-04,
+                            -2.7050922014808390e-03, 1.6959138589712618e-03,
+                            6.1077224833082030e-04,  -3.8335512759285180e-04,
+                            -2.5394784289842513e-03, 1.7326482568320832e-03,
+                            5.8030789834628870e-04,  -4.9658497358456876e-04,
+                            -3.7139080629854208e-03, 1.3083146125837427e-03,
+                            8.8709895889324357e-04,  4.3181952475237497e-04,
+                            -3.0902912715277077e-03, 1.5458493373768866e-03,
+                            6.8330337138244770e-04,  -7.7274160748078970e-05,
+                            -3.6486457748529568e-03, 1.4380340692779169e-03,
+                            8.0537862363315113e-04,  3.6166524605696793e-04,
+                            -3.8259014218855834e-03, 1.3935398904790382e-03,
+                            8.5063806525072705e-04,  5.0369999227149601e-04,
+                            -3.3219004182506043e-03, 1.5169900953600937e-03,
+                            7.2943565247969642e-04,  1.2454041603774481e-04,
+                            -2.9710886292812757e-03, 1.6122308537815642e-03,
+                            6.9407008210209398e-04,  -1.8550200210251709e-04,
+                            -3.5823989245151611e-03, 1.3803897342254585e-03,
+                            7.9659655171532566e-04,  2.9806889205486018e-04,
+                            -2.6709594617153878e-03, 1.7132749589768494e-03,
+                            6.1556197573809268e-04,  -3.9142476838036435e-04,
+                            -2.9481681479607618e-03, 1.5766328367677427e-03,
+                            6.6232144637444707e-04,  -1.4989734301076280e-04,
+                            -3.2491817681866895e-03, 1.5341764317035110e-03,
+                            7.6268657981340220e-04,  2.4406488167046862e-05,
+                            -3.2248794286049952e-03, 1.4615786669513607e-03,
+                            7.3069596608385794e-04,  5.0300823976094416e-05,
+                            1.2628166438797038e-03,  -8.9274753600505690e-04,
+                            -4.9076860067750170e-04, -4.1869309125592256e-04,
+                            1.3715102644465138e-03,  -1.0141291898274978e-03,
+                            -7.8766533967201816e-04, -3.9772035849276288e-04,
+                            1.1612302578037791e-03,  -4.3053803181958750e-04,
+                            -2.0146672160117711e-04, -7.4016307381392259e-04,
+                            1.4537946412679723e-03,  -9.5065863784825301e-04,
+                            -1.0343570332467453e-03, -3.8768498653108665e-04,
+                            1.3854917882825165e-03,  -1.0295014786804450e-03,
+                            -9.6373001786993968e-04, -3.0535774540021835e-04,
+                            1.4511572929488627e-03,  -9.2318578761469057e-04,
+                            -1.0422547691855640e-03, -4.2723072416751189e-04,
+                            1.4452773695581647e-03,  -9.7728273043188804e-04,
+                            -1.0741338772551449e-03, -3.0937363161101300e-04,
+                            1.5259064198728280e-03,  -8.8493370962583592e-04,
+                            -1.2007486502659927e-03, -3.1227283339035905e-04,
+                            1.5436700653155932e-03,  -8.7589313303263426e-04,
+                            -1.3280253258668860e-03, -1.8105115666848938e-04,
+                            1.5404932421320258e-03,  -8.5758359271832579e-04,
+                            -1.4832458365412898e-03, 2.6407770476321191e-06,
+                            1.4155612513337844e-03,  -1.0046531590963029e-03,
+                            -9.6842134578424706e-04, -3.5401027400996205e-04,
+                            1.3400184836343154e-03,  -1.0256045874409653e-03,
+                            -6.5169805282558834e-04, -4.2701606068519234e-04,
+                            1.2857877730087135e-03,  -9.0508762673802764e-04,
+                            -5.1943388839087771e-04, -4.6352002092798518e-04,
+                            1.5051360776270669e-03,  -8.9594973727753411e-04,
+                            -1.1527368214360100e-03, -3.3755565066477403e-04,
+                            1.4546640992893533e-03,  -9.5988516608341162e-04,
+                            -9.1623755155505670e-04, -4.8588890598821486e-04,
+                            1.4016196272903870e-03,  -1.0178182975371106e-03,
+                            -9.9041538684325634e-04, -3.1234386991993196e-04,
+                            1.4055482177189956e-03,  -1.0213236463436626e-03,
+                            -9.7796586357687832e-04, -3.0205558859203868e-04,
+                            1.4242664925470684e-03,  -9.8289044930037375e-04,
+                            -9.1476699353148475e-04, -4.3593144707404561e-04,
+                            1.4563487652851604e-03,  -9.5674318018110129e-04,
+                            -1.0204295900713581e-03, -3.8614169684175954e-04,
+                            1.3340538380055412e-03,  -9.8166445704493547e-04,
+                            -6.4235456808529520e-04, -4.6185514700680652e-04,
+                            1.3251633800902419e-03,  -9.7699364526281410e-04,
+                            -6.0020814973974911e-04, -4.4027417584270871e-04,
+                            1.3352824913690901e-03,  -9.3227658428344747e-04,
+                            -6.1244190731189806e-04, -4.9720054122240971e-04,
+                            1.2307343717364581e-03,  -8.8718012579170583e-04,
+                            -3.7907952728977293e-04, -4.7149395834188992e-04,
+                            1.2872312473441560e-03,  -1.0074761274442257e-03,
+                            -6.9178075288798071e-04, -2.7705625448618486e-04,
+                            1.2927150624045740e-03,  -9.7803534445332430e-04,
+                            -4.8720068931597341e-04, -4.6004378064328215e-04,
+                            1.4257607447382041e-03,  -9.2923784202873067e-04,
+                            -9.6755887729674294e-04, -4.6242569285732927e-04,
+                            1.3914989006502964e-03,  -1.0200176224497951e-03,
+                            -9.2458206518511248e-04, -3.4506509486466449e-04,
+                            1.4333760204447944e-03,  -9.1097262948909249e-04,
+                            -1.2188972198513615e-03, -2.5996549081574896e-04,
+                            1.2461083659099338e-03,  -8.2861754923992321e-04,
+                            -4.1605933810697306e-04, -4.8905777062213857e-04,
+                            1.3768615064283817e-03,  -9.9713742062896974e-04,
+                            -7.9858980387651933e-04, -4.3504985424060790e-04,
+                            1.3291889767625101e-03,  -9.9040477132564600e-04,
+                            -6.5776027239921122e-04, -3.9846418355279120e-04,
+                            1.5206075965447940e-03,  -9.2006057004575127e-04,
+                            -1.2474234877704595e-03, -2.3140418373219520e-04,
+                            1.2887820380652511e-03,  -9.3080226682111587e-04,
+                            -5.2786640162325342e-04, -4.4118156984124777e-04,
+                            1.3907544080002891e-03,  -9.6351013629205012e-04,
+                            -7.8303340377082974e-04, -4.7622449399401109e-04,
+                            1.4091048902409534e-03,  -9.7169766598279418e-04,
+                            -9.3316898961782319e-04, -4.0724649531488546e-04,
+                            1.3879165258355152e-03,  -1.0811464187553267e-03,
+                            -8.8168502399533827e-04, -3.0373866499100119e-04,
+                            1.2870954343195211e-03,  -8.0890289393696527e-04,
+                            -5.3267861480527198e-04, -4.9356122412130170e-04,
+                            1.5429774711287242e-03,  -8.5866927657185196e-04,
+                            -1.3306313542314914e-03, -1.7873354099492435e-04,
+                            1.4021085501997211e-03,  -1.0221324371805936e-03,
+                            -9.2699019936940438e-04, -3.5288337262228114e-04,
+                            1.3239521433104891e-03,  -1.0047418588945290e-03,
+                            -8.3997109532851777e-04, -2.7449751294088233e-04,
+                            1.3868837677795806e-03,  -1.0276648847350152e-03,
+                            -7.1775681753454717e-04, -4.5782917392179828e-04,
+                            1.5808700010297582e-03,  -8.3038113202047045e-04,
+                            -1.6908149849389628e-03, 2.5410913367872757e-04,
+                            1.4647947565545553e-03,  -9.5678426470656116e-04,
+                            -9.6423665069701801e-04, -4.5175848785730601e-04,
+                            1.3804322148250373e-03,  -9.9700774526987799e-04,
+                            -8.1337331990322353e-04, -4.0374879312031649e-04,
+                            1.2800075192210976e-03,  -9.2649222387085459e-04,
+                            -5.2976884029422029e-04, -4.2936605007220911e-04,
+                            1.3810570455247023e-03,  -1.0722378487912155e-03,
+                            -7.7520984586550872e-04, -3.6667718738961482e-04,
+                            1.4037862914692491e-03,  -9.9147719341820565e-04,
+                            -9.5712521087853390e-04, -3.7439255803680066e-04,
+                            1.3299618332732993e-03,  -9.9744087373244928e-04,
+                            -6.3658280665772381e-04, -4.0128206925905106e-04,
+                            1.5066576047840761e-03,  -8.7658152159021132e-04,
+                            -1.2789382944763108e-03, -2.3242884748562141e-04,
+                            1.3071504645501193e-03,  -8.8996140075365670e-04,
+                            -4.9614457597987795e-04, -5.5075072441456200e-04,
+                            1.4159595821463559e-03,  -9.9120400447633844e-04,
+                            -8.8041573992293201e-04, -4.2244235314895873e-04,
+                            1.2554376918387320e-03,  -7.9168866122310321e-04,
+                            -4.5152569311177868e-04, -5.2227602995275849e-04,
+                            1.5629805055743966e-03,  -8.5058550567704435e-04,
+                            -1.3544920149855563e-03, -1.5043929062635852e-04,
+                            1.2912095035131571e-03,  -9.1748375464204240e-04,
+                            -5.3427771054738980e-04, -4.4418198090508067e-04,
+                            1.4057467167300063e-03,  -9.6192143297912003e-04,
+                            -7.2872273247311125e-04, -5.4389115598480571e-04,
+                            1.5409804354399271e-03,  -8.6418016393906435e-04,
+                            -1.5638051161781471e-03, 1.3537325148278473e-04,
+                            1.5124150369572446e-03,  -9.0296330713242421e-04,
+                            -1.4053433983987566e-03, -5.3260164448894459e-06,
+                            1.3265199858472445e-03,  -1.0277688029524160e-03,
+                            -7.2309275058722795e-04, -3.4610937063137806e-04,
+                            1.3022445273983721e-03,  -9.6922788833204770e-04,
+                            -5.7901862780536037e-04, -4.2446243249374797e-04,
+                            1.5616377636392905e-03,  -8.6248219043335368e-04,
+                            -1.3732629178650024e-03, -1.3294236495870761e-04,
+                            1.2462257930009543e-03,  -8.1096438665143804e-04,
+                            -4.9300621444041675e-04, -4.4992238895687783e-04,
+                            1.2048138996092884e-03,  -7.4155402937961089e-04,
+                            -3.8562341095634543e-04, -4.8265185178004110e-04,
+                            1.3997918624215518e-03,  -1.0216209066369918e-03,
+                            -9.4397461640859136e-04, -3.4199098846130318e-04,
+                            1.2620735286065163e-03,  -8.5604575570593841e-04,
+                            -4.6109387630229590e-04, -4.8688792774158394e-04,
+                            1.5208163918870386e-03,  -8.8722580384502858e-04,
+                            -1.1514411523661054e-03, -3.6068716415424309e-04,
+                            1.5616431264257710e-03,  -8.6373104473164315e-04,
+                            -1.2257596188148397e-03, -3.1867733110531793e-04,
+                            1.2389429452407711e-03,  -7.3425077652598814e-04,
+                            -3.7818319125320504e-04, -5.5296180344098229e-04,
+                            1.4236745199894749e-03,  -9.8144413894297945e-04,
+                            -1.0339917147206214e-03, -3.3224350123284209e-04,
+                            1.3639301846054431e-03,  -9.7698616283006996e-04,
+                            -7.6271561854903563e-04, -4.4038843862668760e-04,
+                            1.3319077186367961e-03,  -9.8418504392616744e-04,
+                            -6.1766568127290646e-04, -4.5146401018658297e-04,
+                            1.4441953622589801e-03,  -9.5685273246004965e-04,
+                            -1.0902814510323519e-03, -3.0850762291715034e-04,
+                            1.3433425680632694e-03,  -9.9356279190803516e-04,
+                            -6.1534605867470541e-04, -4.6558966036282527e-04,
+                            1.4546708164129235e-03,  -9.6300590913229187e-04,
+                            -1.1337359994421921e-03, -2.5746740945545017e-04,
+                            1.3087856139503663e-03,  -1.0168973495678301e-03,
+                            -6.4497486510166216e-04, -3.7542838371827940e-04,
+                            1.4352742897946794e-03,  -1.0009281183037142e-03,
+                            -8.1467745598339078e-04, -4.9780948738641084e-04,
+                            1.3267280323664397e-03,  -9.9210831897400889e-04,
+                            -7.2110752592905965e-04, -3.7053587233734035e-04,
+                            1.4367724596286544e-03,  -9.7287646205622543e-04,
+                            -1.0689733437145245e-03, -3.0929713667895097e-04,
+                            1.4284826558297228e-03,  -1.0032946508087481e-03,
+                            -9.3215819081625623e-04, -3.9266944529891222e-04,
+                            1.2638165713808633e-03,  -9.4203549844022077e-04,
+                            -4.3172960405687657e-04, -4.4825396345361454e-04,
+                            1.3144188019784330e-03,  -1.0229345575007232e-03,
+                            -7.8213089400696050e-04, -2.9459056286120584e-04,
+                            1.2977967295550502e-03,  -9.1823850004044442e-04,
+                            -5.4312108628579527e-04, -4.4973852658337552e-04,
+                            1.4589399346793047e-03,  -9.6337020175007505e-04,
+                            -9.5571443993837163e-04, -4.3500741544292166e-04,
+                            1.3679082255077248e-03,  -1.0456586778204123e-03,
+                            -7.9689290020671321e-04, -3.6447108180251604e-04,
+                            1.2192301505625884e-03,  -6.8914758742765522e-04,
+                            -4.1801535483624729e-04, -5.0855084936264909e-04,
+                            1.4209422098361410e-03,  -9.7982501545086425e-04,
+                            -9.5383290882026508e-04, -4.0628628933239729e-04,
+                            1.2840876758166296e-03,  -9.5726205004418880e-04,
+                            -5.9263894588719901e-04, -3.6034174047262833e-04,
+                            1.2193989165566138e-03,  -8.6362711921495882e-04,
+                            -3.3089474187258407e-04, -4.7367739380385751e-04,
+                            1.4454340065456679e-03,  -9.4903106726323190e-04,
+                            -9.4294645768342182e-04, -4.7443129921609459e-04,
+                            1.3223313806910361e-03,  -1.0793092901145601e-03,
+                            -6.6527927680775072e-04, -3.2359110026424040e-04,
+                            1.4973920733738924e-03,  -8.8491990550246182e-04,
+                            -1.2171591571912377e-03, -3.1820729740532089e-04,
+                            1.2211117894376268e-03,  -6.9737457441970436e-04,
+                            -3.8056940530299850e-04, -5.5878299644200971e-04,
+                            1.3871354491036012e-03,  -1.0196636806439811e-03,
+                            -8.9397096134337427e-04, -3.7123677529844246e-04,
+                            1.5073496738918853e-03,  -8.9914274586309299e-04,
+                            -1.1100923521957767e-03, -3.9534394591372900e-04,
+                            1.5389834372201414e-03,  -8.6865302431473463e-04,
+                            -1.2678597357811232e-03, -2.2479040064971316e-04,
+                            1.5857715616750692e-03,  -8.4802792400804413e-04,
+                            -1.4431505536957709e-03, -6.5066771441232851e-05,
+                            1.3040976914584432e-03,  -9.5378487842587192e-04,
+                            -5.6107062491615094e-04, -4.3607781750943416e-04,
+                            1.4361202688561447e-03,  -9.5224831111481809e-04,
+                            -1.1297794380246147e-03, -2.6778384205976237e-04,
+                            1.3635676357551174e-03,  -9.9412255599946506e-04,
+                            -7.3051951765478731e-04, -4.4154800562627911e-04,
+                            1.5508477502083700e-03,  -8.7939607075253085e-04,
+                            -1.3561935393377267e-03, -1.2635932200992647e-04,
+                            1.3977902932929805e-03,  -9.9082462373056976e-04,
+                            -8.0283463213426192e-04, -4.5650787679754265e-04,
+                            1.3207741731525238e-03,  -9.6358005340555517e-04,
+                            -6.3085951867438014e-04, -4.3968075348902217e-04,
+                            1.6252559503457818e-03,  -8.1207361624099851e-04,
+                            -1.5190942898645995e-03, 3.4005528914352132e-05,
+                            1.2909097015961842e-03,  -9.6864624807368213e-04,
+                            -5.2347902381152912e-04, -4.3715405458455165e-04,
+                            1.4001851162801044e-03,  -9.6767807815415017e-04,
+                            -1.1232365779831662e-03, -2.6481040763848013e-04,
+                            1.3217217954799262e-03,  -9.7991362264913690e-04,
+                            -5.5341730738921137e-04, -4.8295238369989513e-04,
+                            1.2640698090863750e-03,  -9.4752318346179938e-04,
+                            -4.4218067665729841e-04, -4.6578091847270366e-04,
+                            1.4951267809199774e-03,  -9.1642762891185319e-04,
+                            -1.0214860239958733e-03, -4.6970740845900466e-04,
+                            1.2841169680887078e-03,  -8.6417312011187616e-04,
+                            -4.8407383177561302e-04, -5.0088996321260085e-04,
+                            1.3512353593900953e-03,  -1.0439736418993391e-03,
+                            -9.0759977568137897e-04, -3.0173432033581658e-04,
+                            1.2345305066023997e-03,  -8.6838807328022921e-04,
+                            -4.0413743732280768e-04, -4.4479537319962951e-04,
+                            1.4466525287875924e-03,  -9.5170674859887969e-04,
+                            -9.7973885160627201e-04, -4.3948849284228546e-04,
+                            1.3725941331708027e-03,  -1.0166781230845619e-03,
+                            -7.6755671498962628e-04, -4.3191896703312273e-04,
+                            1.3538563227843109e-03,  -1.0199928751009691e-03,
+                            -7.7442160079980896e-04, -3.8161567314677245e-04,
+                            1.3859472864117407e-03,  -1.0345636883744270e-03,
+                            -8.1387538504034307e-04, -4.0318758271787206e-04,
+                            1.2260084446081257e-03,  -8.1329688437282478e-04,
+                            -3.8608177755934897e-04, -4.6857032807702829e-04,
+                            1.6004862228002157e-03,  -8.5869813229410576e-04,
+                            -1.4050654852032346e-03, -9.6057664582574606e-05,
+                            1.3459385533108785e-03,  -1.0014225817083209e-03,
+                            -7.5445110209810162e-04, -3.6394681092073613e-04,
+                            1.5802112204707694e-03,  -8.8418143184290860e-04,
+                            -1.3656321230276084e-03, -9.9859855886459247e-05,
+                            1.3938777586773245e-03,  -9.8734047951481258e-04,
+                            -8.2208943704233193e-04, -4.0539504453435344e-04,
+                            1.4360158838886581e-03,  -9.5466630949037588e-04,
+                            -1.0695021763836086e-03, -3.4966385012963883e-04,
+                            1.4782335719419745e-03,  -9.2096462932788139e-04,
+                            -9.9386049752195902e-04, -4.9044154671578190e-04,
+                            1.3242075112925250e-03,  -9.0801458127461564e-04,
+                            -6.9494238580068419e-04, -4.0874412726708961e-04,
+                            1.4746718485486884e-03,  -9.4454358380030991e-04,
+                            -1.2097928041591858e-03, -1.9567585923707621e-04,
+                            1.3680805611761193e-03,  -9.9714444979884850e-04,
+                            -6.7787533895967075e-04, -4.8246661902258490e-04,
+                            1.3149700282666921e-03,  -9.8943531187651343e-04,
+                            -6.8955875031408869e-04, -4.0063500875313666e-04,
+                            1.3856527284201953e-03,  -1.0075706013894159e-03,
+                            -8.9064557198323456e-04, -3.8669351114642469e-04,
+                            1.2372161408350122e-03,  -8.1355539160972760e-04,
+                            -5.0496057042071407e-04, -4.0950187262171203e-04,
+                            1.2788105421023745e-03,  -8.8057490788653024e-04,
+                            -5.8240597405691328e-04, -3.9217722061436343e-04};
   const int nloc = 192;
   const int nnei_i = 4;
   const int nnei_j = 4;
   const int last_layer_size = 4;
 
-  void SetUp() override {
-  }
-  void TearDown() override {
-  }
+  void SetUp() override {}
+  void TearDown() override {}
 };
 
-TEST_F(TestTabulateSeT, tabulate_fusion_se_t_cpu)
-{
+TEST_F(TestTabulateSeT, tabulate_fusion_se_t_cpu) {
   std::vector<double> xyz_scatter(nloc * last_layer_size, 0);
-  deepmd::tabulate_fusion_se_t_cpu<double>(&xyz_scatter[0], &table[0], &info[0], &em_x[0], &em[0], nloc, nnei_i, nnei_j, last_layer_size);
+  deepmd::tabulate_fusion_se_t_cpu<double>(&xyz_scatter[0], &table[0], &info[0],
+                                           &em_x[0], &em[0], nloc, nnei_i,
+                                           nnei_j, last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), nloc * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
   for (int jj = 0; jj < xyz_scatter.size(); ++jj) {
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_cpu)
-{
+TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_cpu) {
   std::vector<double> dy_dem_x(em_x.size());
   std::vector<double> dy_dem(em.size());
-  deepmd::tabulate_fusion_se_t_grad_cpu<double>(&dy_dem_x[0], &dy_dem[0], &table[0], &info[0], &em_x[0], &em[0], &dy[0], nloc, nnei_i, nnei_j, last_layer_size);
+  deepmd::tabulate_fusion_se_t_grad_cpu<double>(
+      &dy_dem_x[0], &dy_dem[0], &table[0], &info[0], &em_x[0], &em[0], &dy[0],
+      nloc, nnei_i, nnei_j, last_layer_size);
   EXPECT_EQ(dy_dem_x.size(), nloc * nnei_i * nnei_j);
   EXPECT_EQ(dy_dem.size(), nloc * nnei_i * nnei_j);
   EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
   for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
-    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]) , 1e-5);
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
   }
   for (int jj = 0; jj < dy_dem.size(); ++jj) {
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
 
 #if GOOGLE_CUDA
-TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu_cuda)
-{
+TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu_cuda) {
   std::vector<double> xyz_scatter(nloc * last_layer_size, 0.0);
-  double * xyz_scatter_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL;
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
+         *em_dev = NULL;
   deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
-  deepmd::tabulate_fusion_se_t_gpu_cuda<double>(xyz_scatter_dev, table_dev, &info[0], em_x_dev, em_dev, nloc, nnei_i, nnei_j, last_layer_size);
-  // deepmd::tabulate_fusion_se_t_cpu<double>(&xyz_scatter[0], &table[0], &info[0], &em_x[0], &em[0], nloc, nnei_i, nnei_j, last_layer_size);
+  deepmd::tabulate_fusion_se_t_gpu_cuda<double>(
+      xyz_scatter_dev, table_dev, &info[0], em_x_dev, em_dev, nloc, nnei_i,
+      nnei_j, last_layer_size);
+  // deepmd::tabulate_fusion_se_t_cpu<double>(&xyz_scatter[0], &table[0],
+  // &info[0], &em_x[0], &em[0], nloc, nnei_i, nnei_j, last_layer_size);
   deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
   deepmd::delete_device_memory(xyz_scatter_dev);
   deepmd::delete_device_memory(table_dev);
@@ -90,24 +5281,26 @@ TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu_cuda)
 
   EXPECT_EQ(xyz_scatter.size(), nloc * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size() / 100; ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size() / 100; ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeT, tabulate_fusion_se_a_grad_gpu_cuda)
-{
+TEST_F(TestTabulateSeT, tabulate_fusion_se_a_grad_gpu_cuda) {
   std::vector<double> dy_dem_x(em_x.size(), 0.0);
   std::vector<double> dy_dem(em.size(), 0.0);
 
-  double * dy_dem_x_dev = NULL, * dy_dem_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL,
+         *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x);
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::tabulate_fusion_se_t_grad_gpu_cuda<double>(dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev, nloc, nnei_i, nnei_j, last_layer_size);
+  deepmd::tabulate_fusion_se_t_grad_gpu_cuda<double>(
+      dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev,
+      nloc, nnei_i, nnei_j, last_layer_size);
   deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x);
   deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem);
   deepmd::delete_device_memory(dy_dem_x_dev);
@@ -121,25 +5314,27 @@ TEST_F(TestTabulateSeT, tabulate_fusion_se_a_grad_gpu_cuda)
   EXPECT_EQ(dy_dem.size(), nloc * nnei_i * nnei_j);
   EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
-  for (int jj = 0; jj < dy_dem_x.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
   }
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
-#endif // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu_rocm)
-{
+TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu_rocm) {
   std::vector<double> xyz_scatter(nloc * last_layer_size, 0.0);
-  double * xyz_scatter_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL;
+  double *xyz_scatter_dev = NULL, *table_dev = NULL, *em_x_dev = NULL,
+         *em_dev = NULL;
   deepmd::malloc_device_memory_sync(xyz_scatter_dev, xyz_scatter);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
-  deepmd::tabulate_fusion_se_t_gpu_rocm<double>(xyz_scatter_dev, table_dev, &info[0], em_x_dev, em_dev, nloc, nnei_i, nnei_j, last_layer_size);
+  deepmd::tabulate_fusion_se_t_gpu_rocm<double>(
+      xyz_scatter_dev, table_dev, &info[0], em_x_dev, em_dev, nloc, nnei_i,
+      nnei_j, last_layer_size);
   deepmd::memcpy_device_to_host(xyz_scatter_dev, xyz_scatter);
   deepmd::delete_device_memory(xyz_scatter_dev);
   deepmd::delete_device_memory(table_dev);
@@ -148,24 +5343,26 @@ TEST_F(TestTabulateSeT, tabulate_fusion_se_t_gpu_rocm)
 
   EXPECT_EQ(xyz_scatter.size(), nloc * last_layer_size);
   EXPECT_EQ(xyz_scatter.size(), expected_xyz_scatter.size());
-  for (int jj = 0; jj < xyz_scatter.size() / 100; ++jj){
-    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]) , 1e-5);
+  for (int jj = 0; jj < xyz_scatter.size() / 100; ++jj) {
+    EXPECT_LT(fabs(xyz_scatter[jj] - expected_xyz_scatter[jj]), 1e-5);
   }
 }
 
-TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_gpu_rocm)
-{
+TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_gpu_rocm) {
   std::vector<double> dy_dem_x(em_x.size(), 0.0);
   std::vector<double> dy_dem(em.size(), 0.0);
 
-  double * dy_dem_x_dev = NULL, * dy_dem_dev = NULL, * table_dev = NULL, * em_x_dev = NULL, * em_dev = NULL, * dy_dev = NULL;
+  double *dy_dem_x_dev = NULL, *dy_dem_dev = NULL, *table_dev = NULL,
+         *em_x_dev = NULL, *em_dev = NULL, *dy_dev = NULL;
   deepmd::malloc_device_memory_sync(dy_dem_x_dev, dy_dem_x);
   deepmd::malloc_device_memory_sync(dy_dem_dev, dy_dem);
   deepmd::malloc_device_memory_sync(table_dev, table);
   deepmd::malloc_device_memory_sync(em_x_dev, em_x);
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(dy_dev, dy);
-  deepmd::tabulate_fusion_se_t_grad_gpu_rocm<double>(dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev, nloc, nnei_i, nnei_j, last_layer_size);
+  deepmd::tabulate_fusion_se_t_grad_gpu_rocm<double>(
+      dy_dem_x_dev, dy_dem_dev, table_dev, &info[0], em_x_dev, em_dev, dy_dev,
+      nloc, nnei_i, nnei_j, last_layer_size);
   deepmd::memcpy_device_to_host(dy_dem_x_dev, dy_dem_x);
   deepmd::memcpy_device_to_host(dy_dem_dev, dy_dem);
   deepmd::delete_device_memory(dy_dem_x_dev);
@@ -179,11 +5376,11 @@ TEST_F(TestTabulateSeT, tabulate_fusion_se_t_grad_gpu_rocm)
   EXPECT_EQ(dy_dem.size(), nloc * nnei_i * nnei_j);
   EXPECT_EQ(dy_dem_x.size(), expected_dy_dem_x.size());
   EXPECT_EQ(dy_dem.size(), expected_dy_dem.size());
-  for (int jj = 0; jj < dy_dem_x.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem_x.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem_x[jj] - expected_dy_dem_x[jj]), 1e-5);
   }
-  for (int jj = 0; jj < dy_dem.size(); ++jj){
-    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]) , 1e-5);
+  for (int jj = 0; jj < dy_dem.size(); ++jj) {
+    EXPECT_LT(fabs(dy_dem[jj] - expected_dy_dem[jj]), 1e-5);
   }
 }
-#endif // TENSORFLOW_USE_ROCM
+#endif  // TENSORFLOW_USE_ROCM
diff --git a/source/lmp/CMakeLists.txt b/source/lmp/CMakeLists.txt
index c0d2136847..cab0f66f4e 100644
--- a/source/lmp/CMakeLists.txt
+++ b/source/lmp/CMakeLists.txt
@@ -4,17 +4,17 @@ file(GLOB LMP_HEADER *.h)
 file(GLOB LMP_SRC *.cpp)
 file(GLOB LMP_SHSCRIPT *.sh)
 
-unset (LMP_INSTALL_FILES)
-list (APPEND LMP_INSTALL_FILES ${LMP_HEADER})
-list (APPEND LMP_INSTALL_FILES ${LMP_SRC})
-list (APPEND LMP_INSTALL_FILES ${LMP_SHSCRIPT})
+unset(LMP_INSTALL_FILES)
+list(APPEND LMP_INSTALL_FILES ${LMP_HEADER})
+list(APPEND LMP_INSTALL_FILES ${LMP_SRC})
+list(APPEND LMP_INSTALL_FILES ${LMP_SHSCRIPT})
 
 function(_add_lmp_variant variant_name prec_def)
-if (USE_TF_PYTHON_LIBS)
+  if(USE_TF_PYTHON_LIBS)
     configure_file("env_py.sh.in" "env${variant_name}.sh" @ONLY)
-else()
+  else()
     configure_file("env.sh.in" "env${variant_name}.sh" @ONLY)
-endif()
+  endif()
 endfunction()
 _add_lmp_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
 _add_lmp_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
diff --git a/source/lmp/Install.sh b/source/lmp/Install.sh
index b50700b9c3..87c51a9766 100644
--- a/source/lmp/Install.sh
+++ b/source/lmp/Install.sh
@@ -13,45 +13,45 @@ export LC_ALL
 
 # arg1 = file, arg2 = file it depends on
 
-action () {
-  if (test $mode = 0) then
-    rm -f ../$1
-  elif (! cmp -s $1 ../$1) then
-    if (test -z "$2" || test -e ../$2) then
-      cp $1 ..
-      if (test $mode = 2) then
-        echo "  updating src/$1"
-      fi
-    fi
-  elif (test -n "$2") then
-    if (test ! -e ../$2) then
-      rm -f ../$1
-    fi
-  fi
+action() {
+	if (test $mode = 0); then
+		rm -f ../$1
+	elif (! cmp -s $1 ../$1); then
+		if (test -z "$2" || test -e ../$2); then
+			cp $1 ..
+			if (test $mode = 2); then
+				echo "  updating src/$1"
+			fi
+		fi
+	elif (test -n "$2"); then
+		if (test ! -e ../$2); then
+			rm -f ../$1
+		fi
+	fi
 }
 
 # all package files with no dependencies
 
 for file in *.cpp *.h; do
-    test -f ${file} && action $file
+	test -f ${file} && action $file
 done
 
 # edit 2 Makefile.package files to include/exclude package info
 
-if (test $1 = 1) then
+if (test $1 = 1); then
 
-  if (test -e ../Makefile.package) then
-    sed -i -e "s|^PKG_INC =[ \t].*|& $NNP_INC|" ../Makefile.package
-    sed -i -e "s|^PKG_PATH =[ \t].*|& $NNP_PATH|" ../Makefile.package
-    sed -i -e "s|^PKG_LIB =[ \t].*|& $NNP_LIB|" ../Makefile.package
-  fi
+	if (test -e ../Makefile.package); then
+		sed -i -e "s|^PKG_INC =[ \t].*|& $NNP_INC|" ../Makefile.package
+		sed -i -e "s|^PKG_PATH =[ \t].*|& $NNP_PATH|" ../Makefile.package
+		sed -i -e "s|^PKG_LIB =[ \t].*|& $NNP_LIB|" ../Makefile.package
+	fi
 
-elif (test $mode = 0) then
+elif (test $mode = 0); then
 
-  if (test -e ../Makefile.package) then
-    sed -i -e "s|$NNP_INC||g" ../Makefile.package
-    sed -i -e "s|$NNP_PATH||g" ../Makefile.package
-    sed -i -e "s|$NNP_LIB||g" ../Makefile.package
-  fi
+	if (test -e ../Makefile.package); then
+		sed -i -e "s|$NNP_INC||g" ../Makefile.package
+		sed -i -e "s|$NNP_PATH||g" ../Makefile.package
+		sed -i -e "s|$NNP_LIB||g" ../Makefile.package
+	fi
 
 fi
diff --git a/source/lmp/compute_deeptensor_atom.cpp b/source/lmp/compute_deeptensor_atom.cpp
index 6030dee3fb..4a7c65ffa9 100644
--- a/source/lmp/compute_deeptensor_atom.cpp
+++ b/source/lmp/compute_deeptensor_atom.cpp
@@ -1,22 +1,21 @@
 #include "compute_deeptensor_atom.h"
-#include <cstring>
+
 #include <algorithm>
+#include <cstring>
+
 #include "atom.h"
-#include "update.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
 #include "comm.h"
-#include "force.h"
-#include "pair.h"
+#include "domain.h"
+#include "error.h"
 #include "fix.h"
+#include "force.h"
 #include "memory.h"
-#include "error.h"
-
-#include "domain.h"
-#include "update.h"
 #include "modify.h"
-#include "fix.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "update.h"
 
 using namespace LAMMPS_NS;
 
@@ -28,12 +27,9 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-ComputeDeeptensorAtom::ComputeDeeptensorAtom(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg),
-  dp(lmp),
-  tensor(nullptr)
-{
-  if (narg < 4) error->all(FLERR,"Illegal compute deeptensor/atom command");
+ComputeDeeptensorAtom::ComputeDeeptensorAtom(LAMMPS *lmp, int narg, char **arg)
+    : Compute(lmp, narg, arg), dp(lmp), tensor(nullptr) {
+  if (narg < 4) error->all(FLERR, "Illegal compute deeptensor/atom command");
 
   // parse args
   std::string model_file = std::string(arg[3]);
@@ -55,21 +51,18 @@ ComputeDeeptensorAtom::ComputeDeeptensorAtom(LAMMPS *lmp, int narg, char **arg)
 
 /* ---------------------------------------------------------------------- */
 
-ComputeDeeptensorAtom::~ComputeDeeptensorAtom()
-{
-  memory->destroy(tensor);
-}
+ComputeDeeptensorAtom::~ComputeDeeptensorAtom() { memory->destroy(tensor); }
 
 /* ---------------------------------------------------------------------- */
 
-void ComputeDeeptensorAtom::init()
-{
+void ComputeDeeptensorAtom::init() {
   // need an occasional full neighbor list
 
-#if LAMMPS_VERSION_NUMBER>=20220324
-  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+#if LAMMPS_VERSION_NUMBER >= 20220324
+  neighbor->add_request(this,
+                        NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
 #else
-  int irequest = neighbor->request(this,instance_me);
+  int irequest = neighbor->request(this, instance_me);
   neighbor->requests[irequest]->half = 0;
   neighbor->requests[irequest]->pair = 0;
   neighbor->requests[irequest]->compute = 1;
@@ -78,15 +71,13 @@ void ComputeDeeptensorAtom::init()
 #endif
 }
 
-void ComputeDeeptensorAtom::init_list(int /*id*/, NeighList *ptr)
-{
+void ComputeDeeptensorAtom::init_list(int /*id*/, NeighList *ptr) {
   list = ptr;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void ComputeDeeptensorAtom::compute_peratom()
-{
+void ComputeDeeptensorAtom::compute_peratom() {
   invoked_peratom = update->ntimestep;
 
   // grow local tensor array if necessary
@@ -107,70 +98,69 @@ void ComputeDeeptensorAtom::compute_peratom()
   int nall = nlocal + nghost;
   int newton_pair = force->newton_pair;
 
-  std::vector<VALUETYPE > dcoord (nall * 3, 0.);
-  std::vector<VALUETYPE > dbox (9, 0) ;
-  std::vector<int > dtype (nall);
+  std::vector<VALUETYPE> dcoord(nall * 3, 0.);
+  std::vector<VALUETYPE> dbox(9, 0);
+  std::vector<int> dtype(nall);
   // get type
-  for (int ii = 0; ii < nall; ++ii){
+  for (int ii = 0; ii < nall; ++ii) {
     dtype[ii] = type[ii] - 1;
   }
   // get box
-  dbox[0] = domain->h[0];	// xx
-  dbox[4] = domain->h[1];	// yy
-  dbox[8] = domain->h[2];	// zz
-  dbox[7] = domain->h[3];	// zy
-  dbox[6] = domain->h[4];	// zx
-  dbox[3] = domain->h[5];	// yx
+  dbox[0] = domain->h[0];  // xx
+  dbox[4] = domain->h[1];  // yy
+  dbox[8] = domain->h[2];  // zz
+  dbox[7] = domain->h[3];  // zy
+  dbox[6] = domain->h[4];  // zx
+  dbox[3] = domain->h[5];  // yx
   // get coord
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      dcoord[ii*3+dd] = x[ii][dd] - domain->boxlo[dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dcoord[ii * 3 + dd] = x[ii][dd] - domain->boxlo[dd];
     }
   }
 
   // invoke full neighbor list (will copy or build if necessary)
   neighbor->build_one(list);
-  deepmd::InputNlist lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
-  
+  deepmd::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
+                              list->firstneigh);
+
   // declare outputs
-  std::vector<VALUETYPE > gtensor, force, virial, atensor, avirial;
+  std::vector<VALUETYPE> gtensor, force, virial, atensor, avirial;
 
   // compute tensors
-  dt.compute (gtensor, force, virial, atensor, avirial,
-	      dcoord, dtype, dbox, nghost, lmp_list);
-  
+  dt.compute(gtensor, force, virial, atensor, avirial, dcoord, dtype, dbox,
+             nghost, lmp_list);
+
   // store the result in tensor
   int iter_tensor = 0;
-  for(int ii = 0; ii < nlocal; ++ii){
+  for (int ii = 0; ii < nlocal; ++ii) {
     std::vector<int>::iterator _it =
-	std::find(sel_types.begin(), sel_types.end(), dtype[ii]);
-    bool selected = (_it != sel_types.end());    
-    bool ingroup = (mask[ii] & groupbit);    
+        std::find(sel_types.begin(), sel_types.end(), dtype[ii]);
+    bool selected = (_it != sel_types.end());
+    bool ingroup = (mask[ii] & groupbit);
     // record when selected and in group
-    if (selected && ingroup){
-      for(int jj = 0; jj < size_peratom_cols; ++jj){
-	tensor[ii][jj] = atensor[iter_tensor+jj];
+    if (selected && ingroup) {
+      for (int jj = 0; jj < size_peratom_cols; ++jj) {
+        tensor[ii][jj] = atensor[iter_tensor + jj];
       }
     }
     // if not selected or not in group set to 0.
-    else{
-      for(int jj = 0; jj < size_peratom_cols; ++jj){
-	tensor[ii][jj] = 0.0;
+    else {
+      for (int jj = 0; jj < size_peratom_cols; ++jj) {
+        tensor[ii][jj] = 0.0;
       }
-    }    
+    }
     if (selected) {
       iter_tensor += size_peratom_cols;
     }
   }
 }
 
-
 /* ----------------------------------------------------------------------
    memory usage of local atom-based array
 ------------------------------------------------------------------------- */
 
-double ComputeDeeptensorAtom::memory_usage()
-{
-  double bytes = nmax*size_peratom_cols * sizeof(double);
+double ComputeDeeptensorAtom::memory_usage() {
+  double bytes = nmax * size_peratom_cols * sizeof(double);
   return bytes;
 }
diff --git a/source/lmp/compute_deeptensor_atom.h b/source/lmp/compute_deeptensor_atom.h
index 40d68f99f9..0209f5f0dc 100644
--- a/source/lmp/compute_deeptensor_atom.h
+++ b/source/lmp/compute_deeptensor_atom.h
@@ -1,6 +1,6 @@
 #ifdef COMPUTE_CLASS
 
-ComputeStyle(deeptensor/atom,ComputeDeeptensorAtom)
+ComputeStyle(deeptensor / atom, ComputeDeeptensorAtom)
 
 #else
 
@@ -32,11 +32,10 @@ class ComputeDeeptensorAtom : public Compute {
   PairDeepMD dp;
   class NeighList *list;
   deepmd::DeepTensor dt;
-  std::vector<int > sel_types;
+  std::vector<int> sel_types;
 };
 
-}
+}  // namespace LAMMPS_NS
 
 #endif
 #endif
-
diff --git a/source/lmp/fix_dplr.cpp b/source/lmp/fix_dplr.cpp
index 21f0179625..a809970f97 100644
--- a/source/lmp/fix_dplr.cpp
+++ b/source/lmp/fix_dplr.cpp
@@ -1,31 +1,31 @@
-#include <iostream>
+#include "fix_dplr.h"
+
 #include <iomanip>
+#include <iostream>
 #include <limits>
+
 #include "atom.h"
-#include "domain.h"
 #include "comm.h"
-#include "force.h"
-#include "update.h"
+#include "domain.h"
 #include "error.h"
-#include "neighbor.h"
-#include "neigh_list.h"
 #include "fix.h"
-#include "fix_dplr.h"
+#include "force.h"
+#include "neigh_list.h"
+#include "neighbor.h"
 #include "pppm_dplr.h"
+#include "update.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace std;
 
-static bool 
-is_key (const string& input) 
-{
-  vector<string> keys ;
+static bool is_key(const string &input) {
+  vector<string> keys;
   keys.push_back("model");
   keys.push_back("type_associate");
   keys.push_back("bond_type");
   keys.push_back("efield");
-  for (int ii = 0; ii < keys.size(); ++ii){
+  for (int ii = 0; ii < keys.size(); ++ii) {
     if (input == keys[ii]) {
       return true;
     }
@@ -33,15 +33,13 @@ is_key (const string& input)
   return false;
 }
 
-
-FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg) 
-    :Fix(lmp, narg, arg), 
-     efield(3, 0.0), 
-     efield_fsum(4, 0.0), 
-     efield_fsum_all(4, 0.0), 
-     efield_force_flag(0)
-{
-#if LAMMPS_VERSION_NUMBER>=20210210
+FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg)
+    : Fix(lmp, narg, arg),
+      efield(3, 0.0),
+      efield_fsum(4, 0.0),
+      efield_fsum_all(4, 0.0),
+      efield_force_flag(0) {
+#if LAMMPS_VERSION_NUMBER >= 20210210
   // lammps/lammps#2560
   energy_global_flag = 1;
   virial_global_flag = 1;
@@ -49,54 +47,57 @@ FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg)
   virial_flag = 1;
 #endif
 
-  if (strcmp(update->unit_style,"metal") != 0) {
-    error->all(FLERR,"Pair deepmd requires metal unit, please set it by \"units metal\"");
+  if (strcmp(update->unit_style, "metal") != 0) {
+    error->all(
+        FLERR,
+        "Pair deepmd requires metal unit, please set it by \"units metal\"");
   }
-  
+
   int iarg = 3;
   vector<int> map_vec;
   bond_type.clear();
   while (iarg < narg) {
-    if (! is_key(arg[iarg])) {
-      error->all(FLERR,"Illegal pair_style command\nwrong number of parameters\n");
+    if (!is_key(arg[iarg])) {
+      error->all(FLERR,
+                 "Illegal pair_style command\nwrong number of parameters\n");
     }
     if (string(arg[iarg]) == string("model")) {
-      if (iarg+1 > narg) error->all(FLERR,"Illegal fix adapt command");
-      model = string(arg[iarg+1]);
+      if (iarg + 1 > narg) error->all(FLERR, "Illegal fix adapt command");
+      model = string(arg[iarg + 1]);
       iarg += 2;
-    }
-    else if (string(arg[iarg]) == string("efield")) {
-      if (iarg+3 > narg) error->all(FLERR,"Illegal fix adapt command, efield should be provided 3 float numbers");
-      efield[0] = atof(arg[iarg+1]);
-      efield[1] = atof(arg[iarg+2]);
-      efield[2] = atof(arg[iarg+3]);
+    } else if (string(arg[iarg]) == string("efield")) {
+      if (iarg + 3 > narg)
+        error->all(FLERR,
+                   "Illegal fix adapt command, efield should be provided 3 "
+                   "float numbers");
+      efield[0] = atof(arg[iarg + 1]);
+      efield[1] = atof(arg[iarg + 2]);
+      efield[2] = atof(arg[iarg + 3]);
       iarg += 4;
-    }
-    else if (string(arg[iarg]) == string("type_associate")) {
-      int iend = iarg+1;
-      while (iend < narg && (! is_key(arg[iend]) )) {
-	map_vec.push_back(atoi(arg[iend])-1);
-	iend ++;
+    } else if (string(arg[iarg]) == string("type_associate")) {
+      int iend = iarg + 1;
+      while (iend < narg && (!is_key(arg[iend]))) {
+        map_vec.push_back(atoi(arg[iend]) - 1);
+        iend++;
       }
       iarg = iend;
-    }
-    else if (string(arg[iarg]) == string("bond_type")) {
-      int iend = iarg+1;
-      while (iend < narg && (! is_key(arg[iend]) )) {
-	bond_type.push_back(atoi(arg[iend])-1);
-	iend ++;
+    } else if (string(arg[iarg]) == string("bond_type")) {
+      int iend = iarg + 1;
+      while (iend < narg && (!is_key(arg[iend]))) {
+        bond_type.push_back(atoi(arg[iend]) - 1);
+        iend++;
       }
       sort(bond_type.begin(), bond_type.end());
       iarg = iend;
-    }
-    else {
+    } else {
       break;
     }
   }
-  assert(map_vec.size() % 2 == 0), "number of ints provided by type_associate should be even";
-  for (int ii = 0; ii < map_vec.size()/2; ++ii){
-    type_asso[map_vec[ii*2+0]] = map_vec[ii*2+1];
-    bk_type_asso[map_vec[ii*2+1]] = map_vec[ii*2+0];
+  assert(map_vec.size() % 2 == 0),
+      "number of ints provided by type_associate should be even";
+  for (int ii = 0; ii < map_vec.size() / 2; ++ii) {
+    type_asso[map_vec[ii * 2 + 0]] = map_vec[ii * 2 + 1];
+    bk_type_asso[map_vec[ii * 2 + 1]] = map_vec[ii * 2 + 0];
   }
 
   // dpt.init(model);
@@ -107,23 +108,22 @@ FixDPLR::FixDPLR(LAMMPS *lmp, int narg, char **arg)
   sel_type = dpt.sel_types();
   sort(sel_type.begin(), sel_type.end());
   dpl_type.clear();
-  for (int ii = 0; ii < sel_type.size(); ++ii){
+  for (int ii = 0; ii < sel_type.size(); ++ii) {
     dpl_type.push_back(type_asso[sel_type[ii]]);
   }
 
-  pair_deepmd = (PairDeepMD *) force->pair_match("deepmd",1);
+  pair_deepmd = (PairDeepMD *)force->pair_match("deepmd", 1);
   if (!pair_deepmd) {
-    error->all(FLERR,"pair_style deepmd should be set before this fix\n");
+    error->all(FLERR, "pair_style deepmd should be set before this fix\n");
   }
 
   // set comm size needed by this fix
   comm_reverse = 3;
 }
 
-int FixDPLR::setmask()
-{
+int FixDPLR::setmask() {
   int mask = 0;
-#if LAMMPS_VERSION_NUMBER<20210210
+#if LAMMPS_VERSION_NUMBER < 20210210
   // THERMO_ENERGY removed in lammps/lammps#2560
   mask |= THERMO_ENERGY;
 #endif
@@ -133,24 +133,22 @@ int FixDPLR::setmask()
   return mask;
 }
 
-void FixDPLR::init()
-{
+void FixDPLR::init() {
   // double **xx = atom->x;
   // double **vv = atom->v;
   // int nlocal = atom->nlocal;
   // for (int ii = 0; ii < nlocal; ++ii){
-  //   cout << xx[ii][0] << " " 
-  // 	 << xx[ii][1] << " " 
-  // 	 << xx[ii][2] << "   " 
-  // 	 << vv[ii][0] << " " 
-  // 	 << vv[ii][1] << " " 
-  // 	 << vv[ii][2] << " " 
+  //   cout << xx[ii][0] << " "
+  // 	 << xx[ii][1] << " "
+  // 	 << xx[ii][2] << "   "
+  // 	 << vv[ii][0] << " "
+  // 	 << vv[ii][1] << " "
+  // 	 << vv[ii][2] << " "
   // 	 << endl;
   // }
 }
 
-void FixDPLR::setup(int vflag)
-{
+void FixDPLR::setup(int vflag) {
   // if (strstr(update->integrate_style,"verlet"))
   //   post_force(vflag);
   // else {
@@ -158,64 +156,60 @@ void FixDPLR::setup(int vflag)
   // }
   if (vflag) {
     v_setup(vflag);
-  }
-  else {
+  } else {
     evflag = 0;
   }
 }
 
-
-void
-FixDPLR::get_valid_pairs(vector<pair<int,int> >& pairs)
-{
+void FixDPLR::get_valid_pairs(vector<pair<int, int> > &pairs) {
   pairs.clear();
-  
+
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
-  vector<int > dtype (nall);
+  vector<int> dtype(nall);
   // get type
   {
     int *type = atom->type;
-    for (int ii = 0; ii < nall; ++ii){
+    for (int ii = 0; ii < nall; ++ii) {
       dtype[ii] = type[ii] - 1;
     }
   }
 
   int **bondlist = neighbor->bondlist;
   int nbondlist = neighbor->nbondlist;
-  for (int ii = 0; ii < nbondlist; ++ii){
-    int idx0=-1, idx1=-1;
+  for (int ii = 0; ii < nbondlist; ++ii) {
+    int idx0 = -1, idx1 = -1;
     int bd_type = bondlist[ii][2] - 1;
-    if ( ! binary_search(bond_type.begin(), bond_type.end(), bd_type) ){
+    if (!binary_search(bond_type.begin(), bond_type.end(), bd_type)) {
       continue;
     }
-    if (binary_search(sel_type.begin(), sel_type.end(), dtype[bondlist[ii][0]]) 
-	&& 
-	binary_search(dpl_type.begin(), dpl_type.end(), dtype[bondlist[ii][1]])
-	){
+    if (binary_search(sel_type.begin(), sel_type.end(),
+                      dtype[bondlist[ii][0]]) &&
+        binary_search(dpl_type.begin(), dpl_type.end(),
+                      dtype[bondlist[ii][1]])) {
       idx0 = bondlist[ii][0];
       idx1 = bondlist[ii][1];
-    }
-    else if (binary_search(sel_type.begin(), sel_type.end(), dtype[bondlist[ii][1]])
-	     &&
-	     binary_search(dpl_type.begin(), dpl_type.end(), dtype[bondlist[ii][0]])
-	){
+    } else if (binary_search(sel_type.begin(), sel_type.end(),
+                             dtype[bondlist[ii][1]]) &&
+               binary_search(dpl_type.begin(), dpl_type.end(),
+                             dtype[bondlist[ii][0]])) {
       idx0 = bondlist[ii][1];
       idx1 = bondlist[ii][0];
+    } else {
+      error->all(FLERR,
+                 "find a bonded pair the types of which are not associated");
     }
-    else {
-      error->all(FLERR, "find a bonded pair the types of which are not associated");
+    if (!(idx0 < nlocal && idx1 < nlocal)) {
+      error->all(FLERR,
+                 "find a bonded pair that is not on the same processor, "
+                 "something should not happen");
     }
-    if ( ! (idx0 < nlocal && idx1 < nlocal) ){
-      error->all(FLERR, "find a bonded pair that is not on the same processor, something should not happen");
-    }
-    pairs.push_back(pair<int,int>(idx0, idx1));
+    pairs.push_back(pair<int, int>(idx0, idx1));
   }
 }
 
-void FixDPLR::post_integrate()
-{
+void FixDPLR::post_integrate() {
   double **x = atom->x;
   double **v = atom->v;
   int *type = atom->type;
@@ -223,22 +217,21 @@ void FixDPLR::post_integrate()
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
-  vector<pair<int,int> > valid_pairs;
-  get_valid_pairs(valid_pairs);  
-  
-  for (int ii = 0; ii < valid_pairs.size(); ++ii){
+  vector<pair<int, int> > valid_pairs;
+  get_valid_pairs(valid_pairs);
+
+  for (int ii = 0; ii < valid_pairs.size(); ++ii) {
     int idx0 = valid_pairs[ii].first;
     int idx1 = valid_pairs[ii].second;
-    for (int dd = 0; dd < 3; ++dd){
-      x[idx1][dd] = x[idx0][dd] ;
-      v[idx1][dd] = v[idx0][dd] ;
+    for (int dd = 0; dd < 3; ++dd) {
+      x[idx1][dd] = x[idx0][dd];
+      v[idx1][dd] = v[idx0][dd];
       // v[idx1][dd] = 0.0;
     }
   }
 }
 
-void FixDPLR::pre_force(int vflag)
-{
+void FixDPLR::pre_force(int vflag) {
   double **x = atom->x;
   int *type = atom->type;
   int nlocal = atom->nlocal;
@@ -246,33 +239,35 @@ void FixDPLR::pre_force(int vflag)
   int nall = nlocal + nghost;
 
   // if (eflag_atom) {
-  //   error->all(FLERR,"atomic energy calculation is not supported by this fix\n");
+  //   error->all(FLERR,"atomic energy calculation is not supported by this
+  //   fix\n");
   // }
-  
+
   // declear inputs
-  vector<int > dtype (nall);
-  vector<FLOAT_PREC > dbox (9, 0) ;
-  vector<FLOAT_PREC > dcoord (nall * 3, 0.);
+  vector<int> dtype(nall);
+  vector<FLOAT_PREC> dbox(9, 0);
+  vector<FLOAT_PREC> dcoord(nall * 3, 0.);
   // get type
-  for (int ii = 0; ii < nall; ++ii){
+  for (int ii = 0; ii < nall; ++ii) {
     dtype[ii] = type[ii] - 1;
-  }  
+  }
   // get box
-  dbox[0] = domain->h[0];	// xx
-  dbox[4] = domain->h[1];	// yy
-  dbox[8] = domain->h[2];	// zz
-  dbox[7] = domain->h[3];	// zy
-  dbox[6] = domain->h[4];	// zx
-  dbox[3] = domain->h[5];	// yx
+  dbox[0] = domain->h[0];  // xx
+  dbox[4] = domain->h[1];  // yy
+  dbox[8] = domain->h[2];  // zz
+  dbox[7] = domain->h[3];  // zy
+  dbox[6] = domain->h[4];  // zx
+  dbox[3] = domain->h[5];  // yx
   // get coord
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      dcoord[ii*3+dd] = x[ii][dd] - domain->boxlo[dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dcoord[ii * 3 + dd] = x[ii][dd] - domain->boxlo[dd];
     }
   }
   // get lammps nlist
-  NeighList * list = pair_deepmd->list;
-  deepmd::InputNlist lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
+  NeighList *list = pair_deepmd->list;
+  deepmd::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
+                              list->firstneigh);
   // declear output
   vector<FLOAT_PREC> tensor;
   // compute
@@ -303,40 +298,42 @@ void FixDPLR::pre_force(int vflag)
 
   // selected type
   vector<int> dpl_type;
-  for (int ii = 0; ii < sel_type.size(); ++ii){
+  for (int ii = 0; ii < sel_type.size(); ++ii) {
     dpl_type.push_back(type_asso[sel_type[ii]]);
   }
   vector<int> sel_fwd, sel_bwd;
   int sel_nghost;
-  deepmd::select_by_type(sel_fwd, sel_bwd, sel_nghost, dcoord, dtype, nghost, sel_type);
+  deepmd::select_by_type(sel_fwd, sel_bwd, sel_nghost, dcoord, dtype, nghost,
+                         sel_type);
   int sel_nall = sel_bwd.size();
   int sel_nloc = sel_nall - sel_nghost;
   vector<int> sel_type(sel_bwd.size());
   deepmd::select_map<int>(sel_type, dtype, sel_fwd, 1);
-  
-  // Yixiao: because the deeptensor already return the correct order, the following map is no longer needed
-  // deepmd::AtomMap<FLOAT_PREC> atom_map(sel_type.begin(), sel_type.begin() + sel_nloc);
-  // const vector<int> & sort_fwd_map(atom_map.get_fwd_map());
-
-  vector<pair<int,int> > valid_pairs;
-  get_valid_pairs(valid_pairs);  
-  
+
+  // Yixiao: because the deeptensor already return the correct order, the
+  // following map is no longer needed deepmd::AtomMap<FLOAT_PREC>
+  // atom_map(sel_type.begin(), sel_type.begin() + sel_nloc); const vector<int>
+  // & sort_fwd_map(atom_map.get_fwd_map());
+
+  vector<pair<int, int> > valid_pairs;
+  get_valid_pairs(valid_pairs);
+
   int odim = dpt.output_dim();
   assert(odim == 3);
   dipole_recd.resize(nall * 3);
   fill(dipole_recd.begin(), dipole_recd.end(), 0.0);
-  for (int ii = 0; ii < valid_pairs.size(); ++ii){
+  for (int ii = 0; ii < valid_pairs.size(); ++ii) {
     int idx0 = valid_pairs[ii].first;
     int idx1 = valid_pairs[ii].second;
-    assert(idx0 < sel_fwd.size()); // && sel_fwd[idx0] < sort_fwd_map.size());
+    assert(idx0 < sel_fwd.size());  // && sel_fwd[idx0] < sort_fwd_map.size());
     // Yixiao: the sort map is no longer needed
     // int res_idx = sort_fwd_map[sel_fwd[idx0]];
     int res_idx = sel_fwd[idx0];
     // int ret_idx = dpl_bwd[res_idx];
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       x[idx1][dd] = x[idx0][dd] + tensor[res_idx * 3 + dd];
       // res_buff[idx1 * odim + dd] = tensor[res_idx * odim + dd];
-      dipole_recd[idx0*3+dd] = tensor[res_idx * 3 + dd];
+      dipole_recd[idx0 * 3 + dd] = tensor[res_idx * 3 + dd];
     }
   }
   // cout << "-------------------- fix/dplr: pre force " << endl;
@@ -349,103 +346,105 @@ void FixDPLR::pre_force(int vflag)
   // }
 }
 
-
-void FixDPLR::post_force(int vflag)
-{
+void FixDPLR::post_force(int vflag) {
   if (vflag) {
     v_setup(vflag);
-  }
-  else {
+  } else {
     evflag = 0;
   }
   if (vflag_atom) {
-    error->all(FLERR,"atomic virial calculation is not supported by this fix\n");
+    error->all(FLERR,
+               "atomic virial calculation is not supported by this fix\n");
   }
 
-  PPPMDPLR * pppm_dplr = (PPPMDPLR*) force->kspace_match("pppm/dplr", 1);
+  PPPMDPLR *pppm_dplr = (PPPMDPLR *)force->kspace_match("pppm/dplr", 1);
   if (!pppm_dplr) {
-    error->all(FLERR,"kspace_style pppm/dplr should be set before this fix\n");
+    error->all(FLERR, "kspace_style pppm/dplr should be set before this fix\n");
   }
-  const vector<double > & dfele_(pppm_dplr->get_fele());
+  const vector<double> &dfele_(pppm_dplr->get_fele());
   int nlocal = atom->nlocal;
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
-  vector<FLOAT_PREC> dcoord(nall*3, 0.0), dbox(9, 0.0), dfele(nlocal*3, 0.0);
+  vector<FLOAT_PREC> dcoord(nall * 3, 0.0), dbox(9, 0.0),
+      dfele(nlocal * 3, 0.0);
   vector<int> dtype(nall, 0);
   // set values for dcoord, dbox, dfele
   {
     int *type = atom->type;
-    for (int ii = 0; ii < nall; ++ii){
+    for (int ii = 0; ii < nall; ++ii) {
       dtype[ii] = type[ii] - 1;
     }
-    dbox[0] = domain->h[0];	// xx
-    dbox[4] = domain->h[1];	// yy
-    dbox[8] = domain->h[2];	// zz
-    dbox[7] = domain->h[3];	// zy
-    dbox[6] = domain->h[4];	// zx
-    dbox[3] = domain->h[5];	// yx
+    dbox[0] = domain->h[0];  // xx
+    dbox[4] = domain->h[1];  // yy
+    dbox[8] = domain->h[2];  // zz
+    dbox[7] = domain->h[3];  // zy
+    dbox[6] = domain->h[4];  // zx
+    dbox[3] = domain->h[5];  // yx
     // get coord
-    double ** x = atom->x;
-    for (int ii = 0; ii < nall; ++ii){
-      for (int dd = 0; dd < 3; ++dd){
-	dcoord[ii*3+dd] = x[ii][dd] - domain->boxlo[dd];
+    double **x = atom->x;
+    for (int ii = 0; ii < nall; ++ii) {
+      for (int dd = 0; dd < 3; ++dd) {
+        dcoord[ii * 3 + dd] = x[ii][dd] - domain->boxlo[dd];
       }
     }
     assert(dfele_.size() == nlocal * 3);
     // revise force according to efield
-    for (int ii = 0; ii < nlocal*3; ++ii){
+    for (int ii = 0; ii < nlocal * 3; ++ii) {
       dfele[ii] = dfele_[ii];
     }
     // revise force and virial according to efield
-    double * q = atom->q;
+    double *q = atom->q;
     imageint *image = atom->image;
     double unwrap[3];
     double v[6];
     efield_fsum[0] = efield_fsum[1] = efield_fsum[2] = efield_fsum[3] = 0.0;
     efield_force_flag = 0;
-    for (int ii = 0; ii < nlocal; ++ii){
+    for (int ii = 0; ii < nlocal; ++ii) {
       double tmpf[3];
-      for (int dd = 0; dd < 3; ++dd){
-	tmpf[dd] = q[ii] * efield[dd];
+      for (int dd = 0; dd < 3; ++dd) {
+        tmpf[dd] = q[ii] * efield[dd];
       }
-      for (int dd = 0; dd < 3; ++dd){
-	dfele[ii*3+dd] += tmpf[dd];
+      for (int dd = 0; dd < 3; ++dd) {
+        dfele[ii * 3 + dd] += tmpf[dd];
       }
-      domain->unmap(x[ii],image[ii],unwrap);
-      efield_fsum[0] -= tmpf[0]*unwrap[0]+tmpf[1]*unwrap[1]+tmpf[2]*unwrap[2];
+      domain->unmap(x[ii], image[ii], unwrap);
+      efield_fsum[0] -=
+          tmpf[0] * unwrap[0] + tmpf[1] * unwrap[1] + tmpf[2] * unwrap[2];
       efield_fsum[1] += tmpf[0];
       efield_fsum[2] += tmpf[1];
       efield_fsum[3] += tmpf[2];
       if (evflag) {
-	v[0] = tmpf[0] *unwrap[0];
-	v[1] = tmpf[1] *unwrap[1];
-	v[2] = tmpf[2] *unwrap[2];
-	v[3] = tmpf[0] *unwrap[1];
-	v[4] = tmpf[0] *unwrap[2];
-	v[5] = tmpf[1] *unwrap[2];
-	v_tally(ii, v);
+        v[0] = tmpf[0] * unwrap[0];
+        v[1] = tmpf[1] * unwrap[1];
+        v[2] = tmpf[2] * unwrap[2];
+        v[3] = tmpf[0] * unwrap[1];
+        v[4] = tmpf[0] * unwrap[2];
+        v[5] = tmpf[1] * unwrap[2];
+        v_tally(ii, v);
       }
     }
   }
   // lmp nlist
-  NeighList * list = pair_deepmd->list;
-  deepmd::InputNlist lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
+  NeighList *list = pair_deepmd->list;
+  deepmd::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
+                              list->firstneigh);
   // bonded pairs
-  vector<pair<int,int> > valid_pairs;
-  get_valid_pairs(valid_pairs);  
+  vector<pair<int, int> > valid_pairs;
+  get_valid_pairs(valid_pairs);
   // output vects
   vector<FLOAT_PREC> dfcorr, dvcorr;
   // compute
-  dtm.compute(dfcorr, dvcorr, dcoord, dtype, dbox, valid_pairs, dfele, nghost, lmp_list);
+  dtm.compute(dfcorr, dvcorr, dcoord, dtype, dbox, valid_pairs, dfele, nghost,
+              lmp_list);
   assert(dfcorr.size() == dcoord.size());
   assert(dfcorr.size() == nall * 3);
   // backward communication of fcorr
   dfcorr_buff.resize(dfcorr.size());
   copy(dfcorr.begin(), dfcorr.end(), dfcorr_buff.begin());
-#if LAMMPS_VERSION_NUMBER>=20220324
-  comm->reverse_comm(this,3);
+#if LAMMPS_VERSION_NUMBER >= 20220324
+  comm->reverse_comm(this, 3);
 #else
-  comm->reverse_comm_fix(this,3);
+  comm->reverse_comm_fix(this, 3);
 #endif
   copy(dfcorr_buff.begin(), dfcorr_buff.end(), dfcorr.begin());
   // // check and print
@@ -464,10 +463,10 @@ void FixDPLR::post_force(int vflag)
   //   cout << endl;
   // }
   // apply the force correction
-  double ** f = atom->f;
-  for (int ii = 0; ii < nlocal; ++ii){
-    for(int dd = 0; dd < 3; ++dd){
-      f[ii][dd] += dfcorr[ii*3+dd];
+  double **f = atom->f;
+  for (int ii = 0; ii < nlocal; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      f[ii][dd] += dfcorr[ii * 3 + dd];
     }
   }
   // cout << "virial corr1 ";
@@ -475,21 +474,22 @@ void FixDPLR::post_force(int vflag)
   //   cout << dvcorr[ii] << " " ;
   // }
   // cout << endl;
-  for (int ii = 0; ii < valid_pairs.size(); ++ii){
+  for (int ii = 0; ii < valid_pairs.size(); ++ii) {
     int idx0 = valid_pairs[ii].first;
     int idx1 = valid_pairs[ii].second;
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-      for (int dd1 = 0; dd1 < 3; ++dd1){
-	dvcorr[dd0*3+dd1] -= dfele[idx1*3+dd0] * dipole_recd[idx0*3+dd1];
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        dvcorr[dd0 * 3 + dd1] -=
+            dfele[idx1 * 3 + dd0] * dipole_recd[idx0 * 3 + dd1];
       }
-    }    
+    }
   }
   // cout << "virial corr2 ";
   // for (int ii = 0; ii < 9; ++ii){
   //   cout << dvcorr[ii] << " " ;
   // }
   // cout << endl;
-  if (evflag){
+  if (evflag) {
     double vv[6] = {0.0};
     vv[0] += dvcorr[0];
     vv[1] += dvcorr[4];
@@ -501,29 +501,26 @@ void FixDPLR::post_force(int vflag)
   }
 }
 
-
-int FixDPLR::pack_reverse_comm(int n, int first, double *buf)
-{
+int FixDPLR::pack_reverse_comm(int n, int first, double *buf) {
   int m = 0;
   int last = first + n;
   for (int i = first; i < last; i++) {
-    buf[m++] = dfcorr_buff[3*i+0];
-    buf[m++] = dfcorr_buff[3*i+1];
-    buf[m++] = dfcorr_buff[3*i+2];
+    buf[m++] = dfcorr_buff[3 * i + 0];
+    buf[m++] = dfcorr_buff[3 * i + 1];
+    buf[m++] = dfcorr_buff[3 * i + 2];
   }
   return m;
 }
 
 /* ---------------------------------------------------------------------- */
 
-void FixDPLR::unpack_reverse_comm(int n, int *list, double *buf)
-{
+void FixDPLR::unpack_reverse_comm(int n, int *list, double *buf) {
   int m = 0;
   for (int i = 0; i < n; i++) {
     int j = list[i];
-    dfcorr_buff[3*j+0] += buf[m++];
-    dfcorr_buff[3*j+1] += buf[m++];
-    dfcorr_buff[3*j+2] += buf[m++];
+    dfcorr_buff[3 * j + 0] += buf[m++];
+    dfcorr_buff[3 * j + 1] += buf[m++];
+    dfcorr_buff[3 * j + 2] += buf[m++];
   }
 }
 
@@ -531,10 +528,10 @@ void FixDPLR::unpack_reverse_comm(int n, int *list, double *buf)
    return energy added by fix
 ------------------------------------------------------------------------- */
 
-double FixDPLR::compute_scalar(void)
-{
+double FixDPLR::compute_scalar(void) {
   if (efield_force_flag == 0) {
-    MPI_Allreduce(&efield_fsum[0],&efield_fsum_all[0],4,MPI_DOUBLE,MPI_SUM,world);
+    MPI_Allreduce(&efield_fsum[0], &efield_fsum_all[0], 4, MPI_DOUBLE, MPI_SUM,
+                  world);
     efield_force_flag = 1;
   }
   return efield_fsum_all[0];
@@ -544,11 +541,11 @@ double FixDPLR::compute_scalar(void)
    return total extra force due to fix
 ------------------------------------------------------------------------- */
 
-double FixDPLR::compute_vector(int n)
-{
+double FixDPLR::compute_vector(int n) {
   if (efield_force_flag == 0) {
-    MPI_Allreduce(&efield_fsum[0],&efield_fsum_all[0],4,MPI_DOUBLE,MPI_SUM,world);
+    MPI_Allreduce(&efield_fsum[0], &efield_fsum_all[0], 4, MPI_DOUBLE, MPI_SUM,
+                  world);
     efield_force_flag = 1;
   }
-  return efield_fsum_all[n+1];
+  return efield_fsum_all[n + 1];
 }
diff --git a/source/lmp/fix_dplr.h b/source/lmp/fix_dplr.h
index 3367285ed3..8bcf0a4a89 100644
--- a/source/lmp/fix_dplr.h
+++ b/source/lmp/fix_dplr.h
@@ -1,6 +1,6 @@
 #ifdef FIX_CLASS
 
-FixStyle(dplr,FixDPLR)
+FixStyle(dplr, FixDPLR)
 
 #else
 
@@ -8,15 +8,17 @@ FixStyle(dplr,FixDPLR)
 #define LMP_FIX_DPLR_H
 
 #include <stdio.h>
+
 #include <map>
+
 #include "fix.h"
 #include "pair_deepmd.h"
 #ifdef LMPPLUGIN
-#include "DeepTensor.h"
 #include "DataModifier.h"
+#include "DeepTensor.h"
 #else
-#include "deepmd/DeepTensor.h"
 #include "deepmd/DataModifier.h"
+#include "deepmd/DeepTensor.h"
 #endif
 
 #ifdef HIGH_PREC
@@ -26,39 +28,40 @@ FixStyle(dplr,FixDPLR)
 #endif
 
 namespace LAMMPS_NS {
-  class FixDPLR : public Fix {
-public:
-    FixDPLR(class LAMMPS *, int, char **);
-    ~FixDPLR() override {};
-    int setmask() override;
-    void init() override;
-    void setup(int) override;
-    void post_integrate() override;
-    void pre_force(int) override;
-    void post_force(int) override;
-    int pack_reverse_comm(int, int, double *) override;
-    void unpack_reverse_comm(int, int *, double *) override;
-    double compute_scalar(void) override;
-    double compute_vector(int) override;
-private:
-    PairDeepMD * pair_deepmd;
-    deepmd::DeepTensor dpt;
-    deepmd::DipoleChargeModifier dtm;
-    std::string model;
-    int ntypes;
-    std::vector<int > sel_type;
-    std::vector<int > dpl_type;
-    std::vector<int > bond_type;
-    std::map<int,int > type_asso;
-    std::map<int,int > bk_type_asso;
-    std::vector<FLOAT_PREC> dipole_recd;
-    std::vector<double> dfcorr_buff;
-    std::vector<double> efield;
-    std::vector<double> efield_fsum, efield_fsum_all;
-    int efield_force_flag;
-    void get_valid_pairs(std::vector<std::pair<int,int> >& pairs);
-  };
-}
+class FixDPLR : public Fix {
+ public:
+  FixDPLR(class LAMMPS *, int, char **);
+  ~FixDPLR() override{};
+  int setmask() override;
+  void init() override;
+  void setup(int) override;
+  void post_integrate() override;
+  void pre_force(int) override;
+  void post_force(int) override;
+  int pack_reverse_comm(int, int, double *) override;
+  void unpack_reverse_comm(int, int *, double *) override;
+  double compute_scalar(void) override;
+  double compute_vector(int) override;
+
+ private:
+  PairDeepMD *pair_deepmd;
+  deepmd::DeepTensor dpt;
+  deepmd::DipoleChargeModifier dtm;
+  std::string model;
+  int ntypes;
+  std::vector<int> sel_type;
+  std::vector<int> dpl_type;
+  std::vector<int> bond_type;
+  std::map<int, int> type_asso;
+  std::map<int, int> bk_type_asso;
+  std::vector<FLOAT_PREC> dipole_recd;
+  std::vector<double> dfcorr_buff;
+  std::vector<double> efield;
+  std::vector<double> efield_fsum, efield_fsum_all;
+  int efield_force_flag;
+  void get_valid_pairs(std::vector<std::pair<int, int> > &pairs);
+};
+}  // namespace LAMMPS_NS
 
-#endif // LMP_FIX_DPLR_H
-#endif // FIX_CLASS
+#endif  // LMP_FIX_DPLR_H
+#endif  // FIX_CLASS
diff --git a/source/lmp/fix_ttm_dp.h b/source/lmp/fix_ttm_dp.h
index cf89eb53fb..0356c9b5cb 100644
--- a/source/lmp/fix_ttm_dp.h
+++ b/source/lmp/fix_ttm_dp.h
@@ -1,9 +1,10 @@
-#include "fix_ttm.h"
 #include <vector>
 
+#include "fix_ttm.h"
+
 namespace LAMMPS_NS {
 class FixTTMDP : public FixTTM {
-public:
+ public:
   std::vector<int> get_nodes() const {
     std::vector<int> tmp(3);
     tmp[0] = nxgrid;
@@ -13,4 +14,4 @@ class FixTTMDP : public FixTTM {
   };
   double ***const get_T_electron() const { return T_electron; };
 };
-} // namespace LAMMPS_NS
\ No newline at end of file
+}  // namespace LAMMPS_NS
diff --git a/source/lmp/lmp_version.sh b/source/lmp/lmp_version.sh
index 471c15c4a4..3f769cf3a3 100755
--- a/source/lmp/lmp_version.sh
+++ b/source/lmp/lmp_version.sh
@@ -3,7 +3,7 @@ set -e
 # Read LAMMPS version from version.h
 version_line=$(grep LAMMPS_VERSION ../version.h)
 # extract version
-tmp=${version_line#*\"}   # remove prefix ending in "
-version=${tmp%\"*}   # remove suffix starting with "
+tmp=${version_line#*\"} # remove prefix ending in "
+version=${tmp%\"*}      # remove suffix starting with "
 # string to int
 date --date="$(printf $version)" +"%Y%m%d"
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 8fb360abb0..8433db28ee 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -1,24 +1,26 @@
-#include <iostream>
-#include <sstream>
 #include <string.h>
+
 #include <iomanip>
+#include <iostream>
 #include <limits>
+#include <sstream>
+
 #include "atom.h"
-#include "domain.h"
+#include "citeme.h"
 #include "comm.h"
-#include "force.h"
 #include "compute.h"
-#include "memory.h"
-#include "update.h"
-#include "output.h"
+#include "domain.h"
 #include "error.h"
-#include "neighbor.h"
+#include "fix.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
-#include "modify.h"
-#include "fix.h"
-#include "citeme.h"
-#if LAMMPS_VERSION_NUMBER>=20210831
+#include "neighbor.h"
+#include "output.h"
+#include "update.h"
+#if LAMMPS_VERSION_NUMBER >= 20210831
 // in lammps #2902, fix_ttm members turns from private to protected
 #define USE_TTM 1
 #include "fix_ttm_dp.h"
@@ -30,7 +32,7 @@ using namespace LAMMPS_NS;
 using namespace std;
 
 static const char cite_user_deepmd_package[] =
-	"USER-DEEPMD package:\n\n"
+    "USER-DEEPMD package:\n\n"
     "@article{Wang_ComputPhysCommun_2018_v228_p178,\n"
     "  author = {Wang, Han and Zhang, Linfeng and Han, Jiequn and E, Weinan},\n"
     "  doi = {10.1016/j.cpc.2018.03.016},\n"
@@ -40,76 +42,73 @@ static const char cite_user_deepmd_package[] =
     "  publisher = {Elsevier {BV}},\n"
     "  volume = 228,\n"
     "  journal = {Comput. Phys. Commun.},\n"
-    "  title = {{DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics}},\n"
+    "  title = {{DeePMD-kit: A deep learning package for many-body potential "
+    "energy representation and molecular dynamics}},\n"
     "  pages = {178--184}\n"
-	"}\n\n";
+    "}\n\n";
 
+static int stringCmp(const void *a, const void *b) {
+  char *m = (char *)a;
+  char *n = (char *)b;
+  int i, sum = 0;
 
-static int stringCmp(const void *a, const void* b)
-{
-    char* m = (char*)a;
-    char* n = (char*)b;
-    int i, sum = 0;
-
-    for(i = 0; i < MPI_MAX_PROCESSOR_NAME; i++)
-        if (m[i] == n[i])
-            continue;
-        else
-        {
-            sum = m[i] - n[i];
-            break;
-        }
-    return sum;
+  for (i = 0; i < MPI_MAX_PROCESSOR_NAME; i++)
+    if (m[i] == n[i])
+      continue;
+    else {
+      sum = m[i] - n[i];
+      break;
+    }
+  return sum;
 }
 
 int PairDeepMD::get_node_rank() {
-    char host_name[MPI_MAX_PROCESSOR_NAME];
-    memset(host_name, '\0', sizeof(char) * MPI_MAX_PROCESSOR_NAME);
-    char (*host_names)[MPI_MAX_PROCESSOR_NAME];
-    int n, namelen, color, rank, nprocs, myrank;
-    size_t bytes;
-    MPI_Comm nodeComm;
-    
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
-    MPI_Get_processor_name(host_name,&namelen);
-
-    bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
-    host_names = (char (*)[MPI_MAX_PROCESSOR_NAME]) malloc(bytes);
-    for (int ii = 0; ii < nprocs; ii++) {
-        memset(host_names[ii], '\0', sizeof(char) * MPI_MAX_PROCESSOR_NAME);
+  char host_name[MPI_MAX_PROCESSOR_NAME];
+  memset(host_name, '\0', sizeof(char) * MPI_MAX_PROCESSOR_NAME);
+  char(*host_names)[MPI_MAX_PROCESSOR_NAME];
+  int n, namelen, color, rank, nprocs, myrank;
+  size_t bytes;
+  MPI_Comm nodeComm;
+
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+  MPI_Get_processor_name(host_name, &namelen);
+
+  bytes = nprocs * sizeof(char[MPI_MAX_PROCESSOR_NAME]);
+  host_names = (char(*)[MPI_MAX_PROCESSOR_NAME])malloc(bytes);
+  for (int ii = 0; ii < nprocs; ii++) {
+    memset(host_names[ii], '\0', sizeof(char) * MPI_MAX_PROCESSOR_NAME);
+  }
+
+  strcpy(host_names[rank], host_name);
+
+  for (n = 0; n < nprocs; n++)
+    MPI_Bcast(&(host_names[n]), MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n,
+              MPI_COMM_WORLD);
+  qsort(host_names, nprocs, sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);
+
+  color = 0;
+  for (n = 0; n < nprocs - 1; n++) {
+    if (strcmp(host_name, host_names[n]) == 0) {
+      break;
     }
-    
-    strcpy(host_names[rank], host_name);
-
-    for (n=0; n<nprocs; n++)
-        MPI_Bcast(&(host_names[n]),MPI_MAX_PROCESSOR_NAME, MPI_CHAR, n, MPI_COMM_WORLD);
-    qsort(host_names, nprocs,  sizeof(char[MPI_MAX_PROCESSOR_NAME]), stringCmp);
-
-    color = 0;
-    for (n=0; n<nprocs-1; n++)
-    {
-        if(strcmp(host_name, host_names[n]) == 0)
-        {
-            break;
-        }
-        if(strcmp(host_names[n], host_names[n+1]))
-        {
-            color++;
-        }
+    if (strcmp(host_names[n], host_names[n + 1])) {
+      color++;
     }
+  }
 
-    MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);
-    MPI_Comm_rank(nodeComm, &myrank);
+  MPI_Comm_split(MPI_COMM_WORLD, color, 0, &nodeComm);
+  MPI_Comm_rank(nodeComm, &myrank);
 
-    MPI_Barrier(MPI_COMM_WORLD);
-    int looprank=myrank;
-    // printf (" Assigning device %d  to process on node %s rank %d, OK\n",looprank,  host_name, rank );
-    free(host_names);
-    return looprank;
+  MPI_Barrier(MPI_COMM_WORLD);
+  int looprank = myrank;
+  // printf (" Assigning device %d  to process on node %s rank %d,
+  // OK\n",looprank,  host_name, rank );
+  free(host_names);
+  return looprank;
 }
 
-std::string PairDeepMD::get_file_content(const std::string & model) {
+std::string PairDeepMD::get_file_content(const std::string &model) {
   int myrank = 0, root = 0;
   MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
   int nchar = 0;
@@ -118,9 +117,9 @@ std::string PairDeepMD::get_file_content(const std::string & model) {
     deepmd::read_file_to_string(model, file_content);
     nchar = file_content.size();
   }
-  MPI_Bcast(&nchar, 1, MPI_INT, root, MPI_COMM_WORLD);  
-  char * buff = (char *)malloc(sizeof(char) * nchar);  
-  if (myrank == root) {  
+  MPI_Bcast(&nchar, 1, MPI_INT, root, MPI_COMM_WORLD);
+  char *buff = (char *)malloc(sizeof(char) * nchar);
+  if (myrank == root) {
     memcpy(buff, file_content.c_str(), sizeof(char) * nchar);
   }
   MPI_Bcast(buff, nchar, MPI_CHAR, root, MPI_COMM_WORLD);
@@ -132,7 +131,8 @@ std::string PairDeepMD::get_file_content(const std::string & model) {
   return file_content;
 }
 
-std::vector<std::string> PairDeepMD::get_file_content(const std::vector<std::string> & models) {
+std::vector<std::string> PairDeepMD::get_file_content(
+    const std::vector<std::string> &models) {
   std::vector<std::string> file_contents(models.size());
   for (unsigned ii = 0; ii < models.size(); ++ii) {
     file_contents[ii] = get_file_content(models[ii]);
@@ -140,54 +140,45 @@ std::vector<std::string> PairDeepMD::get_file_content(const std::vector<std::str
   return file_contents;
 }
 
-static void 
-ana_st (double & max, 
-	double & min, 
-	double & sum, 
-	const vector<double> & vec, 
-	const int & nloc) 
-{
+static void ana_st(double &max,
+                   double &min,
+                   double &sum,
+                   const vector<double> &vec,
+                   const int &nloc) {
   if (nloc == 0) return;
   max = vec[0];
   min = vec[0];
   sum = vec[0];
-  for (unsigned ii = 1; ii < nloc; ++ii){
+  for (unsigned ii = 1; ii < nloc; ++ii) {
     if (vec[ii] > max) max = vec[ii];
     if (vec[ii] < min) min = vec[ii];
     sum += vec[ii];
   }
 }
 
-static void 
-make_uniform_aparam(
-#ifdef HIGH_PREC    
-    vector<double > & daparam,
-    const vector<double > & aparam,
-    const int & nlocal
+static void make_uniform_aparam(
+#ifdef HIGH_PREC
+    vector<double> &daparam, const vector<double> &aparam, const int &nlocal
 #else
-    vector<float > & daparam,
-    const vector<float > & aparam,
-    const int & nlocal
+    vector<float> &daparam, const vector<float> &aparam, const int &nlocal
 #endif
-    )
-{
+) {
   unsigned dim_aparam = aparam.size();
   daparam.resize(dim_aparam * nlocal);
-  for (int ii = 0; ii < nlocal; ++ii){
-    for (int jj = 0; jj < dim_aparam; ++jj){
-      daparam[ii*dim_aparam+jj] = aparam[jj];
+  for (int ii = 0; ii < nlocal; ++ii) {
+    for (int jj = 0; jj < dim_aparam; ++jj) {
+      daparam[ii * dim_aparam + jj] = aparam[jj];
     }
   }
 }
 
 void PairDeepMD::make_fparam_from_compute(
 #ifdef HIGH_PREC
-  vector<double > & fparam
+    vector<double> &fparam
 #else
-  vector<float > & fparam
+    vector<float> &fparam
 #endif
-)
-{
+) {
   assert(do_compute);
 
   int icompute = modify->find_compute(compute_id);
@@ -196,15 +187,14 @@ void PairDeepMD::make_fparam_from_compute(
   assert(compute);
   fparam.resize(dim_fparam);
 
-  if (dim_fparam == 1){
+  if (dim_fparam == 1) {
     compute->compute_scalar();
     fparam[0] = compute->scalar;
-  }
-  else if (dim_fparam >1){
+  } else if (dim_fparam > 1) {
     compute->compute_vector();
     double *cvector = compute->vector;
-    for (int jj = 0; jj < dim_aparam; ++jj){
-    fparam[jj] =cvector[jj];
+    for (int jj = 0; jj < dim_aparam; ++jj) {
+      fparam[jj] = cvector[jj];
     }
   }
 }
@@ -212,18 +202,17 @@ void PairDeepMD::make_fparam_from_compute(
 #ifdef USE_TTM
 void PairDeepMD::make_ttm_fparam(
 #ifdef HIGH_PREC
-    vector<double > & fparam
+    vector<double> &fparam
 #else
-    vector<float > & fparam
+    vector<float> &fparam
 #endif
-    )
-{
+) {
   assert(do_ttm);
   // get ttm_fix
-  const FixTTMDP * ttm_fix = NULL;
+  const FixTTMDP *ttm_fix = NULL;
   for (int ii = 0; ii < modify->nfix; ii++) {
-    if (string(modify->fix[ii]->id) == ttm_fix_id){
-      ttm_fix = dynamic_cast<FixTTMDP*>(modify->fix[ii]);
+    if (string(modify->fix[ii]->id) == ttm_fix_id) {
+      ttm_fix = dynamic_cast<FixTTMDP *>(modify->fix[ii]);
     }
   }
   assert(ttm_fix);
@@ -234,42 +223,39 @@ void PairDeepMD::make_ttm_fparam(
   int nxnodes = nnodes[0];
   int nynodes = nnodes[1];
   int nznodes = nnodes[2];
-  double *** const T_electron = ttm_fix->get_T_electron();
+  double ***const T_electron = ttm_fix->get_T_electron();
 
   int numb_effective_nodes = 0;
   double total_Te = 0;
 
-  // loop over grids to get average electron temperature 
+  // loop over grids to get average electron temperature
   for (int ixnode = 0; ixnode < nxnodes; ixnode++)
-      for (int iynode = 0; iynode < nynodes; iynode++)
-        for (int iznode = 0; iznode < nznodes; iznode++) 
-        {
-          if (T_electron[ixnode][iynode][iznode] != 0)
-          {
-            numb_effective_nodes += 1;
-            total_Te += T_electron[ixnode][iynode][iznode];
-          }
+    for (int iynode = 0; iynode < nynodes; iynode++)
+      for (int iznode = 0; iznode < nznodes; iznode++) {
+        if (T_electron[ixnode][iynode][iznode] != 0) {
+          numb_effective_nodes += 1;
+          total_Te += T_electron[ixnode][iynode][iznode];
         }
+      }
 
-  fparam[0] = total_Te/numb_effective_nodes;
+  fparam[0] = total_Te / numb_effective_nodes;
 }
 #endif
 
 #ifdef USE_TTM
 void PairDeepMD::make_ttm_aparam(
 #ifdef HIGH_PREC
-    vector<double > & daparam
+    vector<double> &daparam
 #else
-    vector<float > & daparam
+    vector<float> &daparam
 #endif
-    )
-{
+) {
   assert(do_ttm);
   // get ttm_fix
-  const FixTTMDP * ttm_fix = NULL;
+  const FixTTMDP *ttm_fix = NULL;
   for (int ii = 0; ii < modify->nfix; ii++) {
-    if (string(modify->fix[ii]->id) == ttm_fix_id){
-      ttm_fix = dynamic_cast<FixTTMDP*>(modify->fix[ii]);
+    if (string(modify->fix[ii]->id) == ttm_fix_id) {
+      ttm_fix = dynamic_cast<FixTTMDP *>(modify->fix[ii]);
     }
   }
   assert(ttm_fix);
@@ -281,24 +267,24 @@ void PairDeepMD::make_ttm_aparam(
   int nxnodes = nnodes[0];
   int nynodes = nnodes[1];
   int nznodes = nnodes[2];
-  double *** const T_electron = ttm_fix->get_T_electron();
-  double dx = domain->xprd/nxnodes;
-  double dy = domain->yprd/nynodes;
-  double dz = domain->zprd/nynodes;
+  double ***const T_electron = ttm_fix->get_T_electron();
+  double dx = domain->xprd / nxnodes;
+  double dy = domain->yprd / nynodes;
+  double dz = domain->zprd / nynodes;
   // resize daparam
   daparam.resize(nlocal);
   // loop over atoms to assign aparam
   for (int ii = 0; ii < nlocal; ii++) {
     if (mask[ii] & ttm_fix->groupbit) {
-      double xscale = (x[ii][0] - domain->boxlo[0])/domain->xprd;
-      double yscale = (x[ii][1] - domain->boxlo[1])/domain->yprd;
-      double zscale = (x[ii][2] - domain->boxlo[2])/domain->zprd;
-      int ixnode = static_cast<int>(xscale*nxnodes);
-      int iynode = static_cast<int>(yscale*nynodes);
-      int iznode = static_cast<int>(zscale*nznodes);
-      while (ixnode > nxnodes-1) ixnode -= nxnodes;
-      while (iynode > nynodes-1) iynode -= nynodes;
-      while (iznode > nznodes-1) iznode -= nznodes;
+      double xscale = (x[ii][0] - domain->boxlo[0]) / domain->xprd;
+      double yscale = (x[ii][1] - domain->boxlo[1]) / domain->yprd;
+      double zscale = (x[ii][2] - domain->boxlo[2]) / domain->zprd;
+      int ixnode = static_cast<int>(xscale * nxnodes);
+      int iynode = static_cast<int>(yscale * nynodes);
+      int iznode = static_cast<int>(zscale * nznodes);
+      while (ixnode > nxnodes - 1) ixnode -= nxnodes;
+      while (iynode > nynodes - 1) iynode -= nynodes;
+      while (iznode > nznodes - 1) iznode -= nznodes;
       while (ixnode < 0) ixnode += nxnodes;
       while (iynode < 0) iynode += nynodes;
       while (iznode < 0) iznode += nznodes;
@@ -308,19 +294,24 @@ void PairDeepMD::make_ttm_aparam(
 }
 #endif
 
-PairDeepMD::PairDeepMD(LAMMPS *lmp) 
+PairDeepMD::PairDeepMD(LAMMPS *lmp)
     : Pair(lmp)
-      
+
 {
   if (lmp->citeme) lmp->citeme->add(cite_user_deepmd_package);
-  if (strcmp(update->unit_style,"metal") != 0) {
-    error->all(FLERR,"Pair deepmd requires metal unit, please set it by \"units metal\"");
+  if (strcmp(update->unit_style, "metal") != 0) {
+    error->all(
+        FLERR,
+        "Pair deepmd requires metal unit, please set it by \"units metal\"");
   }
   restartinfo = 1;
-#if LAMMPS_VERSION_NUMBER>=20201130
-  centroidstressflag = CENTROID_AVAIL ; // set centroidstressflag = CENTROID_AVAIL to allow the use of the centroid/stress/atom. Added by Davide Tisi
-#else 
-  centroidstressflag = 2 ; // set centroidstressflag = 2 to allow the use of the centroid/stress/atom. Added by Davide Tisi
+#if LAMMPS_VERSION_NUMBER >= 20201130
+  centroidstressflag =
+      CENTROID_AVAIL;  // set centroidstressflag = CENTROID_AVAIL to allow the
+                       // use of the centroid/stress/atom. Added by Davide Tisi
+#else
+  centroidstressflag = 2;  // set centroidstressflag = 2 to allow the use of the
+                           // centroid/stress/atom. Added by Davide Tisi
 #endif
   pppmflag = 1;
   respa_enable = 0;
@@ -346,10 +337,8 @@ PairDeepMD::PairDeepMD(LAMMPS *lmp)
   print_summary("  ");
 }
 
-void
-PairDeepMD::print_summary(const string pre) const
-{
-  if (comm->me == 0){
+void PairDeepMD::print_summary(const string pre) const {
+  if (comm->me == 0) {
     // capture cout to a string, then call LAMMPS's utils::logmesg
     // https://stackoverflow.com/a/4043813/9567349
     std::stringstream buffer;
@@ -366,7 +355,8 @@ PairDeepMD::print_summary(const string pre) const
     cout << pre << "source commit:      " << STR_GIT_HASH << endl;
     cout << pre << "source commit at:   " << STR_GIT_DATE << endl;
     cout << pre << "build float prec:   " << STR_FLOAT_PREC << endl;
-    cout << pre << "build with tf inc:  " << STR_TensorFlow_INCLUDE_DIRS << endl;
+    cout << pre << "build with tf inc:  " << STR_TensorFlow_INCLUDE_DIRS
+         << endl;
     cout << pre << "build with tf lib:  " << STR_TensorFlow_LIBRARY << endl;
 
     std::cout.rdbuf(sbuf);
@@ -374,9 +364,7 @@ PairDeepMD::print_summary(const string pre) const
   }
 }
 
-
-PairDeepMD::~PairDeepMD()
-{
+PairDeepMD::~PairDeepMD() {
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
@@ -384,13 +372,15 @@ PairDeepMD::~PairDeepMD()
   }
 }
 
-void PairDeepMD::compute(int eflag, int vflag)
-{
+void PairDeepMD::compute(int eflag, int vflag) {
   if (numb_models == 0) return;
-  if (eflag || vflag) ev_setup(eflag,vflag);
-  if (vflag_atom) error->all(FLERR, "6-element atomic virial is not supported. Use compute centroid/stress/atom command for 9-element atomic virial.");
+  if (eflag || vflag) ev_setup(eflag, vflag);
+  if (vflag_atom)
+    error->all(FLERR,
+               "6-element atomic virial is not supported. Use compute "
+               "centroid/stress/atom command for 9-element atomic virial.");
   bool do_ghost = true;
-  
+
   double **x = atom->x;
   double **f = atom->f;
   int *type = atom->type;
@@ -402,168 +392,184 @@ void PairDeepMD::compute(int eflag, int vflag)
   int nall = nlocal + nghost;
   int newton_pair = force->newton_pair;
 
-  vector<int > dtype (nall);
-  for (int ii = 0; ii < nall; ++ii){
+  vector<int> dtype(nall);
+  for (int ii = 0; ii < nall; ++ii) {
     dtype[ii] = type_idx_map[type[ii] - 1];
-  }  
+  }
 
-  double dener (0);
-  vector<double > dforce (nall * 3);
-  vector<double > dvirial (9, 0);
-  vector<double > dcoord (nall * 3, 0.);
-  vector<double > dbox (9, 0) ;
+  double dener(0);
+  vector<double> dforce(nall * 3);
+  vector<double> dvirial(9, 0);
+  vector<double> dcoord(nall * 3, 0.);
+  vector<double> dbox(9, 0);
 #ifdef HIGH_PREC
-  vector<double > daparam;
-#else 
-  vector<float > daparam;
+  vector<double> daparam;
+#else
+  vector<float> daparam;
 #endif
 
   // get box
-  dbox[0] = domain->h[0];	// xx
-  dbox[4] = domain->h[1];	// yy
-  dbox[8] = domain->h[2];	// zz
-  dbox[7] = domain->h[3];	// zy
-  dbox[6] = domain->h[4];	// zx
-  dbox[3] = domain->h[5];	// yx
+  dbox[0] = domain->h[0];  // xx
+  dbox[4] = domain->h[1];  // yy
+  dbox[8] = domain->h[2];  // zz
+  dbox[7] = domain->h[3];  // zy
+  dbox[6] = domain->h[4];  // zx
+  dbox[3] = domain->h[5];  // yx
 
   // get coord
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      dcoord[ii*3+dd] = x[ii][dd] - domain->boxlo[dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      dcoord[ii * 3 + dd] = x[ii][dd] - domain->boxlo[dd];
     }
   }
 
   // uniform aparam
-  if (aparam.size() > 0){
+  if (aparam.size() > 0) {
     make_uniform_aparam(daparam, aparam, nlocal);
-  }
-  else if (do_ttm) {
+  } else if (do_ttm) {
 #ifdef USE_TTM
-    if (dim_aparam > 0){
-    make_ttm_aparam(daparam);
-    }
-    else if (dim_fparam > 0){
-    make_ttm_fparam(fparam);
+    if (dim_aparam > 0) {
+      make_ttm_aparam(daparam);
+    } else if (dim_fparam > 0) {
+      make_ttm_fparam(fparam);
     }
 #endif
   }
-	
-  if (do_compute){
+
+  if (do_compute) {
     make_fparam_from_compute(fparam);
   }
 
   // int ago = numb_models > 1 ? 0 : neighbor->ago;
   int ago = neighbor->ago;
   if (numb_models > 1) {
-      if (multi_models_no_mod_devi && (out_freq > 0 && update->ntimestep % out_freq == 0)) {
-          ago = 0;
-      }
-      else if (multi_models_mod_devi && (out_freq == 0 || update->ntimestep % out_freq != 0)) {
-        ago = 0;
-      }
+    if (multi_models_no_mod_devi &&
+        (out_freq > 0 && update->ntimestep % out_freq == 0)) {
+      ago = 0;
+    } else if (multi_models_mod_devi &&
+               (out_freq == 0 || update->ntimestep % out_freq != 0)) {
+      ago = 0;
+    }
   }
   // compute
   single_model = (numb_models == 1);
-  multi_models_no_mod_devi = (numb_models > 1 && (out_freq == 0 || update->ntimestep % out_freq != 0));
-  multi_models_mod_devi = (numb_models > 1 && (out_freq > 0 && update->ntimestep % out_freq == 0));
+  multi_models_no_mod_devi =
+      (numb_models > 1 && (out_freq == 0 || update->ntimestep % out_freq != 0));
+  multi_models_mod_devi =
+      (numb_models > 1 && (out_freq > 0 && update->ntimestep % out_freq == 0));
   if (do_ghost) {
-    deepmd::InputNlist lmp_list (list->inum, list->ilist, list->numneigh, list->firstneigh);
+    deepmd::InputNlist lmp_list(list->inum, list->ilist, list->numneigh,
+                                list->firstneigh);
     if (single_model || multi_models_no_mod_devi) {
-      //cvflag_atom is the right flag for the cvatom matrix 
-      if ( ! (eflag_atom || cvflag_atom) ) {      
+      // cvflag_atom is the right flag for the cvatom matrix
+      if (!(eflag_atom || cvflag_atom)) {
 #ifdef HIGH_PREC
-  try {
-	deep_pot.compute (dener, dforce, dvirial, dcoord, dtype, dbox, nghost, lmp_list, ago, fparam, daparam);
-  } catch(deepmd::deepmd_exception& e) {
-    error->all(FLERR, e.what());
-  }
+        try {
+          deep_pot.compute(dener, dforce, dvirial, dcoord, dtype, dbox, nghost,
+                           lmp_list, ago, fparam, daparam);
+        } catch (deepmd::deepmd_exception &e) {
+          error->all(FLERR, e.what());
+        }
 #else
-	vector<float> dcoord_(dcoord.size());
-	vector<float> dbox_(dbox.size());
-	for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
-	for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
-	vector<float> dforce_(dforce.size(), 0);
-	vector<float> dvirial_(dvirial.size(), 0);
-	double dener_ = 0;
-  try {
-	deep_pot.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_, nghost, lmp_list, ago, fparam, daparam);
-  } catch(deepmd::deepmd_exception& e) {
-    error->all(FLERR, e.what());
-  }
-	for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
-	for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
-	dener = dener_;
+        vector<float> dcoord_(dcoord.size());
+        vector<float> dbox_(dbox.size());
+        for (unsigned dd = 0; dd < dcoord.size(); ++dd)
+          dcoord_[dd] = dcoord[dd];
+        for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
+        vector<float> dforce_(dforce.size(), 0);
+        vector<float> dvirial_(dvirial.size(), 0);
+        double dener_ = 0;
+        try {
+          deep_pot.compute(dener_, dforce_, dvirial_, dcoord_, dtype, dbox_,
+                           nghost, lmp_list, ago, fparam, daparam);
+        } catch (deepmd::deepmd_exception &e) {
+          error->all(FLERR, e.what());
+        }
+        for (unsigned dd = 0; dd < dforce.size(); ++dd)
+          dforce[dd] = dforce_[dd];
+        for (unsigned dd = 0; dd < dvirial.size(); ++dd)
+          dvirial[dd] = dvirial_[dd];
+        dener = dener_;
 #endif
       }
       // do atomic energy and virial
       else {
-	vector<double > deatom (nall * 1, 0);
-	vector<double > dvatom (nall * 9, 0);
+        vector<double> deatom(nall * 1, 0);
+        vector<double> dvatom(nall * 9, 0);
 #ifdef HIGH_PREC
-  try {
-	deep_pot.compute (dener, dforce, dvirial, deatom, dvatom, dcoord, dtype, dbox, nghost, lmp_list, ago, fparam, daparam);
-  } catch(deepmd::deepmd_exception& e) {
-    error->all(FLERR, e.what());
-  }
-#else 
-	vector<float> dcoord_(dcoord.size());
-	vector<float> dbox_(dbox.size());
-	for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
-	for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
-	vector<float> dforce_(dforce.size(), 0);
-	vector<float> dvirial_(dvirial.size(), 0);
-	vector<float> deatom_(dforce.size(), 0);
-	vector<float> dvatom_(dforce.size(), 0);
-	double dener_ = 0;
-  try {
-	deep_pot.compute (dener_, dforce_, dvirial_, deatom_, dvatom_, dcoord_, dtype, dbox_, nghost, lmp_list, ago, fparam, daparam);
-  } catch(deepmd::deepmd_exception& e) {
-    error->all(FLERR, e.what());
-  }
-	for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
-	for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
-	for (unsigned dd = 0; dd < deatom.size(); ++dd) deatom[dd] = deatom_[dd];	
-	for (unsigned dd = 0; dd < dvatom.size(); ++dd) dvatom[dd] = dvatom_[dd];	
-	dener = dener_;
-#endif	
-	if (eflag_atom) {
-	  for (int ii = 0; ii < nlocal; ++ii) eatom[ii] += deatom[ii];
-	}
-	// Added by Davide Tisi 2020
-	// interface the atomic virial computed by DeepMD 
-	// with the one used in centroid atoms
-	if (cvflag_atom) {
-	  for (int ii = 0; ii < nall; ++ii){
-	    //vatom[ii][0] += 1.0 * dvatom[9*ii+0];
-	    //vatom[ii][1] += 1.0 * dvatom[9*ii+4];
-	    //vatom[ii][2] += 1.0 * dvatom[9*ii+8];
-	    //vatom[ii][3] += 1.0 * dvatom[9*ii+3];
-	    //vatom[ii][4] += 1.0 * dvatom[9*ii+6];
-	    //vatom[ii][5] += 1.0 * dvatom[9*ii+7];
-            cvatom[ii][0] += -1.0 * dvatom[9*ii+0]; // xx
-            cvatom[ii][1] += -1.0 * dvatom[9*ii+4]; // yy 
-            cvatom[ii][2] += -1.0 * dvatom[9*ii+8]; // zz
-            cvatom[ii][3] += -1.0 * dvatom[9*ii+3]; // xy
-            cvatom[ii][4] += -1.0 * dvatom[9*ii+6]; // xz
-            cvatom[ii][5] += -1.0 * dvatom[9*ii+7]; // yz
-            cvatom[ii][6] += -1.0 * dvatom[9*ii+1]; // yx
-            cvatom[ii][7] += -1.0 * dvatom[9*ii+2]; // zx
-            cvatom[ii][8] += -1.0 * dvatom[9*ii+5]; // zy
-	  }
-	}
+        try {
+          deep_pot.compute(dener, dforce, dvirial, deatom, dvatom, dcoord,
+                           dtype, dbox, nghost, lmp_list, ago, fparam, daparam);
+        } catch (deepmd::deepmd_exception &e) {
+          error->all(FLERR, e.what());
+        }
+#else
+        vector<float> dcoord_(dcoord.size());
+        vector<float> dbox_(dbox.size());
+        for (unsigned dd = 0; dd < dcoord.size(); ++dd)
+          dcoord_[dd] = dcoord[dd];
+        for (unsigned dd = 0; dd < dbox.size(); ++dd) dbox_[dd] = dbox[dd];
+        vector<float> dforce_(dforce.size(), 0);
+        vector<float> dvirial_(dvirial.size(), 0);
+        vector<float> deatom_(dforce.size(), 0);
+        vector<float> dvatom_(dforce.size(), 0);
+        double dener_ = 0;
+        try {
+          deep_pot.compute(dener_, dforce_, dvirial_, deatom_, dvatom_, dcoord_,
+                           dtype, dbox_, nghost, lmp_list, ago, fparam,
+                           daparam);
+        } catch (deepmd::deepmd_exception &e) {
+          error->all(FLERR, e.what());
+        }
+        for (unsigned dd = 0; dd < dforce.size(); ++dd)
+          dforce[dd] = dforce_[dd];
+        for (unsigned dd = 0; dd < dvirial.size(); ++dd)
+          dvirial[dd] = dvirial_[dd];
+        for (unsigned dd = 0; dd < deatom.size(); ++dd)
+          deatom[dd] = deatom_[dd];
+        for (unsigned dd = 0; dd < dvatom.size(); ++dd)
+          dvatom[dd] = dvatom_[dd];
+        dener = dener_;
+#endif
+        if (eflag_atom) {
+          for (int ii = 0; ii < nlocal; ++ii) eatom[ii] += deatom[ii];
+        }
+        // Added by Davide Tisi 2020
+        // interface the atomic virial computed by DeepMD
+        // with the one used in centroid atoms
+        if (cvflag_atom) {
+          for (int ii = 0; ii < nall; ++ii) {
+            // vatom[ii][0] += 1.0 * dvatom[9*ii+0];
+            // vatom[ii][1] += 1.0 * dvatom[9*ii+4];
+            // vatom[ii][2] += 1.0 * dvatom[9*ii+8];
+            // vatom[ii][3] += 1.0 * dvatom[9*ii+3];
+            // vatom[ii][4] += 1.0 * dvatom[9*ii+6];
+            // vatom[ii][5] += 1.0 * dvatom[9*ii+7];
+            cvatom[ii][0] += -1.0 * dvatom[9 * ii + 0];  // xx
+            cvatom[ii][1] += -1.0 * dvatom[9 * ii + 4];  // yy
+            cvatom[ii][2] += -1.0 * dvatom[9 * ii + 8];  // zz
+            cvatom[ii][3] += -1.0 * dvatom[9 * ii + 3];  // xy
+            cvatom[ii][4] += -1.0 * dvatom[9 * ii + 6];  // xz
+            cvatom[ii][5] += -1.0 * dvatom[9 * ii + 7];  // yz
+            cvatom[ii][6] += -1.0 * dvatom[9 * ii + 1];  // yx
+            cvatom[ii][7] += -1.0 * dvatom[9 * ii + 2];  // zx
+            cvatom[ii][8] += -1.0 * dvatom[9 * ii + 5];  // zy
+          }
+        }
       }
-    }
-    else if (multi_models_mod_devi) {
-      vector<double > deatom (nall * 1, 0);
-      vector<double > dvatom (nall * 9, 0);
-      vector<vector<double>> 	all_virial;	       
+    } else if (multi_models_mod_devi) {
+      vector<double> deatom(nall * 1, 0);
+      vector<double> dvatom(nall * 9, 0);
+      vector<vector<double>> all_virial;
 #ifdef HIGH_PREC
-      vector<double> 		all_energy;
-      vector<vector<double>> 	all_atom_energy;
-      vector<vector<double>> 	all_atom_virial;
+      vector<double> all_energy;
+      vector<vector<double>> all_atom_energy;
+      vector<vector<double>> all_atom_virial;
       try {
-      deep_pot_model_devi.compute(all_energy, all_force, all_virial, all_atom_energy, all_atom_virial, dcoord, dtype, dbox, nghost, lmp_list, ago, fparam, daparam);
-      } catch(deepmd::deepmd_exception& e) {
+        deep_pot_model_devi.compute(
+            all_energy, all_force, all_virial, all_atom_energy, all_atom_virial,
+            dcoord, dtype, dbox, nghost, lmp_list, ago, fparam, daparam);
+      } catch (deepmd::deepmd_exception &e) {
         error->all(FLERR, e.what());
       }
       // deep_pot_model_devi.compute_avg (dener, all_energy);
@@ -576,7 +582,7 @@ void PairDeepMD::compute(int eflag, int vflag)
       dvirial = all_virial[0];
       deatom = all_atom_energy[0];
       dvatom = all_atom_virial[0];
-#else 
+#else
       vector<float> dcoord_(dcoord.size());
       vector<float> dbox_(dbox.size());
       for (unsigned dd = 0; dd < dcoord.size(); ++dd) dcoord_[dd] = dcoord[dd];
@@ -586,14 +592,17 @@ void PairDeepMD::compute(int eflag, int vflag)
       vector<float> deatom_(dforce.size(), 0);
       vector<float> dvatom_(dforce.size(), 0);
       double dener_ = 0;
-      vector<double> 		all_energy_;
-      vector<vector<float>>	all_force_;
-      vector<vector<float>> 	all_virial_;	       
-      vector<vector<float>> 	all_atom_energy_;
-      vector<vector<float>> 	all_atom_virial_;
+      vector<double> all_energy_;
+      vector<vector<float>> all_force_;
+      vector<vector<float>> all_virial_;
+      vector<vector<float>> all_atom_energy_;
+      vector<vector<float>> all_atom_virial_;
       try {
-      deep_pot_model_devi.compute(all_energy_, all_force_, all_virial_, all_atom_energy_, all_atom_virial_, dcoord_, dtype, dbox_, nghost, lmp_list, ago, fparam, daparam);
-      } catch(deepmd::deepmd_exception& e) {
+        deep_pot_model_devi.compute(all_energy_, all_force_, all_virial_,
+                                    all_atom_energy_, all_atom_virial_, dcoord_,
+                                    dtype, dbox_, nghost, lmp_list, ago, fparam,
+                                    daparam);
+      } catch (deepmd::deepmd_exception &e) {
         error->all(FLERR, e.what());
       }
       // deep_pot_model_devi.compute_avg (dener_, all_energy_);
@@ -607,211 +616,213 @@ void PairDeepMD::compute(int eflag, int vflag)
       deatom_ = all_atom_energy_[0];
       dvatom_ = all_atom_virial_[0];
       dener = dener_;
-      for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
-      for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
-      for (unsigned dd = 0; dd < deatom.size(); ++dd) deatom[dd] = deatom_[dd];	
-      for (unsigned dd = 0; dd < dvatom.size(); ++dd) dvatom[dd] = dvatom_[dd];	
+      for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];
+      for (unsigned dd = 0; dd < dvirial.size(); ++dd)
+        dvirial[dd] = dvirial_[dd];
+      for (unsigned dd = 0; dd < deatom.size(); ++dd) deatom[dd] = deatom_[dd];
+      for (unsigned dd = 0; dd < dvatom.size(); ++dd) dvatom[dd] = dvatom_[dd];
       all_force.resize(all_force_.size());
-      for (unsigned ii = 0; ii < all_force_.size(); ++ii){
-	all_force[ii].resize(all_force_[ii].size());
-	for (unsigned jj = 0; jj < all_force_[ii].size(); ++jj){
-	  all_force[ii][jj] = all_force_[ii][jj];
-	}
+      for (unsigned ii = 0; ii < all_force_.size(); ++ii) {
+        all_force[ii].resize(all_force_[ii].size());
+        for (unsigned jj = 0; jj < all_force_[ii].size(); ++jj) {
+          all_force[ii][jj] = all_force_[ii][jj];
+        }
       }
       all_virial.resize(all_virial_.size());
-      for (unsigned ii = 0; ii < all_virial_.size(); ++ii){
+      for (unsigned ii = 0; ii < all_virial_.size(); ++ii) {
         all_virial[ii].resize(all_virial_[ii].size());
-        for (unsigned jj = 0; jj < all_virial_[ii].size(); ++jj){
+        for (unsigned jj = 0; jj < all_virial_[ii].size(); ++jj) {
           all_virial[ii][jj] = all_virial_[ii][jj];
         }
       }
 #endif
       if (eflag_atom) {
-	for (int ii = 0; ii < nlocal; ++ii) eatom[ii] += deatom[ii];
+        for (int ii = 0; ii < nlocal; ++ii) eatom[ii] += deatom[ii];
       }
-	// Added by Davide Tisi 2020
-	// interface the atomic virial computed by DeepMD 
-	// with the one used in centroid atoms
+      // Added by Davide Tisi 2020
+      // interface the atomic virial computed by DeepMD
+      // with the one used in centroid atoms
       if (cvflag_atom) {
-	for (int ii = 0; ii < nall; ++ii){
-	  //vatom[ii][0] += 1.0 * dvatom[9*ii+0];
-	  //vatom[ii][1] += 1.0 * dvatom[9*ii+4];
-	  //vatom[ii][2] += 1.0 * dvatom[9*ii+8];
-	  //vatom[ii][3] += 1.0 * dvatom[9*ii+3];
-	  //vatom[ii][4] += 1.0 * dvatom[9*ii+6];
-	  //vatom[ii][5] += 1.0 * dvatom[9*ii+7];
-            cvatom[ii][0] += -1.0 * dvatom[9*ii+0]; // xx
-            cvatom[ii][1] += -1.0 * dvatom[9*ii+4]; // yy 
-            cvatom[ii][2] += -1.0 * dvatom[9*ii+8]; // zz
-            cvatom[ii][3] += -1.0 * dvatom[9*ii+3]; // xy
-            cvatom[ii][4] += -1.0 * dvatom[9*ii+6]; // xz
-            cvatom[ii][5] += -1.0 * dvatom[9*ii+7]; // yz
-            cvatom[ii][6] += -1.0 * dvatom[9*ii+1]; // yx
-            cvatom[ii][7] += -1.0 * dvatom[9*ii+2]; // zx
-            cvatom[ii][8] += -1.0 * dvatom[9*ii+5]; // zy
-	}
-      }      
+        for (int ii = 0; ii < nall; ++ii) {
+          // vatom[ii][0] += 1.0 * dvatom[9*ii+0];
+          // vatom[ii][1] += 1.0 * dvatom[9*ii+4];
+          // vatom[ii][2] += 1.0 * dvatom[9*ii+8];
+          // vatom[ii][3] += 1.0 * dvatom[9*ii+3];
+          // vatom[ii][4] += 1.0 * dvatom[9*ii+6];
+          // vatom[ii][5] += 1.0 * dvatom[9*ii+7];
+          cvatom[ii][0] += -1.0 * dvatom[9 * ii + 0];  // xx
+          cvatom[ii][1] += -1.0 * dvatom[9 * ii + 4];  // yy
+          cvatom[ii][2] += -1.0 * dvatom[9 * ii + 8];  // zz
+          cvatom[ii][3] += -1.0 * dvatom[9 * ii + 3];  // xy
+          cvatom[ii][4] += -1.0 * dvatom[9 * ii + 6];  // xz
+          cvatom[ii][5] += -1.0 * dvatom[9 * ii + 7];  // yz
+          cvatom[ii][6] += -1.0 * dvatom[9 * ii + 1];  // yx
+          cvatom[ii][7] += -1.0 * dvatom[9 * ii + 2];  // zx
+          cvatom[ii][8] += -1.0 * dvatom[9 * ii + 5];  // zy
+        }
+      }
       if (out_freq > 0 && update->ntimestep % out_freq == 0) {
-	int rank = comm->me;
-	// std force 
-	if (newton_pair) {
-#if LAMMPS_VERSION_NUMBER>=20220324
-	  comm->reverse_comm(this);
+        int rank = comm->me;
+        // std force
+        if (newton_pair) {
+#if LAMMPS_VERSION_NUMBER >= 20220324
+          comm->reverse_comm(this);
 #else
-    comm->reverse_comm_pair(this);
+          comm->reverse_comm_pair(this);
 #endif
-	}
-	vector<double> std_f;
+        }
+        vector<double> std_f;
 #ifdef HIGH_PREC
-	vector<double> tmp_avg_f;
-	deep_pot_model_devi.compute_avg (tmp_avg_f, all_force);  
-	deep_pot_model_devi.compute_std_f (std_f, tmp_avg_f, all_force);
-	if (out_rel == 1){
-	    deep_pot_model_devi.compute_relative_std_f (std_f, tmp_avg_f, eps);
-	}
-#else 
-	vector<float> tmp_avg_f_, std_f_;
-	for (unsigned ii = 0; ii < all_force_.size(); ++ii){
-	  for (unsigned jj = 0; jj < all_force_[ii].size(); ++jj){
-	    all_force_[ii][jj] = all_force[ii][jj];
-	  }
-	}
-	deep_pot_model_devi.compute_avg (tmp_avg_f_, all_force_);  
-	deep_pot_model_devi.compute_std_f (std_f_, tmp_avg_f_, all_force_);
-	std_f.resize(std_f_.size());
-	if (out_rel == 1){
-	    deep_pot_model_devi.compute_relative_std_f (std_f_, tmp_avg_f_, eps);
-	}
-	for (int dd = 0; dd < std_f_.size(); ++dd) std_f[dd] = std_f_[dd];
+        vector<double> tmp_avg_f;
+        deep_pot_model_devi.compute_avg(tmp_avg_f, all_force);
+        deep_pot_model_devi.compute_std_f(std_f, tmp_avg_f, all_force);
+        if (out_rel == 1) {
+          deep_pot_model_devi.compute_relative_std_f(std_f, tmp_avg_f, eps);
+        }
+#else
+        vector<float> tmp_avg_f_, std_f_;
+        for (unsigned ii = 0; ii < all_force_.size(); ++ii) {
+          for (unsigned jj = 0; jj < all_force_[ii].size(); ++jj) {
+            all_force_[ii][jj] = all_force[ii][jj];
+          }
+        }
+        deep_pot_model_devi.compute_avg(tmp_avg_f_, all_force_);
+        deep_pot_model_devi.compute_std_f(std_f_, tmp_avg_f_, all_force_);
+        std_f.resize(std_f_.size());
+        if (out_rel == 1) {
+          deep_pot_model_devi.compute_relative_std_f(std_f_, tmp_avg_f_, eps);
+        }
+        for (int dd = 0; dd < std_f_.size(); ++dd) std_f[dd] = std_f_[dd];
 #endif
-	double min = numeric_limits<double>::max(), max = 0, avg = 0;
-	ana_st(max, min, avg, std_f, nlocal);
-	int all_nlocal = 0;
-	MPI_Reduce (&nlocal, &all_nlocal, 1, MPI_INT, MPI_SUM, 0, world);
-	double all_f_min = 0, all_f_max = 0, all_f_avg = 0;
-	MPI_Reduce (&min, &all_f_min, 1, MPI_DOUBLE, MPI_MIN, 0, world);
-	MPI_Reduce (&max, &all_f_max, 1, MPI_DOUBLE, MPI_MAX, 0, world);
-	MPI_Reduce (&avg, &all_f_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
-	all_f_avg /= double(all_nlocal);
-	// std energy
-	vector<double > std_e;
+        double min = numeric_limits<double>::max(), max = 0, avg = 0;
+        ana_st(max, min, avg, std_f, nlocal);
+        int all_nlocal = 0;
+        MPI_Reduce(&nlocal, &all_nlocal, 1, MPI_INT, MPI_SUM, 0, world);
+        double all_f_min = 0, all_f_max = 0, all_f_avg = 0;
+        MPI_Reduce(&min, &all_f_min, 1, MPI_DOUBLE, MPI_MIN, 0, world);
+        MPI_Reduce(&max, &all_f_max, 1, MPI_DOUBLE, MPI_MAX, 0, world);
+        MPI_Reduce(&avg, &all_f_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
+        all_f_avg /= double(all_nlocal);
+        // std energy
+        vector<double> std_e;
 #ifdef HIGH_PREC
-	vector<double > tmp_avg_e;
-	deep_pot_model_devi.compute_avg (tmp_avg_e, all_atom_energy);
-	deep_pot_model_devi.compute_std_e (std_e, tmp_avg_e, all_atom_energy);
-#else 
-	vector<float> tmp_avg_e_, std_e_;
-	deep_pot_model_devi.compute_avg (tmp_avg_e_, all_atom_energy_);
-	deep_pot_model_devi.compute_std_e (std_e_, tmp_avg_e_, all_atom_energy_);
-	std_e.resize(std_e_.size());
-	for (int dd = 0; dd < std_e_.size(); ++dd) std_e[dd] = std_e_[dd];
-#endif	
-	max = avg = 0;
-	min = numeric_limits<double>::max();
-	ana_st(max, min, avg, std_e, nlocal);
-	double all_e_min = 0, all_e_max = 0, all_e_avg = 0;
-	MPI_Reduce (&min, &all_e_min, 1, MPI_DOUBLE, MPI_MIN, 0, world);
-	MPI_Reduce (&max, &all_e_max, 1, MPI_DOUBLE, MPI_MAX, 0, world);
-	MPI_Reduce (&avg, &all_e_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
-	all_e_avg /= double(all_nlocal);
-	// std v
-	std::vector<double> send_v(9 * numb_models);
-	std::vector<double> recv_v(9 * numb_models);
-	for(int kk = 0; kk < numb_models; ++kk){
-	  for(int ii = 0; ii < 9; ++ii){
-	    send_v[kk*9+ii] = all_virial[kk][ii] / double(atom->natoms);
-	  }
-	}
-	MPI_Reduce(&send_v[0], &recv_v[0], 9 * numb_models, MPI_DOUBLE, MPI_SUM, 0, world);
+        vector<double> tmp_avg_e;
+        deep_pot_model_devi.compute_avg(tmp_avg_e, all_atom_energy);
+        deep_pot_model_devi.compute_std_e(std_e, tmp_avg_e, all_atom_energy);
+#else
+        vector<float> tmp_avg_e_, std_e_;
+        deep_pot_model_devi.compute_avg(tmp_avg_e_, all_atom_energy_);
+        deep_pot_model_devi.compute_std_e(std_e_, tmp_avg_e_, all_atom_energy_);
+        std_e.resize(std_e_.size());
+        for (int dd = 0; dd < std_e_.size(); ++dd) std_e[dd] = std_e_[dd];
+#endif
+        max = avg = 0;
+        min = numeric_limits<double>::max();
+        ana_st(max, min, avg, std_e, nlocal);
+        double all_e_min = 0, all_e_max = 0, all_e_avg = 0;
+        MPI_Reduce(&min, &all_e_min, 1, MPI_DOUBLE, MPI_MIN, 0, world);
+        MPI_Reduce(&max, &all_e_max, 1, MPI_DOUBLE, MPI_MAX, 0, world);
+        MPI_Reduce(&avg, &all_e_avg, 1, MPI_DOUBLE, MPI_SUM, 0, world);
+        all_e_avg /= double(all_nlocal);
+        // std v
+        std::vector<double> send_v(9 * numb_models);
+        std::vector<double> recv_v(9 * numb_models);
+        for (int kk = 0; kk < numb_models; ++kk) {
+          for (int ii = 0; ii < 9; ++ii) {
+            send_v[kk * 9 + ii] = all_virial[kk][ii] / double(atom->natoms);
+          }
+        }
+        MPI_Reduce(&send_v[0], &recv_v[0], 9 * numb_models, MPI_DOUBLE, MPI_SUM,
+                   0, world);
 #ifdef HIGH_PREC
-	std::vector<std::vector<double>> all_virial_1(numb_models);
-	std::vector<double> avg_virial, std_virial;
+        std::vector<std::vector<double>> all_virial_1(numb_models);
+        std::vector<double> avg_virial, std_virial;
 #else
-	std::vector<std::vector<float>> all_virial_1(numb_models);
-	std::vector<float> avg_virial, std_virial;
+        std::vector<std::vector<float>> all_virial_1(numb_models);
+        std::vector<float> avg_virial, std_virial;
 #endif
-	for(int kk = 0; kk < numb_models; ++kk){
-	  all_virial_1[kk].resize(9);
-	  for(int ii = 0; ii < 9; ++ii){
-	    all_virial_1[kk][ii] = recv_v[kk*9+ii];
-	  }
-	}	
-	double all_v_min = numeric_limits<double>::max(), all_v_max = 0, all_v_avg = 0;
-	if (rank == 0){
-	  deep_pot_model_devi.compute_avg(avg_virial, all_virial_1);
-	  deep_pot_model_devi.compute_std(std_virial, avg_virial, all_virial_1, 1);
-	  if (out_rel_v == 1){
-	    deep_pot_model_devi.compute_relative_std(std_virial, avg_virial, eps_v, 1);
-	  }
-	  for(int ii = 0; ii < 9; ++ii){
-	    if(std_virial[ii] > all_v_max){
-	      all_v_max = std_virial[ii];
-	    }
-	    if(std_virial[ii] < all_v_min){
-	      all_v_min = std_virial[ii];
-	    }
-	    all_v_avg += std_virial[ii] * std_virial[ii];
-	  }
-	  all_v_avg = sqrt(all_v_avg / 9);
-	}
-	// // total e
-	// vector<double > sum_e(numb_models, 0.);
-	// MPI_Reduce (&all_energy[0], &sum_e[0], numb_models, MPI_DOUBLE, MPI_SUM, 0, world);
-	if (rank == 0) {
-	  // double avg_e = 0;
-	  // deep_pot_model_devi.compute_avg(avg_e, sum_e);
-	  // double std_e_1 = 0;
-	  // deep_pot_model_devi.compute_std(std_e_1, avg_e, sum_e);	
-	  fp << setw(12) << update->ntimestep 
-	     << " " << setw(18) << all_v_max
-	     << " " << setw(18) << all_v_min
-	     << " " << setw(18) << all_v_avg
-	     << " " << setw(18) << all_f_max 
-	     << " " << setw(18) << all_f_min
-	     << " " << setw(18) << all_f_avg;
-	     // << " " << setw(18) << avg_e
-	     // << " " << setw(18) << std_e_1 / all_nlocal
-	}
-	if (out_each == 1){
-	  vector<double> std_f_all(all_nlocal);
-	  // Gather std_f and tags
-	  tagint *tag = atom->tag;
-	  int nprocs = comm->nprocs;
-	  for (int ii = 0; ii < nlocal; ii++) {
-	    tagsend[ii] = tag[ii];
-	    stdfsend[ii] = std_f[ii];
-	  }
-	  MPI_Gather(&nlocal, 1, MPI_INT, counts, 1, MPI_INT, 0, world);
-	  displacements[0] = 0;
-	  for (int ii = 0; ii < nprocs-1; ii++) displacements[ii+1] = displacements[ii] + counts[ii];
-	  MPI_Gatherv(tagsend, nlocal, MPI_LMP_TAGINT,
-	              tagrecv, counts, displacements, MPI_LMP_TAGINT, 0, world);
-	  MPI_Gatherv(stdfsend, nlocal, MPI_DOUBLE,
-	              stdfrecv, counts, displacements, MPI_DOUBLE, 0, world);
-	  if (rank == 0) {
-	    for (int dd = 0; dd < all_nlocal; ++dd) {
-	      std_f_all[tagrecv[dd]-1] = stdfrecv[dd];
-	    }
-	    for (int dd = 0; dd < all_nlocal; ++dd) {
-	      fp << " " << setw(18) << std_f_all[dd];	
-	    }
-	  }
-	}
-	if (rank == 0) {
-	  fp << endl;
-	}
+        for (int kk = 0; kk < numb_models; ++kk) {
+          all_virial_1[kk].resize(9);
+          for (int ii = 0; ii < 9; ++ii) {
+            all_virial_1[kk][ii] = recv_v[kk * 9 + ii];
+          }
+        }
+        double all_v_min = numeric_limits<double>::max(), all_v_max = 0,
+               all_v_avg = 0;
+        if (rank == 0) {
+          deep_pot_model_devi.compute_avg(avg_virial, all_virial_1);
+          deep_pot_model_devi.compute_std(std_virial, avg_virial, all_virial_1,
+                                          1);
+          if (out_rel_v == 1) {
+            deep_pot_model_devi.compute_relative_std(std_virial, avg_virial,
+                                                     eps_v, 1);
+          }
+          for (int ii = 0; ii < 9; ++ii) {
+            if (std_virial[ii] > all_v_max) {
+              all_v_max = std_virial[ii];
+            }
+            if (std_virial[ii] < all_v_min) {
+              all_v_min = std_virial[ii];
+            }
+            all_v_avg += std_virial[ii] * std_virial[ii];
+          }
+          all_v_avg = sqrt(all_v_avg / 9);
+        }
+        // // total e
+        // vector<double > sum_e(numb_models, 0.);
+        // MPI_Reduce (&all_energy[0], &sum_e[0], numb_models, MPI_DOUBLE,
+        // MPI_SUM, 0, world);
+        if (rank == 0) {
+          // double avg_e = 0;
+          // deep_pot_model_devi.compute_avg(avg_e, sum_e);
+          // double std_e_1 = 0;
+          // deep_pot_model_devi.compute_std(std_e_1, avg_e, sum_e);
+          fp << setw(12) << update->ntimestep << " " << setw(18) << all_v_max
+             << " " << setw(18) << all_v_min << " " << setw(18) << all_v_avg
+             << " " << setw(18) << all_f_max << " " << setw(18) << all_f_min
+             << " " << setw(18) << all_f_avg;
+          // << " " << setw(18) << avg_e
+          // << " " << setw(18) << std_e_1 / all_nlocal
+        }
+        if (out_each == 1) {
+          vector<double> std_f_all(all_nlocal);
+          // Gather std_f and tags
+          tagint *tag = atom->tag;
+          int nprocs = comm->nprocs;
+          for (int ii = 0; ii < nlocal; ii++) {
+            tagsend[ii] = tag[ii];
+            stdfsend[ii] = std_f[ii];
+          }
+          MPI_Gather(&nlocal, 1, MPI_INT, counts, 1, MPI_INT, 0, world);
+          displacements[0] = 0;
+          for (int ii = 0; ii < nprocs - 1; ii++)
+            displacements[ii + 1] = displacements[ii] + counts[ii];
+          MPI_Gatherv(tagsend, nlocal, MPI_LMP_TAGINT, tagrecv, counts,
+                      displacements, MPI_LMP_TAGINT, 0, world);
+          MPI_Gatherv(stdfsend, nlocal, MPI_DOUBLE, stdfrecv, counts,
+                      displacements, MPI_DOUBLE, 0, world);
+          if (rank == 0) {
+            for (int dd = 0; dd < all_nlocal; ++dd) {
+              std_f_all[tagrecv[dd] - 1] = stdfrecv[dd];
+            }
+            for (int dd = 0; dd < all_nlocal; ++dd) {
+              fp << " " << setw(18) << std_f_all[dd];
+            }
+          }
+        }
+        if (rank == 0) {
+          fp << endl;
+        }
       }
+    } else {
+      error->all(FLERR, "unknown computational branch");
     }
-    else {
-      error->all(FLERR,"unknown computational branch");
-    }
-  }
-  else {
+  } else {
     if (numb_models == 1) {
 #ifdef HIGH_PREC
       try {
-      deep_pot.compute (dener, dforce, dvirial, dcoord, dtype, dbox);
-      } catch(deepmd::deepmd_exception& e) {
+        deep_pot.compute(dener, dforce, dvirial, dcoord, dtype, dbox);
+      } catch (deepmd::deepmd_exception &e) {
         error->all(FLERR, e.what());
       }
 #else
@@ -823,27 +834,27 @@ void PairDeepMD::compute(int eflag, int vflag)
       vector<float> dvirial_(dvirial.size(), 0);
       double dener_ = 0;
       try {
-      deep_pot.compute (dener_, dforce_, dvirial_, dcoord_, dtype, dbox_);
-      } catch(deepmd::deepmd_exception& e) {
+        deep_pot.compute(dener_, dforce_, dvirial_, dcoord_, dtype, dbox_);
+      } catch (deepmd::deepmd_exception &e) {
         error->all(FLERR, e.what());
       }
-      for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];	
-      for (unsigned dd = 0; dd < dvirial.size(); ++dd) dvirial[dd] = dvirial_[dd];	
-      dener = dener_;      
+      for (unsigned dd = 0; dd < dforce.size(); ++dd) dforce[dd] = dforce_[dd];
+      for (unsigned dd = 0; dd < dvirial.size(); ++dd)
+        dvirial[dd] = dvirial_[dd];
+      dener = dener_;
 #endif
-    }
-    else {
-      error->all(FLERR,"Serial version does not support model devi");
+    } else {
+      error->all(FLERR, "Serial version does not support model devi");
     }
   }
 
   // get force
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      f[ii][dd] += scale[1][1] * dforce[3*ii+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      f[ii][dd] += scale[1][1] * dforce[3 * ii + dd];
     }
   }
-  
+
   // accumulate energy and virial
   if (eflag) eng_vdwl += scale[1][1] * dener;
   if (vflag) {
@@ -856,18 +867,16 @@ void PairDeepMD::compute(int eflag, int vflag)
   }
 }
 
-
-void PairDeepMD::allocate()
-{
+void PairDeepMD::allocate() {
   allocated = 1;
   int n = atom->ntypes;
 
-  memory->create(setflag,n+1,n+1,"pair:setflag");
-  memory->create(cutsq,n+1,n+1,"pair:cutsq");
-  memory->create(scale,n+1,n+1,"pair:scale");
+  memory->create(setflag, n + 1, n + 1, "pair:setflag");
+  memory->create(cutsq, n + 1, n + 1, "pair:cutsq");
+  memory->create(scale, n + 1, n + 1, "pair:scale");
 
-  for (int i = 1; i <= n; i++){
-    for (int j = i; j <= n; j++){
+  for (int i = 1; i <= n; i++) {
+    for (int j = i; j <= n; j++) {
       setflag[i][j] = 0;
       scale[i][j] = 0;
     }
@@ -882,11 +891,8 @@ void PairDeepMD::allocate()
   }
 }
 
-
-static bool 
-is_key (const string& input) 
-{
-  vector<string> keys ;
+static bool is_key(const string &input) {
+  vector<string> keys;
   keys.push_back("out_freq");
   keys.push_back("out_file");
   keys.push_back("fparam");
@@ -897,7 +903,7 @@ is_key (const string& input)
   keys.push_back("relative");
   keys.push_back("relative_v");
 
-  for (int ii = 0; ii < keys.size(); ++ii){
+  for (int ii = 0; ii < keys.size(); ++ii) {
     if (input == keys[ii]) {
       return true;
     }
@@ -905,39 +911,37 @@ is_key (const string& input)
   return false;
 }
 
-
-void PairDeepMD::settings(int narg, char **arg)
-{
-  if (narg <= 0) error->all(FLERR,"Illegal pair_style command");
+void PairDeepMD::settings(int narg, char **arg) {
+  if (narg <= 0) error->all(FLERR, "Illegal pair_style command");
 
   vector<string> models;
   int iarg = 0;
-  while (iarg < narg){
+  while (iarg < narg) {
     if (is_key(arg[iarg])) {
       break;
     }
-    iarg ++;
+    iarg++;
   }
-  for (int ii = 0; ii < iarg; ++ii){
+  for (int ii = 0; ii < iarg; ++ii) {
     models.push_back(arg[ii]);
   }
   numb_models = models.size();
   if (numb_models == 1) {
     try {
-    deep_pot.init (arg[0], get_node_rank(), get_file_content(arg[0]));
-    } catch(deepmd::deepmd_exception& e) {
+      deep_pot.init(arg[0], get_node_rank(), get_file_content(arg[0]));
+    } catch (deepmd::deepmd_exception &e) {
       error->all(FLERR, e.what());
     }
-    cutoff = deep_pot.cutoff ();
+    cutoff = deep_pot.cutoff();
     numb_types = deep_pot.numb_types();
     dim_fparam = deep_pot.dim_fparam();
     dim_aparam = deep_pot.dim_aparam();
-  }
-  else {
+  } else {
     try {
-    deep_pot.init (arg[0], get_node_rank(), get_file_content(arg[0]));
-    deep_pot_model_devi.init(models, get_node_rank(), get_file_content(models));
-    } catch(deepmd::deepmd_exception& e) {
+      deep_pot.init(arg[0], get_node_rank(), get_file_content(arg[0]));
+      deep_pot_model_devi.init(models, get_node_rank(),
+                               get_file_content(models));
+    } catch (deepmd::deepmd_exception &e) {
       error->all(FLERR, e.what());
     }
     cutoff = deep_pot_model_devi.cutoff();
@@ -958,176 +962,168 @@ void PairDeepMD::settings(int narg, char **arg)
   fparam.clear();
   aparam.clear();
   while (iarg < narg) {
-    if (! is_key(arg[iarg])) {
-      error->all(FLERR,"Illegal pair_style command\nwrong number of parameters\n");
+    if (!is_key(arg[iarg])) {
+      error->all(FLERR,
+                 "Illegal pair_style command\nwrong number of parameters\n");
     }
     if (string(arg[iarg]) == string("out_freq")) {
-      if (iarg+1 >= narg) error->all(FLERR,"Illegal out_freq, not provided");
-      out_freq = atoi(arg[iarg+1]);
+      if (iarg + 1 >= narg) error->all(FLERR, "Illegal out_freq, not provided");
+      out_freq = atoi(arg[iarg + 1]);
       iarg += 2;
-    }
-    else if (string(arg[iarg]) == string("out_file")) {
-      if (iarg+1 >= narg) error->all(FLERR,"Illegal out_file, not provided");
-      out_file = string(arg[iarg+1]);	
+    } else if (string(arg[iarg]) == string("out_file")) {
+      if (iarg + 1 >= narg) error->all(FLERR, "Illegal out_file, not provided");
+      out_file = string(arg[iarg + 1]);
       iarg += 2;
-    }
-    else if (string(arg[iarg]) == string("fparam")) {
-      for (int ii = 0; ii < dim_fparam; ++ii){
-	if (iarg+1+ii >= narg || is_key(arg[iarg+1+ii])) {
-	  char tmp[1024];
-	  sprintf(tmp, "Illegal fparam, the dimension should be %d", dim_fparam);		  
-	  error->all(FLERR, tmp);
-	}
-	fparam.push_back(atof(arg[iarg+1+ii]));
+    } else if (string(arg[iarg]) == string("fparam")) {
+      for (int ii = 0; ii < dim_fparam; ++ii) {
+        if (iarg + 1 + ii >= narg || is_key(arg[iarg + 1 + ii])) {
+          char tmp[1024];
+          sprintf(tmp, "Illegal fparam, the dimension should be %d",
+                  dim_fparam);
+          error->all(FLERR, tmp);
+        }
+        fparam.push_back(atof(arg[iarg + 1 + ii]));
       }
-      iarg += 1 + dim_fparam ;
-    }
-    else if (string(arg[iarg]) == string("aparam")) {
-      for (int ii = 0; ii < dim_aparam; ++ii){
-	if (iarg+1+ii >= narg || is_key(arg[iarg+1+ii])) {
-	  char tmp[1024];
-	  sprintf(tmp, "Illegal aparam, the dimension should be %d", dim_aparam);		  
-	  error->all(FLERR, tmp);
-	}
-	aparam.push_back(atof(arg[iarg+1+ii]));
-      }      
-      iarg += 1 + dim_aparam ;
-    }
-    else if (string(arg[iarg]) == string("ttm")) {
+      iarg += 1 + dim_fparam;
+    } else if (string(arg[iarg]) == string("aparam")) {
+      for (int ii = 0; ii < dim_aparam; ++ii) {
+        if (iarg + 1 + ii >= narg || is_key(arg[iarg + 1 + ii])) {
+          char tmp[1024];
+          sprintf(tmp, "Illegal aparam, the dimension should be %d",
+                  dim_aparam);
+          error->all(FLERR, tmp);
+        }
+        aparam.push_back(atof(arg[iarg + 1 + ii]));
+      }
+      iarg += 1 + dim_aparam;
+    } else if (string(arg[iarg]) == string("ttm")) {
 #ifdef USE_TTM
-      for (int ii = 0; ii < 1; ++ii){
-	if (iarg+1+ii >= narg || is_key(arg[iarg+1+ii])) {
-	  error->all(FLERR, "invalid ttm key: should be ttm ttm_fix_id(str)");
-	}
-      }	
+      for (int ii = 0; ii < 1; ++ii) {
+        if (iarg + 1 + ii >= narg || is_key(arg[iarg + 1 + ii])) {
+          error->all(FLERR, "invalid ttm key: should be ttm ttm_fix_id(str)");
+        }
+      }
       do_ttm = true;
-      ttm_fix_id = arg[iarg+1];
+      ttm_fix_id = arg[iarg + 1];
       iarg += 1 + 1;
 #else
-      error->all(FLERR, "The deepmd-kit was compiled without support for TTM, please rebuild it with LAMMPS version >=20210831");
-#endif      
+      error->all(FLERR,
+                 "The deepmd-kit was compiled without support for TTM, please "
+                 "rebuild it with LAMMPS version >=20210831");
+#endif
     }
-	  
+
     ///////////////////////////////////////////////
     // pair_style     deepmd cp.pb fparam_from_compute TEMP
     // compute        TEMP all temp
     //////////////////////////////////////////////
     else if (string(arg[iarg]) == string("fparam_from_compute")) {
-      for (int ii = 0; ii < 1; ++ii){
-        if (iarg+1+ii >= narg || is_key(arg[iarg+1+ii])) {
-          error->all(FLERR, "invalid fparam_from_compute key: should be fparam_from_compute compute_id(str)");
+      for (int ii = 0; ii < 1; ++ii) {
+        if (iarg + 1 + ii >= narg || is_key(arg[iarg + 1 + ii])) {
+          error->all(FLERR,
+                     "invalid fparam_from_compute key: should be "
+                     "fparam_from_compute compute_id(str)");
         }
-      }	
+      }
       do_compute = true;
-      compute_id = arg[iarg+1];
+      compute_id = arg[iarg + 1];
       iarg += 1 + 1;
     }
-	  
+
     else if (string(arg[iarg]) == string("atomic")) {
       out_each = 1;
       iarg += 1;
-    }
-    else if (string(arg[iarg]) == string("relative")) {
+    } else if (string(arg[iarg]) == string("relative")) {
       out_rel = 1;
 #ifdef HIGH_PREC
-      eps = atof(arg[iarg+1]);
+      eps = atof(arg[iarg + 1]);
 #else
-      eps = strtof(arg[iarg+1], NULL);
+      eps = strtof(arg[iarg + 1], NULL);
 #endif
       iarg += 2;
-    }
-    else if (string(arg[iarg]) == string("relative_v")) {
+    } else if (string(arg[iarg]) == string("relative_v")) {
       out_rel_v = 1;
 #ifdef HIGH_PREC
-      eps_v = atof(arg[iarg+1]);
+      eps_v = atof(arg[iarg + 1]);
 #else
-      eps_v = strtof(arg[iarg+1], NULL);
+      eps_v = strtof(arg[iarg + 1], NULL);
 #endif
       iarg += 2;
     }
   }
-  if (out_freq < 0) error->all(FLERR,"Illegal out_freq, should be >= 0");
+  if (out_freq < 0) error->all(FLERR, "Illegal out_freq, should be >= 0");
   if (do_ttm && aparam.size() > 0) {
-    error->all(FLERR,"aparam and ttm should NOT be set simultaneously");
+    error->all(FLERR, "aparam and ttm should NOT be set simultaneously");
   }
   if (do_compute && fparam.size() > 0) {
-    error->all(FLERR,"fparam and fparam_from_compute should NOT be set simultaneously");
+    error->all(
+        FLERR,
+        "fparam and fparam_from_compute should NOT be set simultaneously");
   }
-  
-  if (comm->me == 0){
-    if (numb_models > 1 && out_freq > 0){
+
+  if (comm->me == 0) {
+    if (numb_models > 1 && out_freq > 0) {
       if (!is_restart) {
-      fp.open (out_file);
-      fp << scientific;
-      fp << "#"
-	 << setw(12-1) << "step" 
-	 << setw(18+1) << "max_devi_v"
-	 << setw(18+1) << "min_devi_v"
-	 << setw(18+1) << "avg_devi_v"
-	 << setw(18+1) << "max_devi_f"
-	 << setw(18+1) << "min_devi_f"
-	 << setw(18+1) << "avg_devi_f"
-	 << endl;
+        fp.open(out_file);
+        fp << scientific;
+        fp << "#" << setw(12 - 1) << "step" << setw(18 + 1) << "max_devi_v"
+           << setw(18 + 1) << "min_devi_v" << setw(18 + 1) << "avg_devi_v"
+           << setw(18 + 1) << "max_devi_f" << setw(18 + 1) << "min_devi_f"
+           << setw(18 + 1) << "avg_devi_f" << endl;
       } else {
-        fp.open (out_file, std::ofstream::out | std::ofstream::app);
+        fp.open(out_file, std::ofstream::out | std::ofstream::app);
         fp << scientific;
       }
     }
     string pre = "  ";
     cout << pre << ">>> Info of model(s):" << endl
-	 << pre << "using " << setw(3) << numb_models << " model(s): ";
+         << pre << "using " << setw(3) << numb_models << " model(s): ";
     if (narg == 1) {
       cout << arg[0] << " ";
-    }
-    else {
-      for (int ii = 0; ii < models.size(); ++ii){
-      	cout << models[ii] << " ";
+    } else {
+      for (int ii = 0; ii < models.size(); ++ii) {
+        cout << models[ii] << " ";
       }
     }
     cout << endl
-	 << pre << "rcut in model:      " << cutoff << endl
-	 << pre << "ntypes in model:    " << numb_types << endl;
+         << pre << "rcut in model:      " << cutoff << endl
+         << pre << "ntypes in model:    " << numb_types << endl;
     if (fparam.size() > 0) {
-      cout << pre << "using fparam(s):    " ;
-      for (int ii = 0; ii < dim_fparam; ++ii){
-	cout << fparam[ii] << "  " ;
+      cout << pre << "using fparam(s):    ";
+      for (int ii = 0; ii < dim_fparam; ++ii) {
+        cout << fparam[ii] << "  ";
       }
       cout << endl;
     }
-    if (do_compute){
-      cout << pre << "using compute id:      " ;
+    if (do_compute) {
+      cout << pre << "using compute id:      ";
       cout << compute_id << "  " << endl;
     }
     if (aparam.size() > 0) {
-      cout << pre << "using aparam(s):    " ;
-      for (int ii = 0; ii < aparam.size(); ++ii){
-	cout << aparam[ii] << "  " ;
+      cout << pre << "using aparam(s):    ";
+      for (int ii = 0; ii < aparam.size(); ++ii) {
+        cout << aparam[ii] << "  ";
       }
       cout << endl;
     }
-    if (do_ttm){
-      cout << pre << "using ttm fix:      " ;
-      cout << ttm_fix_id << "  " ;
-      if (dim_fparam > 0){
+    if (do_ttm) {
+      cout << pre << "using ttm fix:      ";
+      cout << ttm_fix_id << "  ";
+      if (dim_fparam > 0) {
         cout << "(fparam)" << endl;
-      }
-      else if (dim_aparam > 0){
+      } else if (dim_aparam > 0) {
         cout << "(aparam)" << endl;
       }
-    }  
+    }
   }
-  
+
   comm_reverse = numb_models * 3;
   all_force.resize(numb_models);
 }
 
-void PairDeepMD::read_restart(FILE *)
-{
-  is_restart = true;
-}
+void PairDeepMD::read_restart(FILE *) { is_restart = true; }
 
-void PairDeepMD::write_restart(FILE *)
-{
+void PairDeepMD::write_restart(FILE *) {
   // pass
 }
 
@@ -1135,28 +1131,29 @@ void PairDeepMD::write_restart(FILE *)
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
-void PairDeepMD::coeff(int narg, char **arg)
-{
+void PairDeepMD::coeff(int narg, char **arg) {
   if (!allocated) {
     allocate();
   }
 
   int n = atom->ntypes;
-  int ilo,ihi,jlo,jhi;
+  int ilo, ihi, jlo, jhi;
   ilo = 0;
   jlo = 0;
   ihi = n;
   jhi = n;
   if (narg >= 2) {
-    utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error);
-    utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error);
+    utils::bounds(FLERR, arg[0], 1, atom->ntypes, ilo, ihi, error);
+    utils::bounds(FLERR, arg[1], 1, atom->ntypes, jlo, jhi, error);
     if (ilo != 1 || jlo != 1 || ihi != n || jhi != n) {
-      error->all(FLERR,"deepmd requires that the scale should be set to all atom types, i.e. pair_coeff * *.");
+      error->all(FLERR,
+                 "deepmd requires that the scale should be set to all atom "
+                 "types, i.e. pair_coeff * *.");
     }
   }
   if (narg <= 2) {
     type_idx_map.resize(numb_types);
-    for (int ii = 0; ii < numb_types; ++ii){
+    for (int ii = 0; ii < numb_types; ++ii) {
       type_idx_map[ii] = ii;
     }
   } else {
@@ -1187,57 +1184,64 @@ void PairDeepMD::coeff(int narg, char **arg)
         }
       }
       if (!found_element) {
-        error->all(FLERR, "pair_coeff: element " + type_name + " not found in the model");
+        error->all(FLERR, "pair_coeff: element " + type_name +
+                              " not found in the model");
       }
       iarg += 1;
     }
     numb_types = type_idx_map.size();
   }
   if (numb_types < n) {
-    error->all(FLERR, "number of types assigned by pair_coeff or in the model is less than the number of types in the system");
+    error->all(FLERR,
+               "number of types assigned by pair_coeff or in the model is less "
+               "than the number of types in the system");
   }
   for (int i = ilo; i <= ihi; i++) {
-    for (int j = MAX(jlo,i); j <= jhi; j++) {
+    for (int j = MAX(jlo, i); j <= jhi; j++) {
       setflag[i][j] = 1;
       scale[i][j] = 1.0;
       if (i > numb_types || j > numb_types) {
-	char warning_msg[1024];
-	sprintf(warning_msg, "Interaction between types %d and %d is set with deepmd, but will be ignored.\n Deepmd model has only %d types, it only computes the mulitbody interaction of types: 1-%d.", i, j, numb_types, numb_types);
-	error->warning(FLERR, warning_msg);
+        char warning_msg[1024];
+        sprintf(warning_msg,
+                "Interaction between types %d and %d is set with deepmd, but "
+                "will be ignored.\n Deepmd model has only %d types, it only "
+                "computes the mulitbody interaction of types: 1-%d.",
+                i, j, numb_types, numb_types);
+        error->warning(FLERR, warning_msg);
       }
     }
   }
 }
 
-
-void PairDeepMD::init_style()
-{
-#if LAMMPS_VERSION_NUMBER>=20220324
+void PairDeepMD::init_style() {
+#if LAMMPS_VERSION_NUMBER >= 20220324
   neighbor->add_request(this, NeighConst::REQ_FULL);
 #else
-  int irequest = neighbor->request(this,instance_me);
+  int irequest = neighbor->request(this, instance_me);
   neighbor->requests[irequest]->half = 0;
-  neighbor->requests[irequest]->full = 1;  
-  // neighbor->requests[irequest]->newton = 2;  
+  neighbor->requests[irequest]->full = 1;
+  // neighbor->requests[irequest]->newton = 2;
 #endif
-  if (out_each == 1){
+  if (out_each == 1) {
     int ntotal = atom->natoms;
     int nprocs = comm->nprocs;
     memory->create(counts, nprocs, "deepmd:counts");
     memory->create(displacements, nprocs, "deepmd:displacements");
-    memory->create(stdfsend,ntotal,"deepmd:stdfsendall");
-    memory->create(stdfrecv,ntotal,"deepmd:stdfrecvall");
-    memory->create(tagsend,ntotal,"deepmd:tagsendall");
-    memory->create(tagrecv,ntotal,"deepmd:tagrecvall");
+    memory->create(stdfsend, ntotal, "deepmd:stdfsendall");
+    memory->create(stdfrecv, ntotal, "deepmd:stdfrecvall");
+    memory->create(tagsend, ntotal, "deepmd:tagsendall");
+    memory->create(tagrecv, ntotal, "deepmd:tagrecvall");
   }
 }
 
-
-double PairDeepMD::init_one(int i, int j)
-{
+double PairDeepMD::init_one(int i, int j) {
   if (i > numb_types || j > numb_types) {
     char warning_msg[1024];
-    sprintf(warning_msg, "Interaction between types %d and %d is set with deepmd, but will be ignored.\n Deepmd model has only %d types, it only computes the mulitbody interaction of types: 1-%d.", i, j, numb_types, numb_types);
+    sprintf(warning_msg,
+            "Interaction between types %d and %d is set with deepmd, but will "
+            "be ignored.\n Deepmd model has only %d types, it only computes "
+            "the mulitbody interaction of types: 1-%d.",
+            i, j, numb_types, numb_types);
     error->warning(FLERR, warning_msg);
   }
 
@@ -1247,20 +1251,18 @@ double PairDeepMD::init_one(int i, int j)
   return cutoff;
 }
 
-
 /* ---------------------------------------------------------------------- */
 
-int PairDeepMD::pack_reverse_comm(int n, int first, double *buf)
-{
-  int i,m,last;
+int PairDeepMD::pack_reverse_comm(int n, int first, double *buf) {
+  int i, m, last;
 
   m = 0;
   last = first + n;
   for (i = first; i < last; i++) {
-    for (int dd = 0; dd < numb_models; ++dd){
-      buf[m++] = all_force[dd][3*i+0];
-      buf[m++] = all_force[dd][3*i+1];
-      buf[m++] = all_force[dd][3*i+2];
+    for (int dd = 0; dd < numb_models; ++dd) {
+      buf[m++] = all_force[dd][3 * i + 0];
+      buf[m++] = all_force[dd][3 * i + 1];
+      buf[m++] = all_force[dd][3 * i + 2];
     }
   }
   return m;
@@ -1268,30 +1270,28 @@ int PairDeepMD::pack_reverse_comm(int n, int first, double *buf)
 
 /* ---------------------------------------------------------------------- */
 
-void PairDeepMD::unpack_reverse_comm(int n, int *list, double *buf)
-{
-  int i,j,m;
+void PairDeepMD::unpack_reverse_comm(int n, int *list, double *buf) {
+  int i, j, m;
 
   m = 0;
   for (i = 0; i < n; i++) {
     j = list[i];
-    for (int dd = 0; dd < numb_models; ++dd){
-      all_force[dd][3*j+0] += buf[m++];
-      all_force[dd][3*j+1] += buf[m++];
-      all_force[dd][3*j+2] += buf[m++];
+    for (int dd = 0; dd < numb_models; ++dd) {
+      all_force[dd][3 * j + 0] += buf[m++];
+      all_force[dd][3 * j + 1] += buf[m++];
+      all_force[dd][3 * j + 2] += buf[m++];
     }
   }
 }
 
-void *PairDeepMD::extract(const char *str, int &dim)
-{
-  if (strcmp(str,"cut_coul") == 0) {
+void *PairDeepMD::extract(const char *str, int &dim) {
+  if (strcmp(str, "cut_coul") == 0) {
     dim = 0;
-    return (void *) &cutoff;
+    return (void *)&cutoff;
   }
-  if (strcmp(str,"scale") == 0) {
+  if (strcmp(str, "scale") == 0) {
     dim = 2;
-    return (void *) scale;
+    return (void *)scale;
   }
   return NULL;
 }
diff --git a/source/lmp/pair_deepmd.h.in b/source/lmp/pair_deepmd.h.in
index 1aa72ed1d8..a288b472c2 100644
--- a/source/lmp/pair_deepmd.h.in
+++ b/source/lmp/pair_deepmd.h.in
@@ -63,11 +63,11 @@ class PairDeepMD : public Pair {
   int get_node_rank();
   std::string get_file_content(const std::string & model);
   std::vector<std::string> get_file_content(const std::vector<std::string> & models);
- protected:  
+ protected:
   virtual void allocate();
   double **scale;
 
-private:  
+private:
   deepmd::DeepPot deep_pot;
   deepmd::DeepPotModelDevi deep_pot_model_devi;
   unsigned numb_models;
@@ -107,7 +107,7 @@ private:
       );
   bool do_compute;
   std::string compute_id;
-  
+
     void make_ttm_fparam(
 #ifdef HIGH_PREC
       std::vector<double > & fparam
@@ -115,7 +115,7 @@ private:
       std::vector<float > & fparam
 #endif
       );
-      
+
   void make_ttm_aparam(
 #ifdef HIGH_PREC
       std::vector<double > & dparam
diff --git a/source/lmp/plugin/CMakeLists.txt b/source/lmp/plugin/CMakeLists.txt
index ca42a7267b..8d20b0866c 100644
--- a/source/lmp/plugin/CMakeLists.txt
+++ b/source/lmp/plugin/CMakeLists.txt
@@ -1,13 +1,13 @@
-if (DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
+if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
   message(STATUS "enable LAMMPS plugin mode")
   add_library(lammps_interface INTERFACE)
-  if (DEFINED LAMMPS_VERSION)
+  if(DEFINED LAMMPS_VERSION)
     cmake_minimum_required(VERSION 3.11)
     include(FetchContent)
-    FetchContent_Declare(lammps_download
-      GIT_REPOSITORY    https://github.com/lammps/lammps
-      GIT_TAG           ${LAMMPS_VERSION}
-    )
+    FetchContent_Declare(
+      lammps_download
+      GIT_REPOSITORY https://github.com/lammps/lammps
+      GIT_TAG ${LAMMPS_VERSION})
     FetchContent_GetProperties(lammps_download)
     if(NOT lammps_download_POPULATED)
       FetchContent_Populate(lammps_download)
@@ -16,7 +16,7 @@ if (DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
   endif()
   set(LAMMPS_HEADER_DIR ${LAMMPS_SOURCE_ROOT}/src)
   message(STATUS "LAMMPS_HEADER_DIR is ${LAMMPS_HEADER_DIR}")
-  
+
   target_include_directories(lammps_interface INTERFACE ${LAMMPS_HEADER_DIR})
 
   find_package(MPI)
@@ -24,98 +24,95 @@ if (DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)
     set(LAMMPS_MPI_INCLUDE_DIRS ${MPI_CXX_INCLUDE_DIRS})
     # LAMMPS has linked MPI; do not link twice
     # target_link_libraries(lammps_interface INTERFACE MPI::MPI_CXX)
-    target_include_directories(lammps_interface INTERFACE ${LAMMPS_MPI_INCLUDE_DIRS})
+    target_include_directories(lammps_interface
+                               INTERFACE ${LAMMPS_MPI_INCLUDE_DIRS})
   else()
     # Use LAMMPS serial mpi.h header
-    target_include_directories(lammps_interface INTERFACE "${LAMMPS_HEADER_DIR}/STUBS")
+    target_include_directories(lammps_interface
+                               INTERFACE "${LAMMPS_HEADER_DIR}/STUBS")
   endif()
 
   # get_lammps_version
   # https://github.com/lammps/lammps/blob/c2a12f97c5f665852fb38fdd4922f7dd2e77a0a1/cmake/Modules/LAMMPSUtils.cmake#L27-L46
   include(${LAMMPS_SOURCE_ROOT}/cmake/Modules/LAMMPSUtils.cmake)
   get_lammps_version(${LAMMPS_HEADER_DIR}/version.h LAMMPS_VERSION_NUMBER)
-  set(LAMMPS_VERSION_NUMBER ${LAMMPS_VERSION_NUMBER} PARENT_SCOPE)
+  set(LAMMPS_VERSION_NUMBER
+      ${LAMMPS_VERSION_NUMBER}
+      PARENT_SCOPE)
   message(STATUS "LAMMPS version is ${LAMMPS_VERSION_NUMBER}")
 
-  configure_file("../pair_deepmd.h.in" "${CMAKE_CURRENT_BINARY_DIR}/pair_deepmd.h" @ONLY)
+  configure_file("../pair_deepmd.h.in"
+                 "${CMAKE_CURRENT_BINARY_DIR}/pair_deepmd.h" @ONLY)
 
-  file(GLOB LMP_SRC
-    deepmdplugin.cpp
-    ../*.cpp
-    ${LAMMPS_SOURCE_ROOT}/src/kspace.cpp # for pppm_dplr
-    ${LAMMPS_SOURCE_ROOT}/src/KSPACE/pppm.cpp
-  )
-  if (LAMMPS_VERSION_NUMBER GREATER 20210831)
-    list(APPEND LMP_SRC ${LAMMPS_SOURCE_ROOT}/src/EXTRA-FIX/fix_ttm.cpp) # for ttm
+  file(GLOB LMP_SRC deepmdplugin.cpp ../*.cpp
+       ${LAMMPS_SOURCE_ROOT}/src/kspace.cpp # for pppm_dplr
+       ${LAMMPS_SOURCE_ROOT}/src/KSPACE/pppm.cpp)
+  if(LAMMPS_VERSION_NUMBER GREATER 20210831)
+    list(APPEND LMP_SRC ${LAMMPS_SOURCE_ROOT}/src/EXTRA-FIX/fix_ttm.cpp
+    )# for ttm
   endif()
 
   function(_add_lmp_plugin_variant variant_name prec_def)
-  set (libname "deepmd_lmp${variant_name}")
+    set(libname "deepmd_lmp${variant_name}")
 
-  add_library(${libname} MODULE ${LMP_SRC})
+    add_library(${libname} MODULE ${LMP_SRC})
 
-  # link: libdeepmd
-  target_link_libraries (${libname} PUBLIC
-    lammps_interface
-    ${LIB_DEEPMD_CC}
-  )
-  target_include_directories(${libname} PRIVATE
-    ${CMAKE_CURRENT_BINARY_DIR}
-    ${CMAKE_CURRENT_SOURCE_DIR}/..
-    ${LAMMPS_SOURCE_ROOT}/src/PLUGIN
-    ${LAMMPS_SOURCE_ROOT}/src/KSPACE
-    ${LAMMPS_SOURCE_ROOT}/src
-  )
-  if (LAMMPS_VERSION_NUMBER GREATER 20210831)
-    target_include_directories(${libname} PRIVATE ${LAMMPS_SOURCE_ROOT}/src/EXTRA-FIX) # for ttm
-  endif()
-  if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
-    set_target_properties(${libname} PROPERTIES LINK_FLAGS "-Wl,-undefined,dynamic_lookup")
-  else()
-  set_target_properties(
-    ${libname} 
-    PROPERTIES 
-    INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
-    LINK_FLAGS "-rdynamic"
-  )
-  endif()
-  target_compile_definitions(${libname}
-    PUBLIC ${prec_def}
-    PRIVATE "LMPPLUGIN" # fix header path
-    PRIVATE "LAMMPS_VERSION_NUMBER=${LAMMPS_VERSION_NUMBER}"
-  )
+    # link: libdeepmd
+    target_link_libraries(${libname} PUBLIC lammps_interface ${LIB_DEEPMD_CC})
+    target_include_directories(
+      ${libname}
+      PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/..
+              ${LAMMPS_SOURCE_ROOT}/src/PLUGIN ${LAMMPS_SOURCE_ROOT}/src/KSPACE
+              ${LAMMPS_SOURCE_ROOT}/src)
+    if(LAMMPS_VERSION_NUMBER GREATER 20210831)
+      target_include_directories(
+        ${libname} PRIVATE ${LAMMPS_SOURCE_ROOT}/src/EXTRA-FIX) # for ttm
+    endif()
+    if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
+      set_target_properties(
+        ${libname} PROPERTIES LINK_FLAGS "-Wl,-undefined,dynamic_lookup")
+    else()
+      set_target_properties(
+        ${libname} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}"
+                              LINK_FLAGS "-rdynamic")
+    endif()
+    target_compile_definitions(
+      ${libname}
+      PUBLIC ${prec_def}
+      PRIVATE "LMPPLUGIN" # fix header path
+      PRIVATE "LAMMPS_VERSION_NUMBER=${LAMMPS_VERSION_NUMBER}")
 
-  if (CMAKE_TESTING_ENABLED)
-    target_link_libraries(${libname} PRIVATE coverage_config)
-  endif()
+    if(CMAKE_TESTING_ENABLED)
+      target_link_libraries(${libname} PRIVATE coverage_config)
+    endif()
 
-  if(BUILD_PY_IF)
-    install(TARGETS ${libname} DESTINATION deepmd/op/)
-  else(BUILD_PY_IF)
-  install(TARGETS ${libname} DESTINATION lib/)
+    if(BUILD_PY_IF)
+      install(TARGETS ${libname} DESTINATION deepmd/op/)
+    else(BUILD_PY_IF)
+      install(TARGETS ${libname} DESTINATION lib/)
 
-  if (${LAMMPS_VERSION_NUMBER} GREATER_EQUAL 20220324)
-    set(PLUGINNAME "dpplugin.so")
-    INSTALL(CODE "execute_process( \
+      if(${LAMMPS_VERSION_NUMBER} GREATER_EQUAL 20220324)
+        set(PLUGINNAME "dpplugin.so")
+        install(
+          CODE "execute_process( \
         COMMAND ${CMAKE_COMMAND} -E make_directory \
 		${CMAKE_INSTALL_PREFIX}/lib/${libname}/   \
-        )"
-	)
-    INSTALL(CODE "execute_process( \
+        )")
+        install(
+          CODE "execute_process( \
         COMMAND ${CMAKE_COMMAND} -E create_symlink \
 		../${CMAKE_SHARED_LIBRARY_PREFIX}${libname}${CMAKE_SHARED_LIBRARY_SUFFIX} \
         ${CMAKE_INSTALL_PREFIX}/lib/${libname}/${PLUGINNAME}   \
-        )"
-    )
-  endif()
-  endif(BUILD_PY_IF)
+        )")
+      endif()
+    endif(BUILD_PY_IF)
 
   endfunction()
   if(BUILD_PY_IF)
     _add_lmp_plugin_variant("plugin" "${HIGH_PREC_DEF}")
   else(BUILD_PY_IF)
-  _add_lmp_plugin_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
-  _add_lmp_plugin_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
+    _add_lmp_plugin_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
+    _add_lmp_plugin_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
   endif()
 
 else()
diff --git a/source/lmp/plugin/deepmdplugin.cpp b/source/lmp/plugin/deepmdplugin.cpp
index feebec30b1..e8794d1a5c 100644
--- a/source/lmp/plugin/deepmdplugin.cpp
+++ b/source/lmp/plugin/deepmdplugin.cpp
@@ -1,71 +1,62 @@
 /**
-* See https://docs.lammps.org/Developer_plugins.html
-*/
+ * See https://docs.lammps.org/Developer_plugins.html
+ */
+#include "compute_deeptensor_atom.h"
+#include "fix_dplr.h"
 #include "lammpsplugin.h"
-#include "version.h"
 #include "pair_deepmd.h"
-#include "fix_dplr.h"
-#include "compute_deeptensor_atom.h"
-#if LAMMPS_VERSION_NUMBER>=20220328
+#include "version.h"
+#if LAMMPS_VERSION_NUMBER >= 20220328
 #include "pppm_dplr.h"
 #endif
 
 using namespace LAMMPS_NS;
 
-static Pair *pairdeepmd(LAMMPS *lmp)
-{
-  return new PairDeepMD(lmp);
-}
+static Pair *pairdeepmd(LAMMPS *lmp) { return new PairDeepMD(lmp); }
 
-static Compute *computedeepmdtensoratom(LAMMPS *lmp, int narg, char **arg)
-{
+static Compute *computedeepmdtensoratom(LAMMPS *lmp, int narg, char **arg) {
   return new ComputeDeeptensorAtom(lmp, narg, arg);
 }
 
-static Fix *fixdplr(LAMMPS *lmp, int narg, char **arg)
-{
+static Fix *fixdplr(LAMMPS *lmp, int narg, char **arg) {
   return new FixDPLR(lmp, narg, arg);
 }
 
-#if LAMMPS_VERSION_NUMBER>=20220328
-static KSpace *pppmdplr(LAMMPS *lmp)
-{
-  return new PPPMDPLR(lmp);
-}
+#if LAMMPS_VERSION_NUMBER >= 20220328
+static KSpace *pppmdplr(LAMMPS *lmp) { return new PPPMDPLR(lmp); }
 #endif
 
-extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc)
-{
+extern "C" void lammpsplugin_init(void *lmp, void *handle, void *regfunc) {
   lammpsplugin_t plugin;
-  lammpsplugin_regfunc register_plugin = (lammpsplugin_regfunc) regfunc;
+  lammpsplugin_regfunc register_plugin = (lammpsplugin_regfunc)regfunc;
 
   plugin.version = LAMMPS_VERSION;
   plugin.style = "pair";
   plugin.name = "deepmd";
   plugin.info = "deepmd pair style " STR_GIT_SUMM;
   plugin.author = "Han Wang";
-  plugin.creator.v1 = (lammpsplugin_factory1 *) &pairdeepmd;
+  plugin.creator.v1 = (lammpsplugin_factory1 *)&pairdeepmd;
   plugin.handle = handle;
   (*register_plugin)(&plugin, lmp);
 
   plugin.style = "compute";
   plugin.name = "deeptensor/atom";
   plugin.info = "compute deeptensor/atom " STR_GIT_SUMM;
-  plugin.creator.v2 = (lammpsplugin_factory2 *) &computedeepmdtensoratom;
+  plugin.creator.v2 = (lammpsplugin_factory2 *)&computedeepmdtensoratom;
   (*register_plugin)(&plugin, lmp);
 
   plugin.style = "fix";
   plugin.name = "dplr";
   plugin.info = "fix dplr " STR_GIT_SUMM;
-  plugin.creator.v2 = (lammpsplugin_factory2 *) &fixdplr;
+  plugin.creator.v2 = (lammpsplugin_factory2 *)&fixdplr;
   (*register_plugin)(&plugin, lmp);
 
-#if LAMMPS_VERSION_NUMBER>=20220328
+#if LAMMPS_VERSION_NUMBER >= 20220328
   // lammps/lammps#
   plugin.style = "kspace";
   plugin.name = "pppm/dplr";
   plugin.info = "kspace pppm/dplr " STR_GIT_SUMM;
-  plugin.creator.v1 = (lammpsplugin_factory1 *) &pppmdplr;
+  plugin.creator.v1 = (lammpsplugin_factory1 *)&pppmdplr;
   (*register_plugin)(&plugin, lmp);
 #endif
 }
diff --git a/source/lmp/pppm_dplr.cpp b/source/lmp/pppm_dplr.cpp
index 1a1070c04f..0abf8fef7a 100644
--- a/source/lmp/pppm_dplr.cpp
+++ b/source/lmp/pppm_dplr.cpp
@@ -1,40 +1,41 @@
-#include <math.h>
 #include "pppm_dplr.h"
+
+#include <math.h>
+
 #include "atom.h"
 #include "domain.h"
-#include "force.h"
-#include "memory.h"
 #include "error.h"
+#include "force.h"
+#include "gridcomm.h"
 #include "math_const.h"
+#include "memory.h"
 #include "pppm.h"
-#include "gridcomm.h"
 
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-enum{REVERSE_RHO};
-enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
+enum { REVERSE_RHO };
+enum { FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM };
 
 #define OFFSET 16384
 
 #ifdef FFT_SINGLE
 #define ZEROF 0.0f
-#define ONEF  1.0f
+#define ONEF 1.0f
 #else
 #define ZEROF 0.0
-#define ONEF  1.0
+#define ONEF 1.0
 #endif
 
-
 /* ---------------------------------------------------------------------- */
 
-#if LAMMPS_VERSION_NUMBER<20181109
+#if LAMMPS_VERSION_NUMBER < 20181109
 // See lammps/lammps#1165
-PPPMDPLR::PPPMDPLR(LAMMPS *lmp, int narg, char **arg) :
-  PPPM(lmp, narg, arg)
+PPPMDPLR::PPPMDPLR(LAMMPS *lmp, int narg, char **arg)
+    : PPPM(lmp, narg, arg)
 #else
-PPPMDPLR::PPPMDPLR(LAMMPS *lmp) :
-  PPPM(lmp)
+PPPMDPLR::PPPMDPLR(LAMMPS *lmp)
+    : PPPM(lmp)
 #endif
 {
   triclinic_support = 1;
@@ -42,18 +43,17 @@ PPPMDPLR::PPPMDPLR(LAMMPS *lmp) :
 
 /* ---------------------------------------------------------------------- */
 
-void PPPMDPLR::init()
-{
+void PPPMDPLR::init() {
   // DPLR PPPM requires newton on, b/c it computes forces on ghost atoms
 
   if (force->newton == 0)
-    error->all(FLERR,"Kspace style pppm/dplr requires newton on");
+    error->all(FLERR, "Kspace style pppm/dplr requires newton on");
 
   PPPM::init();
 
   int nlocal = atom->nlocal;
   // cout << " ninit pppm/dplr ---------------------- " << nlocal << endl;
-  fele.resize(nlocal*3);
+  fele.resize(nlocal * 3);
   fill(fele.begin(), fele.end(), 0.0);
 }
 
@@ -61,14 +61,13 @@ void PPPMDPLR::init()
    compute the PPPM long-range force, energy, virial
 ------------------------------------------------------------------------- */
 
-void PPPMDPLR::compute(int eflag, int vflag)
-{
-  int i,j;
+void PPPMDPLR::compute(int eflag, int vflag) {
+  int i, j;
 
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
-  ev_init(eflag,vflag);
+  ev_init(eflag, vflag);
 
   if (evflag_atom && !peratom_allocate_flag) allocate_peratom();
 
@@ -85,7 +84,8 @@ void PPPMDPLR::compute(int eflag, int vflag)
 
   // convert atoms from box to lamda coords
 
-  if (triclinic == 0) boxlo = domain->boxlo;
+  if (triclinic == 0)
+    boxlo = domain->boxlo;
   else {
     boxlo = domain->boxlo_lamda;
     domain->x2lamda(atom->nlocal);
@@ -96,7 +96,7 @@ void PPPMDPLR::compute(int eflag, int vflag)
   if (atom->nmax > nmax) {
     memory->destroy(part2grid);
     nmax = atom->nmax;
-    memory->create(part2grid,nmax,3,"pppm:part2grid");
+    memory->create(part2grid, nmax, 3, "pppm:part2grid");
   }
 
   // find grid points for all my particles
@@ -109,12 +109,12 @@ void PPPMDPLR::compute(int eflag, int vflag)
   //   to fully sum contribution in their 3d bricks
   // remap from 3d decomposition to FFT decomposition
 
-#if LAMMPS_VERSION_NUMBER>=20210831
-  gc->reverse_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
-                          gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+#if LAMMPS_VERSION_NUMBER >= 20210831
+  gc->reverse_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), REVERSE_RHO,
+                   gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #else
-  gc->reverse_comm_kspace(this,1,sizeof(FFT_SCALAR),REVERSE_RHO,
-                          gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+  gc->reverse_comm_kspace(this, 1, sizeof(FFT_SCALAR), REVERSE_RHO, gc_buf1,
+                          gc_buf2, MPI_FFT_SCALAR);
 #endif
   brick2fft();
 
@@ -129,40 +129,40 @@ void PPPMDPLR::compute(int eflag, int vflag)
   // to fill ghost cells surrounding their 3d bricks
 
   if (differentiation_flag == 1)
-#if LAMMPS_VERSION_NUMBER>=20210831
-    gc->forward_comm(GridComm::KSPACE,this,1,sizeof(FFT_SCALAR),FORWARD_AD,
-                            gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+#if LAMMPS_VERSION_NUMBER >= 20210831
+    gc->forward_comm(GridComm::KSPACE, this, 1, sizeof(FFT_SCALAR), FORWARD_AD,
+                     gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #else
-    gc->forward_comm_kspace(this,1,sizeof(FFT_SCALAR),FORWARD_AD,
-                            gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+    gc->forward_comm_kspace(this, 1, sizeof(FFT_SCALAR), FORWARD_AD, gc_buf1,
+                            gc_buf2, MPI_FFT_SCALAR);
 #endif
   else
-#if LAMMPS_VERSION_NUMBER>=20210831
-    gc->forward_comm(GridComm::KSPACE,this,3,sizeof(FFT_SCALAR),FORWARD_IK,
-                            gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+#if LAMMPS_VERSION_NUMBER >= 20210831
+    gc->forward_comm(GridComm::KSPACE, this, 3, sizeof(FFT_SCALAR), FORWARD_IK,
+                     gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #else
-    gc->forward_comm_kspace(this,3,sizeof(FFT_SCALAR),FORWARD_IK,
-                            gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+    gc->forward_comm_kspace(this, 3, sizeof(FFT_SCALAR), FORWARD_IK, gc_buf1,
+                            gc_buf2, MPI_FFT_SCALAR);
 #endif
 
   // extra per-atom energy/virial communication
 
   if (evflag_atom) {
     if (differentiation_flag == 1 && vflag_atom)
-#if LAMMPS_VERSION_NUMBER>=20210831
-      gc->forward_comm(GridComm::KSPACE,this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
-                              gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+#if LAMMPS_VERSION_NUMBER >= 20210831
+      gc->forward_comm(GridComm::KSPACE, this, 6, sizeof(FFT_SCALAR),
+                       FORWARD_AD_PERATOM, gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #else
-      gc->forward_comm_kspace(this,6,sizeof(FFT_SCALAR),FORWARD_AD_PERATOM,
-                              gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+      gc->forward_comm_kspace(this, 6, sizeof(FFT_SCALAR), FORWARD_AD_PERATOM,
+                              gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #endif
     else if (differentiation_flag == 0)
-#if LAMMPS_VERSION_NUMBER>=20210831
-      gc->forward_comm(GridComm::KSPACE,this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
-                              gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+#if LAMMPS_VERSION_NUMBER >= 20210831
+      gc->forward_comm(GridComm::KSPACE, this, 7, sizeof(FFT_SCALAR),
+                       FORWARD_IK_PERATOM, gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #else
-      gc->forward_comm_kspace(this,7,sizeof(FFT_SCALAR),FORWARD_IK_PERATOM,
-                              gc_buf1,gc_buf2,MPI_FFT_SCALAR);
+      gc->forward_comm_kspace(this, 7, sizeof(FFT_SCALAR), FORWARD_IK_PERATOM,
+                              gc_buf1, gc_buf2, MPI_FFT_SCALAR);
 #endif
   }
 
@@ -180,10 +180,10 @@ void PPPMDPLR::compute(int eflag, int vflag)
 
   if (eflag_global) {
     double energy_all;
-    MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
+    MPI_Allreduce(&energy, &energy_all, 1, MPI_DOUBLE, MPI_SUM, world);
     energy = energy_all;
 
-    energy *= 0.5*volume;
+    energy *= 0.5 * volume;
     // do not add self-term, for neutral systems qsum == 0
     // energy -= g_ewald*qsqsum/MY_PIS +
     //   MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
@@ -194,8 +194,8 @@ void PPPMDPLR::compute(int eflag, int vflag)
 
   if (vflag_global) {
     double virial_all[6];
-    MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
-    for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
+    MPI_Allreduce(virial, virial_all, 6, MPI_DOUBLE, MPI_SUM, world);
+    for (i = 0; i < 6; i++) virial[i] = 0.5 * qscale * volume * virial_all[i];
   }
 
   // per-atom energy/virial
@@ -211,16 +211,16 @@ void PPPMDPLR::compute(int eflag, int vflag)
     if (eflag_atom) {
       for (i = 0; i < nlocal; i++) {
         eatom[i] *= 0.5;
-        eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
-          (g_ewald*g_ewald*volume);
+        eatom[i] -= g_ewald * q[i] * q[i] / MY_PIS +
+                    MY_PI2 * q[i] * qsum / (g_ewald * g_ewald * volume);
         eatom[i] *= qscale;
       }
-      for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5*qscale;
+      for (i = nlocal; i < ntotal; i++) eatom[i] *= 0.5 * qscale;
     }
 
     if (vflag_atom) {
       for (i = 0; i < ntotal; i++)
-        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
+        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5 * qscale;
     }
   }
 
@@ -237,11 +237,10 @@ void PPPMDPLR::compute(int eflag, int vflag)
    interpolate from grid to get electric field & force on my particles for ik
 ------------------------------------------------------------------------- */
 
-void PPPMDPLR::fieldforce_ik()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz,x0,y0,z0;
-  FFT_SCALAR ekx,eky,ekz;
+void PPPMDPLR::fieldforce_ik() {
+  int i, l, m, n, nx, ny, nz, mx, my, mz;
+  FFT_SCALAR dx, dy, dz, x0, y0, z0;
+  FFT_SCALAR ekx, eky, ekz;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@@ -257,32 +256,32 @@ void PPPMDPLR::fieldforce_ik()
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
-  fele.resize(nlocal*3);
+  fele.resize(nlocal * 3);
   fill(fele.begin(), fele.end(), 0.0);
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+    dx = nx + shiftone - (x[i][0] - boxlo[0]) * delxinv;
+    dy = ny + shiftone - (x[i][1] - boxlo[1]) * delyinv;
+    dz = nz + shiftone - (x[i][2] - boxlo[2]) * delzinv;
 
-    compute_rho1d(dx,dy,dz);
+    compute_rho1d(dx, dy, dz);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
+      mz = n + nz;
       z0 = rho1d[2][n];
       for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
-        y0 = z0*rho1d[1][m];
+        my = m + ny;
+        y0 = z0 * rho1d[1][m];
         for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          x0 = y0*rho1d[0][l];
-          ekx -= x0*vdx_brick[mz][my][mx];
-          eky -= x0*vdy_brick[mz][my][mx];
-          ekz -= x0*vdz_brick[mz][my][mx];
+          mx = l + nx;
+          x0 = y0 * rho1d[0][l];
+          ekx -= x0 * vdx_brick[mz][my][mx];
+          eky -= x0 * vdy_brick[mz][my][mx];
+          ekz -= x0 * vdz_brick[mz][my][mx];
         }
       }
     }
@@ -290,9 +289,9 @@ void PPPMDPLR::fieldforce_ik()
     // convert E-field to force
 
     const double qfactor = qqrd2e * scale * q[i];
-    fele[i*3+0] += qfactor*ekx;
-    fele[i*3+1] += qfactor*eky;
-    if (slabflag != 2) fele[i*3+2] += qfactor*ekz;
+    fele[i * 3 + 0] += qfactor * ekx;
+    fele[i * 3 + 1] += qfactor * eky;
+    if (slabflag != 2) fele[i * 3 + 2] += qfactor * ekz;
   }
 }
 
@@ -300,12 +299,11 @@ void PPPMDPLR::fieldforce_ik()
    interpolate from grid to get electric field & force on my particles for ad
 ------------------------------------------------------------------------- */
 
-void PPPMDPLR::fieldforce_ad()
-{
-  int i,l,m,n,nx,ny,nz,mx,my,mz;
-  FFT_SCALAR dx,dy,dz;
-  FFT_SCALAR ekx,eky,ekz;
-  double s1,s2,s3;
+void PPPMDPLR::fieldforce_ad() {
+  int i, l, m, n, nx, ny, nz, mx, my, mz;
+  FFT_SCALAR dx, dy, dz;
+  FFT_SCALAR ekx, eky, ekz;
+  double s1, s2, s3;
   double sf = 0.0;
   double *prd;
 
@@ -314,9 +312,9 @@ void PPPMDPLR::fieldforce_ad()
   double yprd = prd[1];
   double zprd = prd[2];
 
-  double hx_inv = nx_pppm/xprd;
-  double hy_inv = ny_pppm/yprd;
-  double hz_inv = nz_pppm/zprd;
+  double hx_inv = nx_pppm / xprd;
+  double hy_inv = ny_pppm / yprd;
+  double hz_inv = nz_pppm / zprd;
 
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@@ -332,30 +330,30 @@ void PPPMDPLR::fieldforce_ad()
   int nghost = atom->nghost;
   int nall = nlocal + nghost;
 
-  fele.resize(nlocal*3);
+  fele.resize(nlocal * 3);
   fill(fele.begin(), fele.end(), 0.0);
 
   for (i = 0; i < nlocal; i++) {
     nx = part2grid[i][0];
     ny = part2grid[i][1];
     nz = part2grid[i][2];
-    dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
-    dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
-    dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
+    dx = nx + shiftone - (x[i][0] - boxlo[0]) * delxinv;
+    dy = ny + shiftone - (x[i][1] - boxlo[1]) * delyinv;
+    dz = nz + shiftone - (x[i][2] - boxlo[2]) * delzinv;
 
-    compute_rho1d(dx,dy,dz);
-    compute_drho1d(dx,dy,dz);
+    compute_rho1d(dx, dy, dz);
+    compute_drho1d(dx, dy, dz);
 
     ekx = eky = ekz = ZEROF;
     for (n = nlower; n <= nupper; n++) {
-      mz = n+nz;
+      mz = n + nz;
       for (m = nlower; m <= nupper; m++) {
-        my = m+ny;
+        my = m + ny;
         for (l = nlower; l <= nupper; l++) {
-          mx = l+nx;
-          ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
-          ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
+          mx = l + nx;
+          ekx += drho1d[0][l] * rho1d[1][m] * rho1d[2][n] * u_brick[mz][my][mx];
+          eky += rho1d[0][l] * drho1d[1][m] * rho1d[2][n] * u_brick[mz][my][mx];
+          ekz += rho1d[0][l] * rho1d[1][m] * drho1d[2][n] * u_brick[mz][my][mx];
         }
       }
     }
@@ -367,24 +365,22 @@ void PPPMDPLR::fieldforce_ad()
 
     const double qfactor = qqrd2e * scale;
 
-    s1 = x[i][0]*hx_inv;
-    s2 = x[i][1]*hy_inv;
-    s3 = x[i][2]*hz_inv;
-    sf = sf_coeff[0]*sin(2*MY_PI*s1);
-    sf += sf_coeff[1]*sin(4*MY_PI*s1);
-    sf *= 2*q[i]*q[i];
-    fele[i*3+0] += qfactor*(ekx*q[i] - sf);
-
-    sf = sf_coeff[2]*sin(2*MY_PI*s2);
-    sf += sf_coeff[3]*sin(4*MY_PI*s2);
-    sf *= 2*q[i]*q[i];
-    fele[i*3+1] += qfactor*(eky*q[i] - sf);
-
-
-    sf = sf_coeff[4]*sin(2*MY_PI*s3);
-    sf += sf_coeff[5]*sin(4*MY_PI*s3);
-    sf *= 2*q[i]*q[i];
-    if (slabflag != 2) fele[i*3+2] += qfactor*(ekz*q[i] - sf);
+    s1 = x[i][0] * hx_inv;
+    s2 = x[i][1] * hy_inv;
+    s3 = x[i][2] * hz_inv;
+    sf = sf_coeff[0] * sin(2 * MY_PI * s1);
+    sf += sf_coeff[1] * sin(4 * MY_PI * s1);
+    sf *= 2 * q[i] * q[i];
+    fele[i * 3 + 0] += qfactor * (ekx * q[i] - sf);
+
+    sf = sf_coeff[2] * sin(2 * MY_PI * s2);
+    sf += sf_coeff[3] * sin(4 * MY_PI * s2);
+    sf *= 2 * q[i] * q[i];
+    fele[i * 3 + 1] += qfactor * (eky * q[i] - sf);
+
+    sf = sf_coeff[4] * sin(2 * MY_PI * s3);
+    sf += sf_coeff[5] * sin(4 * MY_PI * s3);
+    sf *= 2 * q[i] * q[i];
+    if (slabflag != 2) fele[i * 3 + 2] += qfactor * (ekz * q[i] - sf);
   }
 }
-
diff --git a/source/lmp/pppm_dplr.h b/source/lmp/pppm_dplr.h
index 1a19aea258..b027d493b9 100644
--- a/source/lmp/pppm_dplr.h
+++ b/source/lmp/pppm_dplr.h
@@ -1,6 +1,6 @@
 #ifdef KSPACE_CLASS
 
-KSpaceStyle(pppm/dplr,PPPMDPLR)
+KSpaceStyle(pppm / dplr, PPPMDPLR)
 
 #else
 
@@ -13,33 +13,35 @@ KSpaceStyle(pppm/dplr,PPPMDPLR)
 #define FLOAT_PREC float
 #endif
 
-#include "pppm.h"
 #include <iostream>
 #include <vector>
 
+#include "pppm.h"
+
 namespace LAMMPS_NS {
 
-  class PPPMDPLR : public PPPM {
-public:
-#if LAMMPS_VERSION_NUMBER<20181109
-// See lammps/lammps#1165
-    PPPMDPLR(class LAMMPS *, int, char **);
+class PPPMDPLR : public PPPM {
+ public:
+#if LAMMPS_VERSION_NUMBER < 20181109
+  // See lammps/lammps#1165
+  PPPMDPLR(class LAMMPS *, int, char **);
 #else
-    PPPMDPLR(class LAMMPS *);
+  PPPMDPLR(class LAMMPS *);
 #endif
-    ~PPPMDPLR () override {};
-    void init() override;
-    const std::vector<double > & get_fele() const {return fele;};
-protected:
-    void compute(int, int) override;
-    void fieldforce_ik() override;
-    void fieldforce_ad() override;
-private:
-    std::vector<double > fele;
-  };
-
-}
+  ~PPPMDPLR() override{};
+  void init() override;
+  const std::vector<double> &get_fele() const { return fele; };
+
+ protected:
+  void compute(int, int) override;
+  void fieldforce_ik() override;
+  void fieldforce_ad() override;
+
+ private:
+  std::vector<double> fele;
+};
+
+}  // namespace LAMMPS_NS
 
 #endif
 #endif
-
diff --git a/source/lmp/tests/.gitignore b/source/lmp/tests/.gitignore
index 76123e90cd..3fb606e9d6 100644
--- a/source/lmp/tests/.gitignore
+++ b/source/lmp/tests/.gitignore
@@ -1,4 +1,4 @@
 __pycache__/
 graph.pb
 log.lammps
-md.out
\ No newline at end of file
+md.out
diff --git a/source/lmp/tests/data.lmp b/source/lmp/tests/data.lmp
index 37370f245d..1c5104d44d 100644
--- a/source/lmp/tests/data.lmp
+++ b/source/lmp/tests/data.lmp
@@ -13,4 +13,4 @@ Atoms
 3	2	0.25	3.32	1.68
 4	1	3.36	3.00	1.81
 5	2	3.51	2.51	2.60
-6	2	4.27	3.22	1.56 
+6	2	4.27	3.22	1.56
diff --git a/source/lmp/tests/data_type_map.lmp b/source/lmp/tests/data_type_map.lmp
index c1f10c7c36..be6739ca6c 100644
--- a/source/lmp/tests/data_type_map.lmp
+++ b/source/lmp/tests/data_type_map.lmp
@@ -13,4 +13,4 @@ Atoms
 3	1	0.25	3.32	1.68
 4	2	3.36	3.00	1.81
 5	1	3.51	2.51	2.60
-6	1	4.27	3.22	1.56 
+6	1	4.27	3.22	1.56
diff --git a/source/lmp/tests/test_lammps.py b/source/lmp/tests/test_lammps.py
index 9815a962a5..406633f0db 100644
--- a/source/lmp/tests/test_lammps.py
+++ b/source/lmp/tests/test_lammps.py
@@ -1,14 +1,19 @@
-import sys
 import subprocess as sp
-from pathlib import Path
+import sys
+from pathlib import (
+    Path,
+)
 
-import pytest
 import numpy as np
-from lammps import PyLammps
-
+import pytest
+from lammps import (
+    PyLammps,
+)
 
 pbtxt_file = Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot.pbtxt"
-pbtxt_file2 = Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot-1.pbtxt"
+pbtxt_file2 = (
+    Path(__file__).parent.parent.parent / "tests" / "infer" / "deeppot-1.pbtxt"
+)
 pb_file = Path(__file__).parent / "graph.pb"
 pb_file2 = Path(__file__).parent / "graph2.pb"
 system_file = Path(__file__).parent.parent.parent / "tests"
@@ -17,60 +22,197 @@
 md_file = Path(__file__).parent / "md.out"
 
 # this is as the same as python and c++ tests, test_deeppot_a.py
-expected_ae = np.array([
-    -9.275780747115504710e+01,-1.863501786584258468e+02,-1.863392472863538103e+02,-9.279281325486221021e+01,-1.863671545232153903e+02,-1.863619822847602165e+02,
-])
+expected_ae = np.array(
+    [
+        -9.275780747115504710e01,
+        -1.863501786584258468e02,
+        -1.863392472863538103e02,
+        -9.279281325486221021e01,
+        -1.863671545232153903e02,
+        -1.863619822847602165e02,
+    ]
+)
 expected_e = np.sum(expected_ae)
-expected_f = np.array([
-    -3.034045420701179663e-01,8.405844663871177014e-01,7.696947487118485642e-02,7.662001266663505117e-01,-1.880601391333554251e-01,-6.183333871091722944e-01,-5.036172391059643427e-01,-6.529525836149027151e-01,5.432962643022043459e-01,6.382357912332115024e-01,-1.748518296794561167e-01,3.457363524891907125e-01,1.286482986991941552e-03,3.757251165286925043e-01,-5.972588700887541124e-01,-5.987006197104716154e-01,-2.004450304880958100e-01,2.495901655353461868e-01
-]).reshape(6, 3)
-
-expected_f2 = np.array([
-    [-0.6454949 , 1.72457783, 0.18897958],
-    [ 1.68936514,-0.36995299,-1.36044464],
-    [-1.09902692,-1.35487928, 1.17416702],
-    [ 1.68426111,-0.50835585, 0.98340415],
-    [ 0.05771758, 1.12515818,-1.77561531],
-    [-1.686822  ,-0.61654789, 0.78950921],
-])
-
-expected_v = np.array([
-    -2.912234126853306959e-01,-3.800610846612756388e-02,2.776624987489437202e-01,-5.053761003913598976e-02,-3.152373041953385746e-01,1.060894290092162379e-01,2.826389131596073745e-01,1.039129970665329250e-01,-2.584378792325942586e-01,-3.121722367954994914e-01,8.483275876786681990e-02,2.524662342344257682e-01,4.142176771106586414e-02,-3.820285230785245428e-02,-2.727311173065460545e-02,2.668859789777112135e-01,-6.448243569420382404e-02,-2.121731470426218846e-01,-8.624335220278558922e-02,-1.809695356746038597e-01,1.529875294531883312e-01,-1.283658185172031341e-01,-1.992682279795223999e-01,1.409924999632362341e-01,1.398322735274434292e-01,1.804318474574856390e-01,-1.470309318999652726e-01,-2.593983661598450730e-01,-4.236536279233147489e-02,3.386387920184946720e-02,-4.174017537818433543e-02,-1.003500282164128260e-01,1.525690815194478966e-01,3.398976109910181037e-02,1.522253908435125536e-01,-2.349125581341701963e-01,9.515545977581392825e-04,-1.643218849228543846e-02,1.993234765412972564e-02,6.027265332209678569e-04,-9.563256398907417355e-02,1.510815124001868293e-01,-7.738094816888557714e-03,1.502832772532304295e-01,-2.380965783745832010e-01,-2.309456719810296654e-01,-6.666961081213038098e-02,7.955566551234216632e-02,-8.099093777937517447e-02,-3.386641099800401927e-02,4.447884755740908608e-02,1.008593228579038742e-01,4.556718179228393811e-02,-6.078081273849572641e-02
-]).reshape(6, 9)
-expected_v2 = np.array([
-    [-0.70008436, -0.06399891,  0.63678391, -0.07642171,
-        -0.70580035,  0.20506145,  0.64098364,  0.20305781,
-        -0.57906794],
-    [-0.6372635 ,  0.14315552,  0.51952246,  0.04604049,
-        -0.06003681, -0.02688702,  0.54489318, -0.10951559,
-        -0.43730539],
-    [-0.25090748, -0.37466262,  0.34085833, -0.26690852,
-        -0.37676917,  0.29080825,  0.31600481,  0.37558276,
-        -0.33251064],
-    [-0.80195614, -0.10273138,  0.06935364, -0.10429256,
-        -0.29693811,  0.45643496,  0.07247872,  0.45604679,
-        -0.71048816],
-    [-0.03840668, -0.07680205,  0.10940472, -0.02374189,
-        -0.27610266,  0.4336071 ,  0.02465248,  0.4290638 ,
-        -0.67496763],
-    [-0.61475065, -0.21163135,  0.26652929, -0.26134659,
-        -0.11560267,  0.15415902,  0.34343952,  0.1589482 ,
-        -0.21370642]
-]).reshape(6, 9)
+expected_f = np.array(
+    [
+        -3.034045420701179663e-01,
+        8.405844663871177014e-01,
+        7.696947487118485642e-02,
+        7.662001266663505117e-01,
+        -1.880601391333554251e-01,
+        -6.183333871091722944e-01,
+        -5.036172391059643427e-01,
+        -6.529525836149027151e-01,
+        5.432962643022043459e-01,
+        6.382357912332115024e-01,
+        -1.748518296794561167e-01,
+        3.457363524891907125e-01,
+        1.286482986991941552e-03,
+        3.757251165286925043e-01,
+        -5.972588700887541124e-01,
+        -5.987006197104716154e-01,
+        -2.004450304880958100e-01,
+        2.495901655353461868e-01,
+    ]
+).reshape(6, 3)
+
+expected_f2 = np.array(
+    [
+        [-0.6454949, 1.72457783, 0.18897958],
+        [1.68936514, -0.36995299, -1.36044464],
+        [-1.09902692, -1.35487928, 1.17416702],
+        [1.68426111, -0.50835585, 0.98340415],
+        [0.05771758, 1.12515818, -1.77561531],
+        [-1.686822, -0.61654789, 0.78950921],
+    ]
+)
+
+expected_v = np.array(
+    [
+        -2.912234126853306959e-01,
+        -3.800610846612756388e-02,
+        2.776624987489437202e-01,
+        -5.053761003913598976e-02,
+        -3.152373041953385746e-01,
+        1.060894290092162379e-01,
+        2.826389131596073745e-01,
+        1.039129970665329250e-01,
+        -2.584378792325942586e-01,
+        -3.121722367954994914e-01,
+        8.483275876786681990e-02,
+        2.524662342344257682e-01,
+        4.142176771106586414e-02,
+        -3.820285230785245428e-02,
+        -2.727311173065460545e-02,
+        2.668859789777112135e-01,
+        -6.448243569420382404e-02,
+        -2.121731470426218846e-01,
+        -8.624335220278558922e-02,
+        -1.809695356746038597e-01,
+        1.529875294531883312e-01,
+        -1.283658185172031341e-01,
+        -1.992682279795223999e-01,
+        1.409924999632362341e-01,
+        1.398322735274434292e-01,
+        1.804318474574856390e-01,
+        -1.470309318999652726e-01,
+        -2.593983661598450730e-01,
+        -4.236536279233147489e-02,
+        3.386387920184946720e-02,
+        -4.174017537818433543e-02,
+        -1.003500282164128260e-01,
+        1.525690815194478966e-01,
+        3.398976109910181037e-02,
+        1.522253908435125536e-01,
+        -2.349125581341701963e-01,
+        9.515545977581392825e-04,
+        -1.643218849228543846e-02,
+        1.993234765412972564e-02,
+        6.027265332209678569e-04,
+        -9.563256398907417355e-02,
+        1.510815124001868293e-01,
+        -7.738094816888557714e-03,
+        1.502832772532304295e-01,
+        -2.380965783745832010e-01,
+        -2.309456719810296654e-01,
+        -6.666961081213038098e-02,
+        7.955566551234216632e-02,
+        -8.099093777937517447e-02,
+        -3.386641099800401927e-02,
+        4.447884755740908608e-02,
+        1.008593228579038742e-01,
+        4.556718179228393811e-02,
+        -6.078081273849572641e-02,
+    ]
+).reshape(6, 9)
+expected_v2 = np.array(
+    [
+        [
+            -0.70008436,
+            -0.06399891,
+            0.63678391,
+            -0.07642171,
+            -0.70580035,
+            0.20506145,
+            0.64098364,
+            0.20305781,
+            -0.57906794,
+        ],
+        [
+            -0.6372635,
+            0.14315552,
+            0.51952246,
+            0.04604049,
+            -0.06003681,
+            -0.02688702,
+            0.54489318,
+            -0.10951559,
+            -0.43730539,
+        ],
+        [
+            -0.25090748,
+            -0.37466262,
+            0.34085833,
+            -0.26690852,
+            -0.37676917,
+            0.29080825,
+            0.31600481,
+            0.37558276,
+            -0.33251064,
+        ],
+        [
+            -0.80195614,
+            -0.10273138,
+            0.06935364,
+            -0.10429256,
+            -0.29693811,
+            0.45643496,
+            0.07247872,
+            0.45604679,
+            -0.71048816,
+        ],
+        [
+            -0.03840668,
+            -0.07680205,
+            0.10940472,
+            -0.02374189,
+            -0.27610266,
+            0.4336071,
+            0.02465248,
+            0.4290638,
+            -0.67496763,
+        ],
+        [
+            -0.61475065,
+            -0.21163135,
+            0.26652929,
+            -0.26134659,
+            -0.11560267,
+            0.15415902,
+            0.34343952,
+            0.1589482,
+            -0.21370642,
+        ],
+    ]
+).reshape(6, 9)
 
 # https://github.com/lammps/lammps/blob/1e1311cf401c5fc2614b5d6d0ff3230642b76597/src/update.cpp#L193
 nktv2p = 1.6021765e6
 
-sp.check_output("{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-    sys.executable,
-    pbtxt_file.resolve(),
-    pb_file.resolve(),
-    ).split())
-sp.check_output("{} -m deepmd convert-from pbtxt -i {} -o {}".format(
-    sys.executable,
-    pbtxt_file2.resolve(),
-    pb_file2.resolve(),
-    ).split())
+sp.check_output(
+    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
+        sys.executable,
+        pbtxt_file.resolve(),
+        pb_file.resolve(),
+    ).split()
+)
+sp.check_output(
+    "{} -m deepmd convert-from pbtxt -i {} -o {}".format(
+        sys.executable,
+        pbtxt_file2.resolve(),
+        pb_file2.resolve(),
+    ).split()
+)
 
 
 def _lammps(data_file) -> PyLammps:
@@ -92,6 +234,7 @@ def _lammps(data_file) -> PyLammps:
 def lammps():
     yield _lammps(data_file=data_file)
 
+
 @pytest.fixture
 def lammps_type_map():
     yield _lammps(data_file=data_type_map_file)
@@ -114,17 +257,25 @@ def test_pair_deepmd_virial(lammps):
     for ii in range(9):
         jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
         lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
-    lammps.dump("1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)]))
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
     lammps.run(0)
     assert lammps.eval("pe") == pytest.approx(expected_e)
     for ii in range(6):
         assert lammps.atoms[ii].force == pytest.approx(expected_f[ii])
     for ii in range(9):
-        assert np.array(lammps.variables[f'virial{ii}'].value) / nktv2p == pytest.approx(expected_v[:, ii])
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / nktv2p == pytest.approx(expected_v[:, ii])
 
 
 def test_pair_deepmd_model_devi(lammps):
-    lammps.pair_style("deepmd {} {} out_file {} out_freq 1 atomic".format(pb_file.resolve(), pb_file2.resolve(), md_file.resolve()))
+    lammps.pair_style(
+        "deepmd {} {} out_file {} out_freq 1 atomic".format(
+            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
+        )
+    )
     lammps.pair_coeff("* *")
     lammps.run(0)
     assert lammps.eval("pe") == pytest.approx(expected_e)
@@ -137,26 +288,36 @@ def test_pair_deepmd_model_devi(lammps):
     assert md[4] == pytest.approx(np.max(expected_md_f))
     assert md[5] == pytest.approx(np.min(expected_md_f))
     assert md[6] == pytest.approx(np.mean(expected_md_f))
-    expected_md_v = np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
     assert md[1] == pytest.approx(np.max(expected_md_v))
     assert md[2] == pytest.approx(np.min(expected_md_v))
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
 
 def test_pair_deepmd_model_devi_virial(lammps):
-    lammps.pair_style("deepmd {} {} out_file {} out_freq 1 atomic".format(pb_file.resolve(), pb_file2.resolve(), md_file.resolve()))
+    lammps.pair_style(
+        "deepmd {} {} out_file {} out_freq 1 atomic".format(
+            pb_file.resolve(), pb_file2.resolve(), md_file.resolve()
+        )
+    )
     lammps.pair_coeff("* *")
     lammps.compute("virial all centroid/stress/atom NULL pair")
     for ii in range(9):
         jj = [0, 4, 8, 3, 6, 7, 1, 2, 5][ii]
         lammps.variable(f"virial{jj} atom c_virial[{ii+1}]")
-    lammps.dump("1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)]))
+    lammps.dump(
+        "1 all custom 1 dump id " + " ".join([f"v_virial{ii}" for ii in range(9)])
+    )
     lammps.run(0)
     assert lammps.eval("pe") == pytest.approx(expected_e)
     for ii in range(6):
         assert lammps.atoms[ii].force == pytest.approx(expected_f[ii])
     for ii in range(9):
-        assert np.array(lammps.variables[f'virial{ii}'].value) / nktv2p == pytest.approx(expected_v[:, ii])
+        assert np.array(
+            lammps.variables[f"virial{ii}"].value
+        ) / nktv2p == pytest.approx(expected_v[:, ii])
     # load model devi
     md = np.loadtxt(md_file.resolve())
     expected_md_f = np.linalg.norm(np.std([expected_f, expected_f2], axis=0), axis=1)
@@ -164,7 +325,9 @@ def test_pair_deepmd_model_devi_virial(lammps):
     assert md[4] == pytest.approx(np.max(expected_md_f))
     assert md[5] == pytest.approx(np.min(expected_md_f))
     assert md[6] == pytest.approx(np.mean(expected_md_f))
-    expected_md_v = np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
     assert md[1] == pytest.approx(np.max(expected_md_v))
     assert md[2] == pytest.approx(np.min(expected_md_v))
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
@@ -172,7 +335,11 @@ def test_pair_deepmd_model_devi_virial(lammps):
 
 def test_pair_deepmd_model_devi_atomic_relative(lammps):
     relative = 1.0
-    lammps.pair_style("deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative))
+    lammps.pair_style(
+        "deepmd {} {} out_file {} out_freq 1 atomic relative {}".format(
+            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
+        )
+    )
     lammps.pair_coeff("* *")
     lammps.run(0)
     assert lammps.eval("pe") == pytest.approx(expected_e)
@@ -187,7 +354,9 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
     assert md[4] == pytest.approx(np.max(expected_md_f))
     assert md[5] == pytest.approx(np.min(expected_md_f))
     assert md[6] == pytest.approx(np.mean(expected_md_f))
-    expected_md_v = np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
     assert md[1] == pytest.approx(np.max(expected_md_v))
     assert md[2] == pytest.approx(np.min(expected_md_v))
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
@@ -195,7 +364,11 @@ def test_pair_deepmd_model_devi_atomic_relative(lammps):
 
 def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     relative = 1.0
-    lammps.pair_style("deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative))
+    lammps.pair_style(
+        "deepmd {} {} out_file {} out_freq 1 atomic relative_v {}".format(
+            pb_file.resolve(), pb_file2.resolve(), md_file.resolve(), relative
+        )
+    )
     lammps.pair_coeff("* *")
     lammps.run(0)
     assert lammps.eval("pe") == pytest.approx(expected_e)
@@ -207,13 +380,21 @@ def test_pair_deepmd_model_devi_atomic_relative_v(lammps):
     assert md[4] == pytest.approx(np.max(expected_md_f))
     assert md[5] == pytest.approx(np.min(expected_md_f))
     assert md[6] == pytest.approx(np.mean(expected_md_f))
-    expected_md_v = np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
-    norm = np.abs(np.mean([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0)) / 6
+    expected_md_v = (
+        np.std([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0) / 6
+    )
+    norm = (
+        np.abs(
+            np.mean([np.sum(expected_v, axis=0), np.sum(expected_v2, axis=0)], axis=0)
+        )
+        / 6
+    )
     expected_md_v /= norm + relative
     assert md[1] == pytest.approx(np.max(expected_md_v))
     assert md[2] == pytest.approx(np.min(expected_md_v))
     assert md[3] == pytest.approx(np.sqrt(np.mean(np.square(expected_md_v))))
 
+
 def test_pair_deepmd_type_map(lammps_type_map):
     lammps_type_map.pair_style("deepmd {}".format(pb_file.resolve()))
     lammps_type_map.pair_coeff("* * H O")
diff --git a/source/md/CMakeLists.txt b/source/md/CMakeLists.txt
index 5ee7ddabc9..dfd1c547be 100644
--- a/source/md/CMakeLists.txt
+++ b/source/md/CMakeLists.txt
@@ -1,95 +1,82 @@
 # md
-set (MAKE_FF_AD FALSE) 
+set(MAKE_FF_AD FALSE)
 
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/)
 find_package(xdrfile REQUIRED)
 
-list (APPEND MD_INCLUDE_PATH "include")
-list (APPEND MD_INCLUDE_PATH ${XDRFILE_INCLUDE_DIRS})
+list(APPEND MD_INCLUDE_PATH "include")
+list(APPEND MD_INCLUDE_PATH ${XDRFILE_INCLUDE_DIRS})
 
 file(GLOB MD_SRC src/*.cc src/*.cpp)
 
 set(MDNN_SOURCE_FILES mdnn.cc)
-if (MAKE_FF_AD)
+if(MAKE_FF_AD)
   set(MDAD_SOURCE_FILES mdad.cc)
   set(MDFF_SOURCE_FILES mdff.cc)
 endif()
 
 function(_add_md_variant variant_name prec_def)
-set (libname "${LIB_DEEPMD_NATIVE}${variant_name}")
-set (dp_mdnn_name "dp_mdnn${variant_name}")
-set (dp_mdff_name "dp_mdff${variant_name}")
-set (dp_mdad_name "dp_mdad${variant_name}")
+  set(libname "${LIB_DEEPMD_NATIVE}${variant_name}")
+  set(dp_mdnn_name "dp_mdnn${variant_name}")
+  set(dp_mdff_name "dp_mdff${variant_name}")
+  set(dp_mdad_name "dp_mdad${variant_name}")
 
-add_library(${libname} SHARED ${MD_SRC})
-target_link_libraries(${libname} PRIVATE ${LIB_DEEPMD})
-target_include_directories(${libname} PUBLIC ${MD_INCLUDE_PATH})
-set_target_properties(
-  ${libname}
-  PROPERTIES
-  COMPILE_DEFINITIONS ${prec_def}
-  INSTALL_RPATH "$ORIGIN"
-)
+  add_library(${libname} SHARED ${MD_SRC})
+  target_link_libraries(${libname} PRIVATE ${LIB_DEEPMD})
+  target_include_directories(${libname} PUBLIC ${MD_INCLUDE_PATH})
+  set_target_properties(${libname} PROPERTIES COMPILE_DEFINITIONS ${prec_def}
+                                              INSTALL_RPATH "$ORIGIN")
 
-add_executable(${dp_mdnn_name} ${MDNN_SOURCE_FILES})
-if (MAKE_FF_AD)
-  add_executable(${dp_mdff_name} ${MDFF_SOURCE_FILES})
-  add_executable(${dp_mdad_name} ${MDAD_SOURCE_FILES})
-endif()
+  add_executable(${dp_mdnn_name} ${MDNN_SOURCE_FILES})
+  if(MAKE_FF_AD)
+    add_executable(${dp_mdff_name} ${MDFF_SOURCE_FILES})
+    add_executable(${dp_mdad_name} ${MDAD_SOURCE_FILES})
+  endif()
 
-# link: libdeepmd_native libdeepmd_cc libxdr
-target_link_libraries(${dp_mdnn_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name} ${XDRFILE_LIBRARIES})
-target_include_directories(${dp_mdnn_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
-if (MAKE_FF_AD)
-  target_link_libraries(${dp_mdad_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name} ${XDRFILE_LIBRARIES})
-  target_include_directories(${dp_mdad_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
-  target_link_libraries(${dp_mdff_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name} ${XDRFILE_LIBRARIES})
-  target_include_directories(${dp_mdff_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
-endif()
+  # link: libdeepmd_native libdeepmd_cc libxdr
+  target_link_libraries(
+    ${dp_mdnn_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name}
+                            ${XDRFILE_LIBRARIES})
+  target_include_directories(${dp_mdnn_name}
+                             PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
+  if(MAKE_FF_AD)
+    target_link_libraries(
+      ${dp_mdad_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name}
+                              ${XDRFILE_LIBRARIES})
+    target_include_directories(${dp_mdad_name}
+                               PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
+    target_link_libraries(
+      ${dp_mdff_name} PRIVATE ${libname} ${LIB_DEEPMD_CC}${variant_name}
+                              ${XDRFILE_LIBRARIES})
+    target_include_directories(${dp_mdff_name}
+                               PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../3rdparty/)
+  endif()
 
-set_target_properties(
-  ${dp_mdnn_name}
-  PROPERTIES
-  COMPILE_DEFINITIONS ${prec_def}
-  LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-  INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}"
-)
-if (MAKE_FF_AD)
-  set_target_properties(
-    ${dp_mdad_name}
-    PROPERTIES
-    COMPILE_DEFINITIONS ${prec_def}
-    LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-    INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}"
-    )
   set_target_properties(
-    ${dp_mdff_name}
-    PROPERTIES
-    COMPILE_DEFINITIONS ${prec_def}
-    LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
-    INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}"
-    )
-endif()
+    ${dp_mdnn_name}
+    PROPERTIES COMPILE_DEFINITIONS ${prec_def}
+               LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
+               INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}")
+  if(MAKE_FF_AD)
+    set_target_properties(
+      ${dp_mdad_name}
+      PROPERTIES COMPILE_DEFINITIONS ${prec_def}
+                 LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
+                 INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}")
+    set_target_properties(
+      ${dp_mdff_name}
+      PROPERTIES COMPILE_DEFINITIONS ${prec_def}
+                 LINK_FLAGS "-Wl,-rpath,'$ORIGIN'/../lib -Wl,-z,defs"
+                 INSTALL_RPATH "$ORIGIN/../lib:${TensorFlow_LIBRARY_PATH}")
+  endif()
 
-install(
-  TARGETS	${LIB_DEEPMD_NATIVE}   
-  DESTINATION	lib/
-  )
-install(
-  TARGETS	${dp_mdnn_name} 
-  DESTINATION	bin/
-  )
-if (MAKE_FF_AD)
-  install(
-    TARGETS	${dp_mdad_name} 
-    DESTINATION	bin/
-    )
-  install(
-    TARGETS	${dp_mdff_name} 
-    DESTINATION	bin/
-    )
-endif()
+  install(TARGETS ${LIB_DEEPMD_NATIVE} DESTINATION lib/)
+  install(TARGETS ${dp_mdnn_name} DESTINATION bin/)
+  if(MAKE_FF_AD)
+    install(TARGETS ${dp_mdad_name} DESTINATION bin/)
+    install(TARGETS ${dp_mdff_name} DESTINATION bin/)
+  endif()
 endfunction()
 _add_md_variant("${HIGH_PREC_VARIANT}" "${HIGH_PREC_DEF}")
 # TODO: there is hard-code `DOUBLE` in the code
-#_add_md_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
+# _add_md_variant("${LOW_PREC_VARIANT}" "${LOW_PREC_DEF}")
diff --git a/source/md/include/AdWeight.h b/source/md/include/AdWeight.h
index 2195fd7c6e..e43896e3f9 100644
--- a/source/md/include/AdWeight.h
+++ b/source/md/include/AdWeight.h
@@ -6,73 +6,58 @@ using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class AdWeight 
-{
-public:  
-  AdWeight (const VALUETYPE & pl);
-  virtual void 
-  zone_tag (vector<int > & tag,
-	    const vector<VALUETYPE> & coord) const = 0;
-  virtual void
-  atom_weight (vector<VALUETYPE > & weight,
-	       vector<VALUETYPE > & weight_x,
-	       const vector<VALUETYPE> & coord) const = 0;
-  virtual vector<VALUETYPE> 
-  get_center () const = 0;
-  void 
-  sel_nn_atom (vector<VALUETYPE> & nn_coord,
-	       vector<int> & nn_type,
-	       vector<int> & nn_idx,
-	       vector<int> & nn_tag,
-	       const vector<VALUETYPE> & dcoord,
-	       const vector<int> & dtype) const;
-  void 
-  force_intpl (vector<VALUETYPE> & of,
-	       const vector<VALUETYPE> & dcoord,
-	       const vector<VALUETYPE> & ff_force,
-	       const vector<VALUETYPE> & nn_force,
-	       const vector<int> & nn_idx) const;
-  void 
-  force_intpl (vector<VALUETYPE> & of,
-	       const vector<VALUETYPE> & dcoord,
-	       const vector<VALUETYPE> & ff_bd_force,
-	       const vector<VALUETYPE> & ff_nb_force,
-	       const vector<VALUETYPE> & nn_force,
-	       const vector<int> & nn_idx) const;
- private :
+class AdWeight {
+ public:
+  AdWeight(const VALUETYPE& pl);
+  virtual void zone_tag(vector<int>& tag,
+                        const vector<VALUETYPE>& coord) const = 0;
+  virtual void atom_weight(vector<VALUETYPE>& weight,
+                           vector<VALUETYPE>& weight_x,
+                           const vector<VALUETYPE>& coord) const = 0;
+  virtual vector<VALUETYPE> get_center() const = 0;
+  void sel_nn_atom(vector<VALUETYPE>& nn_coord,
+                   vector<int>& nn_type,
+                   vector<int>& nn_idx,
+                   vector<int>& nn_tag,
+                   const vector<VALUETYPE>& dcoord,
+                   const vector<int>& dtype) const;
+  void force_intpl(vector<VALUETYPE>& of,
+                   const vector<VALUETYPE>& dcoord,
+                   const vector<VALUETYPE>& ff_force,
+                   const vector<VALUETYPE>& nn_force,
+                   const vector<int>& nn_idx) const;
+  void force_intpl(vector<VALUETYPE>& of,
+                   const vector<VALUETYPE>& dcoord,
+                   const vector<VALUETYPE>& ff_bd_force,
+                   const vector<VALUETYPE>& ff_nb_force,
+                   const vector<VALUETYPE>& nn_force,
+                   const vector<int>& nn_idx) const;
+
+ private:
   VALUETYPE protect_level;
-}
-    ;
-
+};
 
 // slab model, axis x
-class SlabWeight : public AdWeight
-{
-public:
-  SlabWeight (const vector<VALUETYPE> & box,
-	      const VALUETYPE & rnn,
-	      const VALUETYPE & rhy,
-	      const VALUETYPE & rc, 
-	      const VALUETYPE & protect_level = 1e-3);
-  virtual void 
-  zone_tag (vector<int > & tag,
-	    const vector<VALUETYPE> & coord) const;
-  virtual void
-  atom_weight (vector<VALUETYPE > & weight,
-	       vector<VALUETYPE > & weight_x,
-	       const vector<VALUETYPE> & coord) const;
-  virtual vector<VALUETYPE> 
-  get_center () const {return center;}
-private:
+class SlabWeight : public AdWeight {
+ public:
+  SlabWeight(const vector<VALUETYPE>& box,
+             const VALUETYPE& rnn,
+             const VALUETYPE& rhy,
+             const VALUETYPE& rc,
+             const VALUETYPE& protect_level = 1e-3);
+  virtual void zone_tag(vector<int>& tag, const vector<VALUETYPE>& coord) const;
+  virtual void atom_weight(vector<VALUETYPE>& weight,
+                           vector<VALUETYPE>& weight_x,
+                           const vector<VALUETYPE>& coord) const;
+  virtual vector<VALUETYPE> get_center() const { return center; }
+
+ private:
   vector<VALUETYPE> center;
   VALUETYPE rnn;
   VALUETYPE rhy;
   VALUETYPE rc;
-}
-    ;
-
-
+};
diff --git a/source/md/include/Convert.h b/source/md/include/Convert.h
index b0355a9f1b..5f6a50bc34 100644
--- a/source/md/include/Convert.h
+++ b/source/md/include/Convert.h
@@ -1,44 +1,38 @@
 #pragma once
 
-#include <vector>
-#include <string>
 #include <map>
+#include <string>
+#include <vector>
 
 using namespace std;
 
 template <typename VALUETYPE>
-class Convert 
-{
-public:
-  Convert(const vector<string > &  atomname,
-	  map<string, int> & name_type_map,
-	  map<string, VALUETYPE> & name_mass_map,
-	  map<string, VALUETYPE> & name_charge_map, 
-	  const bool sort = true);
-  void gro2nnp (
-      vector<VALUETYPE > & coord,
-      vector<VALUETYPE > & veloc,
-      vector<VALUETYPE > & box,
-      const vector<vector<double > > & posi,
-      const vector<vector<double > > & velo,
-      const vector<double > & box_size) const ;
-  void nnp2gro (
-      vector<vector<double > > & posi,
-      vector<vector<double > > & velo,
-      vector<double > & box_size,
-      const vector<VALUETYPE > & coord,
-      const vector<VALUETYPE > & veloc,
-      const vector<VALUETYPE > & box) const ;
-  void idx_gro2nnp (
-      vector<int > & out,
-      const vector<int > & in) const;
-  void idx_nnp2gro (
-      vector<int > & out,
-      const vector<int > & in) const;      
-  const vector<int > & get_type () const {return atype;}
-  const vector<VALUETYPE > & get_mass () const {return amass;}
-  const vector<VALUETYPE > & get_charge () const {return acharge;}
-private:
+class Convert {
+ public:
+  Convert(const vector<string>& atomname,
+          map<string, int>& name_type_map,
+          map<string, VALUETYPE>& name_mass_map,
+          map<string, VALUETYPE>& name_charge_map,
+          const bool sort = true);
+  void gro2nnp(vector<VALUETYPE>& coord,
+               vector<VALUETYPE>& veloc,
+               vector<VALUETYPE>& box,
+               const vector<vector<double> >& posi,
+               const vector<vector<double> >& velo,
+               const vector<double>& box_size) const;
+  void nnp2gro(vector<vector<double> >& posi,
+               vector<vector<double> >& velo,
+               vector<double>& box_size,
+               const vector<VALUETYPE>& coord,
+               const vector<VALUETYPE>& veloc,
+               const vector<VALUETYPE>& box) const;
+  void idx_gro2nnp(vector<int>& out, const vector<int>& in) const;
+  void idx_nnp2gro(vector<int>& out, const vector<int>& in) const;
+  const vector<int>& get_type() const { return atype; }
+  const vector<VALUETYPE>& get_mass() const { return amass; }
+  const vector<VALUETYPE>& get_charge() const { return acharge; }
+
+ private:
   vector<int> idx_map_nnp2gro;
   vector<int> idx_map_gro2nnp;
   vector<int> atype;
diff --git a/source/md/include/CosSwitch.h b/source/md/include/CosSwitch.h
index 8cef75f733..ca1cb8b493 100644
--- a/source/md/include/CosSwitch.h
+++ b/source/md/include/CosSwitch.h
@@ -3,70 +3,54 @@
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class CosSwitch 
-{
-public:
-  CosSwitch (const VALUETYPE & rmin_ = 0, 
-	     const VALUETYPE & rmax_ = 0)
-      {reinit (rmin_, rmax_); }
-  void reinit (const VALUETYPE & rmin_, 
-	       const VALUETYPE & rmax_);
-public:
-  void eval (VALUETYPE & vv,
-	     const VALUETYPE xx) const;
-private:
+class CosSwitch {
+ public:
+  CosSwitch(const VALUETYPE& rmin_ = 0, const VALUETYPE& rmax_ = 0) {
+    reinit(rmin_, rmax_);
+  }
+  void reinit(const VALUETYPE& rmin_, const VALUETYPE& rmax_);
+
+ public:
+  void eval(VALUETYPE& vv, const VALUETYPE xx) const;
+
+ private:
   VALUETYPE rmin, rmax;
 };
 
-
-void
-CosSwitch::
-reinit (const VALUETYPE & rmin_, 
-	const VALUETYPE & rmax_)
-{
+void CosSwitch::reinit(const VALUETYPE& rmin_, const VALUETYPE& rmax_) {
   rmin = rmin_;
   rmax = rmax_;
 }
-    
-void
-CosSwitch::
-eval (VALUETYPE & vv,
-      const VALUETYPE xx) const
-{
+
+void CosSwitch::eval(VALUETYPE& vv, const VALUETYPE xx) const {
   VALUETYPE dd;
-  if (xx >= 0){
+  if (xx >= 0) {
     if (xx < rmin) {
       dd = 0;
       vv = 1;
-    }
-    else if (xx < rmax){
+    } else if (xx < rmax) {
       VALUETYPE value = (xx - rmin) / (rmax - rmin) * M_PI;
       dd = -0.5 * sin(value) * M_PI / (rmax - rmin);
       vv = 0.5 * (cos(value) + 1);
-    }
-    else {
+    } else {
       dd = 0;
       vv = 0;
     }
-  }
-  else {
-    if (xx > -rmin){
+  } else {
+    if (xx > -rmin) {
       dd = 0;
       vv = 1;
-    }
-    else if (xx > -rmax){
+    } else if (xx > -rmax) {
       VALUETYPE value = (-xx - rmin) / (rmax - rmin) * M_PI;
       dd = 0.5 * sin(value) * M_PI / (rmax - rmin);
-      vv = 0.5 * (cos(value) + 1);      
-    }
-    else {
+      vv = 0.5 * (cos(value) + 1);
+    } else {
       dd = 0;
       vv = 0;
     }
   }
 }
-
diff --git a/source/md/include/Gaussian.h b/source/md/include/Gaussian.h
index d2c8e893ef..80a75af204 100644
--- a/source/md/include/Gaussian.h
+++ b/source/md/include/Gaussian.h
@@ -7,11 +7,8 @@
 
 using namespace std;
 
-class Gaussian 
-{
-public:
-  void set_seed (unsigned long seed);
-  void gen (double * vec, const int numb_gen);
+class Gaussian {
+ public:
+  void set_seed(unsigned long seed);
+  void gen(double* vec, const int numb_gen);
 };
-
-
diff --git a/source/md/include/GroFileManager.h b/source/md/include/GroFileManager.h
index f937c7ee43..fb21c56ea5 100644
--- a/source/md/include/GroFileManager.h
+++ b/source/md/include/GroFileManager.h
@@ -1,42 +1,49 @@
 #ifndef __GroFileManager_wanghan__
 #define __GroFileManager_wanghan__
 
-#include <vector>
 #include <iostream>
 #include <string>
+#include <vector>
+
+namespace GroFileManager {
+void read(const std::string& name,
+          std::vector<int>& resdindex,
+          std::vector<std::string>& resdname,
+          std::vector<std::string>& atomname,
+          std::vector<int>& atomindex,
+          std::vector<std::vector<double> >& posi,
+          std::vector<std::vector<double> >& velo,
+          std::vector<double>& boxsize);
+void write(const std::string& name,
+           const std::vector<int>& resdindex,
+           const std::vector<std::string>& resdname,
+           const std::vector<std::string>& atomname,
+           const std::vector<int>& atomindex,
+           const std::vector<std::vector<double> >& posi,
+           const std::vector<std::vector<double> >& velo,
+           const std::vector<double>& boxsize);
+
+bool readTop(const std::string& filename,
+             std::vector<std::string>& molnames,
+             std::vector<int>& nmols);
 
-namespace GroFileManager{
-    void read (const std::string & name ,
-	       std::vector<int > & resdindex,
-	       std::vector<std::string > &  resdname,
-	       std::vector<std::string > &  atomname,
-	       std::vector<int > & atomindex,
-	       std::vector<std::vector<double > > & posi,
-	       std::vector<std::vector<double > > & velo,
-	       std::vector<double > & boxsize);
-    void write (const std::string & name ,
-		const std::vector<int > & resdindex,
-		const std::vector<std::string > &  resdname,
-		const std::vector<std::string > &  atomname,
-		const std::vector<int > & atomindex,
-		const std::vector<std::vector<double > > & posi,
-		const std::vector<std::vector<double > > & velo,
-		const std::vector<double > & boxsize);
+template <typename UnitaryFunction1,
+          typename UnitaryFunction2,
+          typename UnitaryFunction3,
+          typename UnitaryFunction4,
+          typename UnitaryFunction5,
+          typename UnitaryFunction6>
+bool writePotenFile(const double& rmin,
+                    const double& rcut,
+                    const double& interval,
+                    UnitaryFunction1& f,
+                    UnitaryFunction2& fp,
+                    UnitaryFunction3& g,
+                    UnitaryFunction4& gp,
+                    UnitaryFunction5& h,
+                    UnitaryFunction6& hp,
+                    const std::string& filename);
 
-    bool readTop (const std::string & filename,
-		  std::vector<std::string > & molnames,
-		  std::vector<int > & nmols);
-    
-    template <typename UnitaryFunction1, typename UnitaryFunction2,
-	      typename UnitaryFunction3, typename UnitaryFunction4,
-	      typename UnitaryFunction5, typename UnitaryFunction6>
-    bool writePotenFile (const double & rmin, const double & rcut, 
-			 const double & interval,
-			 UnitaryFunction1 & f, UnitaryFunction2 & fp,
-			 UnitaryFunction3 & g, UnitaryFunction4 & gp,
-			 UnitaryFunction5 & h, UnitaryFunction6 & hp,
-			 const std::string & filename);
-    
-};
+};  // namespace GroFileManager
 
 #endif
diff --git a/source/md/include/HarmonicAngle.h b/source/md/include/HarmonicAngle.h
index 7679bd1ea2..84ad5d8fd3 100644
--- a/source/md/include/HarmonicAngle.h
+++ b/source/md/include/HarmonicAngle.h
@@ -1,35 +1,31 @@
 #pragma once
 
-#include "SimulationRegion.h"
 #include <vector>
 
+#include "SimulationRegion.h"
+
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class HarmonicAngle 
-{
-public:
-  HarmonicAngle (const VALUETYPE & kk,
-		const VALUETYPE & tt);
-public:
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<int > &		alist);
-private:
-  VALUETYPE ka, tt;
-  void 
-  hb_inner (VALUETYPE & ae,
-	    VALUETYPE & af,
-	    const VALUETYPE & r2);
-}
-    ;
+class HarmonicAngle {
+ public:
+  HarmonicAngle(const VALUETYPE& kk, const VALUETYPE& tt);
+
+ public:
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<int>& alist);
 
+ private:
+  VALUETYPE ka, tt;
+  void hb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
+};
diff --git a/source/md/include/HarmonicBond.h b/source/md/include/HarmonicBond.h
index 0b3a8e7ae7..0c7779db55 100644
--- a/source/md/include/HarmonicBond.h
+++ b/source/md/include/HarmonicBond.h
@@ -1,35 +1,31 @@
 #pragma once
 
-#include "SimulationRegion.h"
 #include <vector>
 
+#include "SimulationRegion.h"
+
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class HarmonicBond 
-{
-public:
-  HarmonicBond (const VALUETYPE & kk,
-		const VALUETYPE & bb);
-public:
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<int > &		blist);
-private:
-  VALUETYPE kk, bb;
-  void 
-  hb_inner (VALUETYPE & ae,
-	    VALUETYPE & af,
-	    const VALUETYPE & r2);
-}
-    ;
+class HarmonicBond {
+ public:
+  HarmonicBond(const VALUETYPE& kk, const VALUETYPE& bb);
+
+ public:
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<int>& blist);
 
+ private:
+  VALUETYPE kk, bb;
+  void hb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
+};
diff --git a/source/md/include/Integrator.h b/source/md/include/Integrator.h
index 24033bf586..99f720fc51 100644
--- a/source/md/include/Integrator.h
+++ b/source/md/include/Integrator.h
@@ -1,50 +1,50 @@
 #pragma once
 
+#include <vector>
+
 #include "Gaussian.h"
 #include "UnitManager.h"
-
-#include <vector>
 using namespace std;
 
 template <typename VALUETYPE>
-class Integrator
-{
-public:
-  Integrator () 
-      : massConst (UnitManager::IntegratorMassConstant) {};
-public:
-  void stepVeloc (vector<VALUETYPE > & vv,
-		  const vector<VALUETYPE > & ff,
-		  const vector<VALUETYPE > & mass, 
-		  const double & dt, 
-		  const vector<int > & freez = vector<int> ()) const;
-  void stepCoord (vector<VALUETYPE > & rr,
-		  const vector<VALUETYPE > & vv, 
-		  const double & dt) const;
-private:
+class Integrator {
+ public:
+  Integrator() : massConst(UnitManager::IntegratorMassConstant){};
+
+ public:
+  void stepVeloc(vector<VALUETYPE>& vv,
+                 const vector<VALUETYPE>& ff,
+                 const vector<VALUETYPE>& mass,
+                 const double& dt,
+                 const vector<int>& freez = vector<int>()) const;
+  void stepCoord(vector<VALUETYPE>& rr,
+                 const vector<VALUETYPE>& vv,
+                 const double& dt) const;
+
+ private:
   VALUETYPE massConst;
 };
 
-template <typename VALUETYPE> 
-class ThermostatLangevin 
-{
-public:
-  ThermostatLangevin (const VALUETYPE		T = 300.,
-		      const VALUETYPE		tau = 1.,
-		      const long long int	seed = 0);
-  void reinit (const VALUETYPE		T = 300.,
-	       const VALUETYPE		tau = 1.,
-	       const long long int	seed = 0);
-  void stepOU (vector<VALUETYPE> & vv,
-	       const vector<VALUETYPE > & mass,
-	       const double & dt, 
-	       const vector<int > & freez = vector<int> ()) const;
-private:
-  mutable Gaussian	gaussian;
-  string	scheme;
-  VALUETYPE	temperature;
-  VALUETYPE	gamma;
-  VALUETYPE	sigma;
-  VALUETYPE	kT;
-  VALUETYPE	sigmainvsqrt2gamma;
+template <typename VALUETYPE>
+class ThermostatLangevin {
+ public:
+  ThermostatLangevin(const VALUETYPE T = 300.,
+                     const VALUETYPE tau = 1.,
+                     const long long int seed = 0);
+  void reinit(const VALUETYPE T = 300.,
+              const VALUETYPE tau = 1.,
+              const long long int seed = 0);
+  void stepOU(vector<VALUETYPE>& vv,
+              const vector<VALUETYPE>& mass,
+              const double& dt,
+              const vector<int>& freez = vector<int>()) const;
+
+ private:
+  mutable Gaussian gaussian;
+  string scheme;
+  VALUETYPE temperature;
+  VALUETYPE gamma;
+  VALUETYPE sigma;
+  VALUETYPE kT;
+  VALUETYPE sigmainvsqrt2gamma;
 };
diff --git a/source/md/include/Interpolation.h b/source/md/include/Interpolation.h
index 37acc065c4..5026f5ee45 100644
--- a/source/md/include/Interpolation.h
+++ b/source/md/include/Interpolation.h
@@ -1,61 +1,69 @@
 #ifndef __wanghan__Interpolation_h__
 #define __wanghan__Interpolation_h__
 
+#include <algorithm>
 #include <iostream>
 #include <string>
 #include <vector>
-#include <algorithm>
+
 #include "Poly.h"
 
 namespace Interpolation {
 // linear interpolations
-    void pieceLinearInterpol (const double & a,  const double & b, 
-			      const double & va, const double & vb,
-			      Poly & p);
-    void piecewiseLinear (const std::vector<double > & x,
-			  const std::vector<double > & y,
-			  PiecewisePoly & ps);
+void pieceLinearInterpol(const double& a,
+                         const double& b,
+                         const double& va,
+                         const double& vb,
+                         Poly& p);
+void piecewiseLinear(const std::vector<double>& x,
+                     const std::vector<double>& y,
+                     PiecewisePoly& ps);
 // spline interpolations
-    void pieceHermiteInterpol (const double & a,  const double & b,
-			       const double & va, const double & vb,
-			       const double & da, const double & db,
-			       Poly & p) ;  
-    void pieceSecondDerivativeInterpol (const double & a,  const double & b,
-					const double & va, const double & vb,
-					const double & dda,const double & ddb,
-					Poly & p);
-    void piece6OrderInterpol (const double & a,   const double & b,
-			      const double & va,  const double & vb,
-			      const double & da,  const double & db,
-			      const double & dda, const double & ddb,
-			      Poly & p);
-
-    bool spline (const std::vector<double > & x,
-		 const std::vector<double > & y,
-		 PiecewisePoly & ps);
-    bool spline (const std::vector<double >::const_iterator xbegin,
-		 const std::vector<double >::const_iterator xend,
-		 const std::vector<double >::const_iterator ybegin,
-		 PiecewisePoly & ps);
-    bool splinePeriodic (const std::vector<double > & x,
-			 const std::vector<double > & y,
-			 PiecewisePoly & ps);
-    bool solverForSplinePeriodic (
-	const std::vector<double >::const_iterator & lbegin,
-	const std::vector<double >::const_iterator & lend,
-	const std::vector<double >::iterator & ubegin, 
-	const std::vector<double >::iterator & uend);
-    void secondDerivativeInterpol (
-	const std::vector<double >::const_iterator & xbegin,
-	const std::vector<double >::const_iterator & xend,
-	const std::vector<double >::const_iterator & vbegin,
-	const std::vector<double >::const_iterator & ddbegin,
-	PiecewisePoly & ps);
-  
-}
-
-#endif
-
+void pieceHermiteInterpol(const double& a,
+                          const double& b,
+                          const double& va,
+                          const double& vb,
+                          const double& da,
+                          const double& db,
+                          Poly& p);
+void pieceSecondDerivativeInterpol(const double& a,
+                                   const double& b,
+                                   const double& va,
+                                   const double& vb,
+                                   const double& dda,
+                                   const double& ddb,
+                                   Poly& p);
+void piece6OrderInterpol(const double& a,
+                         const double& b,
+                         const double& va,
+                         const double& vb,
+                         const double& da,
+                         const double& db,
+                         const double& dda,
+                         const double& ddb,
+                         Poly& p);
 
+bool spline(const std::vector<double>& x,
+            const std::vector<double>& y,
+            PiecewisePoly& ps);
+bool spline(const std::vector<double>::const_iterator xbegin,
+            const std::vector<double>::const_iterator xend,
+            const std::vector<double>::const_iterator ybegin,
+            PiecewisePoly& ps);
+bool splinePeriodic(const std::vector<double>& x,
+                    const std::vector<double>& y,
+                    PiecewisePoly& ps);
+bool solverForSplinePeriodic(const std::vector<double>::const_iterator& lbegin,
+                             const std::vector<double>::const_iterator& lend,
+                             const std::vector<double>::iterator& ubegin,
+                             const std::vector<double>::iterator& uend);
+void secondDerivativeInterpol(
+    const std::vector<double>::const_iterator& xbegin,
+    const std::vector<double>::const_iterator& xend,
+    const std::vector<double>::const_iterator& vbegin,
+    const std::vector<double>::const_iterator& ddbegin,
+    PiecewisePoly& ps);
 
+}  // namespace Interpolation
 
+#endif
diff --git a/source/md/include/LJInter.h b/source/md/include/LJInter.h
index 68dc4940b9..267a8367d2 100644
--- a/source/md/include/LJInter.h
+++ b/source/md/include/LJInter.h
@@ -1,37 +1,32 @@
 #pragma once
 
+#include <vector>
+
 #include "SimulationRegion.h"
-#include <vector> 
 
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class LJInter
-{
-public:
-  LJInter (const VALUETYPE & c6,
-	   const VALUETYPE & c12,
-	   const VALUETYPE & rc);
-public:
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<vector<int > > &	nlist);
-private:
-  VALUETYPE c6, c12, rc, rc2, one_over_6, one_over_12, one_over_rc6, one_over_rc12;
-  void 
-  lj_inner (VALUETYPE & ae,
-	    VALUETYPE & af,
-	    const VALUETYPE & r2);
-}
-    ;
+class LJInter {
+ public:
+  LJInter(const VALUETYPE& c6, const VALUETYPE& c12, const VALUETYPE& rc);
 
+ public:
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<vector<int> >& nlist);
 
+ private:
+  VALUETYPE c6, c12, rc, rc2, one_over_6, one_over_12, one_over_rc6,
+      one_over_rc12;
+  void lj_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
+};
diff --git a/source/md/include/LJTab.h b/source/md/include/LJTab.h
index f156bdd39e..96c8089da0 100644
--- a/source/md/include/LJTab.h
+++ b/source/md/include/LJTab.h
@@ -1,33 +1,32 @@
 #pragma once
 
-#include <vector> 
+#include <vector>
+
 #include "Tabulated.h"
 
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class LJTab 
-{
-public:
-  LJTab (const VALUETYPE & c6,
-	 const VALUETYPE & c12,
-	 const VALUETYPE & rc);
-public:
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<vector<int > > &	nlist)
-      {lj_tab.compute (ener, force, virial, coord, atype, region, nlist);};
-private:
-  Tabulated lj_tab;
-}
-    ;
+class LJTab {
+ public:
+  LJTab(const VALUETYPE& c6, const VALUETYPE& c12, const VALUETYPE& rc);
 
+ public:
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<vector<int> >& nlist) {
+    lj_tab.compute(ener, force, virial, coord, atype, region, nlist);
+  };
+
+ private:
+  Tabulated lj_tab;
+};
diff --git a/source/md/include/MaxShift.h b/source/md/include/MaxShift.h
index f6fb642dab..65971a7037 100644
--- a/source/md/include/MaxShift.h
+++ b/source/md/include/MaxShift.h
@@ -1,30 +1,29 @@
 #pragma once
 
+#include <vector>
+
 #include "SimulationRegion.h"
-#include <vector> 
 
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class MaxShift 
-{
-public:
-  MaxShift (const vector<VALUETYPE> & dcoord, 
-	    const VALUETYPE & shell);
-  
-  bool rebuild (const vector<VALUETYPE> & coord, 
-		const SimulationRegion<VALUETYPE> & region) ;
-private:
+class MaxShift {
+ public:
+  MaxShift(const vector<VALUETYPE>& dcoord, const VALUETYPE& shell);
+
+  bool rebuild(const vector<VALUETYPE>& coord,
+               const SimulationRegion<VALUETYPE>& region);
+
+ private:
   VALUETYPE
-  max_shift2 (const vector<VALUETYPE> & coord, 
-	      const SimulationRegion<VALUETYPE> & region) ;
+  max_shift2(const vector<VALUETYPE>& coord,
+             const SimulationRegion<VALUETYPE>& region);
   vector<VALUETYPE> record;
   VALUETYPE shell;
   VALUETYPE max_allow2;
 };
-
diff --git a/source/md/include/Poly.h b/source/md/include/Poly.h
index 01f7603c10..94db89f6db 100644
--- a/source/md/include/Poly.h
+++ b/source/md/include/Poly.h
@@ -5,73 +5,83 @@
 #include <string>
 #include <vector>
 
-class Poly
-{
-  std::vector<double > a;
+class Poly {
+  std::vector<double> a;
   unsigned order;
-public:
-  Poly ();
-  Poly (const std::vector<double > & out);
-  void reinit (const std::vector<double > & out);
-  void zero () {a.clear(); a.resize(1,0); order = 0;}
-  void one  () {a.clear(); a.resize(1,1); order = 0;}
-public:
-  Poly & operator = (const Poly & poly);
-  Poly & operator += (const Poly & poly);
-  Poly & operator += (const double & b);
-  Poly & operator *= (const Poly & poly);
-  Poly & operator *= (const double & scale);
-  Poly & derivative ();
-public:
-  unsigned & getOrder () {return order;}
-  const unsigned & getOrder () const {return order;}
-  std::vector<double > & getCoeffs () {return a;}
-  const std::vector<double > & getCoeffs () const {return a;}
-public:
-  void print ();
-  void print (const std::string & x);
-  void printCode (const std::string & x);
-public :
-  double value ( const double & x ) const;
-public:
+
+ public:
+  Poly();
+  Poly(const std::vector<double>& out);
+  void reinit(const std::vector<double>& out);
+  void zero() {
+    a.clear();
+    a.resize(1, 0);
+    order = 0;
+  }
+  void one() {
+    a.clear();
+    a.resize(1, 1);
+    order = 0;
+  }
+
+ public:
+  Poly& operator=(const Poly& poly);
+  Poly& operator+=(const Poly& poly);
+  Poly& operator+=(const double& b);
+  Poly& operator*=(const Poly& poly);
+  Poly& operator*=(const double& scale);
+  Poly& derivative();
+
+ public:
+  unsigned& getOrder() { return order; }
+  const unsigned& getOrder() const { return order; }
+  std::vector<double>& getCoeffs() { return a; }
+  const std::vector<double>& getCoeffs() const { return a; }
+
+ public:
+  void print();
+  void print(const std::string& x);
+  void printCode(const std::string& x);
+
+ public:
+  double value(const double& x) const;
+
+ public:
   // p = f(ax + b)
-  Poly & valueLinearPoly (const double & a, const double & b,
-			  Poly & p);
-}
-    ;
+  Poly& valueLinearPoly(const double& a, const double& b, Poly& p);
+};
+
+class PiecewisePoly {
+ public:
+  std::vector<double>& get_x() { return x; }
+  std::vector<Poly>& get_p() { return p; }
+  const std::vector<double>& get_x() const { return x; }
+  const std::vector<Poly>& get_p() const { return p; }
 
-class PiecewisePoly
-{
-public:
-  std::vector<double > & get_x () {return x;}
-  std::vector<Poly   > & get_p () {return p;}
-  const std::vector<double > & get_x () const {return x;}
-  const std::vector<Poly   > & get_p () const {return p;}
-public:
-  void clear () {x.clear(); p.clear();}
-  bool valid () const;
-public:
-  double value (const double & r) const;
-  void   value (const std::vector<double > & r,
-		std::vector<double > & y) const;
-  double value_periodic (const double & r) const;
-  void   value_periodic (const std::vector<double > & r,
-			 std::vector<double > & y) const;
-private:
-  std::vector<double > x;
-  std::vector<Poly > p;
-  void value (const unsigned & xbegin,
-	      const unsigned & xend,
-	      const std::vector<double > & r,
-	      const unsigned & rbegin,
-	      const unsigned & rend,
-	      std::vector<double > & y) const;
-  double value (const double & xx,
-		unsigned & begin,
-		unsigned & end) const;  
-}
-    ;
+ public:
+  void clear() {
+    x.clear();
+    p.clear();
+  }
+  bool valid() const;
 
+ public:
+  double value(const double& r) const;
+  void value(const std::vector<double>& r, std::vector<double>& y) const;
+  double value_periodic(const double& r) const;
+  void value_periodic(const std::vector<double>& r,
+                      std::vector<double>& y) const;
 
+ private:
+  std::vector<double> x;
+  std::vector<Poly> p;
+  void value(const unsigned& xbegin,
+             const unsigned& xend,
+             const std::vector<double>& r,
+             const unsigned& rbegin,
+             const unsigned& rend,
+             std::vector<double>& y) const;
+  double value(const double& xx, unsigned& begin, unsigned& end) const;
+};
 
 #endif
diff --git a/source/md/include/RandomGenerator.h b/source/md/include/RandomGenerator.h
index da3b06abe9..acccb73531 100644
--- a/source/md/include/RandomGenerator.h
+++ b/source/md/include/RandomGenerator.h
@@ -1,13 +1,12 @@
 #pragma once
 
 namespace RandomGenerator_MT19937 {
-  void init_by_array(unsigned long init_key[], int key_length);
-  void init_genrand(unsigned long s);
-  unsigned long genrand_int32(void);
-  long genrand_int31(void);
-  double genrand_real1(void); // in [0,1]
-  double genrand_real2(void); // in [0,1)
-  double genrand_real3(void); // in (0,1)
-  double genrand_res53(void);
-}
-
+void init_by_array(unsigned long init_key[], int key_length);
+void init_genrand(unsigned long s);
+unsigned long genrand_int32(void);
+long genrand_int31(void);
+double genrand_real1(void);  // in [0,1]
+double genrand_real2(void);  // in [0,1)
+double genrand_real3(void);  // in (0,1)
+double genrand_res53(void);
+}  // namespace RandomGenerator_MT19937
diff --git a/source/md/include/Statistics.h b/source/md/include/Statistics.h
index aa87a657a6..5dd6bad3e0 100644
--- a/source/md/include/Statistics.h
+++ b/source/md/include/Statistics.h
@@ -1,42 +1,40 @@
 #pragma once
 
 #include <vector>
+
 #include "SimulationRegion.h"
 using namespace std;
 
 template <typename VALUETYPE>
-class Statistics
-{
-public:
-  Statistics (const VALUETYPE e_corr = 0,
-	      const VALUETYPE p_corr = 0);
-  void record (const VALUETYPE & ener,
-	       const vector<VALUETYPE > & virial,
-	       const vector<VALUETYPE > & veloc,
-	       const vector<VALUETYPE > & mass, 
-	       const SimulationRegion<VALUETYPE > & region);
-public:
-  double get_T () const;
-  double get_V () const;
-  double get_P () const;
-  double get_E () const {return get_ekin() + get_epot();}; 
-  double get_ekin () const {return r_kin_ener;}
-  double get_epot () const {return r_pot_ener + e_corr;}
-public:
-  void print (ostream & os,
-	      const int & step,
-	      const double time) const;
-  void print_head (ostream & os) const;
-private: 
+class Statistics {
+ public:
+  Statistics(const VALUETYPE e_corr = 0, const VALUETYPE p_corr = 0);
+  void record(const VALUETYPE& ener,
+              const vector<VALUETYPE>& virial,
+              const vector<VALUETYPE>& veloc,
+              const vector<VALUETYPE>& mass,
+              const SimulationRegion<VALUETYPE>& region);
+
+ public:
+  double get_T() const;
+  double get_V() const;
+  double get_P() const;
+  double get_E() const { return get_ekin() + get_epot(); };
+  double get_ekin() const { return r_kin_ener; }
+  double get_epot() const { return r_pot_ener + e_corr; }
+
+ public:
+  void print(ostream& os, const int& step, const double time) const;
+  void print_head(ostream& os) const;
+
+ private:
   int natoms;
   double r_ener;
   double r_pot_ener;
   double r_kin_ener;
   // vector<double> r_box;
-  SimulationRegion<double > region;
+  SimulationRegion<double> region;
   vector<double> r_vir;
   double e_corr;
   double p_corr;
-}
-    ;
-
+};
diff --git a/source/md/include/StringSplit.h b/source/md/include/StringSplit.h
index f0cd107f9b..985db38c14 100644
--- a/source/md/include/StringSplit.h
+++ b/source/md/include/StringSplit.h
@@ -1,18 +1,17 @@
 #ifndef __StringSplit_h_wanghan__
 #define __StringSplit_h_wanghan__
 
+#include <algorithm>
+#include <iterator>
+#include <sstream>
 #include <string>
 #include <vector>
-#include <sstream>
-#include <iterator>
-#include <algorithm>
 
-namespace StringOperation{
-  void split (const std::string & in,
-	      std::vector<std::string > & out);
-  void split (const std::string & in,
-	      const std::string & delimiter,
-	      std::vector<std::string > & out);
-}
+namespace StringOperation {
+void split(const std::string& in, std::vector<std::string>& out);
+void split(const std::string& in,
+           const std::string& delimiter,
+           std::vector<std::string>& out);
+}  // namespace StringOperation
 
 #endif
diff --git a/source/md/include/TF.h b/source/md/include/TF.h
index ff6c4defd0..7fc1720b87 100644
--- a/source/md/include/TF.h
+++ b/source/md/include/TF.h
@@ -1,31 +1,31 @@
 #pragma once
 
-#include "common.h"
-#include "AdWeight.h"
-#include <vector>
 #include <string>
+#include <vector>
+
+#include "AdWeight.h"
+#include "common.h"
 
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class TF 
-{
-public:
-  TF (const string & filename);
-public:
-  void apply (vector<VALUETYPE> & force,
-	      const vector<VALUETYPE> & coord,
-	      const AdWeight & weight) const;
-private:
-  VALUETYPE meas (const VALUETYPE & xx) const;
+class TF {
+ public:
+  TF(const string& filename);
+
+ public:
+  void apply(vector<VALUETYPE>& force,
+             const vector<VALUETYPE>& coord,
+             const AdWeight& weight) const;
+
+ private:
+  VALUETYPE meas(const VALUETYPE& xx) const;
   vector<double> data;
   double hh;
   double xup;
-}
-    ;
-
+};
diff --git a/source/md/include/TableFileLoader.h b/source/md/include/TableFileLoader.h
index ae0f3315d3..4c25e01701 100644
--- a/source/md/include/TableFileLoader.h
+++ b/source/md/include/TableFileLoader.h
@@ -1,22 +1,24 @@
 #ifndef __TableFileLoader_h_wanghan__
 #define __TableFileLoader_h_wanghan__
 
-#include <vector>
 #include <fstream>
+#include <vector>
+
+class TableFileLoader {
+ public:
+  unsigned getNumbColumns();
+
+ public:
+  TableFileLoader(const char* file);
+  void reinit(const char* file);
+  void setColumns(const std::vector<unsigned>& cols);
+  void setEvery(const unsigned every);
 
-class TableFileLoader 
-{
-public:
-  unsigned getNumbColumns ();
-public:
-  TableFileLoader	(const char * file);
-  void reinit		(const char * file);
-  void setColumns	(const std::vector<unsigned > & cols);
-  void setEvery		(const unsigned every);
-public:
-  void loadAll		(std::vector<std::vector<double > > & data);
-  bool loadLine		(std::vector<double > & data);
-private :
+ public:
+  void loadAll(std::vector<std::vector<double> >& data);
+  bool loadLine(std::vector<double>& data);
+
+ private:
   std::ifstream data;
   std::string file;
   unsigned count_read;
@@ -25,5 +27,3 @@ private :
 };
 
 #endif
-
-
diff --git a/source/md/include/Tabulated.h b/source/md/include/Tabulated.h
index 9a34934426..f74eb6354e 100644
--- a/source/md/include/Tabulated.h
+++ b/source/md/include/Tabulated.h
@@ -1,50 +1,47 @@
 #pragma once
 
+#include <vector>
+
 #include "SimulationRegion.h"
-#include <vector> 
 
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class Tabulated
-{
-public:
-  Tabulated () {};
-  Tabulated (const VALUETYPE rc,
-	     const VALUETYPE hh,
-	     const vector<VALUETYPE> & tab);
-  void reinit (const VALUETYPE rc,
-	       const VALUETYPE hh,
-	       const vector<VALUETYPE> & tab);
-public:
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<vector<int > > &	nlist);
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<VALUETYPE> &	charge,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<vector<int > > &	nlist);
-  void tb_inner (VALUETYPE & ae,
-		 VALUETYPE & af,
-		 const VALUETYPE & r2);
-private:
+class Tabulated {
+ public:
+  Tabulated(){};
+  Tabulated(const VALUETYPE rc,
+            const VALUETYPE hh,
+            const vector<VALUETYPE>& tab);
+  void reinit(const VALUETYPE rc,
+              const VALUETYPE hh,
+              const vector<VALUETYPE>& tab);
+
+ public:
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<vector<int> >& nlist);
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<VALUETYPE>& charge,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<vector<int> >& nlist);
+  void tb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
+
+ private:
   VALUETYPE rc2, hi;
   vector<VALUETYPE> data;
-  void compute_posi (int & idx, 
-		     VALUETYPE & eps,
-		     const VALUETYPE & rr);
-}
-    ;
+  void compute_posi(int& idx, VALUETYPE& eps, const VALUETYPE& rr);
+};
diff --git a/source/md/include/Trajectory.h b/source/md/include/Trajectory.h
index 5198d105b0..85b73ba9dd 100644
--- a/source/md/include/Trajectory.h
+++ b/source/md/include/Trajectory.h
@@ -2,59 +2,58 @@
 #define __MDFileManager_Trajectory_h_wanghan__
 
 // #include "Defines.h"
+#include <vector>
+
 #include "xdrfile/xdrfile.h"
-#include "xdrfile/xdrfile_xtc.h"
 #include "xdrfile/xdrfile_trr.h"
-#include <vector> 
+#include "xdrfile/xdrfile_xtc.h"
 
 using namespace std;
 
-class XtcSaver
-{
-public:
-  XtcSaver () : inited(false), prec(1000) {};
-  ~XtcSaver ();
-  XtcSaver (const char * filename,
-	    const int & natoms);
-  bool reinit (const char * filename,
-	       const int & natoms);
-public:
-  void save (const int & step,
-	     const double & time,
-	     const vector<vector<double > > & frame, 
-	     const vector<double > & box);
-private:
+class XtcSaver {
+ public:
+  XtcSaver() : inited(false), prec(1000){};
+  ~XtcSaver();
+  XtcSaver(const char *filename, const int &natoms);
+  bool reinit(const char *filename, const int &natoms);
+
+ public:
+  void save(const int &step,
+            const double &time,
+            const vector<vector<double> > &frame,
+            const vector<double> &box);
+
+ private:
   XDRFILE *xd;
   int natoms;
-  rvec * xx;
+  rvec *xx;
   float prec;
   bool inited;
-  void clear ();
+  void clear();
 };
 
-class TrrSaver
-{
-public:
-  TrrSaver () : inited(false), lambda(0) {};
-  ~TrrSaver ();
-  TrrSaver (const char * filename,
-	    const int & natoms);
-  bool reinit (const char * filename,
-	       const int & natoms);
-public:
-  void save (const int & step,
-	     const double & time,
-	     const vector<vector<double > > & ixx, 
-	     const vector<vector<double > > & ivv, 
-	     const vector<vector<double > > & iff, 
-	     const vector<double > & box);
-private:
+class TrrSaver {
+ public:
+  TrrSaver() : inited(false), lambda(0){};
+  ~TrrSaver();
+  TrrSaver(const char *filename, const int &natoms);
+  bool reinit(const char *filename, const int &natoms);
+
+ public:
+  void save(const int &step,
+            const double &time,
+            const vector<vector<double> > &ixx,
+            const vector<vector<double> > &ivv,
+            const vector<vector<double> > &iff,
+            const vector<double> &box);
+
+ private:
   XDRFILE *xd;
   int natoms;
-  rvec * xx, *vv, *ff;
+  rvec *xx, *vv, *ff;
   float lambda;
   bool inited;
-  void clear ();
+  void clear();
 };
 
 #endif
diff --git a/source/md/include/UnitManager.h b/source/md/include/UnitManager.h
index 4ba8e87cd5..977505b5ee 100644
--- a/source/md/include/UnitManager.h
+++ b/source/md/include/UnitManager.h
@@ -3,11 +3,11 @@
 #include <string>
 using namespace std;
 
-class UnitManager
-{
-protected:
-  UnitManager () {};
-public:
+class UnitManager {
+ protected:
+  UnitManager(){};
+
+ public:
   static double Degree2Radian;
   static double Radian2Degree;
 
@@ -19,7 +19,8 @@ class UnitManager
   static double DefaultTableUpperLimit;
   static double DefaultTableStep;
   static double DefaultTableExtension;
-  static void set (const string & name_of_system);
-private :
-  static string	unit_names[];
+  static void set(const string& name_of_system);
+
+ private:
+  static string unit_names[];
 };
diff --git a/source/md/include/XyzFileManager.h b/source/md/include/XyzFileManager.h
index 359c9b0099..5ffbb847be 100644
--- a/source/md/include/XyzFileManager.h
+++ b/source/md/include/XyzFileManager.h
@@ -4,20 +4,17 @@
 #include <vector>
 using namespace std;
 
-namespace XyzFileManager{
+namespace XyzFileManager {
 
-  void
-  read (const string & file,
-	vector<string > & atom_name,
-	vector<vector<double > > & posi,
-	vector<vector<double > > & velo,
-	vector<vector<double > > & forc,
-	vector<double > & boxsize);
+void read(const string& file,
+          vector<string>& atom_name,
+          vector<vector<double> >& posi,
+          vector<vector<double> >& velo,
+          vector<vector<double> >& forc,
+          vector<double>& boxsize);
 
-  void
-  getBoxSize (const string & name,
-	      vector<double > & boxsize);
+void getBoxSize(const string& name, vector<double>& boxsize);
 
-};
+};  // namespace XyzFileManager
 
 #endif
diff --git a/source/md/include/ZM.h b/source/md/include/ZM.h
index 55c7edbe36..1c2647a555 100644
--- a/source/md/include/ZM.h
+++ b/source/md/include/ZM.h
@@ -1,49 +1,46 @@
 #pragma once
 
-#include <vector> 
+#include <vector>
+
+#include "SimulationRegion.h"
 #include "Tabulated.h"
 #include "ZMFunctions.h"
-#include "SimulationRegion.h"
 
 using namespace std;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-class ZM 
-{
-public:
-  ZM (const int & order,
-      const VALUETYPE & alpha,
-      const VALUETYPE & rc);
-public:
-  void compute (VALUETYPE &			ener,
-		vector<VALUETYPE> &		force,
-		vector<VALUETYPE> &		virial,
-		const vector<VALUETYPE> &	coord,
-		const vector<VALUETYPE> &	charge,
-		const vector<int> &		atype,
-		const SimulationRegion<VALUETYPE> &	region, 
-		const vector<vector<int > > &	nlist)
-      {zm_tab.compute (ener, force, virial, coord, charge, atype, region, nlist);};
-  void exclude  (VALUETYPE &			ener,
-		 vector<VALUETYPE> &		force,
-		 vector<VALUETYPE> &		virial,
-		 const vector<VALUETYPE> &	coord,
-		 const vector<VALUETYPE> &	charge,
-		 const vector<int> &		atype,
-		 const SimulationRegion<VALUETYPE> &	region, 
-		 const vector<int > &		elist);
-  VALUETYPE e_corr (const vector<VALUETYPE> & charge) const;
-private:
+class ZM {
+ public:
+  ZM(const int& order, const VALUETYPE& alpha, const VALUETYPE& rc);
+
+ public:
+  void compute(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<VALUETYPE>& charge,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<vector<int> >& nlist) {
+    zm_tab.compute(ener, force, virial, coord, charge, atype, region, nlist);
+  };
+  void exclude(VALUETYPE& ener,
+               vector<VALUETYPE>& force,
+               vector<VALUETYPE>& virial,
+               const vector<VALUETYPE>& coord,
+               const vector<VALUETYPE>& charge,
+               const vector<int>& atype,
+               const SimulationRegion<VALUETYPE>& region,
+               const vector<int>& elist);
+  VALUETYPE e_corr(const vector<VALUETYPE>& charge) const;
+
+ private:
   Tabulated zm_tab;
-  void ex_inner (VALUETYPE & ae,
-		 VALUETYPE & af,
-		 const VALUETYPE & r2);
+  void ex_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2);
   ZeroMultipole::Potential potzm;
-}
-    ;
-
+};
diff --git a/source/md/include/ZMFunctions.h b/source/md/include/ZMFunctions.h
index 8629aaee5f..9d4206a0a0 100644
--- a/source/md/include/ZMFunctions.h
+++ b/source/md/include/ZMFunctions.h
@@ -5,46 +5,34 @@
 using namespace std;
 
 namespace ZeroMultipole {
-  double funcV (const double & alpha,
-		const double & r);
-  double funcD1V (const double & alpha,
-		  const double & r);
-  double funcD2V (const double & alpha,
-		  const double & r);
-  double funcD3V (const double & alpha,
-		  const double & r);
-  double funcD4V (const double & alpha,
-		  const double & r);
-
-  void calCoefficients (const int & ll,
-			const double & alpha,
-			const double & rc,
-			vector<double > & coeff);
-
-  class Potential
-  {
-    double		alpha, rc;
-    int			ll;
-    vector<double >	coeff;
-public:
-    Potential ();
-    Potential (const int & ll,
-	       const double & alpha,
-	       const double & rc);
-    void reinit (const int & ll,
-		 const double & alpha,
-		 const double & rc);
-    double pot (const double & rr);
-    double ulpot (const double & rr);
-    double mpotp (const double & rr);
-    double mulpotp (const double & rr);
-public:
-    double energyCorr (const vector<double > & charges) const;
-  }
-      ;
-}
-
-
+double funcV(const double& alpha, const double& r);
+double funcD1V(const double& alpha, const double& r);
+double funcD2V(const double& alpha, const double& r);
+double funcD3V(const double& alpha, const double& r);
+double funcD4V(const double& alpha, const double& r);
+
+void calCoefficients(const int& ll,
+                     const double& alpha,
+                     const double& rc,
+                     vector<double>& coeff);
+
+class Potential {
+  double alpha, rc;
+  int ll;
+  vector<double> coeff;
+
+ public:
+  Potential();
+  Potential(const int& ll, const double& alpha, const double& rc);
+  void reinit(const int& ll, const double& alpha, const double& rc);
+  double pot(const double& rr);
+  double ulpot(const double& rr);
+  double mpotp(const double& rr);
+  double mulpotp(const double& rr);
+
+ public:
+  double energyCorr(const vector<double>& charges) const;
+};
+}  // namespace ZeroMultipole
 
 #endif
-
diff --git a/source/md/include/common.h b/source/md/include/common.h
index c82ae197af..54d19880d7 100644
--- a/source/md/include/common.h
+++ b/source/md/include/common.h
@@ -1,44 +1,42 @@
 #pragma once
 
-#include "SimulationRegion.h"
 #include <vector>
+
+#include "SimulationRegion.h"
 using namespace std;
 
 const double b2m_l = 10;
 const double b2m_e = 1.660539040e-21 / 1.602176621e-19;
 
 template <typename VALUETYPE>
-void
-clear (VALUETYPE &			ener,
-       vector<VALUETYPE> &		force,
-       vector<VALUETYPE> &		virial)
-{
+void clear(VALUETYPE& ener,
+           vector<VALUETYPE>& force,
+           vector<VALUETYPE>& virial) {
   ener = 0;
-  fill (force.begin(), force.end(), 0.);
-  fill (virial.begin(), virial.end(), 0.);
+  fill(force.begin(), force.end(), 0.);
+  fill(virial.begin(), virial.end(), 0.);
 }
 
 template <typename VALUETYPE>
-void 
-normalize_coord (vector<VALUETYPE > & coord,
-		 const SimulationRegion<VALUETYPE > & region)
-{
+void normalize_coord(vector<VALUETYPE>& coord,
+                     const SimulationRegion<VALUETYPE>& region) {
   int natoms = coord.size() / 3;
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     double phys[3];
-    for (int dd = 0; dd < 3; ++dd){
-      phys[dd] = coord[ii*3+dd];
+    for (int dd = 0; dd < 3; ++dd) {
+      phys[dd] = coord[ii * 3 + dd];
     }
     double inter[3];
-    region.phys2Inter (inter, phys);
-    for (int dd = 0; dd < 3; ++dd){
-      if      (inter[dd] <  0) inter[dd] += 1.;
-      else if (inter[dd] >= 1) inter[dd] -= 1.;
-    }    
-    region.inter2Phys (phys, inter);
-    for (int dd = 0; dd < 3; ++dd){
-      coord[ii*3+dd] = phys[dd];
+    region.phys2Inter(inter, phys);
+    for (int dd = 0; dd < 3; ++dd) {
+      if (inter[dd] < 0)
+        inter[dd] += 1.;
+      else if (inter[dd] >= 1)
+        inter[dd] -= 1.;
+    }
+    region.inter2Phys(phys, inter);
+    for (int dd = 0; dd < 3; ++dd) {
+      coord[ii * 3 + dd] = phys[dd];
     }
   }
 }
-
diff --git a/source/md/include/mymath.h b/source/md/include/mymath.h
index f4c0f45fe6..64db63878e 100644
--- a/source/md/include/mymath.h
+++ b/source/md/include/mymath.h
@@ -1,36 +1,31 @@
 #pragma once
 
 template <typename VALUETYPE>
-inline VALUETYPE
-dot (const VALUETYPE * r0,
-     const VALUETYPE * r1)
-{
-  return ( r0[0] * r1[0] + r0[1] * r1[1] + r0[2] * r1[2] );
+inline VALUETYPE dot(const VALUETYPE* r0, const VALUETYPE* r1) {
+  return (r0[0] * r1[0] + r0[1] * r1[1] + r0[2] * r1[2]);
 }
 
 template <typename TYPE>
-inline TYPE
-dot (const TYPE& x0, const TYPE& y0, const TYPE& z0,
-     const TYPE& x1, const TYPE& y1, const TYPE& z1)
-{
+inline TYPE dot(const TYPE& x0,
+                const TYPE& y0,
+                const TYPE& z0,
+                const TYPE& x1,
+                const TYPE& y1,
+                const TYPE& z1) {
   return x0 * x1 + y0 * y1 + z0 * z1;
 }
 
 template <typename VALUETYPE>
-inline VALUETYPE
-cos (const VALUETYPE * r0,
-     const VALUETYPE * r1)
-{
-  double ip  = dot<VALUETYPE> (r0, r1);
-  double ip0 = dot<VALUETYPE> (r0, r0);
-  double ip1 = dot<VALUETYPE> (r1, r1);
+inline VALUETYPE cos(const VALUETYPE* r0, const VALUETYPE* r1) {
+  double ip = dot<VALUETYPE>(r0, r1);
+  double ip0 = dot<VALUETYPE>(r0, r0);
+  double ip1 = dot<VALUETYPE>(r1, r1);
   double ip01 = ip0 * ip1;
-  
+
   double cosval;
   if (ip01 > 0) {
     cosval = ip / sqrt(ip01);
-  }
-  else {
+  } else {
     cosval = 1.0;
   }
   if (cosval > 1.0) {
@@ -43,27 +38,28 @@ cos (const VALUETYPE * r0,
 }
 
 template <typename TYPE>
-inline TYPE
-cos (const TYPE& x0, const TYPE& y0, const TYPE& z0,
-     const TYPE& x1, const TYPE& y1, const TYPE& z1)
-{
-  double dblx0 = (double) (x0);
-  double dblx1 = (double) (x1);
-  double dbly0 = (double) (y0);
-  double dbly1 = (double) (y1);
-  double dblz0 = (double) (z0);
-  double dblz1 = (double) (z1);
-  
-  double ip  = dot<double> (dblx0, dbly0, dblz0, dblx1, dbly1, dblz1);
-  double ip0 = dot<double> (dblx0, dbly0, dblz0, dblx0, dbly0, dblz0);
-  double ip1 = dot<double> (dblx1, dbly1, dblz1, dblx1, dbly1, dblz1);  
+inline TYPE cos(const TYPE& x0,
+                const TYPE& y0,
+                const TYPE& z0,
+                const TYPE& x1,
+                const TYPE& y1,
+                const TYPE& z1) {
+  double dblx0 = (double)(x0);
+  double dblx1 = (double)(x1);
+  double dbly0 = (double)(y0);
+  double dbly1 = (double)(y1);
+  double dblz0 = (double)(z0);
+  double dblz1 = (double)(z1);
+
+  double ip = dot<double>(dblx0, dbly0, dblz0, dblx1, dbly1, dblz1);
+  double ip0 = dot<double>(dblx0, dbly0, dblz0, dblx0, dbly0, dblz0);
+  double ip1 = dot<double>(dblx1, dbly1, dblz1, dblx1, dbly1, dblz1);
   double ip01 = ip0 * ip1;
-  
+
   double cosval;
   if (ip01 > 0) {
     cosval = ip / sqrt(ip01);
-  }
-  else {
+  } else {
     cosval = 1.0;
   }
   if (cosval > 1.0) {
diff --git a/source/md/mdnn.cc b/source/md/mdnn.cc
index 5674c648b1..1ebb7753f8 100644
--- a/source/md/mdnn.cc
+++ b/source/md/mdnn.cc
@@ -1,13 +1,11 @@
-#include "common.h"
-#include "Integrator.h"
+#include "Convert.h"
 #include "DeepPot.h"
+#include "GroFileManager.h"
+#include "Integrator.h"
 #include "Statistics.h"
-
 #include "Trajectory.h"
-#include "GroFileManager.h"
 #include "XyzFileManager.h"
-#include "Convert.h"
-
+#include "common.h"
 #include "json.hpp"
 using json = nlohmann::json;
 
@@ -15,25 +13,22 @@ using json = nlohmann::json;
 
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
-#else 
-typedef float  VALUETYPE;
+#else
+typedef float VALUETYPE;
 #endif
 
-void 
-print_vec (const vector<VALUETYPE > & vec)
-{
+void print_vec(const vector<VALUETYPE>& vec) {
   int nloc = vec.size() / 3;
-  for (int kk = 0; kk < nloc; ++kk){
-    for (int dd = 0; dd < 3; ++dd){
-      cout << vec[kk*3+dd] << " \t " ;
+  for (int kk = 0; kk < nloc; ++kk) {
+    for (int dd = 0; dd < 3; ++dd) {
+      cout << vec[kk * 3 + dd] << " \t ";
     }
     cout << endl;
   }
 }
 
-int main(int argc, char * argv[])
-{
-  UnitManager::set ("metal");
+int main(int argc, char* argv[]) {
+  UnitManager::set("metal");
 
   if (argc == 0) {
     cerr << "usage " << endl;
@@ -41,7 +36,7 @@ int main(int argc, char * argv[])
     return 1;
   }
 
-  ifstream fp (argv[1]);
+  ifstream fp(argv[1]);
   json jdata;
   fp >> jdata;
   cout << "# using data base" << endl;
@@ -52,78 +47,76 @@ int main(int argc, char * argv[])
   vector<VALUETYPE> dcoord, dveloc, dbox, dmass;
   vector<int> dtype;
   vector<int> freez;
-  
+
   // load_raw (dcoord, dtype, dbox);
   // dveloc.resize(dcoord.size(), 0.);
   string conf_format = jdata["conf_format"];
   string conf_file = jdata["conf_file"];
-  vector<int > resdindex, atomindex;
+  vector<int> resdindex, atomindex;
   vector<string> resdname, atomname;
-  vector<vector<double > > posi, velo, tmp_forc;
+  vector<vector<double> > posi, velo, tmp_forc;
   vector<double> boxsize;
   if (conf_format == "gro") {
-    GroFileManager::read (conf_file, resdindex, resdname, atomname, atomindex, posi, velo, boxsize);
-  }
-  else if (conf_format == "xyz"){
-    XyzFileManager::read (conf_file, atomname, posi, velo, tmp_forc, boxsize);
+    GroFileManager::read(conf_file, resdindex, resdname, atomname, atomindex,
+                         posi, velo, boxsize);
+  } else if (conf_format == "xyz") {
+    XyzFileManager::read(conf_file, atomname, posi, velo, tmp_forc, boxsize);
     if (velo.size() == 0) {
-      for (unsigned ii = 0; ii < posi.size(); ++ii){
-	velo.push_back (vector<double > (3, 0.));
+      for (unsigned ii = 0; ii < posi.size(); ++ii) {
+        velo.push_back(vector<double>(3, 0.));
       }
     }
     // convert to nanometer
-    for (unsigned ii = 0; ii < posi.size(); ++ii){
-      for (unsigned dd = 0; dd < 3; ++dd){
-	posi[ii][dd] *= .1;
-	velo[ii][dd] *= .1;
+    for (unsigned ii = 0; ii < posi.size(); ++ii) {
+      for (unsigned dd = 0; dd < 3; ++dd) {
+        posi[ii][dd] *= .1;
+        velo[ii][dd] *= .1;
       }
     }
-    for (unsigned dd = 0; dd < 9; ++dd){
+    for (unsigned dd = 0; dd < 9; ++dd) {
       boxsize[dd] *= .1;
     }
-    for (unsigned ii = 0; ii < posi.size(); ++ii){
-      resdindex.push_back (ii+1);
-      atomindex.push_back (ii+1);
+    for (unsigned ii = 0; ii < posi.size(); ++ii) {
+      resdindex.push_back(ii + 1);
+      atomindex.push_back(ii + 1);
     }
     resdname = atomname;
-  }
-  else {
+  } else {
     cerr << "unknown conf file format: " << conf_format << endl;
     return 1;
   }
   map<string, int> name_type_map = jdata["atom_type"];
   map<string, VALUETYPE> name_mass_map = jdata["atom_mass"];
   map<string, VALUETYPE> name_charge_map;
-  if (jdata.find ("atom_charge") == jdata.end()) {
+  if (jdata.find("atom_charge") == jdata.end()) {
     for (map<string, VALUETYPE>::iterator iter = name_mass_map.begin();
-	 iter != name_mass_map.end(); 
-	 ++iter ){
+         iter != name_mass_map.end(); ++iter) {
       name_charge_map[iter->first] = 0.;
     }
-  }
-  else {
+  } else {
     map<string, VALUETYPE> name_charge_map_tmp = jdata["atom_charge"];
     name_charge_map = name_charge_map_tmp;
   }
-  if (jdata.find ("freeze_atoms") != jdata.end()){
-    freez = jdata["freeze_atoms"].get<vector<int> > ();
+  if (jdata.find("freeze_atoms") != jdata.end()) {
+    freez = jdata["freeze_atoms"].get<vector<int> >();
   }
 
   // convert but do not sort
-  Convert<VALUETYPE> cvt (atomname, name_type_map, name_mass_map, name_charge_map, false);
-  cvt.gro2nnp (dcoord, dveloc, dbox, posi, velo, boxsize);
+  Convert<VALUETYPE> cvt(atomname, name_type_map, name_mass_map,
+                         name_charge_map, false);
+  cvt.gro2nnp(dcoord, dveloc, dbox, posi, velo, boxsize);
   dtype = cvt.get_type();
   dmass = cvt.get_mass();
 
   int nloc = dtype.size();
   SimulationRegion<double> region;
-  region.reinitBox (&dbox[0]);
-  normalize_coord<VALUETYPE> (dcoord, region);
+  region.reinitBox(&dbox[0]);
+  normalize_coord<VALUETYPE>(dcoord, region);
 
-  vector<VALUETYPE > dforce (nloc * 3, 0.);
-  vector<VALUETYPE > dae (nloc * 1, 0.);
-  vector<VALUETYPE > dav (nloc * 9, 0.);
-  vector<VALUETYPE > dvirial (9, 0.0);
+  vector<VALUETYPE> dforce(nloc * 3, 0.);
+  vector<VALUETYPE> dae(nloc * 1, 0.);
+  vector<VALUETYPE> dav(nloc * 9, 0.);
+  vector<VALUETYPE> dvirial(9, 0.0);
   VALUETYPE dener = 0;
 
   string graph_file = jdata["graph_file"];
@@ -138,66 +131,68 @@ int main(int argc, char * argv[])
   double temperature = jdata["T"];
   double tau_t = jdata["tau_T"];
   long long int seed = 0;
-  if (jdata.find ("rand_seed") != jdata.end()) {
+  if (jdata.find("rand_seed") != jdata.end()) {
     seed = jdata["rand_seed"];
   }
   bool print_f = false;
-  if (jdata.find ("print_force") != jdata.end()) {
+  if (jdata.find("print_force") != jdata.end()) {
     print_f = jdata["print_force"];
   }
 
   Integrator<VALUETYPE> inte;
-  ThermostatLangevin<VALUETYPE> thm (temperature, tau_t, seed);
-  deepmd::DeepPot nnp (graph_file);
-  
+  ThermostatLangevin<VALUETYPE> thm(temperature, tau_t, seed);
+  deepmd::DeepPot nnp(graph_file);
+
   Statistics<VALUETYPE> st;
-  XtcSaver sxtc (xtc_file.c_str(), nloc);
-  TrrSaver strr (trr_file.c_str(), nloc);
-  
+  XtcSaver sxtc(xtc_file.c_str(), nloc);
+  TrrSaver strr(trr_file.c_str(), nloc);
+
   // compute force at step 0
-  nnp.compute (dener, dforce, dvirial, dcoord, dtype, dbox);
+  nnp.compute(dener, dforce, dvirial, dcoord, dtype, dbox);
   // change virial to gromacs convention
   for (int ii = 0; ii < 9; ++ii) dvirial[ii] *= -0.5;
-  st.record (dener, dvirial, dveloc, dmass, region);
-  ofstream efout (ener_file);
+  st.record(dener, dvirial, dveloc, dmass, region);
+  ofstream efout(ener_file);
   ofstream pforce;
-  if (print_f) pforce.open ("force.out");
-  st.print_head (efout);
-  st.print (efout, 0, 0);
-
-  for (int ii = 0; ii < nsteps; ++ii){
-    inte.stepVeloc (dveloc, dforce, dmass, 0.5*dt, freez);
-    inte.stepCoord (dcoord, dveloc, 0.5*dt);
-    thm.stepOU (dveloc, dmass, dt, freez);
-    inte.stepCoord (dcoord, dveloc, 0.5*dt);
-    normalize_coord<VALUETYPE> (dcoord, region);
-    nnp.compute (dener, dforce, dvirial, dae, dav, dcoord, dtype, dbox);
+  if (print_f) pforce.open("force.out");
+  st.print_head(efout);
+  st.print(efout, 0, 0);
+
+  for (int ii = 0; ii < nsteps; ++ii) {
+    inte.stepVeloc(dveloc, dforce, dmass, 0.5 * dt, freez);
+    inte.stepCoord(dcoord, dveloc, 0.5 * dt);
+    thm.stepOU(dveloc, dmass, dt, freez);
+    inte.stepCoord(dcoord, dveloc, 0.5 * dt);
+    normalize_coord<VALUETYPE>(dcoord, region);
+    nnp.compute(dener, dforce, dvirial, dae, dav, dcoord, dtype, dbox);
     // change virial to gromacs convention
     for (int ii = 0; ii < 9; ++ii) dvirial[ii] *= -0.5;
-    inte.stepVeloc (dveloc, dforce, dmass, 0.5*dt, freez);
+    inte.stepVeloc(dveloc, dforce, dmass, 0.5 * dt, freez);
     if ((ii + 1) % nener == 0) {
-      st.record (dener, dvirial, dveloc, dmass, region);
-      st.print (efout, ii+1, (ii+1) * dt);
+      st.record(dener, dvirial, dveloc, dmass, region);
+      st.print(efout, ii + 1, (ii + 1) * dt);
       efout.flush();
     }
-    if (nxtc > 0 && (ii + 1) % nxtc == 0){
-      cvt.nnp2gro (posi, velo, boxsize, dcoord, dveloc, dbox);
-      sxtc.save (ii+1, (ii+1) * dt, posi, boxsize);
+    if (nxtc > 0 && (ii + 1) % nxtc == 0) {
+      cvt.nnp2gro(posi, velo, boxsize, dcoord, dveloc, dbox);
+      sxtc.save(ii + 1, (ii + 1) * dt, posi, boxsize);
     }
-    if (ntrr > 0 && (ii + 1) % ntrr == 0){
-      cvt.nnp2gro (posi, velo, boxsize, dcoord, dveloc, dbox);
-      strr.save (ii+1, (ii+1) * dt, posi, velo, vector<vector<VALUETYPE> > (), boxsize);
+    if (ntrr > 0 && (ii + 1) % ntrr == 0) {
+      cvt.nnp2gro(posi, velo, boxsize, dcoord, dveloc, dbox);
+      strr.save(ii + 1, (ii + 1) * dt, posi, velo, vector<vector<VALUETYPE> >(),
+                boxsize);
       if (print_f) {
-	for (int jj = 0;  jj < dforce.size(); ++jj) {
-	  pforce << dforce[jj] << " " ;
-	}
-	pforce << endl;
+        for (int jj = 0; jj < dforce.size(); ++jj) {
+          pforce << dforce[jj] << " ";
+        }
+        pforce << endl;
       }
-    }    
+    }
   }
-  
-  cvt.nnp2gro (posi, velo, boxsize, dcoord, dveloc, dbox);
-  GroFileManager::write ("out.gro", resdindex, resdname, atomname, atomindex, posi, velo, boxsize);
+
+  cvt.nnp2gro(posi, velo, boxsize, dcoord, dveloc, dbox);
+  GroFileManager::write("out.gro", resdindex, resdname, atomname, atomindex,
+                        posi, velo, boxsize);
   // ofstream oxyz ("out.xyz");
   // oxyz << nloc << endl;
   // oxyz << setprecision(12) ;
@@ -212,6 +207,6 @@ int main(int argc, char * argv[])
   //   }
   //   oxyz << endl;
   // }
-  
+
   return 0;
 }
diff --git a/source/md/src/AdWeight.cc b/source/md/src/AdWeight.cc
index 17990203ae..c2562a0c0c 100644
--- a/source/md/src/AdWeight.cc
+++ b/source/md/src/AdWeight.cc
@@ -1,65 +1,58 @@
 #include "AdWeight.h"
-#include "CosSwitch.h"
+
+#include <cassert>
 #include <cmath>
 #include <iostream>
-#include <cassert>
 
-AdWeight::
-AdWeight (const VALUETYPE & pl){
-  protect_level = pl;
-}
+#include "CosSwitch.h"
 
-void 
-AdWeight::
-sel_nn_atom (vector<VALUETYPE> & nn_coord,
-	     vector<int> & nn_type,
-	     vector<int> & nn_idx,
-	     vector<int> & nn_tag,
-	     const vector<VALUETYPE> & dcoord,
-	     const vector<int> & dtype) const
-{
+AdWeight::AdWeight(const VALUETYPE& pl) { protect_level = pl; }
+
+void AdWeight::sel_nn_atom(vector<VALUETYPE>& nn_coord,
+                           vector<int>& nn_type,
+                           vector<int>& nn_idx,
+                           vector<int>& nn_tag,
+                           const vector<VALUETYPE>& dcoord,
+                           const vector<int>& dtype) const {
   nn_coord.clear();
   nn_type.clear();
   nn_idx.clear();
 
-  vector<int> & tag(nn_tag);
-  zone_tag (tag, dcoord);
-  for (int ii = 0; ii < tag.size(); ++ii){
-    if (tag[ii] != 0){
-      nn_coord.push_back (dcoord[3*ii+0]);
-      nn_coord.push_back (dcoord[3*ii+1]);
-      nn_coord.push_back (dcoord[3*ii+2]);
-      nn_type .push_back (dtype[ii]);
-      nn_idx  .push_back (ii);
+  vector<int>& tag(nn_tag);
+  zone_tag(tag, dcoord);
+  for (int ii = 0; ii < tag.size(); ++ii) {
+    if (tag[ii] != 0) {
+      nn_coord.push_back(dcoord[3 * ii + 0]);
+      nn_coord.push_back(dcoord[3 * ii + 1]);
+      nn_coord.push_back(dcoord[3 * ii + 2]);
+      nn_type.push_back(dtype[ii]);
+      nn_idx.push_back(ii);
     }
-  }  
+  }
 }
 
-void 
-AdWeight::
-force_intpl (vector<VALUETYPE> & of,
-	     const vector<VALUETYPE> & dcoord,
-	     const vector<VALUETYPE> & ff_force,
-	     const vector<VALUETYPE> & nn_force,
-	     const vector<int> & nn_idx) const
-{
+void AdWeight::force_intpl(vector<VALUETYPE>& of,
+                           const vector<VALUETYPE>& dcoord,
+                           const vector<VALUETYPE>& ff_force,
+                           const vector<VALUETYPE>& nn_force,
+                           const vector<int>& nn_idx) const {
   int nall = dcoord.size() / 3;
-  
+
   vector<VALUETYPE> weight, weight_x;
-  atom_weight (weight, weight_x, dcoord);
-  assert (nall == weight.size());
+  atom_weight(weight, weight_x, dcoord);
+  assert(nall == weight.size());
   // for (unsigned ii = 0; ii < weight.size(); ++ii){
   //   cout << ii << " " << weight[ii] << " " << dcoord[ii*3] << endl;
   // }
-  
+
   // cout << "of " << of.size() <<  endl;
   // cout << "dcoord " << dcoord.size() <<  endl;
   // cout << "ff_f " << ff_force.size() <<  endl;
   // cout << "nn_f " << nn_force.size() <<  endl;
   // cout << "nn_i " << nn_idx.size() <<  endl;
   // cout << "w " << weight.size() <<  endl;
-  vector<VALUETYPE> nn_sum (3, 0.);
-  vector<VALUETYPE> ff_sum (3, 0.);
+  vector<VALUETYPE> nn_sum(3, 0.);
+  vector<VALUETYPE> ff_sum(3, 0.);
   // for (int ii = 0; ii < ff_force.size() / 3; ++ii){
   //   for (int dd = 0; dd < 3; ++dd){
   //     ff_sum[dd] += ff_force[ii*3+dd];
@@ -70,63 +63,60 @@ force_intpl (vector<VALUETYPE> & of,
   //     nn_sum[dd] += nn_force[ii*3+dd];
   //   }
   // }
-  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " " <<endl;
-  // cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2]   << " " <<endl;
-  
-  for (int ii = 0; ii < nn_idx.size(); ++ii){
+  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " "
+  // <<endl; cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2] <<
+  // " " <<endl;
+
+  for (int ii = 0; ii < nn_idx.size(); ++ii) {
     int idx = nn_idx[ii];
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       // nn_sum[dd] += weight[idx] * nn_force[ii*3+dd];
-      nn_sum[dd] +=  1 * nn_force[ii*3+dd];
-      of[idx*3+dd] += weight[idx] * nn_force[ii*3+dd];
+      nn_sum[dd] += 1 * nn_force[ii * 3 + dd];
+      of[idx * 3 + dd] += weight[idx] * nn_force[ii * 3 + dd];
     }
     // cout << "nn " << dcoord[idx*3] << " " << weight[idx] << endl;
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      of[ii*3+dd] += (1 - weight[ii]) * ff_force[ii*3+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      of[ii * 3 + dd] += (1 - weight[ii]) * ff_force[ii * 3 + dd];
     }
     // cout << "ff " << dcoord[ii*3] << " " << 1-weight[ii] << endl;
   }
 
-  for (int ii = 0; ii < of.size() / 3; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      ff_sum[dd] += ff_force[ii*3+dd];
+  for (int ii = 0; ii < of.size() / 3; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      ff_sum[dd] += ff_force[ii * 3 + dd];
     }
   }
-  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " " <<endl;
-  // cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2]   << " " <<endl;
-  // cout << endl;
+  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " "
+  // <<endl; cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2] <<
+  // " " <<endl; cout << endl;
 }
 
-
-void 
-AdWeight::
-force_intpl (vector<VALUETYPE> & of,
-	     const vector<VALUETYPE> & dcoord,
-	     const vector<VALUETYPE> & ff_bd_force,
-	     const vector<VALUETYPE> & ff_nb_force,
-	     const vector<VALUETYPE> & nn_force,
-	     const vector<int> & nn_idx) const
-{
+void AdWeight::force_intpl(vector<VALUETYPE>& of,
+                           const vector<VALUETYPE>& dcoord,
+                           const vector<VALUETYPE>& ff_bd_force,
+                           const vector<VALUETYPE>& ff_nb_force,
+                           const vector<VALUETYPE>& nn_force,
+                           const vector<int>& nn_idx) const {
   int nall = dcoord.size() / 3;
-  
+
   vector<VALUETYPE> weight, weight_x;
-  atom_weight (weight, weight_x, dcoord);
-  assert (nall == weight.size());
+  atom_weight(weight, weight_x, dcoord);
+  assert(nall == weight.size());
+
+  vector<VALUETYPE> nn_sum(3, 0.);
+  vector<VALUETYPE> ff_sum(3, 0.);
 
-  vector<VALUETYPE> nn_sum (3, 0.);
-  vector<VALUETYPE> ff_sum (3, 0.);
-  
-  for (int ii = 0; ii < nn_idx.size(); ++ii){
+  for (int ii = 0; ii < nn_idx.size(); ++ii) {
     int idx = nn_idx[ii];
-    for (int dd = 0; dd < 3; ++dd){
+    for (int dd = 0; dd < 3; ++dd) {
       // nn_sum[dd] += weight[idx] * nn_force[ii*3+dd];
       // nn_sum[dd] +=  1 * nn_force[ii*3+dd];
-      of[idx*3+dd] += weight[idx] * nn_force[ii*3+dd];
+      of[idx * 3 + dd] += weight[idx] * nn_force[ii * 3 + dd];
       // if (fabs(nn_force[ii*3+dd]) > 1e6) {
       // 	cout << " ii " << ii
-      // 	     << " dd " << dd 
+      // 	     << " dd " << dd
       // 	     << " coord " << dcoord[ii*3+dd]
       // 	     << " nn_f " << nn_force[ii*3+dd]
       // 	     << " ww " << weight[ii]
@@ -138,14 +128,14 @@ force_intpl (vector<VALUETYPE> & of,
 
   // double protect_level = 1e-3;
   // cout << "with protect_level " << protect_level << endl;
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
       double pref = (1 - weight[ii]);
       if (fabs(pref) < protect_level) pref = protect_level;
-      of[ii*3+dd] += pref * ff_bd_force[ii*3+dd];
+      of[ii * 3 + dd] += pref * ff_bd_force[ii * 3 + dd];
       // if (fabs(ff_bd_force[ii*3+dd]) > 1e6) {
       // 	cout << " ii " << ii
-      // 	     << " dd " << dd 
+      // 	     << " dd " << dd
       // 	     << " coord " << dcoord[ii*3+dd]
       // 	     << " ff_f " << ff_bd_force[ii*3+dd]
       // 	     << " ww " << 1 - weight[ii]
@@ -154,12 +144,12 @@ force_intpl (vector<VALUETYPE> & of,
     }
     // cout << "ff " << dcoord[ii*3] << " " << 1-weight[ii] << endl;
   }
-  for (int ii = 0; ii < nall; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      of[ii*3+dd] += (1 - weight[ii]) * ff_nb_force[ii*3+dd];
+  for (int ii = 0; ii < nall; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      of[ii * 3 + dd] += (1 - weight[ii]) * ff_nb_force[ii * 3 + dd];
       // if (fabs(ff_nb_force[ii*3+dd]) > 1e6) {
       // 	cout << " ii " << ii
-      // 	     << " dd " << dd 
+      // 	     << " dd " << dd
       // 	     << " coord " << dcoord[ii*3+dd]
       // 	     << " ff_f " << ff_nb_force[ii*3+dd]
       // 	     << " ww " << 1 - weight[ii]
@@ -169,80 +159,66 @@ force_intpl (vector<VALUETYPE> & of,
     // cout << "ff " << dcoord[ii*3] << " " << 1-weight[ii] << endl;
   }
 
-  for (int ii = 0; ii < of.size() / 3; ++ii){
-    for (int dd = 0; dd < 3; ++dd){
-      ff_sum[dd] += ff_bd_force[ii*3+dd];
+  for (int ii = 0; ii < of.size() / 3; ++ii) {
+    for (int dd = 0; dd < 3; ++dd) {
+      ff_sum[dd] += ff_bd_force[ii * 3 + dd];
     }
   }
-  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " " <<endl;
-  // cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2]   << " " <<endl;
-  // cout << endl;
+  // cout << ff_sum[0]   << " "  << ff_sum[1]   << " "  << ff_sum[2]   << " "
+  // <<endl; cout << nn_sum[0]   << " "  << nn_sum[1]   << " "  << nn_sum[2] <<
+  // " " <<endl; cout << endl;
 }
-	     
 
-SlabWeight::
-SlabWeight (const vector<VALUETYPE> & box,
-	    const VALUETYPE & rnn_,
-	    const VALUETYPE & rhy_,
-	    const VALUETYPE & rc_, 
-	    const VALUETYPE & protect_level_)
-  : AdWeight (protect_level_)
-{
-  assert (box.size() == 9);
+SlabWeight::SlabWeight(const vector<VALUETYPE>& box,
+                       const VALUETYPE& rnn_,
+                       const VALUETYPE& rhy_,
+                       const VALUETYPE& rc_,
+                       const VALUETYPE& protect_level_)
+    : AdWeight(protect_level_) {
+  assert(box.size() == 9);
   center.resize(3);
-  for (int ii = 0; ii < 3; ++ii){
-    center[ii] = 0.5 * box[3*ii+ii];
+  for (int ii = 0; ii < 3; ++ii) {
+    center[ii] = 0.5 * box[3 * ii + ii];
   }
   rnn = rnn_;
   rhy = rhy_;
   rc = rc_;
 }
 
-
-void
-SlabWeight::
-zone_tag (vector<int > & tag,
-	  const vector<VALUETYPE> & coord) const
-{
+void SlabWeight::zone_tag(vector<int>& tag,
+                          const vector<VALUETYPE>& coord) const {
   int natoms = coord.size() / 3;
   tag.resize(natoms, 0);
-  
+
   // slab axis x
   VALUETYPE radius = rnn + rhy;
-  for (int ii = 0; ii < natoms; ++ii){
-    VALUETYPE posi = fabs(coord[ii*3] - center[0]);
+  for (int ii = 0; ii < natoms; ++ii) {
+    VALUETYPE posi = fabs(coord[ii * 3] - center[0]);
     if (posi < radius) {
       tag[ii] = 3;
-    }
-    else if (posi < radius + rc){
+    } else if (posi < radius + rc) {
       tag[ii] = 2;
-    }
-    else if (posi < radius + rc * 2){
+    } else if (posi < radius + rc * 2) {
       tag[ii] = 1;
-    }
-    else {
+    } else {
       tag[ii] = 0;
     }
   }
 }
 
-
 // dirty hacking
-void
-SlabWeight::
-atom_weight (vector<VALUETYPE > & weight,
-	     vector<VALUETYPE > & weight_x,
-	     const vector<VALUETYPE> & coord) const
-{
-  CosSwitch cs (rnn, rnn + rhy);
-  
+void SlabWeight::atom_weight(vector<VALUETYPE>& weight,
+                             vector<VALUETYPE>& weight_x,
+                             const vector<VALUETYPE>& coord) const {
+  CosSwitch cs(rnn, rnn + rhy);
+
   int natoms = coord.size() / 3;
   weight.resize(natoms, 0);
   weight_x.resize(natoms, 0);
   // slab axis x
   // for (int ii = 0; ii < natoms; ++ii){
   //   VALUETYPE posi = fabs(coord[ii*3] - center[0]);
-  //   cs.eval (weight[ii], posi);    
+  //   cs.eval (weight[ii], posi);
   //   // if (posi < radius){
   //   //   weight[ii] = 1.;
   //   // }
@@ -258,7 +234,7 @@ atom_weight (vector<VALUETYPE > & weight,
   //   // weight_x
   //   weight_x[ii] = posi;
   //   weight_x[natoms/3 + ii*2 + 0] = posi;
-  //   weight_x[natoms/3 + ii*2 + 1] = posi;    
+  //   weight_x[natoms/3 + ii*2 + 1] = posi;
   //   // if (posi < radius){
   //   //   weight[ii] = 1.;
   //   // }
@@ -266,9 +242,9 @@ atom_weight (vector<VALUETYPE > & weight,
   //   //   weight[ii] = 0.;
   //   // }
   // }
-  for (int ii = 0; ii < natoms; ii += 3){
-    VALUETYPE posi = fabs (coord[ii*3] - center[0]);
-    cs.eval (weight[ii], posi);
+  for (int ii = 0; ii < natoms; ii += 3) {
+    VALUETYPE posi = fabs(coord[ii * 3] - center[0]);
+    cs.eval(weight[ii], posi);
     weight[ii + 1] = weight[ii];
     weight[ii + 2] = weight[ii];
     // weight_x
@@ -277,4 +253,3 @@ atom_weight (vector<VALUETYPE > & weight,
     weight_x[ii + 2] = posi;
   }
 }
-
diff --git a/source/md/src/Convert.cc b/source/md/src/Convert.cc
index 3192ef0305..d40e710084 100644
--- a/source/md/src/Convert.cc
+++ b/source/md/src/Convert.cc
@@ -5,122 +5,108 @@
 #include <iostream>
 
 template <typename VALUETYPE>
-Convert<VALUETYPE>::
-Convert(const vector<string > &  atomname,
-	map<string, int> & name_type_map,
-	map<string, VALUETYPE> & name_mass_map,
-	map<string, VALUETYPE> & name_charge_map, 
-	const bool sort_)
-{
+Convert<VALUETYPE>::Convert(const vector<string>& atomname,
+                            map<string, int>& name_type_map,
+                            map<string, VALUETYPE>& name_mass_map,
+                            map<string, VALUETYPE>& name_charge_map,
+                            const bool sort_) {
   int natoms = atomname.size();
-  atype.resize (natoms);
-  amass.resize (natoms);
-  vector<VALUETYPE> tmp_charge (natoms);
-  for (unsigned ii = 0; ii < atype.size(); ++ii){
+  atype.resize(natoms);
+  amass.resize(natoms);
+  vector<VALUETYPE> tmp_charge(natoms);
+  for (unsigned ii = 0; ii < atype.size(); ++ii) {
     atype[ii] = name_type_map[atomname[ii]];
     amass[ii] = name_mass_map[atomname[ii]];
     tmp_charge[ii] = name_charge_map[atomname[ii]];
   }
-  vector<pair<int, pair<int, VALUETYPE> > > sorting (natoms);
-  for (unsigned ii = 0; ii < sorting.size(); ++ii){
-    sorting[ii] = pair<int, pair<int, VALUETYPE> > (atype[ii], pair<int, VALUETYPE> (ii, amass[ii]));
+  vector<pair<int, pair<int, VALUETYPE> > > sorting(natoms);
+  for (unsigned ii = 0; ii < sorting.size(); ++ii) {
+    sorting[ii] = pair<int, pair<int, VALUETYPE> >(
+        atype[ii], pair<int, VALUETYPE>(ii, amass[ii]));
   }
-  if (sort_) sort (sorting.begin(), sorting.end());
+  if (sort_) sort(sorting.begin(), sorting.end());
   idx_map_nnp2gro.resize(natoms);
   idx_map_gro2nnp.resize(natoms);
-  for (unsigned ii = 0; ii < idx_map_nnp2gro.size(); ++ii){
+  for (unsigned ii = 0; ii < idx_map_nnp2gro.size(); ++ii) {
     idx_map_nnp2gro[ii] = sorting[ii].second.first;
     idx_map_gro2nnp[sorting[ii].second.first] = ii;
     atype[ii] = sorting[ii].first;
     amass[ii] = sorting[ii].second.second;
   }
-  acharge.resize (natoms);
-  for (int ii = 0; ii < natoms; ++ii){
+  acharge.resize(natoms);
+  for (int ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map_nnp2gro[ii];
     acharge[ii] = tmp_charge[gro_i];
   }
 }
 
 template <typename VALUETYPE>
-void
-Convert<VALUETYPE>::
-gro2nnp (vector<VALUETYPE > & coord,
-	 vector<VALUETYPE > & veloc,
-	 vector<VALUETYPE > & box,
-	 const vector<vector<double > > & posi,
-	 const vector<vector<double > > & velo,
-	 const vector<double > & box_size) const
-{
-  assert (posi.size() == idx_map_nnp2gro.size());
-  assert (velo.size() == idx_map_nnp2gro.size());
+void Convert<VALUETYPE>::gro2nnp(vector<VALUETYPE>& coord,
+                                 vector<VALUETYPE>& veloc,
+                                 vector<VALUETYPE>& box,
+                                 const vector<vector<double> >& posi,
+                                 const vector<vector<double> >& velo,
+                                 const vector<double>& box_size) const {
+  assert(posi.size() == idx_map_nnp2gro.size());
+  assert(velo.size() == idx_map_nnp2gro.size());
   int natoms = idx_map_nnp2gro.size();
-  coord.resize (3 * natoms);
-  veloc.resize (3 * natoms);
-  for (unsigned ii = 0; ii < natoms; ++ii){
+  coord.resize(3 * natoms);
+  veloc.resize(3 * natoms);
+  for (unsigned ii = 0; ii < natoms; ++ii) {
     int gro_i = idx_map_nnp2gro[ii];
-    for (int dd = 0; dd < 3; ++dd){
-      coord[ii*3+dd] = posi[gro_i][dd] * 10;
-      veloc[ii*3+dd] = velo[gro_i][dd] * 10;
+    for (int dd = 0; dd < 3; ++dd) {
+      coord[ii * 3 + dd] = posi[gro_i][dd] * 10;
+      veloc[ii * 3 + dd] = velo[gro_i][dd] * 10;
     }
   }
   box.resize(9);
-  for (int dd = 0; dd < 9; ++dd){
+  for (int dd = 0; dd < 9; ++dd) {
     box[dd] = box_size[dd] * 10;
   }
 }
 
 template <typename VALUETYPE>
-void
-Convert<VALUETYPE>::
-nnp2gro (vector<vector<double > > & posi,
-	 vector<vector<double > > & velo,
-	 vector<double > & box_size,
-	 const vector<VALUETYPE > & coord,
-	 const vector<VALUETYPE > & veloc,
-	 const vector<VALUETYPE > & box) const
-{
+void Convert<VALUETYPE>::nnp2gro(vector<vector<double> >& posi,
+                                 vector<vector<double> >& velo,
+                                 vector<double>& box_size,
+                                 const vector<VALUETYPE>& coord,
+                                 const vector<VALUETYPE>& veloc,
+                                 const vector<VALUETYPE>& box) const {
   int natoms = idx_map_nnp2gro.size();
   posi.resize(natoms);
   velo.resize(natoms);
-  for (unsigned ii = 0; ii < posi.size(); ++ii){
+  for (unsigned ii = 0; ii < posi.size(); ++ii) {
     posi[ii].resize(3);
     velo[ii].resize(3);
   }
-  for (unsigned ii = 0; ii < posi.size(); ++ii){
+  for (unsigned ii = 0; ii < posi.size(); ++ii) {
     int gro_i = idx_map_nnp2gro[ii];
-    for (int dd = 0; dd < 3; ++dd){
-      posi[gro_i][dd] = coord[ii*3+dd] * 0.1;
-      velo[gro_i][dd] = veloc[ii*3+dd] * 0.1;
+    for (int dd = 0; dd < 3; ++dd) {
+      posi[gro_i][dd] = coord[ii * 3 + dd] * 0.1;
+      velo[gro_i][dd] = veloc[ii * 3 + dd] * 0.1;
     }
   }
   box_size.resize(9);
-  for (int dd = 0; dd < 9; ++dd){
+  for (int dd = 0; dd < 9; ++dd) {
     box_size[dd] = box[dd] * 0.1;
   }
 }
 
 template <typename VALUETYPE>
-void
-Convert<VALUETYPE>::
-idx_gro2nnp (vector<int > & out,
-	     const vector<int > & in) const
-{
-  for (unsigned ii = 0; ii < in.size(); ++ii){
+void Convert<VALUETYPE>::idx_gro2nnp(vector<int>& out,
+                                     const vector<int>& in) const {
+  for (unsigned ii = 0; ii < in.size(); ++ii) {
     out[ii] = idx_map_gro2nnp[in[ii]];
   }
 }
 
 template <typename VALUETYPE>
-void
-Convert<VALUETYPE>::
-idx_nnp2gro (vector<int > & out,
-	     const vector<int > & in) const
-{
-  for (unsigned ii = 0; ii < in.size(); ++ii){
+void Convert<VALUETYPE>::idx_nnp2gro(vector<int>& out,
+                                     const vector<int>& in) const {
+  for (unsigned ii = 0; ii < in.size(); ++ii) {
     out[ii] = idx_map_nnp2gro[in[ii]];
   }
 }
 
 template class Convert<float>;
 template class Convert<double>;
-
diff --git a/source/md/src/Gaussian.cc b/source/md/src/Gaussian.cc
index 7b5c9e6481..4eddd60419 100644
--- a/source/md/src/Gaussian.cc
+++ b/source/md/src/Gaussian.cc
@@ -1,20 +1,14 @@
 #include "Gaussian.h"
 
-void 
-Gaussian::
-set_seed (unsigned long s) 
-{
-  RandomGenerator_MT19937:: init_genrand (s);
+void Gaussian::set_seed(unsigned long s) {
+  RandomGenerator_MT19937::init_genrand(s);
 }
 
-void
-Gaussian::
-gen (double * vec, const int numb_gen)
-{
+void Gaussian::gen(double* vec, const int numb_gen) {
   const double epsilon = std::numeric_limits<double>::min();
-  const double two_pi = 2.0*M_PI;
+  const double two_pi = 2.0 * M_PI;
 
-  for (int ii = 0; ii < numb_gen; ++ii){
+  for (int ii = 0; ii < numb_gen; ++ii) {
     double u0, u1;
     do {
       u0 = RandomGenerator_MT19937::genrand_real3();
@@ -23,5 +17,3 @@ gen (double * vec, const int numb_gen)
     vec[ii] = sqrt(-2.0 * log(u0)) * cos(two_pi * u1);
   }
 }
-
-
diff --git a/source/md/src/GroFileManager.cc b/source/md/src/GroFileManager.cc
index ea3e85633b..0cf33bb54a 100644
--- a/source/md/src/GroFileManager.cc
+++ b/source/md/src/GroFileManager.cc
@@ -1,213 +1,212 @@
 #include "GroFileManager.h"
-#include <iterator>
-#include <iostream>
-#include <fstream>
+
 #include <cassert>
+#include <fstream>
+#include <iostream>
+#include <iterator>
 
 using namespace std;
 
-class WrongFileFormat
-{
-};
+class WrongFileFormat {};
 
-bool GroFileManager::readTop (const std::string & filename,
-			      std::vector<std::string > & molnames,
-			      std::vector<int > & nmols)
-{
+bool GroFileManager::readTop(const std::string &filename,
+                             std::vector<std::string> &molnames,
+                             std::vector<int> &nmols) {
   molnames.clear();
   nmols.clear();
 
   std::ifstream in(filename.c_str());
-  if (in.bad()){
+  if (in.bad()) {
     std::cerr << "cannot open file " << filename << std::endl;
     return false;
   }
-  char line [1024];
-  std::string target ("[ molecules ]");
+  char line[1024];
+  std::string target("[ molecules ]");
   bool find = false;
-  while (!in.eof()){
-    in.getline (line, 1024, '\n');
-    if (target == std::string (line)) {
+  while (!in.eof()) {
+    in.getline(line, 1024, '\n');
+    if (target == std::string(line)) {
       find = true;
       break;
     }
   }
-  if (!find){
-    std::cerr << "cannot find [ molecules ] in file " << filename 
-	      << ". please check there is no space after \"]\"\n";
+  if (!find) {
+    std::cerr << "cannot find [ molecules ] in file " << filename
+              << ". please check there is no space after \"]\"\n";
     return false;
   }
-  
-//   while (!(in.getline (line, 1024, '\n')).eof()){
-// //     if (line[0] == '['){
-// //       break;
-// //     }
-// //     char name[1024];
-// //     int number;
-// //     sscanf (line, "%s%d", name, &number);
-// //     molnames.push_back (std::string(name));
-// //     nmols.push_back (number);
-//   }
-  
+
+  //   while (!(in.getline (line, 1024, '\n')).eof()){
+  // //     if (line[0] == '['){
+  // //       break;
+  // //     }
+  // //     char name[1024];
+  // //     int number;
+  // //     sscanf (line, "%s%d", name, &number);
+  // //     molnames.push_back (std::string(name));
+  // //     nmols.push_back (number);
+  //   }
 
   std::string name;
   int number;
-  while ( !(in >> name >> number).eof()){
-    if (name[0] == '['){
+  while (!(in >> name >> number).eof()) {
+    if (name[0] == '[') {
       break;
     }
-    if (name.empty()){
+    if (name.empty()) {
       break;
     }
-//     std::cout << name << std::endl;
-    molnames.push_back (name);
-    nmols.push_back (number);
+    //     std::cout << name << std::endl;
+    molnames.push_back(name);
+    nmols.push_back(number);
   }
-  
 
   return true;
 }
 
-
-
-template <typename UnitaryFunction1, typename UnitaryFunction2,
-	  typename UnitaryFunction3, typename UnitaryFunction4,
-	  typename UnitaryFunction5, typename UnitaryFunction6>
-bool GroFileManager::writePotenFile (const double & rmin, const double & rcut, 
-				     const double & interval,
-				     UnitaryFunction1 & f, UnitaryFunction2 & fp,
-				     UnitaryFunction3 & g, UnitaryFunction4 & gp,
-				     UnitaryFunction5 & h, UnitaryFunction6 & hp,
-				     const std::string & filename)
-{
-  FILE * filep = fopen (filename.c_str(), "w");
-  if(filep == NULL){
+template <typename UnitaryFunction1,
+          typename UnitaryFunction2,
+          typename UnitaryFunction3,
+          typename UnitaryFunction4,
+          typename UnitaryFunction5,
+          typename UnitaryFunction6>
+bool GroFileManager::writePotenFile(const double &rmin,
+                                    const double &rcut,
+                                    const double &interval,
+                                    UnitaryFunction1 &f,
+                                    UnitaryFunction2 &fp,
+                                    UnitaryFunction3 &g,
+                                    UnitaryFunction4 &gp,
+                                    UnitaryFunction5 &h,
+                                    UnitaryFunction6 &hp,
+                                    const std::string &filename) {
+  FILE *filep = fopen(filename.c_str(), "w");
+  if (filep == NULL) {
     std::cerr << "cannot open file " << filename << std::endl;
     return false;
   }
 
   double upper = rcut + 1;
   double nx;
-  if ( int(upper / interval) != upper / interval)
+  if (int(upper / interval) != upper / interval)
     nx = int(upper / interval) + 1;
-  else 
+  else
     nx = int(upper / interval);
   upper = interval * nx;
-  
+
   int i = 0;
-  for (i = 0; i <= nx + 1; ++i){
+  for (i = 0; i <= nx + 1; ++i) {
     double x = i * interval;
-    if (x < rmin){
-      fprintf (filep, "%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\n",
-	       x, 0., 0., 0., 0., 0., 0.);
-    }
-    else {
-      fprintf (filep, "%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\n",
-	       x, f(x), -fp(x), g(x), -gp(x), h(x), -hp(x));
+    if (x < rmin) {
+      fprintf(filep, "%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\n", x, 0.,
+              0., 0., 0., 0., 0.);
+    } else {
+      fprintf(filep, "%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\t%.12e\n", x,
+              f(x), -fp(x), g(x), -gp(x), h(x), -hp(x));
     }
   }
-  
-  fclose (filep);
+
+  fclose(filep);
   return true;
 }
 
-
-
-void GroFileManager::read (const std::string & name ,
-			   std::vector<int > & resdindex,
-			   std::vector<std::string > &  resdname,
-			   std::vector<std::string > &  atomname,
-			   std::vector<int > & atomindex,
-			   std::vector<std::vector<double > > & posi,
-			   std::vector<std::vector<double > > & velo,
-			   std::vector<double > & boxsize_)
-{
-  FILE * fp = fopen (name.c_str(), "r");
-  if (fp == NULL){
+void GroFileManager::read(const std::string &name,
+                          std::vector<int> &resdindex,
+                          std::vector<std::string> &resdname,
+                          std::vector<std::string> &atomname,
+                          std::vector<int> &atomindex,
+                          std::vector<std::vector<double> > &posi,
+                          std::vector<std::vector<double> > &velo,
+                          std::vector<double> &boxsize_) {
+  FILE *fp = fopen(name.c_str(), "r");
+  if (fp == NULL) {
     std::cerr << "cannot open file " << name << std::endl;
     return;
   }
-  while (fgetc(fp) != '\n');
+  while (fgetc(fp) != '\n')
+    ;
   int npart;
-  fscanf (fp, "%d\n", &npart);
-  fclose (fp);
-  
+  fscanf(fp, "%d\n", &npart);
+  fclose(fp);
+
   resdindex.clear();
   resdname.clear();
   atomname.clear();
   atomindex.clear();
   posi.clear();
   velo.clear();
-  vector<double > boxsize;
+  vector<double> boxsize;
   boxsize.resize(3);
 
-  fp = fopen (name.c_str(), "r");  
-  while (fgetc(fp) != '\n');
-  while (fgetc(fp) != '\n');
+  fp = fopen(name.c_str(), "r");
+  while (fgetc(fp) != '\n')
+    ;
+  while (fgetc(fp) != '\n')
+    ;
   char line[1024];
-  for (int i = 0; i < npart; ++ i){
-    fgets (line, 1024, fp);
+  for (int i = 0; i < npart; ++i) {
+    fgets(line, 1024, fp);
     char tmp[1024];
     int tmpd;
     char tmps[1024];
-    for (unsigned j = 0; j < 5; ++j){
+    for (unsigned j = 0; j < 5; ++j) {
       tmp[j] = line[j];
     }
     tmp[5] = '\0';
-    if (sscanf (tmp, "%d", &tmpd) != 1){
+    if (sscanf(tmp, "%d", &tmpd) != 1) {
       throw WrongFileFormat();
     }
     resdindex.push_back(tmpd);
 
-    for (unsigned j = 0; j < 5; ++j){
-      tmp[j] = line[j+5];
+    for (unsigned j = 0; j < 5; ++j) {
+      tmp[j] = line[j + 5];
     }
     tmp[5] = '\0';
-    if (sscanf (tmp, "%s", tmps) != 1){
+    if (sscanf(tmp, "%s", tmps) != 1) {
       throw WrongFileFormat();
     }
-    resdname.push_back (tmps);
+    resdname.push_back(tmps);
 
-    for (unsigned j = 0; j < 5; ++j){
-      tmp[j] = line[j+10];
+    for (unsigned j = 0; j < 5; ++j) {
+      tmp[j] = line[j + 10];
     }
     tmp[5] = '\0';
-    if (sscanf (tmp, "%s", tmps) != 1){
+    if (sscanf(tmp, "%s", tmps) != 1) {
       throw WrongFileFormat();
     }
-    atomname.push_back (tmps);
+    atomname.push_back(tmps);
 
-    for (unsigned j = 0; j < 5; ++j){
-      tmp[j] = line[j+15];
+    for (unsigned j = 0; j < 5; ++j) {
+      tmp[j] = line[j + 15];
     }
     tmp[5] = '\0';
-    if (sscanf (tmp, "%d", &tmpd) != 1){
+    if (sscanf(tmp, "%d", &tmpd) != 1) {
       throw WrongFileFormat();
     }
     atomindex.push_back(tmpd);
 
     double a, b, c;
     double d, e, f;
-    std::vector<double > tmpp(3);
-    std::vector<double > tmpv(3);
+    std::vector<double> tmpp(3);
+    std::vector<double> tmpv(3);
 
-    int tag = sscanf (&line[20], "%lf%lf%lf%lf%lf%lf", &a, &b, &c, &d, &e, &f);
+    int tag = sscanf(&line[20], "%lf%lf%lf%lf%lf%lf", &a, &b, &c, &d, &e, &f);
     tmpp[0] = a;
     tmpp[1] = b;
     tmpp[2] = c;
-    switch (tag){
-    case 6:
-	tmpv[0] = d;
-	tmpv[1] = e;
-	tmpv[2] = f;
-	break;
-    case 3:
-	tmpv[0] = 0.f;
-	tmpv[1] = 0.f;
-	tmpv[2] = 0.f;
-	break;
-    default:
-	throw WrongFileFormat();
+    switch (tag) {
+      case 6:
+        tmpv[0] = d;
+        tmpv[1] = e;
+        tmpv[2] = f;
+        break;
+      case 3:
+        tmpv[0] = 0.f;
+        tmpv[1] = 0.f;
+        tmpv[2] = 0.f;
+        break;
+      default:
+        throw WrongFileFormat();
     }
 
     posi.push_back(tmpp);
@@ -215,91 +214,71 @@ void GroFileManager::read (const std::string & name ,
   }
   int tag = 0;
   double rbox[9];
-  tag = fscanf (fp, "%lf%lf%lf%lf%lf%lf%lf%lf%lf",
-		rbox+0, rbox+1, rbox+2,
-		rbox+3, rbox+4, rbox+5,
-		rbox+6, rbox+7, rbox+8);
-  fclose (fp);
+  tag = fscanf(fp, "%lf%lf%lf%lf%lf%lf%lf%lf%lf", rbox + 0, rbox + 1, rbox + 2,
+               rbox + 3, rbox + 4, rbox + 5, rbox + 6, rbox + 7, rbox + 8);
+  fclose(fp);
 
-  boxsize_.resize (9, 0.);
-  fill (boxsize_.begin(), boxsize_.end(), 0.);
+  boxsize_.resize(9, 0.);
+  fill(boxsize_.begin(), boxsize_.end(), 0.);
 
-  if (tag == 9){
+  if (tag == 9) {
     boxsize_[0] = rbox[0];
     boxsize_[4] = rbox[1];
     boxsize_[8] = rbox[2];
-    boxsize_[0*3+1] = rbox[3];
-    boxsize_[0*3+2] = rbox[4];
-    boxsize_[1*3+0] = rbox[5];
-    boxsize_[1*3+2] = rbox[6];
-    boxsize_[2*3+0] = rbox[7];
-    boxsize_[2*3+1] = rbox[8];
-  }
-  else {
-    assert (tag == 3);
+    boxsize_[0 * 3 + 1] = rbox[3];
+    boxsize_[0 * 3 + 2] = rbox[4];
+    boxsize_[1 * 3 + 0] = rbox[5];
+    boxsize_[1 * 3 + 2] = rbox[6];
+    boxsize_[2 * 3 + 0] = rbox[7];
+    boxsize_[2 * 3 + 1] = rbox[8];
+  } else {
+    assert(tag == 3);
     boxsize_[0] = rbox[0];
     boxsize_[4] = rbox[1];
-    boxsize_[8] = rbox[2];    
+    boxsize_[8] = rbox[2];
   }
 }
 
-void GroFileManager::write (const std::string & name ,
-			    const std::vector<int > & resdindex,
-			    const std::vector<std::string > &  resdname,
-			    const std::vector<std::string > &  atomname,
-			    const std::vector<int > & atomindex,
-			    const std::vector<std::vector<double > > & posi,
-			    const std::vector<std::vector<double > > & velo,
-			    const std::vector<double > & boxsize)
-{
-  FILE * fp = fopen(name.c_str(), "w");
-  if (fp == NULL){
+void GroFileManager::write(const std::string &name,
+                           const std::vector<int> &resdindex,
+                           const std::vector<std::string> &resdname,
+                           const std::vector<std::string> &atomname,
+                           const std::vector<int> &atomindex,
+                           const std::vector<std::vector<double> > &posi,
+                           const std::vector<std::vector<double> > &velo,
+                           const std::vector<double> &boxsize) {
+  FILE *fp = fopen(name.c_str(), "w");
+  if (fp == NULL) {
     std::cerr << "cannot open file " << name << std::endl;
     return;
   }
-  // std::copy (atomname.begin(), atomname.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
-  
-  fprintf (fp, "\n%d\n", int(resdindex.size()));
-  for (int i = 0; i < int(resdindex.size()); ++i){
-    fprintf (fp, "%5d%5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n",
-	     resdindex[i] % 100000,
-	     (char *)(resdname[i].c_str()),
-	     (char *)(atomname[i].c_str()),
-	     atomindex[i] % 100000, 
-	     posi[i][0], posi[i][1], posi[i][2],
-	     velo[i][0], velo[i][1], velo[i][2]);
+  // std::copy (atomname.begin(), atomname.end(),
+  // std::ostream_iterator<std::string>(std::cout, "\n"));
+
+  fprintf(fp, "\n%d\n", int(resdindex.size()));
+  for (int i = 0; i < int(resdindex.size()); ++i) {
+    fprintf(fp, "%5d%5s%5s%5d%8.3f%8.3f%8.3f%8.4f%8.4f%8.4f\n",
+            resdindex[i] % 100000, (char *)(resdname[i].c_str()),
+            (char *)(atomname[i].c_str()), atomindex[i] % 100000, posi[i][0],
+            posi[i][1], posi[i][2], velo[i][0], velo[i][1], velo[i][2]);
   }
   // vector<double > box(3);
   // for (int ii = 0; ii < 3; ++ii) box[ii] = boxsize[3*ii+ii];
-  if (boxsize.size() == 3){
-    fprintf (fp, "%f %f %f\n", boxsize[0], boxsize[1], boxsize[2]);
-  }
-  else if (boxsize.size() == 9){    
-    fprintf (fp, "%f %f %f %f %f %f %f %f %f \n",
-	     boxsize[0*3+0], boxsize[1*3+1], boxsize[2*3+2],
-	     boxsize[0*3+1], boxsize[0*3+2],
-	     boxsize[1*3+0], boxsize[1*3+2],
-	     boxsize[2*3+0], boxsize[2*3+1]);
+  if (boxsize.size() == 3) {
+    fprintf(fp, "%f %f %f\n", boxsize[0], boxsize[1], boxsize[2]);
+  } else if (boxsize.size() == 9) {
+    fprintf(fp, "%f %f %f %f %f %f %f %f %f \n", boxsize[0 * 3 + 0],
+            boxsize[1 * 3 + 1], boxsize[2 * 3 + 2], boxsize[0 * 3 + 1],
+            boxsize[0 * 3 + 2], boxsize[1 * 3 + 0], boxsize[1 * 3 + 2],
+            boxsize[2 * 3 + 0], boxsize[2 * 3 + 1]);
   }
 
-  fclose (fp);
+  fclose(fp);
 }
 
-
-struct F 
-{
-  double operator () (double x)
-      {
-	return 1./x;
-      }
-}
-    ;
-struct Zero
-{
-  double operator () (double x)
-      {
-	return 0;
-      }
-}
-    ;
-
+struct F {
+  double operator()(double x) { return 1. / x; }
+};
+struct Zero {
+  double operator()(double x) { return 0; }
+};
diff --git a/source/md/src/HarmonicAngle.cc b/source/md/src/HarmonicAngle.cc
index 23c2513f15..dabbdb7894 100644
--- a/source/md/src/HarmonicAngle.cc
+++ b/source/md/src/HarmonicAngle.cc
@@ -1,63 +1,55 @@
 #include "HarmonicAngle.h"
-#include "common.h"
-#include <iostream> 
+
 #include <cmath>
+#include <iostream>
+
+#include "common.h"
 #include "mymath.h"
 
-HarmonicAngle::
-HarmonicAngle (const VALUETYPE & ka_,
-	      const VALUETYPE & tt_)
-    : ka(ka_), tt(tt_)
-{
-}
+HarmonicAngle::HarmonicAngle(const VALUETYPE& ka_, const VALUETYPE& tt_)
+    : ka(ka_), tt(tt_) {}
+
+inline bool compute_variable(const VALUETYPE* rij,
+                             const VALUETYPE* rkj,
+                             VALUETYPE* var,
+                             VALUETYPE* dvardcos,
+                             VALUETYPE* cos_theta) {
+  *cos_theta = cos<VALUETYPE>(rij[0], rij[1], rij[2], rkj[0], rkj[1], rkj[2]);
+  *var = acos(*cos_theta);
 
-inline bool 
-compute_variable (const VALUETYPE * rij,
-		  const VALUETYPE * rkj,
-		  VALUETYPE * var,
-		  VALUETYPE * dvardcos,
-		  VALUETYPE * cos_theta)
-{
-  *cos_theta = cos<VALUETYPE> (rij[0], rij[1], rij[2], rkj[0], rkj[1], rkj[2]);
-  *var = acos (*cos_theta);
-      
   VALUETYPE cos_theta2 = *cos_theta * *cos_theta;
   if (cos_theta2 >= 1) {
     *dvardcos = 1.;
     return false;
   }
-  *dvardcos = - 1./sqrt(1. - cos_theta2);
+  *dvardcos = -1. / sqrt(1. - cos_theta2);
   return true;
 }
-		  
 
-void
-HarmonicAngle::
-compute (VALUETYPE &			ener,
-	 vector<VALUETYPE> &		force,
-	 vector<VALUETYPE> &		virial,
-	 const vector<VALUETYPE> &	coord,
-	 const vector<int> &		atype,
-	 const SimulationRegion<VALUETYPE> &	region, 
-	 const vector<int > &		alist)
-{
+void HarmonicAngle::compute(VALUETYPE& ener,
+                            vector<VALUETYPE>& force,
+                            vector<VALUETYPE>& virial,
+                            const vector<VALUETYPE>& coord,
+                            const vector<int>& atype,
+                            const SimulationRegion<VALUETYPE>& region,
+                            const vector<int>& alist) {
   // all set zeros
-  for (unsigned _ = 0; _ < alist.size(); _ += 3){
+  for (unsigned _ = 0; _ < alist.size(); _ += 3) {
     int ii = alist[_];
-    int jj = alist[_+1];
-    int kk = alist[_+2];    
+    int jj = alist[_ + 1];
+    int kk = alist[_ + 2];
 
     VALUETYPE rij[3], rkj[3];
-    region.diffNearestNeighbor (&coord[ii*3], &coord[jj*3], rij);      
-    region.diffNearestNeighbor (&coord[kk*3], &coord[jj*3], rkj);      
+    region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], rij);
+    region.diffNearestNeighbor(&coord[kk * 3], &coord[jj * 3], rkj);
 
-    VALUETYPE var(0), dvardcos(0), cos_theta(0);    
-    bool apply_force = compute_variable (rij, rkj, &var, &dvardcos, &cos_theta);
+    VALUETYPE var(0), dvardcos(0), cos_theta(0);
+    bool apply_force = compute_variable(rij, rkj, &var, &dvardcos, &cos_theta);
 
     VALUETYPE dudvar(0), angle_energy(0);
     VALUETYPE diff = var - tt;
     VALUETYPE pdiff = ka * diff;
-    dudvar = - pdiff;
+    dudvar = -pdiff;
     angle_energy = VALUETYPE(0.5) * pdiff * diff;
 
     ener += angle_energy;
@@ -67,12 +59,12 @@ compute (VALUETYPE &			ener,
     VALUETYPE fij[3];
     VALUETYPE fkj[3];
 
-    if (apply_force) {    
+    if (apply_force) {
       VALUETYPE dudcos = dudvar * dvardcos;
-      VALUETYPE rij2 = dot<VALUETYPE> (rij, rij);
-      VALUETYPE rkj2 = dot<VALUETYPE> (rkj, rkj);
-      VALUETYPE invrij = 1./sqrt (rij2);
-      VALUETYPE invrkj = 1./sqrt (rkj2);
+      VALUETYPE rij2 = dot<VALUETYPE>(rij, rij);
+      VALUETYPE rkj2 = dot<VALUETYPE>(rkj, rkj);
+      VALUETYPE invrij = 1. / sqrt(rij2);
+      VALUETYPE invrkj = 1. / sqrt(rkj2);
       VALUETYPE invrij2 = invrij * invrij;
       VALUETYPE invrkj2 = invrkj * invrkj;
       VALUETYPE invrijrkj = invrij * invrkj;
@@ -83,8 +75,7 @@ compute (VALUETYPE &			ener,
       fkj[0] = dudcos * (rij[0] * invrijrkj - rkj[0] * invrkj2 * cos_theta);
       fkj[1] = dudcos * (rij[1] * invrijrkj - rkj[1] * invrkj2 * cos_theta);
       fkj[2] = dudcos * (rij[2] * invrijrkj - rkj[2] * invrkj2 * cos_theta);
-    }
-    else {
+    } else {
       fij[0] = fij[1] = fij[2] = fkj[0] = fkj[1] = fkj[2] = VALUETYPE(0);
     }
 
@@ -99,8 +90,8 @@ compute (VALUETYPE &			ener,
     force[3 * jj + 2] -= fij[2] + fkj[2];
     for (int dd0 = 0; dd0 < 3; ++dd0) {
       for (int dd1 = 0; dd1 < 3; ++dd1) {
-	virial[dd0*3+dd1] -= 0.5 * fij[dd0] * rij[dd1];
-	virial[dd0*3+dd1] -= 0.5 * fkj[dd0] * rkj[dd1];
+        virial[dd0 * 3 + dd1] -= 0.5 * fij[dd0] * rij[dd1];
+        virial[dd0 * 3 + dd1] -= 0.5 * fkj[dd0] * rkj[dd1];
       }
     }
   }
diff --git a/source/md/src/HarmonicBond.cc b/source/md/src/HarmonicBond.cc
index cd13bf5421..010513adc0 100644
--- a/source/md/src/HarmonicBond.cc
+++ b/source/md/src/HarmonicBond.cc
@@ -1,56 +1,45 @@
 #include "HarmonicBond.h"
-#include "common.h"
+
 #include <cmath>
-#include <iostream> 
+#include <iostream>
 
-HarmonicBond::
-HarmonicBond (const VALUETYPE & kk_,
-	      const VALUETYPE & bb_)
-    : kk(kk_), bb(bb_)
-{
-}
+#include "common.h"
+
+HarmonicBond::HarmonicBond(const VALUETYPE& kk_, const VALUETYPE& bb_)
+    : kk(kk_), bb(bb_) {}
 
-void 
-HarmonicBond::
-hb_inner (VALUETYPE & ae,
-	  VALUETYPE & af,
-	  const VALUETYPE & r1)
-{
+void HarmonicBond::hb_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r1) {
   VALUETYPE diff = r1 - bb;
   // cout << bb << " " << r1 << endl;
   VALUETYPE pdiff = kk * diff;
-  af = - pdiff / r1;
+  af = -pdiff / r1;
   ae = 0.5 * pdiff * diff;
 }
 
-void
-HarmonicBond::
-compute (VALUETYPE &			ener,
-	 vector<VALUETYPE> &		force,
-	 vector<VALUETYPE> &		virial,
-	 const vector<VALUETYPE> &	coord,
-	 const vector<int> &		atype,
-	 const SimulationRegion<VALUETYPE> &	region, 
-	 const vector<int > &		blist)
-{
+void HarmonicBond::compute(VALUETYPE& ener,
+                           vector<VALUETYPE>& force,
+                           vector<VALUETYPE>& virial,
+                           const vector<VALUETYPE>& coord,
+                           const vector<int>& atype,
+                           const SimulationRegion<VALUETYPE>& region,
+                           const vector<int>& blist) {
   // all set zeros
-  for (unsigned _ = 0; _ < blist.size(); _ += 2){
+  for (unsigned _ = 0; _ < blist.size(); _ += 2) {
     int ii = blist[_];
-    int jj = blist[_+1];
+    int jj = blist[_ + 1];
     VALUETYPE diff[3];
-    region.diffNearestNeighbor (&coord[ii*3], &coord[jj*3], diff);      
+    region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
     VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
     VALUETYPE r1 = sqrt(r2);
     VALUETYPE ae, af;
-    hb_inner (ae, af, r1);
-    for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] += af * diff[dd];
-    for (int dd = 0; dd < 3; ++dd) force[jj*3+dd] -= af * diff[dd];    
+    hb_inner(ae, af, r1);
+    for (int dd = 0; dd < 3; ++dd) force[ii * 3 + dd] += af * diff[dd];
+    for (int dd = 0; dd < 3; ++dd) force[jj * 3 + dd] -= af * diff[dd];
     ener += ae;
     for (int dd0 = 0; dd0 < 3; ++dd0) {
       for (int dd1 = 0; dd1 < 3; ++dd1) {
-	virial[dd0*3+dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
+        virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
       }
     }
   }
 }
-
diff --git a/source/md/src/Integrator.cc b/source/md/src/Integrator.cc
index e8d18e473c..8ccac3099e 100644
--- a/source/md/src/Integrator.cc
+++ b/source/md/src/Integrator.cc
@@ -1,103 +1,90 @@
 #include "Integrator.h"
+
 #include <cassert>
 
 template <typename VALUETYPE>
-void 
-Integrator<VALUETYPE>::
-stepVeloc (vector<VALUETYPE > & vv,
-	   const vector<VALUETYPE > & ff,
-	   const vector<VALUETYPE > & mass, 
-	   const double & dt, 
-	   const vector<int > & freez) const
-{
+void Integrator<VALUETYPE>::stepVeloc(vector<VALUETYPE> &vv,
+                                      const vector<VALUETYPE> &ff,
+                                      const vector<VALUETYPE> &mass,
+                                      const double &dt,
+                                      const vector<int> &freez) const {
   int natoms = ff.size() / 3;
-  for (int kk = 0; kk < natoms; ++kk){
-    VALUETYPE invmdt =  dt / (mass[kk] * massConst);
-    vv[kk*3+0] += ff[kk*3+0] * invmdt;
-    vv[kk*3+1] += ff[kk*3+1] * invmdt;
-    vv[kk*3+2] += ff[kk*3+2] * invmdt;
-  }  
-  for (unsigned ii = 0; ii < freez.size(); ++ii){
+  for (int kk = 0; kk < natoms; ++kk) {
+    VALUETYPE invmdt = dt / (mass[kk] * massConst);
+    vv[kk * 3 + 0] += ff[kk * 3 + 0] * invmdt;
+    vv[kk * 3 + 1] += ff[kk * 3 + 1] * invmdt;
+    vv[kk * 3 + 2] += ff[kk * 3 + 2] * invmdt;
+  }
+  for (unsigned ii = 0; ii < freez.size(); ++ii) {
     int kk = freez[ii];
-    vv[kk*3+0] = 0;
-    vv[kk*3+1] = 0;
-    vv[kk*3+2] = 0;
+    vv[kk * 3 + 0] = 0;
+    vv[kk * 3 + 1] = 0;
+    vv[kk * 3 + 2] = 0;
   }
 }
 
 template <typename VALUETYPE>
-void 
-Integrator<VALUETYPE>::
-stepCoord (vector<VALUETYPE > & rr,
-	   const vector<VALUETYPE > & vv, 
-	   const double & dt) const
-{
-  for (unsigned kk = 0; kk < vv.size(); ++kk){
+void Integrator<VALUETYPE>::stepCoord(vector<VALUETYPE> &rr,
+                                      const vector<VALUETYPE> &vv,
+                                      const double &dt) const {
+  for (unsigned kk = 0; kk < vv.size(); ++kk) {
     rr[kk] += dt * vv[kk];
-  }  
+  }
 }
 
-
-template <typename VALUETYPE> 
-ThermostatLangevin<VALUETYPE>::
-ThermostatLangevin (const VALUETYPE	T_,
-		    const VALUETYPE	tau_,
-		    const long long int	seed)
-{
-  reinit (T_, tau_, seed);
+template <typename VALUETYPE>
+ThermostatLangevin<VALUETYPE>::ThermostatLangevin(const VALUETYPE T_,
+                                                  const VALUETYPE tau_,
+                                                  const long long int seed) {
+  reinit(T_, tau_, seed);
 }
 
-
-template <typename VALUETYPE> 
-void 
-ThermostatLangevin<VALUETYPE>::
-reinit (const VALUETYPE		T_,
-	const VALUETYPE		tau_,
-	const long long int	seed)
-{
-  gaussian.set_seed (seed);
+template <typename VALUETYPE>
+void ThermostatLangevin<VALUETYPE>::reinit(const VALUETYPE T_,
+                                           const VALUETYPE tau_,
+                                           const long long int seed) {
+  gaussian.set_seed(seed);
   temperature = T_;
   kT = UnitManager::BoltzmannConstant * T_;
-  gamma = 1./tau_;
+  gamma = 1. / tau_;
   VALUETYPE twogammakT = 2. * gamma * kT;
-  sigma = 1./sqrt (twogammakT) * twogammakT;
-  sigmainvsqrt2gamma = VALUETYPE(sigma / sqrt (2. * gamma));  
+  sigma = 1. / sqrt(twogammakT) * twogammakT;
+  sigmainvsqrt2gamma = VALUETYPE(sigma / sqrt(2. * gamma));
 }
 
-
 template <typename VALUETYPE>
-void
-ThermostatLangevin<VALUETYPE>::
-stepOU (vector<VALUETYPE> & vv,
-	const vector<VALUETYPE > & mass,
-	const double & dt, 
-	const vector<int > & freez) const
-{
-  VALUETYPE emgammat = exp (-gamma * dt);
-  VALUETYPE sqrt1memgammat2 = sqrt (1. - emgammat * emgammat);
+void ThermostatLangevin<VALUETYPE>::stepOU(vector<VALUETYPE> &vv,
+                                           const vector<VALUETYPE> &mass,
+                                           const double &dt,
+                                           const vector<int> &freez) const {
+  VALUETYPE emgammat = exp(-gamma * dt);
+  VALUETYPE sqrt1memgammat2 = sqrt(1. - emgammat * emgammat);
   VALUETYPE prefR = sigmainvsqrt2gamma * sqrt1memgammat2;
 
-  int numb_part =  mass.size();
-  assert (int(vv.size() ) == 3 * numb_part);
+  int numb_part = mass.size();
+  assert(int(vv.size()) == 3 * numb_part);
 
-  double * all_rands = (double *) malloc (sizeof(double) * numb_part * 3);
-  gaussian.gen (all_rands, numb_part*3);
+  double *all_rands = (double *)malloc(sizeof(double) * numb_part * 3);
+  gaussian.gen(all_rands, numb_part * 3);
 
-  for (int kk = 0; kk < numb_part; ++kk){
+  for (int kk = 0; kk < numb_part; ++kk) {
     VALUETYPE sm = mass[kk] * UnitManager::IntegratorMassConstant;
-    VALUETYPE invsqrtm = 1./sqrt (sm);
-    vv[kk*3+0] = emgammat * vv[kk*3+0] + prefR * invsqrtm * all_rands[kk*3+0];
-    vv[kk*3+1] = emgammat * vv[kk*3+1] + prefR * invsqrtm * all_rands[kk*3+1];
-    vv[kk*3+2] = emgammat * vv[kk*3+2] + prefR * invsqrtm * all_rands[kk*3+2];
+    VALUETYPE invsqrtm = 1. / sqrt(sm);
+    vv[kk * 3 + 0] =
+        emgammat * vv[kk * 3 + 0] + prefR * invsqrtm * all_rands[kk * 3 + 0];
+    vv[kk * 3 + 1] =
+        emgammat * vv[kk * 3 + 1] + prefR * invsqrtm * all_rands[kk * 3 + 1];
+    vv[kk * 3 + 2] =
+        emgammat * vv[kk * 3 + 2] + prefR * invsqrtm * all_rands[kk * 3 + 2];
   }
-  for (unsigned ii = 0; ii < freez.size(); ++ii){
+  for (unsigned ii = 0; ii < freez.size(); ++ii) {
     int kk = freez[ii];
-    vv[kk*3+0] = 0;
-    vv[kk*3+1] = 0;
-    vv[kk*3+2] = 0;
+    vv[kk * 3 + 0] = 0;
+    vv[kk * 3 + 1] = 0;
+    vv[kk * 3 + 2] = 0;
   }
 
-  free (all_rands);
+  free(all_rands);
 }
 
 template class Integrator<float>;
diff --git a/source/md/src/Interpolation.cpp b/source/md/src/Interpolation.cpp
index 2817e9c72b..40377a292e 100644
--- a/source/md/src/Interpolation.cpp
+++ b/source/md/src/Interpolation.cpp
@@ -1,15 +1,18 @@
 #include "Interpolation.h"
-#include <iterator>
 
+#include <iterator>
 
-void Interpolation::piece6OrderInterpol (const double & a,   const double & b,
-					 const double & va,  const double & vb,
-					 const double & da,  const double & db,
-					 const double & dda, const double & ddb,
-					 Poly & p)
-{
-  std::vector<Poly > standardPolys(6);
-  for (unsigned i = 0; i < 6; ++i){
+void Interpolation::piece6OrderInterpol(const double& a,
+                                        const double& b,
+                                        const double& va,
+                                        const double& vb,
+                                        const double& da,
+                                        const double& db,
+                                        const double& dda,
+                                        const double& ddb,
+                                        Poly& p) {
+  std::vector<Poly> standardPolys(6);
+  for (unsigned i = 0; i < 6; ++i) {
     standardPolys[i].getOrder() = 5;
     standardPolys[i].getCoeffs().resize(6);
   }
@@ -26,7 +29,7 @@ void Interpolation::piece6OrderInterpol (const double & a,   const double & b,
   standardPolys[1].getCoeffs()[3] = 10;
   standardPolys[1].getCoeffs()[4] = -15;
   standardPolys[1].getCoeffs()[5] = 6;
-  
+
   standardPolys[2].getCoeffs()[0] = 0;
   standardPolys[2].getCoeffs()[1] = 1;
   standardPolys[2].getCoeffs()[2] = 0;
@@ -40,7 +43,7 @@ void Interpolation::piece6OrderInterpol (const double & a,   const double & b,
   standardPolys[3].getCoeffs()[3] = -4;
   standardPolys[3].getCoeffs()[4] = 7;
   standardPolys[3].getCoeffs()[5] = -3;
-  
+
   standardPolys[4].getCoeffs()[0] = 0;
   standardPolys[4].getCoeffs()[1] = 0;
   standardPolys[4].getCoeffs()[2] = 0.5;
@@ -55,16 +58,16 @@ void Interpolation::piece6OrderInterpol (const double & a,   const double & b,
   standardPolys[5].getCoeffs()[4] = -1;
   standardPolys[5].getCoeffs()[5] = 0.5;
 
-  std::vector<Poly > scaledPolys (6);
-  double tmpa (1./(b - a));
-  double tmpb (-a / (b - a));
-  for (unsigned i = 0; i < 6; ++i){
-    standardPolys[i].valueLinearPoly (tmpa, tmpb, scaledPolys[i]);
+  std::vector<Poly> scaledPolys(6);
+  double tmpa(1. / (b - a));
+  double tmpb(-a / (b - a));
+  for (unsigned i = 0; i < 6; ++i) {
+    standardPolys[i].valueLinearPoly(tmpa, tmpb, scaledPolys[i]);
   }
-  scaledPolys[2] *= 1./tmpa;
-  scaledPolys[3] *= 1./tmpa;
-  scaledPolys[4] *= 1./tmpa/tmpa;
-  scaledPolys[5] *= 1./tmpa/tmpa;
+  scaledPolys[2] *= 1. / tmpa;
+  scaledPolys[3] *= 1. / tmpa;
+  scaledPolys[4] *= 1. / tmpa / tmpa;
+  scaledPolys[5] *= 1. / tmpa / tmpa;
 
   p.zero();
   p += (scaledPolys[0] *= va);
@@ -74,140 +77,136 @@ void Interpolation::piece6OrderInterpol (const double & a,   const double & b,
   p += (scaledPolys[4] *= dda);
   p += (scaledPolys[5] *= ddb);
 
-  return ;
+  return;
 }
 
-
-
-void Interpolation::pieceLinearInterpol (const double & a,  const double & b, 
-					 const double & va, const double & vb,
-					 Poly & p)
-{
-  double k = (vb-va) / (b-a);
-  std::vector<double > tmp (2, 0);
+void Interpolation::pieceLinearInterpol(const double& a,
+                                        const double& b,
+                                        const double& va,
+                                        const double& vb,
+                                        Poly& p) {
+  double k = (vb - va) / (b - a);
+  std::vector<double> tmp(2, 0);
   tmp[0] += va;
   tmp[0] += k * (-a);
   tmp[1] = k;
-  p.reinit (tmp);
+  p.reinit(tmp);
 }
 
-void Interpolation::piecewiseLinear (const std::vector<double > & x,
-				     const std::vector<double > & y,
-				     PiecewisePoly & ps)
-{
-  std::vector<double >::const_iterator pxp1 = x.begin();
-  std::vector<double >::const_iterator px = (pxp1 ++);
-  std::vector<double >::const_iterator pyp1 = y.begin();
-  std::vector<double >::const_iterator py = (pyp1 ++);
+void Interpolation::piecewiseLinear(const std::vector<double>& x,
+                                    const std::vector<double>& y,
+                                    PiecewisePoly& ps) {
+  std::vector<double>::const_iterator pxp1 = x.begin();
+  std::vector<double>::const_iterator px = (pxp1++);
+  std::vector<double>::const_iterator pyp1 = y.begin();
+  std::vector<double>::const_iterator py = (pyp1++);
   ps.clear();
   Poly tmpp;
-  for (; pxp1 != x.end(); ++ pxp1, ++pyp1, ++px, ++py){
-    pieceLinearInterpol (*px, *pxp1, *py, *pyp1, tmpp);
-    ps.get_x().push_back (*px);
-    ps.get_p().push_back (tmpp);
+  for (; pxp1 != x.end(); ++pxp1, ++pyp1, ++px, ++py) {
+    pieceLinearInterpol(*px, *pxp1, *py, *pyp1, tmpp);
+    ps.get_x().push_back(*px);
+    ps.get_p().push_back(tmpp);
   }
-  ps.get_x().push_back (*px);
-}  
-
-void Interpolation::pieceSecondDerivativeInterpol (
-    const double & a,  const double & b,
-    const double & va, const double & vb,
-    const double & dda,const double & ddb,
-    Poly & p)
-{
-  std::vector<double > tmp (2, 0);
-  double k = (vb-va) / (b-a);
+  ps.get_x().push_back(*px);
+}
+
+void Interpolation::pieceSecondDerivativeInterpol(const double& a,
+                                                  const double& b,
+                                                  const double& va,
+                                                  const double& vb,
+                                                  const double& dda,
+                                                  const double& ddb,
+                                                  Poly& p) {
+  std::vector<double> tmp(2, 0);
+  double k = (vb - va) / (b - a);
   tmp[0] += va;
   tmp[0] += k * (-a);
   tmp[1] = k;
-  p.reinit (tmp);
+  p.reinit(tmp);
 
   tmp[1] = 1;
   tmp[0] = -a;
-  Poly l1 (tmp);
+  Poly l1(tmp);
   tmp[0] = -b;
-  Poly l2 (tmp);
+  Poly l2(tmp);
   l1 *= l2;
-  
-  tmp[1] = 1./6. / (a - b);
-  tmp[0] = 1./6. * (a - 2 * b) / (a-b);
-  Poly p1 (tmp);
+
+  tmp[1] = 1. / 6. / (a - b);
+  tmp[0] = 1. / 6. * (a - 2 * b) / (a - b);
+  Poly p1(tmp);
   p1 *= l1;
   p1 *= dda;
-  
+
   tmp[1] *= -1;
-  tmp[0] = 1./6. * (b - 2 * a) / (b-a);
-  Poly p2 (tmp);
+  tmp[0] = 1. / 6. * (b - 2 * a) / (b - a);
+  Poly p2(tmp);
   p2 *= l1;
   p2 *= ddb;
-  
+
   p += p1;
   p += p2;
-}  
-
+}
 
-void Interpolation::secondDerivativeInterpol (
-    const std::vector<double >::const_iterator & xbegin,
-    const std::vector<double >::const_iterator & xend,
-    const std::vector<double >::const_iterator & vbegin,
-    const std::vector<double >::const_iterator & ddbegin,
-    PiecewisePoly & ps)
-{
+void Interpolation::secondDerivativeInterpol(
+    const std::vector<double>::const_iterator& xbegin,
+    const std::vector<double>::const_iterator& xend,
+    const std::vector<double>::const_iterator& vbegin,
+    const std::vector<double>::const_iterator& ddbegin,
+    PiecewisePoly& ps) {
   ps.clear();
-  std::vector<double >::const_iterator xb (xbegin), vb (vbegin), ddb (ddbegin);
-  std::vector<double >::const_iterator xp (xbegin), vp (vbegin), ddp (ddbegin);
+  std::vector<double>::const_iterator xb(xbegin), vb(vbegin), ddb(ddbegin);
+  std::vector<double>::const_iterator xp(xbegin), vp(vbegin), ddp(ddbegin);
   ++xp, ++vp, ++ddp;
-  while (xp != xend){
-    ps.get_x().push_back (*xb);
+  while (xp != xend) {
+    ps.get_x().push_back(*xb);
     Poly tmpp;
-    pieceSecondDerivativeInterpol (*(xb++), *(xp++), 
-				   *(vb++), *(vp++),
-				   *(ddb++), *(ddp++),
-				   tmpp);
-    ps.get_p().push_back (tmpp);
+    pieceSecondDerivativeInterpol(*(xb++), *(xp++), *(vb++), *(vp++), *(ddb++),
+                                  *(ddp++), tmpp);
+    ps.get_p().push_back(tmpp);
   }
-  ps.get_x().push_back (*xb);
+  ps.get_x().push_back(*xb);
 }
 
-
-void Interpolation::pieceHermiteInterpol (const double & a, const double & b,
-					  const double & va, const double & vb,
-					  const double & da, const double & db,
-					  Poly & p)
-{
-  std::vector<double > tmp (2,0);
-  Poly t ;
+void Interpolation::pieceHermiteInterpol(const double& a,
+                                         const double& b,
+                                         const double& va,
+                                         const double& vb,
+                                         const double& da,
+                                         const double& db,
+                                         Poly& p) {
+  std::vector<double> tmp(2, 0);
+  Poly t;
   tmp[0] = (-2 * a / (b - a) + 1);
   tmp[1] = (2 / (b - a));
-  Poly a0 (tmp);
-  tmp[0] = - b / (a - b);
+  Poly a0(tmp);
+  tmp[0] = -b / (a - b);
   tmp[1] = 1 / (a - b);
   t.reinit(tmp);
   a0 *= t;
   a0 *= t;
-  tmp[0] = - 2 * b / (a - b) + 1;
+  tmp[0] = -2 * b / (a - b) + 1;
   tmp[1] = 2 / (a - b);
-  Poly a1 (tmp);
-  tmp[0] = - a / (b - a);
+  Poly a1(tmp);
+  tmp[0] = -a / (b - a);
   tmp[1] = 1 / (b - a);
-  t.reinit (tmp);
+  t.reinit(tmp);
   a1 *= t;
   a1 *= t;
 
   tmp[0] = -a;
   tmp[1] = 1;
-  Poly b0 (tmp);
-  tmp[0] = - b / (a - b);
+  Poly b0(tmp);
+  tmp[0] = -b / (a - b);
   tmp[1] = 1 / (a - b);
   t.reinit(tmp);
   b0 *= t;
   b0 *= t;
   tmp[0] = -b;
   tmp[1] = 1;
-  Poly b1 (tmp);
-  tmp[0] = - a / (b - a);
+  Poly b1(tmp);
+  tmp[0] = -a / (b - a);
   tmp[1] = 1 / (b - a);
-  t.reinit (tmp);
+  t.reinit(tmp);
   b1 *= t;
   b1 *= t;
 
@@ -224,192 +223,181 @@ void Interpolation::pieceHermiteInterpol (const double & a, const double & b,
 
 // lbegin--lend, stores lambda
 // ubegin--uend, stores mu
-bool Interpolation::solverForSplinePeriodic (
-    const std::vector<double >::const_iterator & lbegin,
-    const std::vector<double >::const_iterator & lend,
-    const std::vector<double >::iterator & ubegin, 
-    const std::vector<double >::iterator & uend)
-{
-  std::vector<double > la, lb, lc, ld;
-  for (std::vector<double >::const_iterator i = lbegin;
-       i != lend; ++i){
-    la.push_back (1 - *i);
-    lb.push_back (2);
-    lc.push_back (*i);
-    ld.push_back (0);
+bool Interpolation::solverForSplinePeriodic(
+    const std::vector<double>::const_iterator& lbegin,
+    const std::vector<double>::const_iterator& lend,
+    const std::vector<double>::iterator& ubegin,
+    const std::vector<double>::iterator& uend) {
+  std::vector<double> la, lb, lc, ld;
+  for (std::vector<double>::const_iterator i = lbegin; i != lend; ++i) {
+    la.push_back(1 - *i);
+    lb.push_back(2);
+    lc.push_back(*i);
+    ld.push_back(0);
   }
-//  ld.front() = 1 - *lbegin;
+  //  ld.front() = 1 - *lbegin;
   ld[0] = 1 - lc[0];
   int num = ld.size();
-  ld[num-2] = lc[num-2];
-  ld[num-1] = lb[num-1];
-  
-  std::vector<double >::iterator pu = ubegin;
-  std::vector<double >::iterator pu_1 = pu ++;
-  for (int i = 1; i < num-1; ++i, ++pu, ++pu_1){
-    if (lb[i-1] == 0){
+  ld[num - 2] = lc[num - 2];
+  ld[num - 1] = lb[num - 1];
+
+  std::vector<double>::iterator pu = ubegin;
+  std::vector<double>::iterator pu_1 = pu++;
+  for (int i = 1; i < num - 1; ++i, ++pu, ++pu_1) {
+    if (lb[i - 1] == 0) {
       return false;
     }
-    double ratio = - la[i] / lb[i-1];
-    lb[i] += lc[i-1] * ratio;
-    ld[i] += ld[i-1] * ratio;
+    double ratio = -la[i] / lb[i - 1];
+    lb[i] += lc[i - 1] * ratio;
+    ld[i] += ld[i - 1] * ratio;
     *pu += *pu_1 * ratio;
   }
-  int i = num-1;
-  if (lb[i-1] == 0){
+  int i = num - 1;
+  if (lb[i - 1] == 0) {
     return false;
   }
-  double ratio = - la[i] / lb[i-1];
-  lb[i] += ld[i-1] * ratio;
+  double ratio = -la[i] / lb[i - 1];
+  lb[i] += ld[i - 1] * ratio;
   ld[i] = lb[i];
   *pu += *pu_1 * ratio;
-  
-//   std::cout << lc.back() << std::endl;
-//   std::cout << lc.front() << std::endl;
+
+  //   std::cout << lc.back() << std::endl;
+  //   std::cout << lc.front() << std::endl;
   ratio = -lb[0] / lc.back();
-  ld[0] += ratio * ld[num-1];
+  ld[0] += ratio * ld[num - 1];
   *ubegin += ratio * *pu;
   lb[0] = 0;
 
-//   std::cout << ld.size() << std::endl;
-  ld.insert (ld.begin(), ld.back());
-//   std::cout << ld.size() << std::endl;
+  //   std::cout << ld.size() << std::endl;
+  ld.insert(ld.begin(), ld.back());
+  //   std::cout << ld.size() << std::endl;
   ld.pop_back();
-//   std::cout << ld.size() << std::endl;
+  //   std::cout << ld.size() << std::endl;
   double before = 0.;
-//   std::cout << "##############################" << std::endl;
-//   std::copy(ubegin, uend, std::ostream_iterator<double >(std::cout, "\n"));
-//   std::cout << "##############################" << std::endl;
-  for (std::vector<double >::iterator tmpu = ubegin; tmpu != uend; ++tmpu){
-    if (tmpu ==  ubegin) {
+  //   std::cout << "##############################" << std::endl;
+  //   std::copy(ubegin, uend, std::ostream_iterator<double >(std::cout, "\n"));
+  //   std::cout << "##############################" << std::endl;
+  for (std::vector<double>::iterator tmpu = ubegin; tmpu != uend; ++tmpu) {
+    if (tmpu == ubegin) {
       before = *tmpu;
       *tmpu = *pu;
-    }
-    else {
+    } else {
       double beforetmp = *tmpu;
       *tmpu = before;
       before = beforetmp;
     }
   }
-//   std::copy(ubegin, uend, std::ostream_iterator<double >(std::cout, "\n"));
-//   std::cout << "##############################" << std::endl;
-  lc.insert (lc.begin(), *lbegin);
-  lc.pop_back ();
-  lc.back () = ld.back();
-  lb.insert (lb.begin(), 0.);
-  lb.pop_back ();
-  
+  //   std::copy(ubegin, uend, std::ostream_iterator<double >(std::cout, "\n"));
+  //   std::cout << "##############################" << std::endl;
+  lc.insert(lc.begin(), *lbegin);
+  lc.pop_back();
+  lc.back() = ld.back();
+  lb.insert(lb.begin(), 0.);
+  lb.pop_back();
+
   pu = ubegin;
-  pu ++;
-  pu_1 = pu ++;
-  for (int i = 2; i < num-1; ++i, ++pu, ++pu_1){
-    if (lc[i-1] == 0){
+  pu++;
+  pu_1 = pu++;
+  for (int i = 2; i < num - 1; ++i, ++pu, ++pu_1) {
+    if (lc[i - 1] == 0) {
       return false;
     }
-    double ratio = - lb[i] / lc[i-1];
-    ld[i] += ld[i-1] * ratio;
+    double ratio = -lb[i] / lc[i - 1];
+    ld[i] += ld[i - 1] * ratio;
     *pu += *pu_1 * ratio;
   }
-  i = num-1;
-  if (lc[i-1] == 0){
+  i = num - 1;
+  if (lc[i - 1] == 0) {
     return false;
   }
-  ratio = - lb[i] / lc[i-1];
-  lc[i] += ld[i-1] * ratio;
+  ratio = -lb[i] / lc[i - 1];
+  lc[i] += ld[i - 1] * ratio;
   ld[i] = lc[i];
   *pu += *pu_1 * ratio;
 
-
-  *pu /=lc[num-1];
-  for (int i = num-2; i >= 0; --i, -- pu_1){
+  *pu /= lc[num - 1];
+  for (int i = num - 2; i >= 0; --i, --pu_1) {
     *pu_1 = (*pu_1 - *pu * ld[i]) / lc[i];
   }
 
   return true;
 }
 
-  
-  
-bool Interpolation::splinePeriodic (const std::vector<double > & x,
-				    const std::vector<double > & y,
-				    PiecewisePoly & ps)
-{
-  std::vector<double > lambda (x.size()-1);
-  std::vector<double > mu (x.size()-1);
-  std::vector<double > dx ;
-  
-  std::vector<double >::const_iterator i = x.begin();
-  std::vector<double >::const_iterator j = i;
-  for (++j; j!= x.end(); ++i, ++j){
+bool Interpolation::splinePeriodic(const std::vector<double>& x,
+                                   const std::vector<double>& y,
+                                   PiecewisePoly& ps) {
+  std::vector<double> lambda(x.size() - 1);
+  std::vector<double> mu(x.size() - 1);
+  std::vector<double> dx;
+
+  std::vector<double>::const_iterator i = x.begin();
+  std::vector<double>::const_iterator j = i;
+  for (++j; j != x.end(); ++i, ++j) {
     dx.push_back(*j - *i);
   }
   lambda[0] = dx.back() / (dx.back() + dx.front());
-  mu[0] = 3 * ((1 - lambda.front())/dx.back()*(y[0] - y[y.size()-2]) +
-	       lambda.front() / dx.front() * (y[1] - y[0]));
-  for (unsigned i = 1; i < lambda.size(); ++i){
-    lambda[i] = dx[i-1] / (dx[i-1] + dx[i]);
-    mu[i] = 3 * ((1 - lambda[i]) / dx[i-1] * (y[i] - y[i-1]) +
-		 lambda[i] / dx[i] * (y[i+1] - y[i]));
+  mu[0] = 3 * ((1 - lambda.front()) / dx.back() * (y[0] - y[y.size() - 2]) +
+               lambda.front() / dx.front() * (y[1] - y[0]));
+  for (unsigned i = 1; i < lambda.size(); ++i) {
+    lambda[i] = dx[i - 1] / (dx[i - 1] + dx[i]);
+    mu[i] = 3 * ((1 - lambda[i]) / dx[i - 1] * (y[i] - y[i - 1]) +
+                 lambda[i] / dx[i] * (y[i + 1] - y[i]));
   }
-  
-  bool tag = solverForSplinePeriodic (lambda.begin(), lambda.end(), 
-				      mu.begin(), mu.end());
+
+  bool tag = solverForSplinePeriodic(lambda.begin(), lambda.end(), mu.begin(),
+                                     mu.end());
   if (!tag) return false;
-  
+
   ps.get_x() = x;
   ps.get_p().clear();
-  for (unsigned i = 0; i < x.size() - 2; ++i){
+  for (unsigned i = 0; i < x.size() - 2; ++i) {
     Poly tmpp;
-    pieceHermiteInterpol (x[i], x[i+1], 
-			  y[i], y[i+1], 
-			  mu[i], mu[i+1], tmpp);
-    ps.get_p().push_back (tmpp);
+    pieceHermiteInterpol(x[i], x[i + 1], y[i], y[i + 1], mu[i], mu[i + 1],
+                         tmpp);
+    ps.get_p().push_back(tmpp);
   }
   Poly tmpp;
-  pieceHermiteInterpol (x[x.size()-2], x[x.size()-2+1], 
-			y[x.size()-2], y[x.size()-2+1], 
-			mu[x.size()-2], mu[0], tmpp);
-  ps.get_p().push_back (tmpp);
+  pieceHermiteInterpol(x[x.size() - 2], x[x.size() - 2 + 1], y[x.size() - 2],
+                       y[x.size() - 2 + 1], mu[x.size() - 2], mu[0], tmpp);
+  ps.get_p().push_back(tmpp);
   return true;
 }
 
-
-bool Interpolation::spline (const std::vector<double > & x,
-			    const std::vector<double > & y,
-			    PiecewisePoly & ps)
-{
-  std::vector<double > lambda (x.size());
-  std::vector<double > mu (x.size());
-  std::vector<double > m (x.size());
-  std::vector<double > dx ;
-  
-  std::vector<double >::const_iterator i = x.begin();
-  std::vector<double >::const_iterator j = i;
-  for (++j; j!= x.end(); ++i, ++j){
+bool Interpolation::spline(const std::vector<double>& x,
+                           const std::vector<double>& y,
+                           PiecewisePoly& ps) {
+  std::vector<double> lambda(x.size());
+  std::vector<double> mu(x.size());
+  std::vector<double> m(x.size());
+  std::vector<double> dx;
+
+  std::vector<double>::const_iterator i = x.begin();
+  std::vector<double>::const_iterator j = i;
+  for (++j; j != x.end(); ++i, ++j) {
     dx.push_back(*j - *i);
   }
-  
+
   lambda.front() = 1;
   lambda.back() = 0;
   mu.front() = 3 * ((*(++(y.begin()))) - y.front()) / dx.front();
-  mu.back()  = 3 * (y.back() - (*(++(y.rbegin())))) / dx.back();
-  std::vector<double >::iterator pdx0 = dx.begin();
-  std::vector<double >::iterator pdx1 = pdx0;
-  ++ pdx1 ;
-  std::vector<double >::const_iterator py0 = y.begin();
-  std::vector<double >::const_iterator py1 = py0;
-  ++ py1;
-  std::vector<double >::const_iterator py2 = py1;
-  ++ py2;
-  std::vector<double >::iterator plambda = lambda.begin();
-  ++ plambda;
-  std::vector<double >::iterator pmu = mu.begin();
-  ++ pmu;
-  for (; py2 != y.end(); 
-       ++pdx0, ++pdx1, ++py0, ++py1, ++py2, ++plambda, ++pmu){
+  mu.back() = 3 * (y.back() - (*(++(y.rbegin())))) / dx.back();
+  std::vector<double>::iterator pdx0 = dx.begin();
+  std::vector<double>::iterator pdx1 = pdx0;
+  ++pdx1;
+  std::vector<double>::const_iterator py0 = y.begin();
+  std::vector<double>::const_iterator py1 = py0;
+  ++py1;
+  std::vector<double>::const_iterator py2 = py1;
+  ++py2;
+  std::vector<double>::iterator plambda = lambda.begin();
+  ++plambda;
+  std::vector<double>::iterator pmu = mu.begin();
+  ++pmu;
+  for (; py2 != y.end();
+       ++pdx0, ++pdx1, ++py0, ++py1, ++py2, ++plambda, ++pmu) {
     *plambda = *pdx0 / (*pdx0 + *pdx1);
-    *pmu = 3 * ((1-*plambda) / *pdx0 * (*py1 - *py0) + 
-		*plambda / *pdx1 * (*py2 - *py1));
+    *pmu = 3 * ((1 - *plambda) / *pdx0 * (*py1 - *py0) +
+                *plambda / *pdx1 * (*py2 - *py1));
   }
 
   //   for (unsigned i = 1; i < x.size()-1; ++i){
@@ -417,139 +405,135 @@ bool Interpolation::spline (const std::vector<double > & x,
   //     mu[i] = 3 * ((1-lambda[i]) / dx[i-1] * (y[i] - y[i-1]) +
   // 		 lambda[i] / dx[i] * (y[i+1] - y[i]));
   //   }
-  
+
   double bet;
-  std::vector<double > gam (x.size());
-  m[0] = mu[0] / (bet=2);
-  for (unsigned j = 1; j < x.size(); ++j){
-    gam[j] = lambda[j-1] / bet;
-    bet = 2 - (1-lambda[j]) * gam[j];
+  std::vector<double> gam(x.size());
+  m[0] = mu[0] / (bet = 2);
+  for (unsigned j = 1; j < x.size(); ++j) {
+    gam[j] = lambda[j - 1] / bet;
+    bet = 2 - (1 - lambda[j]) * gam[j];
     if (bet == 0) {
-      std::cerr << "a error in triangle solver\n" ;
+      std::cerr << "a error in triangle solver\n";
       return false;
     }
-    m[j] = (mu[j] - (1-lambda[j]) * m[j-1]) / bet;
+    m[j] = (mu[j] - (1 - lambda[j]) * m[j - 1]) / bet;
   }
-  for (int j = x.size()-2; j >= 0; --j){
-    m[j] -= gam[j+1] * m[j+1];
+  for (int j = x.size() - 2; j >= 0; --j) {
+    m[j] -= gam[j + 1] * m[j + 1];
   }
 
   ps.clear();
   ps.get_x() = x;
-  std::vector<double >::const_iterator px0 = x.begin();
-  std::vector<double >::const_iterator px1 = px0;
-  ++ px1;
+  std::vector<double>::const_iterator px0 = x.begin();
+  std::vector<double>::const_iterator px1 = px0;
+  ++px1;
   py0 = y.begin();
   py1 = py0;
-  ++ py1;
-  std::vector<double >::iterator pm0 = m.begin();
-  std::vector<double >::iterator pm1 = pm0;
-  ++ pm1;
-  for (; px1 != x.end(); 
-       ++px0, ++px1, ++py0, ++py1, ++pm0, ++pm1){
+  ++py1;
+  std::vector<double>::iterator pm0 = m.begin();
+  std::vector<double>::iterator pm1 = pm0;
+  ++pm1;
+  for (; px1 != x.end(); ++px0, ++px1, ++py0, ++py1, ++pm0, ++pm1) {
     Poly tmpp;
-    pieceHermiteInterpol (*px0, *px1, *py0, *py1, *pm0, *pm1, tmpp);
+    pieceHermiteInterpol(*px0, *px1, *py0, *py1, *pm0, *pm1, tmpp);
     ps.get_p().push_back(tmpp);
   }
 
   return true;
 }
 
-
-bool Interpolation::spline (const std::vector<double >::const_iterator xbegin,
-			    const std::vector<double >::const_iterator xend,
-			    const std::vector<double >::const_iterator ybegin,
-			    PiecewisePoly & ps)
-{
+bool Interpolation::spline(const std::vector<double>::const_iterator xbegin,
+                           const std::vector<double>::const_iterator xend,
+                           const std::vector<double>::const_iterator ybegin,
+                           PiecewisePoly& ps) {
   int xsize = 0;
-  std::vector<double >::const_iterator itmp = xbegin;
-  while (itmp ++ != xend) ++ xsize;
-  
-  std::vector<double > lambda (xsize);
-  std::vector<double > mu (xsize);
-  std::vector<double > m (xsize);
-  std::vector<double > dx ;
+  std::vector<double>::const_iterator itmp = xbegin;
+  while (itmp++ != xend) ++xsize;
+
+  std::vector<double> lambda(xsize);
+  std::vector<double> mu(xsize);
+  std::vector<double> m(xsize);
+  std::vector<double> dx;
 
   // setup linear system
-  std::vector<double >::const_iterator i = xbegin;
-  std::vector<double >::const_iterator j = i;
-  for (++j; j!= xend; ++i, ++j){
+  std::vector<double>::const_iterator i = xbegin;
+  std::vector<double>::const_iterator j = i;
+  for (++j; j != xend; ++i, ++j) {
     dx.push_back(*j - *i);
   }
   lambda.front() = 1;
   lambda.back() = 0;
   mu.front() = 3 * ((*(++(itmp = ybegin))) - *ybegin) / dx.front();
-  std::vector<double >::iterator pdx0 = dx.begin();
-  std::vector<double >::iterator pdx1 = pdx0;
-  ++ pdx1 ;
-  std::vector<double >::const_iterator py0 = ybegin;
-  std::vector<double >::const_iterator py1 = py0;
-  ++ py1;
-  std::vector<double >::const_iterator py2 = py1;
-  ++ py2;
-  std::vector<double >::iterator plambda = lambda.begin();
-  ++ plambda;
-  std::vector<double >::iterator pmu = mu.begin();
-  ++ pmu;
-  for (; pdx1 != dx.end(); 
-       ++pdx0, ++pdx1, ++py0, ++py1, ++py2, ++plambda, ++pmu){
+  std::vector<double>::iterator pdx0 = dx.begin();
+  std::vector<double>::iterator pdx1 = pdx0;
+  ++pdx1;
+  std::vector<double>::const_iterator py0 = ybegin;
+  std::vector<double>::const_iterator py1 = py0;
+  ++py1;
+  std::vector<double>::const_iterator py2 = py1;
+  ++py2;
+  std::vector<double>::iterator plambda = lambda.begin();
+  ++plambda;
+  std::vector<double>::iterator pmu = mu.begin();
+  ++pmu;
+  for (; pdx1 != dx.end();
+       ++pdx0, ++pdx1, ++py0, ++py1, ++py2, ++plambda, ++pmu) {
     *plambda = *pdx0 / (*pdx0 + *pdx1);
-    *pmu = 3 * ((1-*plambda) / *pdx0 * (*py1 - *py0) + 
-		*plambda / *pdx1 * (*py2 - *py1));
+    *pmu = 3 * ((1 - *plambda) / *pdx0 * (*py1 - *py0) +
+                *plambda / *pdx1 * (*py2 - *py1));
   }
-  mu.back()  = 3 * (*py1 - *py0) / dx.back();
-  
+  mu.back() = 3 * (*py1 - *py0) / dx.back();
+
   // solve tridiangonal linear system
   double bet;
-  std::vector<double > gam (xsize);
-  m[0] = mu[0] / (bet=2);
-  for (int j = 1; j < xsize; ++j){
-    gam[j] = lambda[j-1] / bet;
-    bet = 2 - (1-lambda[j]) * gam[j];
+  std::vector<double> gam(xsize);
+  m[0] = mu[0] / (bet = 2);
+  for (int j = 1; j < xsize; ++j) {
+    gam[j] = lambda[j - 1] / bet;
+    bet = 2 - (1 - lambda[j]) * gam[j];
     if (bet == 0) {
-      std::cerr << "a error in triangle solver\n" ;
+      std::cerr << "a error in triangle solver\n";
       return false;
     }
-    m[j] = (mu[j] - (1-lambda[j]) * m[j-1]) / bet;
+    m[j] = (mu[j] - (1 - lambda[j]) * m[j - 1]) / bet;
   }
-  for (int j = xsize-2; j >= 0; --j){
-    m[j] -= gam[j+1] * m[j+1];
+  for (int j = xsize - 2; j >= 0; --j) {
+    m[j] -= gam[j + 1] * m[j + 1];
   }
 
   // make piecewise polynominal
   ps.get_p().clear();
   ps.get_x().resize(xsize);
-  std::copy (xbegin, xend, ps.get_x().begin());
-  std::vector<double >::const_iterator px0 = xbegin;
-  std::vector<double >::const_iterator px1 = px0;
-  ++ px1;
+  std::copy(xbegin, xend, ps.get_x().begin());
+  std::vector<double>::const_iterator px0 = xbegin;
+  std::vector<double>::const_iterator px1 = px0;
+  ++px1;
   py0 = ybegin;
   py1 = py0;
-  ++ py1;
-  std::vector<double >::iterator pm0 = m.begin();
-  std::vector<double >::iterator pm1 = pm0;
-  ++ pm1;
-  for (; px1 != xend; 
-       ++px0, ++px1, ++py0, ++py1, ++pm0, ++pm1){
+  ++py1;
+  std::vector<double>::iterator pm0 = m.begin();
+  std::vector<double>::iterator pm1 = pm0;
+  ++pm1;
+  for (; px1 != xend; ++px0, ++px1, ++py0, ++py1, ++pm0, ++pm1) {
     Poly tmpp;
-    pieceHermiteInterpol (*px0, *px1, *py0, *py1, *pm0, *pm1, tmpp);
+    pieceHermiteInterpol(*px0, *px1, *py0, *py1, *pm0, *pm1, tmpp);
     ps.get_p().push_back(tmpp);
   }
 
   return true;
 }
 
-
-
 // void tridag(float a[], float b[], float c[], float r[], float u[],
 // 	    unsigned long n)
-// //Solves for a vector u[1..n] the tridiagonal linear set given by equation (2.4.1). a[1..n],
+// //Solves for a vector u[1..n] the tridiagonal linear set given by equation
+// (2.4.1). a[1..n],
 // //  b[1..n], c[1..n], and r[1..n] are input vectors and are not modified.
 // {
 //   unsigned long j;
 //   float bet,*gam;
 //   gam=vector(1,n); //One vector of workspace, gam is needed.
-//   //If this happens then you should rewrite your equations as a set of order N-1, w ith u2
+//   //If this happens then you should rewrite your equations as a set of order
+//   N-1, w ith u2
 //   //trivially eliminated.
 //   u[0]=r[0]/(bet=2);
 //   for (j=1;j<=n;j++) { //Decomposition and forward substitution.
diff --git a/source/md/src/LJInter.cc b/source/md/src/LJInter.cc
index 793fdee24b..024af5cb26 100644
--- a/source/md/src/LJInter.cc
+++ b/source/md/src/LJInter.cc
@@ -1,74 +1,66 @@
-#include "common.h"
 #include "LJInter.h"
+
 #include <cmath>
 
-LJInter::
-LJInter (const VALUETYPE & c6_,
-	 const VALUETYPE & c12_,
-	 const VALUETYPE & rc_)
-    : c6(6. * c6_), c12(12. * c12_), rc(rc_), rc2 (rc * rc)
-{
-  one_over_6 = 1./6.;
-  one_over_12 = 1./12.;
+#include "common.h"
+
+LJInter::LJInter(const VALUETYPE& c6_,
+                 const VALUETYPE& c12_,
+                 const VALUETYPE& rc_)
+    : c6(6. * c6_), c12(12. * c12_), rc(rc_), rc2(rc * rc) {
+  one_over_6 = 1. / 6.;
+  one_over_12 = 1. / 12.;
   VALUETYPE rc6 = rc2 * rc2 * rc2;
-  one_over_rc6 = 1./rc6;
-  one_over_rc12 = 1./rc6/rc6;
+  one_over_rc6 = 1. / rc6;
+  one_over_rc12 = 1. / rc6 / rc6;
 }
 
-void 
-LJInter::
-lj_inner (VALUETYPE & ae,
-	  VALUETYPE & af,
-	  const VALUETYPE & r2)
-{
-  VALUETYPE rinv = 1./sqrt(r2);
+void LJInter::lj_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2) {
+  VALUETYPE rinv = 1. / sqrt(r2);
   VALUETYPE rinv2 = rinv * rinv;
-  VALUETYPE rinv6   = rinv2 * rinv2 * rinv2;
-  VALUETYPE vvdw6   = c6 * rinv6;
-  VALUETYPE vvdw12  = c12 * rinv6 * rinv6;
-  ae = (vvdw12 - c12 * one_over_rc12) * one_over_12 - (vvdw6  - c6  * one_over_rc6 ) * one_over_6;
-  af = (vvdw12 - vvdw6) * rinv2;  
+  VALUETYPE rinv6 = rinv2 * rinv2 * rinv2;
+  VALUETYPE vvdw6 = c6 * rinv6;
+  VALUETYPE vvdw12 = c12 * rinv6 * rinv6;
+  ae = (vvdw12 - c12 * one_over_rc12) * one_over_12 -
+       (vvdw6 - c6 * one_over_rc6) * one_over_6;
+  af = (vvdw12 - vvdw6) * rinv2;
 }
 
-void
-LJInter::
-compute (VALUETYPE &			ener,
-	 vector<VALUETYPE> &		force,
-	 vector<VALUETYPE> &		virial,
-	 const vector<VALUETYPE> &	coord,
-	 const vector<int> &		atype,
-	 const SimulationRegion<VALUETYPE> &	region, 
-	 const vector<vector<int > > &	nlist)
-{
-  for (unsigned ii = 0; ii < nlist.size(); ++ii){
-    for (unsigned _ = 0; _ < nlist[ii].size(); ++_){
+void LJInter::compute(VALUETYPE& ener,
+                      vector<VALUETYPE>& force,
+                      vector<VALUETYPE>& virial,
+                      const vector<VALUETYPE>& coord,
+                      const vector<int>& atype,
+                      const SimulationRegion<VALUETYPE>& region,
+                      const vector<vector<int> >& nlist) {
+  for (unsigned ii = 0; ii < nlist.size(); ++ii) {
+    for (unsigned _ = 0; _ < nlist[ii].size(); ++_) {
       int jj = nlist[ii][_];
       if (jj < ii) continue;
       VALUETYPE diff[3];
-      region.diffNearestNeighbor (&coord[ii*3], &coord[jj*3], diff);      
+      region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
       VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
       if (r2 < rc2) {
-	VALUETYPE ae, af;
-	lj_inner (ae, af, r2);
-	for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] += af * diff[dd];
-	for (int dd = 0; dd < 3; ++dd) force[jj*3+dd] -= af * diff[dd];
-	ener += ae;
-	for (int dd0 = 0; dd0 < 3; ++dd0){
-	  for (int dd1 = 0; dd1 < 3; ++dd1){
-	    virial[dd0*3+dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-	  }
-	}
-      }      
+        VALUETYPE ae, af;
+        lj_inner(ae, af, r2);
+        for (int dd = 0; dd < 3; ++dd) force[ii * 3 + dd] += af * diff[dd];
+        for (int dd = 0; dd < 3; ++dd) force[jj * 3 + dd] -= af * diff[dd];
+        ener += ae;
+        for (int dd0 = 0; dd0 < 3; ++dd0) {
+          for (int dd1 = 0; dd1 < 3; ++dd1) {
+            virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
+          }
+        }
+      }
     }
   }
 
   // for (int ii = 0; ii < natoms; ++ii){
   //   for (int jj = ii+1; jj < natoms; ++jj){
   //     VALUETYPE diff[3];
-  //     for (int dd = 0; dd < 3; ++dd) diff[dd] = coord[ii*3+dd] - coord[jj*3+dd];
-  //     diff_pbc (diff, box);
-  //     VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
-  //     if (r2 < rc2) {
+  //     for (int dd = 0; dd < 3; ++dd) diff[dd] = coord[ii*3+dd] -
+  //     coord[jj*3+dd]; diff_pbc (diff, box); VALUETYPE r2 = diff[0] * diff[0]
+  //     + diff[1] * diff[1] + diff[2] * diff[2]; if (r2 < rc2) {
   // 	VALUETYPE ae, af;
   // 	lj_inner (ae, af, r2);
   // 	for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] += af * diff[dd];
@@ -78,4 +70,3 @@ compute (VALUETYPE &			ener,
   //   }
   // }
 }
-
diff --git a/source/md/src/LJTab.cc b/source/md/src/LJTab.cc
index 01dd7a8013..c95b27abf2 100644
--- a/source/md/src/LJTab.cc
+++ b/source/md/src/LJTab.cc
@@ -1,36 +1,27 @@
 #include "LJTab.h"
 
-LJTab::
-LJTab (const VALUETYPE & c6,
-       const VALUETYPE & c12,
-       const VALUETYPE & rc)
-{
+LJTab::LJTab(const VALUETYPE& c6, const VALUETYPE& c12, const VALUETYPE& rc) {
   VALUETYPE rcp = rc + 1;
   VALUETYPE hh = 2e-3;
   int nn = rcp / hh;
   vector<VALUETYPE> tab;
-  VALUETYPE rc6 = rc * rc * rc * rc * rc * rc ;
-  VALUETYPE one_over_rc6 = 1./rc6;
-  VALUETYPE one_over_rc12 = 1./rc6/rc6;
-  for (int ii = 0; ii < nn; ++ii){
+  VALUETYPE rc6 = rc * rc * rc * rc * rc * rc;
+  VALUETYPE one_over_rc6 = 1. / rc6;
+  VALUETYPE one_over_rc12 = 1. / rc6 / rc6;
+  for (int ii = 0; ii < nn; ++ii) {
     VALUETYPE xx = ii * hh;
     VALUETYPE value, deriv;
     if (xx <= rc) {
       VALUETYPE xx3 = xx * xx * xx;
       VALUETYPE xx6 = xx3 * xx3;
-      VALUETYPE xx12 = xx6 * xx6;    
-      value = - c6 / xx6 + c12 / xx12 + c6 * one_over_rc6 - c12 * one_over_rc12;
-      deriv = - (6. * c6 / xx6 - 12. * c12 / xx12) / xx;
-    }
-    else {
+      VALUETYPE xx12 = xx6 * xx6;
+      value = -c6 / xx6 + c12 / xx12 + c6 * one_over_rc6 - c12 * one_over_rc12;
+      deriv = -(6. * c6 / xx6 - 12. * c12 / xx12) / xx;
+    } else {
       value = deriv = 0;
     }
-    tab.push_back (value);
-    tab.push_back (deriv);
+    tab.push_back(value);
+    tab.push_back(deriv);
   }
-  lj_tab.reinit (rcp, hh, tab);
+  lj_tab.reinit(rcp, hh, tab);
 }
-
-
-
-
diff --git a/source/md/src/MaxShift.cc b/source/md/src/MaxShift.cc
index aeb9b293d0..3cab8bc59d 100644
--- a/source/md/src/MaxShift.cc
+++ b/source/md/src/MaxShift.cc
@@ -1,30 +1,26 @@
 #include "MaxShift.h"
-#include "common.h"
 
 #include <cassert>
 
-MaxShift::
-MaxShift (const vector<VALUETYPE> & dcoord, 
-	  const VALUETYPE & shell_)
-{
+#include "common.h"
+
+MaxShift::MaxShift(const vector<VALUETYPE>& dcoord, const VALUETYPE& shell_) {
   record = dcoord;
   shell = shell_;
   max_allow2 = shell * 0.5 * shell * 0.5;
 }
 
 VALUETYPE
-MaxShift::
-max_shift2 (const vector<VALUETYPE> & coord, 
-	    const SimulationRegion<VALUETYPE> & region) 
-{
-  assert (coord.size() == record.size());
+MaxShift::max_shift2(const vector<VALUETYPE>& coord,
+                     const SimulationRegion<VALUETYPE>& region) {
+  assert(coord.size() == record.size());
   int natoms = coord.size() / 3;
-  
+
   VALUETYPE maxv = 0;
-  
-  for (int ii = 0; ii < natoms; ++ii){
+
+  for (int ii = 0; ii < natoms; ++ii) {
     VALUETYPE diff[3];
-    region.diffNearestNeighbor (&coord[ii*3], &record[ii*3], diff);
+    region.diffNearestNeighbor(&coord[ii * 3], &record[ii * 3], diff);
     VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
     if (r2 > maxv) maxv = r2;
   }
@@ -32,20 +28,13 @@ max_shift2 (const vector<VALUETYPE> & coord,
   return maxv;
 }
 
-bool 
-MaxShift::
-rebuild (const vector<VALUETYPE> & coord, 
-	 const SimulationRegion<VALUETYPE> & region) 
-{
-  VALUETYPE maxv2 = max_shift2 (coord, region);
-  if (maxv2 > max_allow2){
+bool MaxShift::rebuild(const vector<VALUETYPE>& coord,
+                       const SimulationRegion<VALUETYPE>& region) {
+  VALUETYPE maxv2 = max_shift2(coord, region);
+  if (maxv2 > max_allow2) {
     record = coord;
     return true;
-  }
-  else {
+  } else {
     return false;
   }
 }
-
-
-
diff --git a/source/md/src/Poly.cpp b/source/md/src/Poly.cpp
index 9414849c01..e3a3a4a060 100644
--- a/source/md/src/Poly.cpp
+++ b/source/md/src/Poly.cpp
@@ -1,30 +1,27 @@
 #include "Poly.h"
 
-bool PiecewisePoly::valid () const
-{
-  if (x.size() != p.size()+1) return false;
-  std::vector<double >::const_iterator i = x.begin();
-  std::vector<double >::const_iterator j = x.begin();
-  for (++j ;j != x.end(); ++i, ++j){
+bool PiecewisePoly::valid() const {
+  if (x.size() != p.size() + 1) return false;
+  std::vector<double>::const_iterator i = x.begin();
+  std::vector<double>::const_iterator j = x.begin();
+  for (++j; j != x.end(); ++i, ++j) {
     if (*i > *j) return false;
   }
   return true;
 }
 
-double PiecewisePoly::value (const double & xx) const
-{
+double PiecewisePoly::value(const double& xx) const {
   unsigned begin = 0;
   unsigned end = x.size() - 1;
-  unsigned mid = end/2;
-  if (end == begin){
+  unsigned mid = end / 2;
+  if (end == begin) {
     return 0;
   }
-  while (end - begin > 1){
-    if (xx < x[mid]){
+  while (end - begin > 1) {
+    if (xx < x[mid]) {
       end = mid;
       mid = (begin + end) / 2;
-    }
-    else{
+    } else {
       begin = mid;
       mid = (begin + end) / 2;
     }
@@ -32,28 +29,27 @@ double PiecewisePoly::value (const double & xx) const
   return p[begin].value(xx);
 }
 
-double PiecewisePoly::value_periodic (const double & xx_) const
-{
+double PiecewisePoly::value_periodic(const double& xx_) const {
   double xx(xx_);
   double T = x.back() - x.front();
-  if (xx < x.front()){
-    while ((xx += T) < x.front()) ;
-  }
-  else if (xx >= x.back()){
-    while ((xx -= T) >= x.back());
+  if (xx < x.front()) {
+    while ((xx += T) < x.front())
+      ;
+  } else if (xx >= x.back()) {
+    while ((xx -= T) >= x.back())
+      ;
   }
   unsigned begin = 0;
   unsigned end = x.size() - 1;
-  unsigned mid = end/2;
-  if (end == begin){
+  unsigned mid = end / 2;
+  if (end == begin) {
     return 0;
   }
-  while (end - begin > 1){
-    if (xx < x[mid]){
+  while (end - begin > 1) {
+    if (xx < x[mid]) {
       end = mid;
       mid = (begin + end) / 2;
-    }
-    else{
+    } else {
       begin = mid;
       mid = (begin + end) / 2;
     }
@@ -61,19 +57,17 @@ double PiecewisePoly::value_periodic (const double & xx_) const
   return p[begin].value(xx);
 }
 
-double PiecewisePoly::value (const double & xx,
-			     unsigned & begin,
-			     unsigned & end) const
-{
+double PiecewisePoly::value(const double& xx,
+                            unsigned& begin,
+                            unsigned& end) const {
   if (end <= begin) return 0;
-  if (end - begin == 1)  return p[begin].value(xx);
-  unsigned mid = (begin + end)/2;
-  while (end - begin > 1){
-    if (xx < x[mid]){
+  if (end - begin == 1) return p[begin].value(xx);
+  unsigned mid = (begin + end) / 2;
+  while (end - begin > 1) {
+    if (xx < x[mid]) {
       end = mid;
       mid = (begin + end) / 2;
-    }
-    else{
+    } else {
       begin = mid;
       mid = (begin + end) / 2;
     }
@@ -81,61 +75,57 @@ double PiecewisePoly::value (const double & xx,
   return p[begin].value(xx);
 }
 
-void PiecewisePoly::value (const unsigned & xbegin,
-			   const unsigned & xend,
-			   const std::vector<double > & r,
-			   const unsigned & rbegin,
-			   const unsigned & rend,
-			   std::vector<double > & y) const
-{
+void PiecewisePoly::value(const unsigned& xbegin,
+                          const unsigned& xend,
+                          const std::vector<double>& r,
+                          const unsigned& rbegin,
+                          const unsigned& rend,
+                          std::vector<double>& y) const {
   unsigned xbegin1 = xbegin;
   unsigned xend1 = xend;
-  if (rend - rbegin <= 1){
+  if (rend - rbegin <= 1) {
     y[rbegin] = value(r[rbegin], xbegin1, xend1);
     xbegin1 = xbegin;
     xend1 = xend;
     y[rend] = value(r[rend], xbegin1, xend1);
-  }
-  else {
+  } else {
     unsigned rmid = (rbegin + rend) / 2;
-    y[rmid] = value (r[rmid], xbegin1, xend1);
-    value (xbegin, xend1, r, rbegin, rmid-1, y);
-    value (xbegin1, xend, r, rmid+1, rend, y);
+    y[rmid] = value(r[rmid], xbegin1, xend1);
+    value(xbegin, xend1, r, rbegin, rmid - 1, y);
+    value(xbegin1, xend, r, rmid + 1, rend, y);
   }
 }
 
 // suppose that
-void PiecewisePoly::value (const std::vector<double > & r,
-			   std::vector<double > & y) const
-{
+void PiecewisePoly::value(const std::vector<double>& r,
+                          std::vector<double>& y) const {
   y.resize(r.size());
-  value (0, x.size()-1, r, 0, r.size()-1, y);
+  value(0, x.size() - 1, r, 0, r.size() - 1, y);
 }
 
 // suppose that
-void PiecewisePoly::value_periodic (const std::vector<double > & r,
-				    std::vector<double > & y) const
-{
-  std::vector<double > tmpr;
-  std::vector<double > tmpy;
-  std::vector<std::vector<double > > values;
+void PiecewisePoly::value_periodic(const std::vector<double>& r,
+                                   std::vector<double>& y) const {
+  std::vector<double> tmpr;
+  std::vector<double> tmpy;
+  std::vector<std::vector<double> > values;
   unsigned presentEnd(0), presentStart(0);
   double T = x.back() - x.front();
-  
-  while (presentEnd < r.size()){
+
+  while (presentEnd < r.size()) {
     tmpr.clear();
     presentStart = presentEnd;
     double shift = 0;
-    if (r[presentStart] < x.front()){
-      while (r[presentStart] + (shift += T) < x.front());
+    if (r[presentStart] < x.front()) {
+      while (r[presentStart] + (shift += T) < x.front())
+        ;
+    } else if (r[presentStart] >= x.back()) {
+      while (r[presentStart] + (shift -= T) >= x.back())
+        ;
     }
-    else if (r[presentStart] >= x.back()){
-      while (r[presentStart] + (shift -= T) >= x.back());
-    }
-    while (presentEnd < r.size() && 
-	   r[presentEnd] + shift >= x.front() &&
-	   r[presentEnd] + shift <  x.back()){
-      tmpr.push_back (r[presentEnd++] + shift);
+    while (presentEnd < r.size() && r[presentEnd] + shift >= x.front() &&
+           r[presentEnd] + shift < x.back()) {
+      tmpr.push_back(r[presentEnd++] + shift);
     }
     // while (presentEnd < r.size() && r[presentEnd] - r[presentStart] < T){
     //   tmpr.push_back (r[presentEnd++]);
@@ -143,174 +133,146 @@ void PiecewisePoly::value_periodic (const std::vector<double > & r,
     // for (unsigned i = 0; i < tmpr.size(); ++i){
     //   tmpr[i] += shift;
     // }
-    value (tmpr, tmpy);
-    values.push_back (tmpy);
+    value(tmpr, tmpy);
+    values.push_back(tmpy);
   }
 
   y.clear();
-  for (unsigned i = 0; i < values.size(); ++i){
+  for (unsigned i = 0; i < values.size(); ++i) {
     y.insert(y.end(), values[i].begin(), values[i].end());
   }
 }
 
-
-Poly & Poly::valueLinearPoly (const double & a_, const double & b_,
-			      Poly & p)
-{
-  std::vector<double > tmp(2, a_);
+Poly& Poly::valueLinearPoly(const double& a_, const double& b_, Poly& p) {
+  std::vector<double> tmp(2, a_);
   tmp[0] = b_;
-  Poly axb (tmp);
+  Poly axb(tmp);
   p.one();
   p *= a.back();
-  for (int i = order-1; i >= 0; i--){
+  for (int i = order - 1; i >= 0; i--) {
     (p *= axb) += a[i];
   }
   return p;
 }
-  
 
-double Poly::value (const double & x) const
-{
-  double value = a[a.size()-1];
-  for (int i = a.size() - 2; i >= 0; --i){
+double Poly::value(const double& x) const {
+  double value = a[a.size() - 1];
+  for (int i = a.size() - 2; i >= 0; --i) {
     value = value * x + a[i];
   }
   return value;
 }
 
-Poly::Poly ()
-    : a (1, 0.) , order(0.)
-{
-}
+Poly::Poly() : a(1, 0.), order(0.) {}
 
-Poly::Poly (const std::vector<double > & out)
-    : a(out) 
-{
-  order = out.size() - 1;
-}
+Poly::Poly(const std::vector<double>& out) : a(out) { order = out.size() - 1; }
 
-void Poly::reinit (const std::vector<double > & out)
-{
+void Poly::reinit(const std::vector<double>& out) {
   a = out;
   order = out.size() - 1;
-} 
+}
 
-Poly & Poly::operator = (const Poly & p)
-{
+Poly& Poly::operator=(const Poly& p) {
   a = p.a;
   order = p.order;
   return *this;
 }
 
-Poly & Poly::operator += (const Poly & p)
-{
-  if (p.a.size() > a.size()){
+Poly& Poly::operator+=(const Poly& p) {
+  if (p.a.size() > a.size()) {
     a.resize(p.a.size(), 0);
     order = p.order;
-    for (unsigned i = 0; i <= order; i ++){
+    for (unsigned i = 0; i <= order; i++) {
       a[i] += p.a[i];
     }
-  }
-  else {
-    for (unsigned i = 0; i <= p.order; i ++){
+  } else {
+    for (unsigned i = 0; i <= p.order; i++) {
       a[i] += p.a[i];
     }
   }
-  return * this;
+  return *this;
 }
 
-Poly & Poly::operator += (const double & b)
-{
+Poly& Poly::operator+=(const double& b) {
   a[0] += b;
   return *this;
 }
 
-
-Poly & Poly::derivative ()
-{
+Poly& Poly::derivative() {
   if (order == 0) {
     a[0] = 0;
     return *this;
   }
-  for (unsigned i = 0; i < order; i ++){
-    a[i] = a[i+1] * (i+1);
+  for (unsigned i = 0; i < order; i++) {
+    a[i] = a[i + 1] * (i + 1);
   }
-  order --;
+  order--;
   a.pop_back();
-  return * this;
+  return *this;
 }
 
-
-Poly & Poly::operator *= (const double & scale)
-{
-  if (scale == 0){
+Poly& Poly::operator*=(const double& scale) {
+  if (scale == 0) {
     order = 0;
-    a.resize (1);
+    a.resize(1);
     a[0] = 0;
-  }
-  else {
-    for (std::vector<double >::iterator i = a.begin(); i != a.end(); i ++){
+  } else {
+    for (std::vector<double>::iterator i = a.begin(); i != a.end(); i++) {
       *i *= scale;
     }
   }
-  return * this;
+  return *this;
 }
 
-  
+Poly& Poly::operator*=(const Poly& p) {
+  std::vector<double> a1(a);
+  unsigned order1(order);
 
-Poly & Poly::operator *= (const Poly & p)
-{
-  std::vector<double > a1 (a);
-  unsigned order1 (order);
-  
   order += p.order;
-  a.resize (order+1, 0);
-  
-  for (std::vector<double >::iterator i = a.begin(); i != a.end(); i ++){
+  a.resize(order + 1, 0);
+
+  for (std::vector<double>::iterator i = a.begin(); i != a.end(); i++) {
     *i *= p.a[0];
   }
-  if (p.order >= 1){
-    for (unsigned i = 1; i <= p.order; i ++){
-      for (unsigned j = 0; j <= order1; j ++){
-	a[i+j] += a1[j] * p.a[i];
+  if (p.order >= 1) {
+    for (unsigned i = 1; i <= p.order; i++) {
+      for (unsigned j = 0; j <= order1; j++) {
+        a[i + j] += a1[j] * p.a[i];
       }
     }
   }
   return *this;
 }
 
-void Poly::print ()
-{
-  for (unsigned i = 0; i <= order; i ++){
-    std::cout << a[i] <<'\t' ;
+void Poly::print() {
+  for (unsigned i = 0; i <= order; i++) {
+    std::cout << a[i] << '\t';
   }
   std::cout << std::endl;
 }
 
-void Poly::print (const std::string & x)
-{
+void Poly::print(const std::string& x) {
   std::cout << a[0];
-  for (unsigned i = 1; i <= order; i ++){
-    std::cout << " + " <<  a[i] << x << "^" << i ;
+  for (unsigned i = 1; i <= order; i++) {
+    std::cout << " + " << a[i] << x << "^" << i;
   }
   std::cout << std::endl;
 }
 
-void Poly::printCode (const std::string & x)
-{
-  std::cout.precision (16);
-  if (order == 0){
+void Poly::printCode(const std::string& x) {
+  std::cout.precision(16);
+  if (order == 0) {
     std::cout << a[0] << std::endl;
     return;
   }
-  
-  for (unsigned i = 0; i < order-1; i ++){
-    std::cout << "(" ;
+
+  for (unsigned i = 0; i < order - 1; i++) {
+    std::cout << "(";
   }
-  std::vector<double >::reverse_iterator p = a.rbegin();
-  std::cout << *(p++) << " * " << x << " + " ;
+  std::vector<double>::reverse_iterator p = a.rbegin();
+  std::cout << *(p++) << " * " << x << " + ";
   std::cout << *(p++);
-  for (; p != a.rend(); p ++){
+  for (; p != a.rend(); p++) {
     std::cout << ") * " << x << " + " << *p;
   }
   std::cout << std::endl;
diff --git a/source/md/src/RandomGenerator_MT19937.cc b/source/md/src/RandomGenerator_MT19937.cc
index 48c7613274..59d6a092f8 100644
--- a/source/md/src/RandomGenerator_MT19937.cc
+++ b/source/md/src/RandomGenerator_MT19937.cc
@@ -1,15 +1,16 @@
-#include "RandomGenerator.h"
 #include <cmath>
 
-/* 
+#include "RandomGenerator.h"
+
+/*
    A C-program for MT19937, with initialization improved 2002/1/26.
    Coded by Takuji Nishimura and Makoto Matsumoto.
 
-   Before using, initialize the state by using init_genrand(seed)  
+   Before using, initialize the state by using init_genrand(seed)
    or init_by_array(init_key, key_length).
 
    Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
-   All rights reserved.                          
+   All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
@@ -22,15 +23,15 @@
         notice, this list of conditions and the following disclaimer in the
         documentation and/or other materials provided with the distribution.
 
-     3. The names of its contributors may not be used to endorse or promote 
-        products derived from this software without specific prior written 
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
         permission.
 
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
@@ -44,7 +45,7 @@
    email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
 */
 
-/* Period parameters */  
+/* Period parameters */
 #define N 624
 #define M 397
 #define MATRIX_A 0x9908b0dfUL   /* constant vector a */
@@ -52,125 +53,124 @@
 #define LOWER_MASK 0x7fffffffUL /* least significant r bits */
 
 static unsigned long mt[N]; /* the array for the state vector  */
-static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
+static int mti = N + 1;     /* mti==N+1 means mt[N] is not initialized */
 
-//using namespace RandomGenerator_MT19937;
+// using namespace RandomGenerator_MT19937;
 
 /* initializes mt[N] with a seed */
-void RandomGenerator_MT19937::init_genrand(unsigned long s)
-{
-    mt[0]= s & 0xffffffffUL;
-    for (mti=1; mti<N; mti++) {
-        mt[mti] = 
-	    (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti); 
-        /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
-        /* In the previous versions, MSBs of the seed affect   */
-        /* only MSBs of the array mt[].                        */
-        /* 2002/01/09 modified by Makoto Matsumoto             */
-        mt[mti] &= 0xffffffffUL;
-        /* for >32 bit machines */
-    }
+void RandomGenerator_MT19937::init_genrand(unsigned long s) {
+  mt[0] = s & 0xffffffffUL;
+  for (mti = 1; mti < N; mti++) {
+    mt[mti] = (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
+    /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+    /* In the previous versions, MSBs of the seed affect   */
+    /* only MSBs of the array mt[].                        */
+    /* 2002/01/09 modified by Makoto Matsumoto             */
+    mt[mti] &= 0xffffffffUL;
+    /* for >32 bit machines */
+  }
 }
 
 /* initialize by an array with array-length */
 /* init_key is the array for initializing keys */
 /* key_length is its length */
 /* slight change for C++, 2004/2/26 */
-void RandomGenerator_MT19937::init_by_array(unsigned long init_key[], int key_length)
-{
-    int i, j, k;
-    init_genrand(19650218UL);
-    i=1; j=0;
-    k = (N>key_length ? N : key_length);
-    for (; k; k--) {
-        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
-          + init_key[j] + j; /* non linear */
-        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-        i++; j++;
-        if (i>=N) { mt[0] = mt[N-1]; i=1; }
-        if (j>=key_length) j=0;
+void RandomGenerator_MT19937::init_by_array(unsigned long init_key[],
+                                            int key_length) {
+  int i, j, k;
+  init_genrand(19650218UL);
+  i = 1;
+  j = 0;
+  k = (N > key_length ? N : key_length);
+  for (; k; k--) {
+    mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525UL)) +
+            init_key[j] + j; /* non linear */
+    mt[i] &= 0xffffffffUL;   /* for WORDSIZE > 32 machines */
+    i++;
+    j++;
+    if (i >= N) {
+      mt[0] = mt[N - 1];
+      i = 1;
     }
-    for (k=N-1; k; k--) {
-        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
-          - i; /* non linear */
-        mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
-        i++;
-        if (i>=N) { mt[0] = mt[N-1]; i=1; }
+    if (j >= key_length) j = 0;
+  }
+  for (k = N - 1; k; k--) {
+    mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941UL)) -
+            i;             /* non linear */
+    mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+    i++;
+    if (i >= N) {
+      mt[0] = mt[N - 1];
+      i = 1;
     }
+  }
 
-    mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ 
+  mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
 }
 
 /* generates a random number on [0,0xffffffff]-interval */
-unsigned long RandomGenerator_MT19937::genrand_int32(void)
-{
-    unsigned long y;
-    static unsigned long mag01[2]={0x0UL, MATRIX_A};
-    /* mag01[x] = x * MATRIX_A  for x=0,1 */
-
-    if (mti >= N) { /* generate N words at one time */
-        int kk;
-
-        if (mti == N+1)   /* if init_genrand() has not been called, */
-            init_genrand(5489UL); /* a default initial seed is used */
-
-        for (kk=0;kk<N-M;kk++) {
-            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
-            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        }
-        for (;kk<N-1;kk++) {
-            y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
-            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
-        }
-        y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
-        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
-
-        mti = 0;
+unsigned long RandomGenerator_MT19937::genrand_int32(void) {
+  unsigned long y;
+  static unsigned long mag01[2] = {0x0UL, MATRIX_A};
+  /* mag01[x] = x * MATRIX_A  for x=0,1 */
+
+  if (mti >= N) { /* generate N words at one time */
+    int kk;
+
+    if (mti == N + 1)       /* if init_genrand() has not been called, */
+      init_genrand(5489UL); /* a default initial seed is used */
+
+    for (kk = 0; kk < N - M; kk++) {
+      y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+      mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+    }
+    for (; kk < N - 1; kk++) {
+      y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
+      mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
     }
-  
-    y = mt[mti++];
+    y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
+    mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+    mti = 0;
+  }
 
-    /* Tempering */
-    y ^= (y >> 11);
-    y ^= (y << 7) & 0x9d2c5680UL;
-    y ^= (y << 15) & 0xefc60000UL;
-    y ^= (y >> 18);
+  y = mt[mti++];
 
-    return y;
+  /* Tempering */
+  y ^= (y >> 11);
+  y ^= (y << 7) & 0x9d2c5680UL;
+  y ^= (y << 15) & 0xefc60000UL;
+  y ^= (y >> 18);
+
+  return y;
 }
 
 /* generates a random number on [0,0x7fffffff]-interval */
-long RandomGenerator_MT19937::genrand_int31(void)
-{
-    return (long)(genrand_int32()>>1);
+long RandomGenerator_MT19937::genrand_int31(void) {
+  return (long)(genrand_int32() >> 1);
 }
 
 /* generates a random number on [0,1]-real-interval */
-double RandomGenerator_MT19937::genrand_real1(void)
-{
-    return genrand_int32()*(1.0/4294967295.0); 
-    /* divided by 2^32-1 */ 
+double RandomGenerator_MT19937::genrand_real1(void) {
+  return genrand_int32() * (1.0 / 4294967295.0);
+  /* divided by 2^32-1 */
 }
 
 /* generates a random number on [0,1)-real-interval */
-double RandomGenerator_MT19937::genrand_real2(void)
-{
-    return genrand_int32()*(1.0/4294967296.0); 
-    /* divided by 2^32 */
+double RandomGenerator_MT19937::genrand_real2(void) {
+  return genrand_int32() * (1.0 / 4294967296.0);
+  /* divided by 2^32 */
 }
 
 /* generates a random number on (0,1)-real-interval */
-double RandomGenerator_MT19937::genrand_real3(void)
-{
-    return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0); 
-    /* divided by 2^32 */
+double RandomGenerator_MT19937::genrand_real3(void) {
+  return (((double)genrand_int32()) + 0.5) * (1.0 / 4294967296.0);
+  /* divided by 2^32 */
 }
 
 /* generates a random number on [0,1) with 53-bit resolution*/
-double RandomGenerator_MT19937::genrand_res53(void) 
-{ 
-    unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; 
-    return(a*67108864.0+b)*(1.0/9007199254740992.0); 
-} 
+double RandomGenerator_MT19937::genrand_res53(void) {
+  unsigned long a = genrand_int32() >> 5, b = genrand_int32() >> 6;
+  return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
+}
 /* These real versions are due to Isaku Wada, 2002/01/09 added */
-
diff --git a/source/md/src/Statistics.cc b/source/md/src/Statistics.cc
index 6b248f83a1..acf5e80ec5 100644
--- a/source/md/src/Statistics.cc
+++ b/source/md/src/Statistics.cc
@@ -1,5 +1,5 @@
-#include <iostream>
 #include <fstream>
+#include <iostream>
 // #include <iomanip>
 #include <stdio.h>
 
@@ -7,26 +7,19 @@
 #include "UnitManager.h"
 
 template <typename VALUETYPE>
-Statistics<VALUETYPE>::
-Statistics (const VALUETYPE e_corr_,
-	    const VALUETYPE p_corr_)
-    : e_corr(e_corr_), p_corr(p_corr_)
-{
-}
-
+Statistics<VALUETYPE>::Statistics(const VALUETYPE e_corr_,
+                                  const VALUETYPE p_corr_)
+    : e_corr(e_corr_), p_corr(p_corr_) {}
 
 template <typename VALUETYPE>
-void
-Statistics<VALUETYPE>::
-record (const VALUETYPE & ener,
-	const vector<VALUETYPE > & virial,
-	const vector<VALUETYPE > & veloc,
-	const vector<VALUETYPE > & mass, 
-	const SimulationRegion<VALUETYPE > & region_)
-{
+void Statistics<VALUETYPE>::record(const VALUETYPE& ener,
+                                   const vector<VALUETYPE>& virial,
+                                   const vector<VALUETYPE>& veloc,
+                                   const vector<VALUETYPE>& mass,
+                                   const SimulationRegion<VALUETYPE>& region_) {
   r_pot_ener = ener;
   r_vir.resize(9);
-  for (unsigned ii = 0; ii < 9; ++ii){
+  for (unsigned ii = 0; ii < 9; ++ii) {
     r_vir[ii] = virial[ii];
   }
   // r_box.resize(6);
@@ -37,58 +30,44 @@ record (const VALUETYPE & ener,
   natoms = mass.size();
   r_kin_ener = 0;
   double pref = 0.5 * UnitManager::IntegratorMassConstant;
-  for (int ii = 0; ii < natoms; ++ii){
-    r_kin_ener += pref * mass[ii] * veloc[3*ii+0] * veloc[3*ii+0];
-    r_kin_ener += pref * mass[ii] * veloc[3*ii+1] * veloc[3*ii+1];
-    r_kin_ener += pref * mass[ii] * veloc[3*ii+2] * veloc[3*ii+2];
+  for (int ii = 0; ii < natoms; ++ii) {
+    r_kin_ener += pref * mass[ii] * veloc[3 * ii + 0] * veloc[3 * ii + 0];
+    r_kin_ener += pref * mass[ii] * veloc[3 * ii + 1] * veloc[3 * ii + 1];
+    r_kin_ener += pref * mass[ii] * veloc[3 * ii + 2] * veloc[3 * ii + 2];
   }
 }
 
 template <typename VALUETYPE>
-double
-Statistics<VALUETYPE>::
-get_T () const 
-{
-  return get_ekin () / (natoms * 3. * UnitManager::BoltzmannConstant) * 2.;
+double Statistics<VALUETYPE>::get_T() const {
+  return get_ekin() / (natoms * 3. * UnitManager::BoltzmannConstant) * 2.;
 }
 
 template <typename VALUETYPE>
-double
-Statistics<VALUETYPE>::
-get_V () const 
-{
-  // return (r_box[1] - r_box[0]) * (r_box[3] - r_box[2]) * (r_box[5] - r_box[4]);
+double Statistics<VALUETYPE>::get_V() const {
+  // return (r_box[1] - r_box[0]) * (r_box[3] - r_box[2]) * (r_box[5] -
+  // r_box[4]);
   return region.getVolume();
 }
 
 template <typename VALUETYPE>
-double
-Statistics<VALUETYPE>::
-get_P () const 
-{
-  return (get_ekin() - (r_vir[0] + r_vir[4] + r_vir[8])) * 2./3. / get_V() * UnitManager::PressureConstant + p_corr;
+double Statistics<VALUETYPE>::get_P() const {
+  return (get_ekin() - (r_vir[0] + r_vir[4] + r_vir[8])) * 2. / 3. / get_V() *
+             UnitManager::PressureConstant +
+         p_corr;
 }
 
 template <typename VALUETYPE>
-void
-Statistics<VALUETYPE>::
-print (ostream & os,
-       const int & step,
-       const double time) const  
-{
+void Statistics<VALUETYPE>::print(ostream& os,
+                                  const int& step,
+                                  const double time) const {
   char tmps[65536];
-  sprintf (tmps, "%13.4f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f\n", 
-	   time,
-	   get_ekin(),
-	   get_epot(),
-	   get_ekin() + get_epot(),
-	   get_T(),
-	   get_P(),
-	   r_vir[0],
-	   r_vir[4],
-	   r_vir[8]);
-  os << tmps ;
-  // os << setw(7) << setprecision(6) << time << setprecision (8) << setfill (' ')
+  sprintf(tmps,
+          "%13.4f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f %15.6f\n",
+          time, get_ekin(), get_epot(), get_ekin() + get_epot(), get_T(),
+          get_P(), r_vir[0], r_vir[4], r_vir[8]);
+  os << tmps;
+  // os << setw(7) << setprecision(6) << time << setprecision (8) << setfill ('
+  // ')
   //    << setw(15) << get_ekin() << " "
   //    << setw(15) << get_epot() << " "
   //    << setw(15) << get_ekin() + get_epot() << " "
@@ -101,22 +80,12 @@ print (ostream & os,
 }
 
 template <typename VALUETYPE>
-void
-Statistics<VALUETYPE>::
-print_head (ostream & os) const  
-{
+void Statistics<VALUETYPE>::print_head(ostream& os) const {
   char tmps[65536];
-  sprintf (tmps, "#%12s %15s %15s %15s %15s %15s %15s %15s %15s\n", 
-	   "time",
-	   "Kinetic",
-	   "Potential",
-	   "E_tot",
-	   "Temperature",
-	   "Pressure",
-	   "Vxx",
-	   "Vyy",
-	   "Vzz");
-  os << tmps ;
+  sprintf(tmps, "#%12s %15s %15s %15s %15s %15s %15s %15s %15s\n", "time",
+          "Kinetic", "Potential", "E_tot", "Temperature", "Pressure", "Vxx",
+          "Vyy", "Vzz");
+  os << tmps;
   // os << "#";
   // os << setw(6) <<  "time" << setfill (' ')
   //    << setw(15) << "Kinetic" << " "
@@ -130,7 +99,5 @@ print_head (ostream & os) const
   //    << endl;
 }
 
-
 template class Statistics<float>;
 template class Statistics<double>;
-
diff --git a/source/md/src/StringSplit.cpp b/source/md/src/StringSplit.cpp
index 123277fa37..6a7f7f62ad 100644
--- a/source/md/src/StringSplit.cpp
+++ b/source/md/src/StringSplit.cpp
@@ -1,44 +1,37 @@
 #include "StringSplit.h"
 
-void StringOperation::
-split (const std::string & in,
-       std::vector<std::string > & out)
-{
+void StringOperation::split(const std::string& in,
+                            std::vector<std::string>& out) {
   std::istringstream iss(in);
   out.clear();
-  
+
   do {
     std::string sub;
     iss >> sub;
-    out.push_back (sub);
-  // std::vector<std::string > tokens;
-  // tokens.push_back (" ");
-  // tokens.push_back ("\t");
-  // std::copy(std::istream_iterator<std::string>(iss),
-  // 	    std::istream_iterator<std::string>(),
-  // 	    std::back_inserter<std::vector<std::string> >(tokens));
+    out.push_back(sub);
+    // std::vector<std::string > tokens;
+    // tokens.push_back (" ");
+    // tokens.push_back ("\t");
+    // std::copy(std::istream_iterator<std::string>(iss),
+    // 	    std::istream_iterator<std::string>(),
+    // 	    std::back_inserter<std::vector<std::string> >(tokens));
   } while (iss);
 
   out.pop_back();
 }
 
-
-void StringOperation::
-split (const std::string & in,
-       const std::string & delimiter,
-       std::vector<std::string > & out)
-{
+void StringOperation::split(const std::string& in,
+                            const std::string& delimiter,
+                            std::vector<std::string>& out) {
   size_t pos = 0;
   size_t len = delimiter.length();
-  std::string s (in);
+  std::string s(in);
   std::string token;
 
-  while ( (pos = s.find(delimiter)) != std::string::npos ){
-    token = s.substr (0, pos);
-    out.push_back (token);
-    s.erase (0, pos + len);
+  while ((pos = s.find(delimiter)) != std::string::npos) {
+    token = s.substr(0, pos);
+    out.push_back(token);
+    s.erase(0, pos + len);
   }
-  if (! s.empty() ) out.push_back (s);
+  if (!s.empty()) out.push_back(s);
 }
-
-
diff --git a/source/md/src/TF.cc b/source/md/src/TF.cc
index 696deecab5..24b5927702 100644
--- a/source/md/src/TF.cc
+++ b/source/md/src/TF.cc
@@ -1,62 +1,56 @@
 #include "TF.h"
+
+#include <iostream>
+
 #include "Interpolation.h"
 #include "TableFileLoader.h"
-#include <iostream>
 
-TF::
-TF (const string & filename)
-{
+TF::TF(const string& filename) {
   vector<vector<double> > tmpdata;
-  TableFileLoader tfl (filename.c_str());
-  tfl.setColumns ({1, 3});
-  tfl.loadAll (tmpdata);
+  TableFileLoader tfl(filename.c_str());
+  tfl.setColumns({1, 3});
+  tfl.loadAll(tmpdata);
   data = tmpdata[1];
   hh = tmpdata[0][1] - tmpdata[0][0];
   xup = tmpdata[0].back();
   xup *= b2m_l;
   hh *= b2m_l;
-  for (unsigned ii = 0; ii < data.size(); ++ii){
+  for (unsigned ii = 0; ii < data.size(); ++ii) {
     data[ii] *= b2m_e / b2m_l;
   }
 }
 
 VALUETYPE
-TF::
-meas (const VALUETYPE & xx) const
-{
+TF::meas(const VALUETYPE& xx) const {
   VALUETYPE ff = 0;
   if (xx >= xup) {
     ff = 0;
-  }
-  else {
-    int posi = int (xx / hh);
-    if (posi < 0) posi = 0;
-    else if (posi >= data.size()-1) posi = data.size() - 2;
+  } else {
+    int posi = int(xx / hh);
+    if (posi < 0)
+      posi = 0;
+    else if (posi >= data.size() - 1)
+      posi = data.size() - 2;
     Poly p;
-    Interpolation::pieceLinearInterpol (posi*hh, (posi+1)*hh, data[posi], data[posi+1], p);
-    ff = p.value (xx);
+    Interpolation::pieceLinearInterpol(posi * hh, (posi + 1) * hh, data[posi],
+                                       data[posi + 1], p);
+    ff = p.value(xx);
   }
   return ff;
 }
 
-void
-TF::
-apply (vector<VALUETYPE> & dforce,
-       const vector<VALUETYPE> & dcoord,
-       const AdWeight & adw) const
-{
+void TF::apply(vector<VALUETYPE>& dforce,
+               const vector<VALUETYPE>& dcoord,
+               const AdWeight& adw) const {
   vector<VALUETYPE> weight, weight_x;
-  adw.atom_weight (weight, weight_x, dcoord);
+  adw.atom_weight(weight, weight_x, dcoord);
   vector<VALUETYPE> center = adw.get_center();
-  
-  for (unsigned ii = 0; ii < weight_x.size(); ++ii){
-    VALUETYPE ff = meas (weight_x[ii]);
-    if (dcoord[ii*3] <  center[0]) {
-      ff=-ff;
+
+  for (unsigned ii = 0; ii < weight_x.size(); ++ii) {
+    VALUETYPE ff = meas(weight_x[ii]);
+    if (dcoord[ii * 3] < center[0]) {
+      ff = -ff;
     }
-    dforce [ii*3] += ff;
+    dforce[ii * 3] += ff;
   }
 }
-
-
-
diff --git a/source/md/src/TableFileLoader.cpp b/source/md/src/TableFileLoader.cpp
index 5b1530fbee..8d372daec1 100644
--- a/source/md/src/TableFileLoader.cpp
+++ b/source/md/src/TableFileLoader.cpp
@@ -1,54 +1,42 @@
 #include "TableFileLoader.h"
-#include "StringSplit.h"
 
-#include <iostream>
 #include <algorithm>
+#include <iostream>
+
+#include "StringSplit.h"
 
 #define MaxLineLength 65536
 
 using namespace std;
 
-TableFileLoader::
-TableFileLoader	(const char * file)
-    :
-    every (1)
-{
-  reinit (file);
-}
+TableFileLoader::TableFileLoader(const char* file) : every(1) { reinit(file); }
 
-unsigned
-TableFileLoader::
-getNumbColumns ()
-{
-  char valueline [MaxLineLength];  
-  while (data.getline(valueline, MaxLineLength)){
-    if (valueline[0] == '#' || valueline[0] == '@'){
+unsigned TableFileLoader::getNumbColumns() {
+  char valueline[MaxLineLength];
+  while (data.getline(valueline, MaxLineLength)) {
+    if (valueline[0] == '#' || valueline[0] == '@') {
       continue;
     }
     break;
-  }  
-  if (data.eof()){
-    return 0;
   }
-  else if (! data.good()){
+  if (data.eof()) {
+    return 0;
+  } else if (!data.good()) {
     cerr << "error file reading state!" << endl;
     throw;
   }
-  vector<string > words;
-  StringOperation::split (string(valueline), words);
+  vector<string> words;
+  StringOperation::split(string(valueline), words);
 
   data.close();
-  reinit (file.c_str());
+  reinit(file.c_str());
   return words.size();
 }
 
-void 
-TableFileLoader::
-reinit (const char * file_)
-{
+void TableFileLoader::reinit(const char* file_) {
   file = string(file_);
-  data.open (file.c_str());
-  if (!data){
+  data.open(file.c_str());
+  if (!data) {
     cerr << "cannot open file \"" << file << "\"" << endl;
     throw;
   }
@@ -56,13 +44,10 @@ reinit (const char * file_)
   // inter_cols.push_back (0);
 }
 
-void 
-TableFileLoader::
-setColumns (const vector<unsigned> & cols)
-{
+void TableFileLoader::setColumns(const vector<unsigned>& cols) {
   inter_cols = cols;
-  for (unsigned ii = 0; ii < inter_cols.size(); ++ii){
-    if (inter_cols[ii] == 0){
+  for (unsigned ii = 0; ii < inter_cols.size(); ++ii) {
+    if (inter_cols[ii] == 0) {
       cerr << "invalid col index, should be larger than 0" << endl;
       throw;
     }
@@ -70,59 +55,43 @@ setColumns (const vector<unsigned> & cols)
   }
 }
 
-void 
-TableFileLoader::
-setEvery (const unsigned every_) 
-{
-  every = every_;
-}
+void TableFileLoader::setEvery(const unsigned every_) { every = every_; }
 
+bool TableFileLoader::loadLine(vector<double>& odata) {
+  char valueline[MaxLineLength];
 
-bool
-TableFileLoader::
-loadLine (vector<double > & odata)
-{
-  char valueline [MaxLineLength];
-  
-  while (data.getline(valueline, MaxLineLength)){
-    if (valueline[0] == '#' || valueline[0] == '@'){
+  while (data.getline(valueline, MaxLineLength)) {
+    if (valueline[0] == '#' || valueline[0] == '@') {
       continue;
-    }
-    else if (count_read++ % every == 0){
+    } else if (count_read++ % every == 0) {
       break;
     }
   }
-  
-  if (data.eof()){
+
+  if (data.eof()) {
     return false;
-  }
-  else if (! data.good()){
+  } else if (!data.good()) {
     cerr << "error file reading state!" << endl;
     throw;
   }
 
-  vector<string > words;
-  StringOperation::split (string(valueline), words);
-  odata.resize (inter_cols.size());
+  vector<string> words;
+  StringOperation::split(string(valueline), words);
+  odata.resize(inter_cols.size());
 
-  for (unsigned ii = 0; ii < inter_cols.size(); ++ii){
+  for (unsigned ii = 0; ii < inter_cols.size(); ++ii) {
     odata[ii] = atof(words[inter_cols[ii]].c_str());
   }
-  
+
   return true;
 }
 
-void
-TableFileLoader::
-loadAll (vector<vector<double > > & odata)
-{
+void TableFileLoader::loadAll(vector<vector<double> >& odata) {
   odata.resize(inter_cols.size());
-  vector<double > line;
-  while (loadLine (line)){
-    for (unsigned ii = 0; ii < inter_cols.size(); ++ii){
-      odata[ii].push_back (line[ii]);
+  vector<double> line;
+  while (loadLine(line)) {
+    for (unsigned ii = 0; ii < inter_cols.size(); ++ii) {
+      odata[ii].push_back(line[ii]);
     }
   }
 }
-
-
diff --git a/source/md/src/Tabulated.cc b/source/md/src/Tabulated.cc
index da81e2b4de..297d0614ab 100644
--- a/source/md/src/Tabulated.cc
+++ b/source/md/src/Tabulated.cc
@@ -1,80 +1,70 @@
-#include "UnitManager.h"
 #include "Tabulated.h"
-#include "common.h"
+
 #include <cmath>
 #include <iostream>
 
-Tabulated::
-Tabulated (const VALUETYPE rc,
-	   const VALUETYPE hh,
-	   const vector<VALUETYPE> & tab) 
-{
-  reinit (rc, hh, tab) ;
+#include "UnitManager.h"
+#include "common.h"
+
+Tabulated::Tabulated(const VALUETYPE rc,
+                     const VALUETYPE hh,
+                     const vector<VALUETYPE> &tab) {
+  reinit(rc, hh, tab);
 }
 
-void 
-Tabulated::
-reinit (const VALUETYPE rc,
-	const VALUETYPE hh,
-	const vector<VALUETYPE> & tab)
-{
+void Tabulated::reinit(const VALUETYPE rc,
+                       const VALUETYPE hh,
+                       const vector<VALUETYPE> &tab) {
   int numbFunc = 1;
   int stride = numbFunc * 4;
   int mystride = numbFunc * 2;
   unsigned tableLength = tab.size() / mystride;
 
-  hi = 1./hh;
+  hi = 1. / hh;
   rc2 = rc * rc;
 
-  data.resize (tableLength * stride);
-  
+  data.resize(tableLength * stride);
+
   int ii;
-  for (ii = 0; ii < tableLength-1; ++ii){
-      const double & v0 (tab[ii*mystride + 0]);
-      const double & f0 (tab[ii*mystride + 1]);
-      const double & v1 (tab[(ii+1)*mystride + 0]);
-      const double & f1 (tab[(ii+1)*mystride + 1]);
-      VALUETYPE &dv (data[ii*stride + 0]);
-      VALUETYPE &df (data[ii*stride + 1]);
-      VALUETYPE &dg (data[ii*stride + 2]);
-      VALUETYPE &dh (data[ii*stride + 3]);
-      dv = v0;
-      df = -f0 * hh;
-      dg =  3*(v1 - v0) + (f1 + 2*f0)*hh;
-      dh = -2*(v1 - v0) - (f1 +   f0)*hh;
+  for (ii = 0; ii < tableLength - 1; ++ii) {
+    const double &v0(tab[ii * mystride + 0]);
+    const double &f0(tab[ii * mystride + 1]);
+    const double &v1(tab[(ii + 1) * mystride + 0]);
+    const double &f1(tab[(ii + 1) * mystride + 1]);
+    VALUETYPE &dv(data[ii * stride + 0]);
+    VALUETYPE &df(data[ii * stride + 1]);
+    VALUETYPE &dg(data[ii * stride + 2]);
+    VALUETYPE &dh(data[ii * stride + 3]);
+    dv = v0;
+    df = -f0 * hh;
+    dg = 3 * (v1 - v0) + (f1 + 2 * f0) * hh;
+    dh = -2 * (v1 - v0) - (f1 + f0) * hh;
   }
   {
-    const double & v0 (tab[ii*mystride + 0]);
-    const double & f0 (tab[ii*mystride + 1]);
-    VALUETYPE &dv (data[ii*stride + 0]);
-    VALUETYPE &df (data[ii*stride + 1]);
-    VALUETYPE &dg (data[ii*stride + 2]);
-    VALUETYPE &dh (data[ii*stride + 3]);
+    const double &v0(tab[ii * mystride + 0]);
+    const double &f0(tab[ii * mystride + 1]);
+    VALUETYPE &dv(data[ii * stride + 0]);
+    VALUETYPE &df(data[ii * stride + 1]);
+    VALUETYPE &dg(data[ii * stride + 2]);
+    VALUETYPE &dh(data[ii * stride + 3]);
     dv = v0;
     df = -f0 * hh;
-    dg = 0; 
+    dg = 0;
     dh = 0;
   }
 }
 
-
-inline void
-Tabulated::
-compute_posi (int & idx, 
-	      VALUETYPE & eps,
-	      const VALUETYPE & rr)
-{
+inline void Tabulated::compute_posi(int &idx,
+                                    VALUETYPE &eps,
+                                    const VALUETYPE &rr) {
   VALUETYPE rt = rr * hi;
   idx = int(rt);
   eps = rt - idx;
 }
 
-inline void
-Tabulated::
-tb_inner (VALUETYPE & ae,
-	  VALUETYPE & af,
-	  const VALUETYPE & r2)
-{
+inline void Tabulated::tb_inner(VALUETYPE &ae,
+                                VALUETYPE &af,
+                                const VALUETYPE &r2) {
   if (r2 > rc2) {
     ae = af = 0;
     return;
@@ -83,12 +73,12 @@ tb_inner (VALUETYPE & ae,
   VALUETYPE rr = sqrt(r2);
   int idx;
   VALUETYPE eps;
-  compute_posi (idx, eps, rr);
-  idx *= 4;  
+  compute_posi(idx, eps, rr);
+  idx *= 4;
 
   VALUETYPE table_param[4];
-  for (int ii = 0; ii < 4; ++ii){
-    table_param[ii] = data[ii+idx];
+  for (int ii = 0; ii < 4; ++ii) {
+    table_param[ii] = data[ii + idx];
   }
   const VALUETYPE &Y(table_param[0]);
   const VALUETYPE &F(table_param[1]);
@@ -100,80 +90,73 @@ tb_inner (VALUETYPE & ae,
   VALUETYPE FF = (Fp + (eps * (G + (Heps + Heps))));
 
   af = FF * hi;
-  af = - af / rr;  
+  af = -af / rr;
   ae = (Y + (eps * Fp));
 }
 
-void
-Tabulated::
-compute (VALUETYPE &			ener,
-	 vector<VALUETYPE> &		force,
-	 vector<VALUETYPE> &		virial,
-	 const vector<VALUETYPE> &	coord,
-	 const vector<int> &		atype,
-	 const SimulationRegion<VALUETYPE> &	region, 
-	 const vector<vector<int > > &	nlist)
-{
-  for (unsigned ii = 0; ii < nlist.size(); ++ii){
-    for (unsigned _ = 0; _ < nlist[ii].size(); ++_){
+void Tabulated::compute(VALUETYPE &ener,
+                        vector<VALUETYPE> &force,
+                        vector<VALUETYPE> &virial,
+                        const vector<VALUETYPE> &coord,
+                        const vector<int> &atype,
+                        const SimulationRegion<VALUETYPE> &region,
+                        const vector<vector<int> > &nlist) {
+  for (unsigned ii = 0; ii < nlist.size(); ++ii) {
+    for (unsigned _ = 0; _ < nlist[ii].size(); ++_) {
       int jj = nlist[ii][_];
       if (jj < ii) continue;
       VALUETYPE diff[3];
-      region.diffNearestNeighbor (&coord[ii*3], &coord[jj*3], diff);      
+      region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
       VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
       if (r2 < rc2) {
-	VALUETYPE ae, af;
-	tb_inner (ae, af, r2);
-	for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] += af * diff[dd];
-	for (int dd = 0; dd < 3; ++dd) force[jj*3+dd] -= af * diff[dd];
-	ener += ae;
-	for (int dd0 = 0; dd0 < 3; ++dd0){
-	  for (int dd1 = 0; dd1 < 3; ++dd1){
-	    virial[dd0*3+dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-	  }
-	}
-      }      
+        VALUETYPE ae, af;
+        tb_inner(ae, af, r2);
+        for (int dd = 0; dd < 3; ++dd) force[ii * 3 + dd] += af * diff[dd];
+        for (int dd = 0; dd < 3; ++dd) force[jj * 3 + dd] -= af * diff[dd];
+        ener += ae;
+        for (int dd0 = 0; dd0 < 3; ++dd0) {
+          for (int dd1 = 0; dd1 < 3; ++dd1) {
+            virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
+          }
+        }
+      }
     }
-  }  
+  }
 }
 
-void
-Tabulated::
-compute (VALUETYPE &			ener,
-	 vector<VALUETYPE> &		force,
-	 vector<VALUETYPE> &		virial,
-	 const vector<VALUETYPE> &	coord,
-	 const vector<VALUETYPE> &	charge,
-	 const vector<int> &		atype,
-	 const SimulationRegion<VALUETYPE> &	region, 
-	 const vector<vector<int > > &	nlist)
-{
-  for (unsigned ii = 0; ii < nlist.size(); ++ii){
-    for (unsigned _ = 0; _ < nlist[ii].size(); ++_){
+void Tabulated::compute(VALUETYPE &ener,
+                        vector<VALUETYPE> &force,
+                        vector<VALUETYPE> &virial,
+                        const vector<VALUETYPE> &coord,
+                        const vector<VALUETYPE> &charge,
+                        const vector<int> &atype,
+                        const SimulationRegion<VALUETYPE> &region,
+                        const vector<vector<int> > &nlist) {
+  for (unsigned ii = 0; ii < nlist.size(); ++ii) {
+    for (unsigned _ = 0; _ < nlist[ii].size(); ++_) {
       int jj = nlist[ii][_];
       if (jj < ii) continue;
       VALUETYPE diff[3];
-      region.diffNearestNeighbor (&coord[ii*3], &coord[jj*3], diff);      
+      region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
       VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
       if (r2 < rc2) {
-	VALUETYPE ae, af;
-	tb_inner (ae, af, r2);
-	{
-	  VALUETYPE qiqj = charge[ii] * charge[jj] * UnitManager::ElectrostaticConvertion;
-	  ae *= qiqj;
-	  af *= qiqj;
-	}
-	for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] += af * diff[dd];
-	for (int dd = 0; dd < 3; ++dd) force[jj*3+dd] -= af * diff[dd];
-	ener += ae;
-	for (int dd0 = 0; dd0 < 3; ++dd0){
-	  for (int dd1 = 0; dd1 < 3; ++dd1){
-	    virial[dd0*3+dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
-	  }
-	}
-      }      
+        VALUETYPE ae, af;
+        tb_inner(ae, af, r2);
+        {
+          VALUETYPE qiqj =
+              charge[ii] * charge[jj] * UnitManager::ElectrostaticConvertion;
+          ae *= qiqj;
+          af *= qiqj;
+        }
+        for (int dd = 0; dd < 3; ++dd) force[ii * 3 + dd] += af * diff[dd];
+        for (int dd = 0; dd < 3; ++dd) force[jj * 3 + dd] -= af * diff[dd];
+        ener += ae;
+        for (int dd0 = 0; dd0 < 3; ++dd0) {
+          for (int dd1 = 0; dd1 < 3; ++dd1) {
+            virial[dd0 * 3 + dd1] -= 0.5 * diff[dd0] * af * diff[dd1];
+          }
+        }
+      }
     }
-  }  
+  }
 }
-
-
diff --git a/source/md/src/Trajectory.cc b/source/md/src/Trajectory.cc
index 1859f1d7bc..673f6ebaad 100644
--- a/source/md/src/Trajectory.cc
+++ b/source/md/src/Trajectory.cc
@@ -1,167 +1,132 @@
 #include "Trajectory.h"
+
 #include <stdlib.h>
 #include <string.h>
-#include <iostream>
+
 #include <cassert>
+#include <iostream>
 
-bool
-XtcSaver::
-reinit (const char * filename,
-	const int & natoms_)
-{
+bool XtcSaver::reinit(const char *filename, const int &natoms_) {
   char tmpname[2048];
-  strncpy (tmpname, filename, 2047);
-  
-  xd = xdrfile_open (filename, "w");
-  if (xd == NULL){
+  strncpy(tmpname, filename, 2047);
+
+  xd = xdrfile_open(filename, "w");
+  if (xd == NULL) {
     std::cerr << "cannot open file " << filename << std::endl;
     return false;
   }
   natoms = natoms_;
 
-  xx = (rvec *) malloc (sizeof(rvec) * natoms);
+  xx = (rvec *)malloc(sizeof(rvec) * natoms);
   inited = true;
-  return true;  
+  return true;
 }
 
-XtcSaver::
-~XtcSaver ()
-{
-  clear();
-}
+XtcSaver::~XtcSaver() { clear(); }
 
-XtcSaver::
-XtcSaver (const char * filename,
-	  const int & natoms_)
-    : inited(false), prec(1000)
-{
-  reinit (filename, natoms_);
+XtcSaver::XtcSaver(const char *filename, const int &natoms_)
+    : inited(false), prec(1000) {
+  reinit(filename, natoms_);
 }
 
-void
-XtcSaver::
-clear ()
-{
-  if (inited){
-    free (xx);
-    xdrfile_close (xd);
+void XtcSaver::clear() {
+  if (inited) {
+    free(xx);
+    xdrfile_close(xd);
     inited = false;
   }
 }
 
-void
-XtcSaver::
-save (const int & step,
-      const double & time,
-      const vector<vector<double > > & frame, 
-      const vector<double > & box)
-{
-  assert (box.size() == 9);
-  assert (inited);
+void XtcSaver::save(const int &step,
+                    const double &time,
+                    const vector<vector<double> > &frame,
+                    const vector<double> &box) {
+  assert(box.size() == 9);
+  assert(inited);
   matrix tmpBox;
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
       tmpBox[dd0][dd1] = 0;
     }
   }
-  for (int dd = 0; dd < 3; ++dd){
-    tmpBox[dd][dd] = box[3*dd+dd];
+  for (int dd = 0; dd < 3; ++dd) {
+    tmpBox[dd][dd] = box[3 * dd + dd];
   }
-  for (int ii = 0; ii < frame.size(); ++ii){
+  for (int ii = 0; ii < frame.size(); ++ii) {
     for (int dd = 0; dd < 3; ++dd) xx[ii][dd] = frame[ii][dd];
   }
-  write_xtc (xd, natoms, step, time, tmpBox, xx, prec);
+  write_xtc(xd, natoms, step, time, tmpBox, xx, prec);
 }
 
-
-bool
-TrrSaver::
-reinit (const char * filename,
-	const int & natoms_)
-{
+bool TrrSaver::reinit(const char *filename, const int &natoms_) {
   char tmpname[2048];
-  strncpy (tmpname, filename, 2047);
-  
-  xd = xdrfile_open (filename, "w");
-  if (xd == NULL){
+  strncpy(tmpname, filename, 2047);
+
+  xd = xdrfile_open(filename, "w");
+  if (xd == NULL) {
     std::cerr << "cannot open file " << filename << std::endl;
     return false;
   }
   natoms = natoms_;
 
-  xx = (rvec *) malloc (sizeof(rvec) * natoms);
-  vv = (rvec *) malloc (sizeof(rvec) * natoms);
-  ff = (rvec *) malloc (sizeof(rvec) * natoms);
-  for (int ii = 0; ii < natoms; ++ii){
+  xx = (rvec *)malloc(sizeof(rvec) * natoms);
+  vv = (rvec *)malloc(sizeof(rvec) * natoms);
+  ff = (rvec *)malloc(sizeof(rvec) * natoms);
+  for (int ii = 0; ii < natoms; ++ii) {
     for (int dd = 0; dd < 3; ++dd) {
       vv[ii][dd] = 0;
       ff[ii][dd] = 0;
     }
   }
   inited = true;
-  return true;  
+  return true;
 }
 
-TrrSaver::
-~TrrSaver ()
-{
-  clear();
-}
+TrrSaver::~TrrSaver() { clear(); }
 
-TrrSaver::
-TrrSaver (const char * filename,
-	  const int & natoms_)
-    : inited(false), lambda(0)
-{
-  reinit (filename, natoms_);
+TrrSaver::TrrSaver(const char *filename, const int &natoms_)
+    : inited(false), lambda(0) {
+  reinit(filename, natoms_);
 }
 
-void
-TrrSaver::
-clear ()
-{
-  if (inited){
-    free (xx);
-    free (vv);
-    free (ff);
-    xdrfile_close (xd);
+void TrrSaver::clear() {
+  if (inited) {
+    free(xx);
+    free(vv);
+    free(ff);
+    xdrfile_close(xd);
     inited = false;
   }
 }
 
-void
-TrrSaver::
-save (const int & step,
-      const double & time,
-      const vector<vector<double > > & ixx, 
-      const vector<vector<double > > & ivv, 
-      const vector<vector<double > > & iff, 
-      const vector<double > & box)
-{
-  assert (box.size() == 9);
-  assert (inited);
+void TrrSaver::save(const int &step,
+                    const double &time,
+                    const vector<vector<double> > &ixx,
+                    const vector<vector<double> > &ivv,
+                    const vector<vector<double> > &iff,
+                    const vector<double> &box) {
+  assert(box.size() == 9);
+  assert(inited);
   matrix tmpBox;
-  for (int dd0 = 0; dd0 < 3; ++dd0){
-    for (int dd1 = 0; dd1 < 3; ++dd1){
-      tmpBox[dd0][dd1] = box[3*dd0 + dd1];
+  for (int dd0 = 0; dd0 < 3; ++dd0) {
+    for (int dd1 = 0; dd1 < 3; ++dd1) {
+      tmpBox[dd0][dd1] = box[3 * dd0 + dd1];
     }
   }
-  for (int ii = 0; ii < ixx.size(); ++ii){
+  for (int ii = 0; ii < ixx.size(); ++ii) {
     for (int dd = 0; dd < 3; ++dd) xx[ii][dd] = ixx[ii][dd];
   }
-  for (int ii = 0; ii < natoms; ++ii){
+  for (int ii = 0; ii < natoms; ++ii) {
     for (int dd = 0; dd < 3; ++dd) {
       vv[ii][dd] = 0;
       ff[ii][dd] = 0;
     }
   }
-  for (int ii = 0; ii < ivv.size(); ++ii){
+  for (int ii = 0; ii < ivv.size(); ++ii) {
     for (int dd = 0; dd < 3; ++dd) vv[ii][dd] = ivv[ii][dd];
   }
-  for (int ii = 0; ii < iff.size(); ++ii){
+  for (int ii = 0; ii < iff.size(); ++ii) {
     for (int dd = 0; dd < 3; ++dd) ff[ii][dd] = iff[ii][dd];
   }
-  write_trr (xd, natoms, step, time, lambda, tmpBox, xx, vv, ff);
+  write_trr(xd, natoms, step, time, lambda, tmpBox, xx, vv, ff);
 }
-
-
diff --git a/source/md/src/UnitManager.cc b/source/md/src/UnitManager.cc
index 370e16a7ae..d070c9631b 100644
--- a/source/md/src/UnitManager.cc
+++ b/source/md/src/UnitManager.cc
@@ -1,37 +1,28 @@
 #include "UnitManager.h"
+
 #include <cmath>
 
 // unit independent constants
-double UnitManager::Degree2Radian		= M_PI / 180.;
-double UnitManager::Radian2Degree		= 180. / M_PI;
+double UnitManager::Degree2Radian = M_PI / 180.;
+double UnitManager::Radian2Degree = 180. / M_PI;
 // unit dependent
-double UnitManager::IntegratorMassConstant	= 1.;
-double UnitManager::PressureConstant		= 16.60539040;
-double UnitManager::BoltzmannConstant		= 8.31445986144858164e-3;
-double UnitManager::ElectrostaticConvertion	= 138.93545756169981341199;
+double UnitManager::IntegratorMassConstant = 1.;
+double UnitManager::PressureConstant = 16.60539040;
+double UnitManager::BoltzmannConstant = 8.31445986144858164e-3;
+double UnitManager::ElectrostaticConvertion = 138.93545756169981341199;
 
-string UnitManager::unit_names[] =
-{
-  "biology",
-  "metal",
-  "unitless"
-};
+string UnitManager::unit_names[] = {"biology", "metal", "unitless"};
 
-void
-UnitManager::
-set (const string & unit)
-{
-  if (unit == "metal"){
-    IntegratorMassConstant	= 1.03642695707516506071e-4;
-    PressureConstant		= 1.602176621e6;
-    BoltzmannConstant		= 8.6173303e-5;
-    ElectrostaticConvertion	= 14.39964535475696995031;
-  }
-  else if (unit == "unitless"){
-    IntegratorMassConstant	= 1.;
-    PressureConstant		= 1.;
-    BoltzmannConstant		= 1.;
-    ElectrostaticConvertion	= 1.;
+void UnitManager::set(const string& unit) {
+  if (unit == "metal") {
+    IntegratorMassConstant = 1.03642695707516506071e-4;
+    PressureConstant = 1.602176621e6;
+    BoltzmannConstant = 8.6173303e-5;
+    ElectrostaticConvertion = 14.39964535475696995031;
+  } else if (unit == "unitless") {
+    IntegratorMassConstant = 1.;
+    PressureConstant = 1.;
+    BoltzmannConstant = 1.;
+    ElectrostaticConvertion = 1.;
   }
 }
-
diff --git a/source/md/src/XyzFileManager.cc b/source/md/src/XyzFileManager.cc
index df79d19b54..b1d8fc1e9d 100644
--- a/source/md/src/XyzFileManager.cc
+++ b/source/md/src/XyzFileManager.cc
@@ -1,112 +1,104 @@
-#include "StringSplit.h"
 #include "XyzFileManager.h"
 
 #include <iostream>
+
+#include "StringSplit.h"
 // #include <iomanip>
-#include <fstream>
 #include <assert.h>
 
-void
-XyzFileManager::
-read (const string & file,
-      vector<string > & atom_name,
-      vector<vector<double > > & posi,
-      vector<vector<double > > & velo,
-      vector<vector<double > > & forc,
-      vector<double > & boxsize)
-{
-  getBoxSize (file, boxsize);
-  
+#include <fstream>
+
+void XyzFileManager::read(const string& file,
+                          vector<string>& atom_name,
+                          vector<vector<double> >& posi,
+                          vector<vector<double> >& velo,
+                          vector<vector<double> >& forc,
+                          vector<double>& boxsize) {
+  getBoxSize(file, boxsize);
+
   posi.clear();
   velo.clear();
 
-  ifstream data0 (file.c_str());
+  ifstream data0(file.c_str());
   if (!data0.is_open()) {
     cerr << "cannot open file " << file << endl;
     exit(1);
   }
-  
+
   string valueline;
   vector<string> words;
-  words.reserve (10);
+  words.reserve(10);
   string tmpname;
-  vector<double > tmpp(3);
-  vector<double > tmpv(3);
-  vector<double > tmpf(3);
+  vector<double> tmpp(3);
+  vector<double> tmpv(3);
+  vector<double> tmpf(3);
   std::getline(data0, valueline);
-  long long int numb_atom = atoll (valueline.c_str());
+  long long int numb_atom = atoll(valueline.c_str());
   std::getline(data0, valueline);
-  
-  for (long long int ii = 0; ii< numb_atom; ++ii) {
+
+  for (long long int ii = 0; ii < numb_atom; ++ii) {
     std::getline(data0, valueline);
-    StringOperation::split (string(valueline), words);
-    if (words.size() == 10){
-      tmpp[0] = atof (words[1+0].c_str());
-      tmpp[1] = atof (words[1+1].c_str());
-      tmpp[2] = atof (words[1+2].c_str());
-      tmpv[0] = atof (words[1+3].c_str());
-      tmpv[1] = atof (words[1+4].c_str());
-      tmpv[2] = atof (words[1+5].c_str());
-      tmpf[0] = atof (words[1+6].c_str());
-      tmpf[1] = atof (words[1+7].c_str());
-      tmpf[2] = atof (words[1+8].c_str());
-      posi.push_back (tmpp);
-      velo.push_back (tmpv);
-      forc.push_back (tmpf);
-      atom_name.push_back (words[0]);
-    }
-    else if (words.size() == 7){
-      tmpp[0] = atof (words[1+0].c_str());
-      tmpp[1] = atof (words[1+1].c_str());
-      tmpp[2] = atof (words[1+2].c_str());
-      tmpv[0] = atof (words[1+3].c_str());
-      tmpv[1] = atof (words[1+4].c_str());
-      tmpv[2] = atof (words[1+5].c_str());
-      posi.push_back (tmpp);
-      velo.push_back (tmpv);
-      atom_name.push_back (words[0]);
-    }
-    else if (words.size() == 4){
-      tmpp[0] = atof (words[1+0].c_str());
-      tmpp[1] = atof (words[1+1].c_str());
-      tmpp[2] = atof (words[1+2].c_str());
-      posi.push_back (tmpp);
-      atom_name.push_back (words[0]);
-    }
-    else {
-      cerr << "XyzFileManager::read: wrong format, line has "<< words.size() << " words" << endl;
-      exit (1);
+    StringOperation::split(string(valueline), words);
+    if (words.size() == 10) {
+      tmpp[0] = atof(words[1 + 0].c_str());
+      tmpp[1] = atof(words[1 + 1].c_str());
+      tmpp[2] = atof(words[1 + 2].c_str());
+      tmpv[0] = atof(words[1 + 3].c_str());
+      tmpv[1] = atof(words[1 + 4].c_str());
+      tmpv[2] = atof(words[1 + 5].c_str());
+      tmpf[0] = atof(words[1 + 6].c_str());
+      tmpf[1] = atof(words[1 + 7].c_str());
+      tmpf[2] = atof(words[1 + 8].c_str());
+      posi.push_back(tmpp);
+      velo.push_back(tmpv);
+      forc.push_back(tmpf);
+      atom_name.push_back(words[0]);
+    } else if (words.size() == 7) {
+      tmpp[0] = atof(words[1 + 0].c_str());
+      tmpp[1] = atof(words[1 + 1].c_str());
+      tmpp[2] = atof(words[1 + 2].c_str());
+      tmpv[0] = atof(words[1 + 3].c_str());
+      tmpv[1] = atof(words[1 + 4].c_str());
+      tmpv[2] = atof(words[1 + 5].c_str());
+      posi.push_back(tmpp);
+      velo.push_back(tmpv);
+      atom_name.push_back(words[0]);
+    } else if (words.size() == 4) {
+      tmpp[0] = atof(words[1 + 0].c_str());
+      tmpp[1] = atof(words[1 + 1].c_str());
+      tmpp[2] = atof(words[1 + 2].c_str());
+      posi.push_back(tmpp);
+      atom_name.push_back(words[0]);
+    } else {
+      cerr << "XyzFileManager::read: wrong format, line has " << words.size()
+           << " words" << endl;
+      exit(1);
     }
   }
 }
 
-void
-XyzFileManager::
-getBoxSize (const string & file,
-	    vector<double > & boxsize) 
-{
-  ifstream data0 (file.c_str());
+void XyzFileManager::getBoxSize(const string& file, vector<double>& boxsize) {
+  ifstream data0(file.c_str());
   if (!data0.is_open()) {
     cerr << "cannot open file " << file << endl;
   }
   string valueline;
   vector<string> words;
-  words.reserve (9);
-  std::getline (data0, valueline);
-  std::getline (data0, valueline);
-  StringOperation::split (valueline, words);
+  words.reserve(9);
+  std::getline(data0, valueline);
+  std::getline(data0, valueline);
+  StringOperation::split(valueline, words);
 
   boxsize.resize(9);
-  fill (boxsize.begin(), boxsize.end(), 0.);
-  if (words.size() == 3){
-    for (int ii = 0; ii < 3; ++ii) boxsize[3*ii+ii] = atof (words[ii].c_str());
-  }
-  else if (words.size() == 9){
-    for (int ii = 0; ii < 9; ++ii) boxsize[ii] = atof (words[ii].c_str());
+  fill(boxsize.begin(), boxsize.end(), 0.);
+  if (words.size() == 3) {
+    for (int ii = 0; ii < 3; ++ii)
+      boxsize[3 * ii + ii] = atof(words[ii].c_str());
+  } else if (words.size() == 9) {
+    for (int ii = 0; ii < 9; ++ii) boxsize[ii] = atof(words[ii].c_str());
+  } else {
+    cerr << "XyzFileManager::getBoxSize: wrong format, line has "
+         << words.size() << " words" << endl;
+    exit(1);
   }
-  else {
-    cerr << "XyzFileManager::getBoxSize: wrong format, line has "<< words.size() << " words" << endl;
-    exit (1);
-  }  
 }
-
diff --git a/source/md/src/ZM.cc b/source/md/src/ZM.cc
index 512e97105e..61537629c3 100644
--- a/source/md/src/ZM.cc
+++ b/source/md/src/ZM.cc
@@ -1,93 +1,78 @@
 #include "ZM.h"
-#include "UnitManager.h"
-#include "common.h"
+
 #include <cmath>
 #include <iostream>
 
-ZM::
-ZM (const int & order,
-    const VALUETYPE & alpha,
-    const VALUETYPE & rc)
-    : potzm (order, alpha, rc)
-{
+#include "UnitManager.h"
+#include "common.h"
+
+ZM::ZM(const int& order, const VALUETYPE& alpha, const VALUETYPE& rc)
+    : potzm(order, alpha, rc) {
   VALUETYPE rcp = rc + 2;
   VALUETYPE hh = 2e-3;
   int nn = rcp / hh;
   vector<VALUETYPE> tab;
 
-  for (int ii = 0; ii < nn; ++ii){
+  for (int ii = 0; ii < nn; ++ii) {
     VALUETYPE xx = ii * hh;
     VALUETYPE value, deriv;
     if (xx <= rc) {
-      value = potzm.pot (xx);
-      deriv = potzm.mpotp (xx);      
-    }
-    else {
+      value = potzm.pot(xx);
+      deriv = potzm.mpotp(xx);
+    } else {
       value = deriv = 0;
     }
-    tab.push_back (value);
-    tab.push_back (deriv);
+    tab.push_back(value);
+    tab.push_back(deriv);
   }
-  zm_tab.reinit (rcp, hh, tab);
+  zm_tab.reinit(rcp, hh, tab);
 }
 
 VALUETYPE
-ZM::
-e_corr (const vector<VALUETYPE> & charge) const
-{
+ZM::e_corr(const vector<VALUETYPE>& charge) const {
   double sum = 0;
-  sum += potzm.energyCorr (charge);
+  sum += potzm.energyCorr(charge);
   return sum;
 }
 
-inline void 
-ZM::
-ex_inner (VALUETYPE & ae,
-	  VALUETYPE & af,
-	  const VALUETYPE & r2)
-{
-  VALUETYPE r1 = sqrt (r2);
-  ae = 1./r1;
-  af = 1./(r2 * r1);
+inline void ZM::ex_inner(VALUETYPE& ae, VALUETYPE& af, const VALUETYPE& r2) {
+  VALUETYPE r1 = sqrt(r2);
+  ae = 1. / r1;
+  af = 1. / (r2 * r1);
 }
 
-void 
-ZM::
-exclude  (VALUETYPE &			ener,
-	  vector<VALUETYPE> &		force,
-	  vector<VALUETYPE> &		virial,
-	  const vector<VALUETYPE> &	coord,
-	  const vector<VALUETYPE> &	charge,
-	  const vector<int> &		atype,
-	  const SimulationRegion<VALUETYPE> &	region, 
-	  const vector<int > &		elist)
-{
-  for (unsigned _ = 0; _ < elist.size(); _ += 2){
+void ZM::exclude(VALUETYPE& ener,
+                 vector<VALUETYPE>& force,
+                 vector<VALUETYPE>& virial,
+                 const vector<VALUETYPE>& coord,
+                 const vector<VALUETYPE>& charge,
+                 const vector<int>& atype,
+                 const SimulationRegion<VALUETYPE>& region,
+                 const vector<int>& elist) {
+  for (unsigned _ = 0; _ < elist.size(); _ += 2) {
     int ii = elist[_];
-    int jj = elist[_+1];
+    int jj = elist[_ + 1];
     VALUETYPE diff[3];
-    region.diffNearestNeighbor (&coord[ii*3], &coord[jj*3], diff);      
-    VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];    
+    region.diffNearestNeighbor(&coord[ii * 3], &coord[jj * 3], diff);
+    VALUETYPE r2 = diff[0] * diff[0] + diff[1] * diff[1] + diff[2] * diff[2];
     VALUETYPE ae, af;
-    ex_inner (ae, af, r2);
+    ex_inner(ae, af, r2);
     // VALUETYPE ae1, af1;
     // zm_tab.tb_inner (ae1, af1, r2);
     // cout << ae << " " << ae1 << endl;
     {
-      VALUETYPE qiqj = charge[ii] * charge[jj] * UnitManager::ElectrostaticConvertion;
+      VALUETYPE qiqj =
+          charge[ii] * charge[jj] * UnitManager::ElectrostaticConvertion;
       ae *= qiqj;
       af *= qiqj;
     }
-    for (int dd = 0; dd < 3; ++dd) force[ii*3+dd] -= af * diff[dd];
-    for (int dd = 0; dd < 3; ++dd) force[jj*3+dd] += af * diff[dd];    
+    for (int dd = 0; dd < 3; ++dd) force[ii * 3 + dd] -= af * diff[dd];
+    for (int dd = 0; dd < 3; ++dd) force[jj * 3 + dd] += af * diff[dd];
     ener -= ae;
-    for (int dd0 = 0; dd0 < 3; ++dd0){
-      for (int dd1 = 0; dd1 < 3; ++dd1){
-	virial[dd0*3+dd1] += 0.5 * diff[dd0] * af * diff[dd1];
+    for (int dd0 = 0; dd0 < 3; ++dd0) {
+      for (int dd1 = 0; dd1 < 3; ++dd1) {
+        virial[dd0 * 3 + dd1] += 0.5 * diff[dd0] * af * diff[dd1];
       }
     }
   }
 }
-
-
-
diff --git a/source/md/src/ZMFunctions.cpp b/source/md/src/ZMFunctions.cpp
index 073ae1c8cf..28f68887dd 100644
--- a/source/md/src/ZMFunctions.cpp
+++ b/source/md/src/ZMFunctions.cpp
@@ -1,270 +1,208 @@
 #include "ZMFunctions.h"
-#include "UnitManager.h"
+
 #include <cmath>
 #include <iostream>
 
+#include "UnitManager.h"
+
 #define M_inv2 (0.5)
 #define M_inv4 (0.25)
 #define M_inv8 (0.125)
 #define M_inv16 (0.06250000000000000000)
 #define M_inv48 (.02083333333333333333)
 
-static double
-f (const double & r)
-{
-  return 1./r;
-}
+static double f(const double& r) { return 1. / r; }
 
-static double
-D1f (const double & r)
-{
-  return -1./(r*r);
-}
+static double D1f(const double& r) { return -1. / (r * r); }
 
-static double
-D2f (const double & r)
-{
-  return 2./(r*r*r);
-}
+static double D2f(const double& r) { return 2. / (r * r * r); }
 
-static double
-D3f (const double & r)
-{
-  return -6./(r*r*r*r);
-}
+static double D3f(const double& r) { return -6. / (r * r * r * r); }
 
-static double
-D4f (const double & r)
-{
-  return 24./(r*r*r*r*r);
-}
+static double D4f(const double& r) { return 24. / (r * r * r * r * r); }
 
-static double
-g (const double & alpha,
-   const double & r)
-{
+static double g(const double& alpha, const double& r) {
   return erfc(alpha * r);
 }
 
-static double
-D1g (const double & alpha,
-    const double & r)
-{
+static double D1g(const double& alpha, const double& r) {
   double tmp = alpha * r;
-  return - M_2_SQRTPI * alpha * exp (-tmp * tmp);
+  return -M_2_SQRTPI * alpha * exp(-tmp * tmp);
 }
 
-static double
-D2g (const double & alpha,
-     const double & r)
-{
+static double D2g(const double& alpha, const double& r) {
   double tmp = alpha * r;
-  return M_2_SQRTPI * 2 * alpha * alpha * alpha * r * exp (-tmp * tmp);
+  return M_2_SQRTPI * 2 * alpha * alpha * alpha * r * exp(-tmp * tmp);
 }
 
-static double
-D3g (const double & alpha,
-     const double & r)
-{
+static double D3g(const double& alpha, const double& r) {
   double tmp = alpha * r;
-  return M_2_SQRTPI * 2 * alpha * alpha * alpha * (1. - 2. * tmp * tmp) * exp (-tmp * tmp);
+  return M_2_SQRTPI * 2 * alpha * alpha * alpha * (1. - 2. * tmp * tmp) *
+         exp(-tmp * tmp);
 }
 
-static double
-D4g (const double & alpha,
-     const double & r)
-{
+static double D4g(const double& alpha, const double& r) {
   double tmp = alpha * r;
   double alpha5 = alpha * alpha;
   alpha5 = alpha5 * alpha5 * alpha;
-  return M_2_SQRTPI * 4. * alpha5 * (-3. + 2. * tmp * tmp) * r * exp (-tmp * tmp);
+  return M_2_SQRTPI * 4. * alpha5 * (-3. + 2. * tmp * tmp) * r *
+         exp(-tmp * tmp);
 }
 
-
-double ZeroMultipole::
-funcV (const double & alpha,
-       const double & r)
-{
+double ZeroMultipole::funcV(const double& alpha, const double& r) {
   return f(r) * g(alpha, r);
 }
 
-double ZeroMultipole::
-funcD1V (const double & alpha,
-	 const double & r)
-{
+double ZeroMultipole::funcD1V(const double& alpha, const double& r) {
   return D1f(r) * g(alpha, r) + f(r) * D1g(alpha, r);
 }
 
-double ZeroMultipole::
-funcD2V (const double & alpha,
-	 const double & r)
-{
-  return D2f(r) * g(alpha, r) + 2. * D1f(r) * D1g(alpha, r) + f(r) * D2g(alpha, r);
+double ZeroMultipole::funcD2V(const double& alpha, const double& r) {
+  return D2f(r) * g(alpha, r) + 2. * D1f(r) * D1g(alpha, r) +
+         f(r) * D2g(alpha, r);
 }
 
-double ZeroMultipole::
-funcD3V (const double & alpha,
-	 const double & r)
-{
-  return D3f(r) * g(alpha, r) + 3. * D2f(r) * D1g(alpha, r) + 3. * D1f(r) * D2g(alpha, r) + f(r) * D3g(alpha, r);
+double ZeroMultipole::funcD3V(const double& alpha, const double& r) {
+  return D3f(r) * g(alpha, r) + 3. * D2f(r) * D1g(alpha, r) +
+         3. * D1f(r) * D2g(alpha, r) + f(r) * D3g(alpha, r);
 }
 
-double ZeroMultipole::
-funcD4V (const double & alpha,
-	 const double & r)
-{
-  return D4f(r) * g(alpha, r) + 4. * D3f(r) * D1g(alpha, r) + 6. * D2f(r) * D2g(alpha, r) + 4. * D1f(r) * D3g(alpha, r) + f(r) * D4g(alpha, r);
+double ZeroMultipole::funcD4V(const double& alpha, const double& r) {
+  return D4f(r) * g(alpha, r) + 4. * D3f(r) * D1g(alpha, r) +
+         6. * D2f(r) * D2g(alpha, r) + 4. * D1f(r) * D3g(alpha, r) +
+         f(r) * D4g(alpha, r);
 }
 
-
-void ZeroMultipole::
-calCoefficients (const int & ll,
-		 const double & alpha,
-		 const double & rc,
-		 vector<double > & coeff)
-{
-  coeff.clear ();
-  coeff.resize (ll+1);
+void ZeroMultipole::calCoefficients(const int& ll,
+                                    const double& alpha,
+                                    const double& rc,
+                                    vector<double>& coeff) {
+  coeff.clear();
+  coeff.resize(ll + 1);
   double b0, b1, b2, b3, b4;
   double invrc, invrc2, invrc3, invrc4;
   double rc2;
-      
+
   switch (ll) {
-  case 0:
-      b0 = funcV (alpha,rc);
+    case 0:
+      b0 = funcV(alpha, rc);
       coeff[0] = b0;
       break;
-  case 1:
-      b0 = funcV (alpha,rc);
-      b1 = funcD1V (alpha,rc);
+    case 1:
+      b0 = funcV(alpha, rc);
+      b1 = funcD1V(alpha, rc);
       coeff[0] = b0 - M_inv2 * b1 * rc;
       coeff[1] = M_inv2 * b1 / rc;
       break;
-  case 2:
-      b0 = funcV (alpha,rc);
-      b1 = funcD1V (alpha,rc);
-      b2 = funcD2V (alpha,rc);
-      invrc = 1./rc;
-      coeff[0] = M_inv8 * b2 * rc * rc - 5.*M_inv8 * b1 * rc + b0;
-      coeff[1] = 3.*M_inv4 * b1 * invrc - M_inv4 * b2;
-      coeff[2] = M_inv8 * b2 * invrc * invrc - M_inv8 * b1 * invrc * invrc * invrc;
+    case 2:
+      b0 = funcV(alpha, rc);
+      b1 = funcD1V(alpha, rc);
+      b2 = funcD2V(alpha, rc);
+      invrc = 1. / rc;
+      coeff[0] = M_inv8 * b2 * rc * rc - 5. * M_inv8 * b1 * rc + b0;
+      coeff[1] = 3. * M_inv4 * b1 * invrc - M_inv4 * b2;
+      coeff[2] =
+          M_inv8 * b2 * invrc * invrc - M_inv8 * b1 * invrc * invrc * invrc;
       break;
-  case 3:
-      b0 = funcV (alpha,rc);
-      b1 = funcD1V (alpha,rc);
-      b2 = funcD2V (alpha,rc);
-      b3 = funcD3V (alpha,rc);
-      invrc = 1./rc;
+    case 3:
+      b0 = funcV(alpha, rc);
+      b1 = funcD1V(alpha, rc);
+      b2 = funcD2V(alpha, rc);
+      b3 = funcD3V(alpha, rc);
+      invrc = 1. / rc;
       invrc2 = invrc * invrc;
-      coeff[0] = - M_inv48 * b3 * rc * rc * rc + 3.*M_inv16 * b2 * rc * rc - 11.*M_inv16 * b1 * rc + b0;
-      coeff[1] = 15.*M_inv16 * b1 * invrc - 7.*M_inv16 * b2 + M_inv16 * b3 * rc;
-      coeff[2] = 5.*M_inv16 * b2 * invrc2 - 5.*M_inv16 * b1 * invrc2 * invrc - M_inv16 * b3 * invrc;
-      coeff[3] = M_inv16 * b1 * invrc2 * invrc2 * invrc - M_inv16 * b2 * invrc2 * invrc2 + M_inv48 * b3 * invrc2 * invrc;
+      coeff[0] = -M_inv48 * b3 * rc * rc * rc + 3. * M_inv16 * b2 * rc * rc -
+                 11. * M_inv16 * b1 * rc + b0;
+      coeff[1] =
+          15. * M_inv16 * b1 * invrc - 7. * M_inv16 * b2 + M_inv16 * b3 * rc;
+      coeff[2] = 5. * M_inv16 * b2 * invrc2 -
+                 5. * M_inv16 * b1 * invrc2 * invrc - M_inv16 * b3 * invrc;
+      coeff[3] = M_inv16 * b1 * invrc2 * invrc2 * invrc -
+                 M_inv16 * b2 * invrc2 * invrc2 + M_inv48 * b3 * invrc2 * invrc;
       break;
-  case 4:
-      b0 = funcV (alpha,rc);
-      b1 = funcD1V (alpha,rc);
-      b2 = funcD2V (alpha,rc);
-      b3 = funcD3V (alpha,rc);
-      b4 = funcD4V (alpha,rc);
+    case 4:
+      b0 = funcV(alpha, rc);
+      b1 = funcD1V(alpha, rc);
+      b2 = funcD2V(alpha, rc);
+      b3 = funcD3V(alpha, rc);
+      b4 = funcD4V(alpha, rc);
       rc2 = rc * rc;
-      invrc = 1./rc;
+      invrc = 1. / rc;
       invrc2 = invrc * invrc;
       invrc3 = invrc2 * invrc;
       invrc4 = invrc2 * invrc2;
-      coeff[0] = 1./384. * b4 * rc2 * rc2 - 7./192. * b3 * rc2 * rc + 29./128. * b2 * rc2 - 93./128. * b1 * rc + b0;
-      coeff[1] = 35./32. * b1 * invrc - 19./32. * b2 - 1./96. * b4 * rc2 + M_inv8 * b3 * rc;
-      coeff[2] = 1./64. * b4 - 35./64. * b1 * invrc3 + 35./64. * b2 * invrc2 - 5./32. * b3 * invrc;
-      coeff[3] = 7./32. * b1 * invrc4 * invrc - 7./32. * b2 * invrc4 + 1./12. * b3 * invrc3 - 1./96. * b4 * invrc2;
-      coeff[4] = 5./128. * b2 * invrc4 * invrc2 - 5./128. * b1 * invrc4 * invrc3 - 1./64. * b3 * invrc4 * invrc + 1./384 * b4 * invrc4;
+      coeff[0] = 1. / 384. * b4 * rc2 * rc2 - 7. / 192. * b3 * rc2 * rc +
+                 29. / 128. * b2 * rc2 - 93. / 128. * b1 * rc + b0;
+      coeff[1] = 35. / 32. * b1 * invrc - 19. / 32. * b2 - 1. / 96. * b4 * rc2 +
+                 M_inv8 * b3 * rc;
+      coeff[2] = 1. / 64. * b4 - 35. / 64. * b1 * invrc3 +
+                 35. / 64. * b2 * invrc2 - 5. / 32. * b3 * invrc;
+      coeff[3] = 7. / 32. * b1 * invrc4 * invrc - 7. / 32. * b2 * invrc4 +
+                 1. / 12. * b3 * invrc3 - 1. / 96. * b4 * invrc2;
+      coeff[4] = 5. / 128. * b2 * invrc4 * invrc2 -
+                 5. / 128. * b1 * invrc4 * invrc3 -
+                 1. / 64. * b3 * invrc4 * invrc + 1. / 384 * b4 * invrc4;
       break;
-  default:
+    default:
       cerr << "ll larger than 4 is not implemented" << endl;
       break;
   }
 }
 
-
-ZeroMultipole::Potential::
-Potential ()
-    : alpha(0), rc(1.0), ll(0)
-{
-  calCoefficients (ll, alpha, rc, coeff);
+ZeroMultipole::Potential::Potential() : alpha(0), rc(1.0), ll(0) {
+  calCoefficients(ll, alpha, rc, coeff);
 }
 
-ZeroMultipole::Potential::
-Potential (const int & ll,
-	   const double & alpha,
-	   const double & rc)
-{
-  reinit (ll, alpha, rc);
+ZeroMultipole::Potential::Potential(const int& ll,
+                                    const double& alpha,
+                                    const double& rc) {
+  reinit(ll, alpha, rc);
 }
 
-void ZeroMultipole::Potential::
-reinit (const int & ll_,
-	const double & alpha_,
-	const double & rc_)
-{
+void ZeroMultipole::Potential::reinit(const int& ll_,
+                                      const double& alpha_,
+                                      const double& rc_) {
   ll = ll_;
   alpha = alpha_;
-  rc = rc_;  
-  calCoefficients (ll, alpha, rc, coeff);
+  rc = rc_;
+  calCoefficients(ll, alpha, rc, coeff);
 }
 
-double ZeroMultipole::Potential::
-pot (const double & rr)
-{
+double ZeroMultipole::Potential::pot(const double& rr) {
   if (rr > rc) return 0.;
-  double tmp0 = funcV (alpha, rr);
+  double tmp0 = funcV(alpha, rr);
   // double tmp0 = 0.;
   double tmp1 = coeff.back();
-  for (int ii = ll-1; ii >= 0; --ii){
+  for (int ii = ll - 1; ii >= 0; --ii) {
     tmp1 = tmp1 * rr * rr + coeff[ii];
   }
   return tmp0 - tmp1;
 }
 
-double ZeroMultipole::Potential::
-ulpot (const double & rr)
-{
+double ZeroMultipole::Potential::ulpot(const double& rr) {
   return pot(rr) + coeff[0];
 }
 
-
-double ZeroMultipole::Potential::
-mpotp (const double & rr) {
+double ZeroMultipole::Potential::mpotp(const double& rr) {
   if (rr > rc) return 0.;
-  double tmp0 = - funcD1V (alpha, rr);
+  double tmp0 = -funcD1V(alpha, rr);
   double tmp1 = 2 * ll * coeff[ll];
-  for (int ii = ll-1; ii >= 1; --ii){
+  for (int ii = ll - 1; ii >= 1; --ii) {
     tmp1 = tmp1 * rr * rr + coeff[ii] * 2 * ii;
   }
   return tmp0 + tmp1 * rr;
 }
 
-double ZeroMultipole::Potential::
-mulpotp (const double & rr)
-{
-  return mpotp (rr);
-}
-
-
+double ZeroMultipole::Potential::mulpotp(const double& rr) { return mpotp(rr); }
 
-double ZeroMultipole::Potential::
-energyCorr (const vector<double > & charges) const
-{
+double ZeroMultipole::Potential::energyCorr(
+    const vector<double>& charges) const {
   double sum = 0.;
   double factor = UnitManager::ElectrostaticConvertion;
-  for (unsigned ii = 0; ii < charges.size(); ++ii){
+  for (unsigned ii = 0; ii < charges.size(); ++ii) {
     sum += charges[ii] * charges[ii];
   }
-  
+
   // return - (coeff[0] * 0.5 + alpha / sqrt(M_PI)) * sum;
-  return - (coeff[0] * 0.5 + alpha / sqrt(M_PI)) * sum * factor;
+  return -(coeff[0] * 0.5 + alpha / sqrt(M_PI)) * sum * factor;
 }
-
-
-
-
-
-
diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt
index a4635dfd15..4377944cbe 100644
--- a/source/op/CMakeLists.txt
+++ b/source/op/CMakeLists.txt
@@ -1,55 +1,92 @@
 # libop
 
-file(GLOB OP_SRC prod_env_mat_multi_device_nvnmd.cc add_flt_nvnmd.cc copy_flt_nvnmd.cc flt_nvnmd.cc map_flt_nvnmd.cc mul_flt_nvnmd.cc matmul_flt_nvnmd.cc matmul_flt2fix_nvnmd.cc matmul_fitnet_nvnmd.cc dotmul_flt_nvnmd.cc quantize_nvnmd.cc tanh4_flt_nvnmd.cc custom_op.cc prod_force.cc prod_virial.cc descrpt.cc descrpt_se_a_ef.cc descrpt_se_a_ef.cc descrpt_se_a_ef_para.cc descrpt_se_a_ef_vert.cc pair_tab.cc prod_force_multi_device.cc prod_virial_multi_device.cc soft_min.cc soft_min_force.cc soft_min_virial.cc ewald_recp.cc gelu_multi_device.cc map_aparam.cc neighbor_stat.cc unaggregated_grad.cc tabulate_multi_device.cc prod_env_mat_multi_device.cc)
-file(GLOB OP_GRADS_SRC custom_op.cc prod_force_grad.cc prod_force_grad_multi_device.cc prod_virial_grad.cc prod_virial_grad_multi_device.cc soft_min_force_grad.cc soft_min_virial_grad.cc )
+file(
+  GLOB
+  OP_SRC
+  prod_env_mat_multi_device_nvnmd.cc
+  add_flt_nvnmd.cc
+  copy_flt_nvnmd.cc
+  flt_nvnmd.cc
+  map_flt_nvnmd.cc
+  mul_flt_nvnmd.cc
+  matmul_flt_nvnmd.cc
+  matmul_flt2fix_nvnmd.cc
+  matmul_fitnet_nvnmd.cc
+  dotmul_flt_nvnmd.cc
+  quantize_nvnmd.cc
+  tanh4_flt_nvnmd.cc
+  custom_op.cc
+  prod_force.cc
+  prod_virial.cc
+  descrpt.cc
+  descrpt_se_a_ef.cc
+  descrpt_se_a_ef.cc
+  descrpt_se_a_ef_para.cc
+  descrpt_se_a_ef_vert.cc
+  pair_tab.cc
+  prod_force_multi_device.cc
+  prod_virial_multi_device.cc
+  soft_min.cc
+  soft_min_force.cc
+  soft_min_virial.cc
+  ewald_recp.cc
+  gelu_multi_device.cc
+  map_aparam.cc
+  neighbor_stat.cc
+  unaggregated_grad.cc
+  tabulate_multi_device.cc
+  prod_env_mat_multi_device.cc)
+file(
+  GLOB
+  OP_GRADS_SRC
+  custom_op.cc
+  prod_force_grad.cc
+  prod_force_grad_multi_device.cc
+  prod_virial_grad.cc
+  prod_virial_grad_multi_device.cc
+  soft_min_force_grad.cc
+  soft_min_virial_grad.cc)
 file(GLOB OP_PY *.py)
 file(GLOB OP_REMAPPER_SRC optimizer/parallel.cc)
 
 add_library(${LIB_DEEPMD_OP} MODULE ${OP_SRC} ${OP_REMAPPER_SRC})
 # link: libdeepmd libtensorflow_cc libtensorflow_framework
-target_link_libraries (${LIB_DEEPMD_OP} PRIVATE TensorFlow::tensorflow_framework)
-target_link_libraries (${LIB_DEEPMD_OP} PRIVATE ${LIB_DEEPMD})
+target_link_libraries(${LIB_DEEPMD_OP} PRIVATE TensorFlow::tensorflow_framework)
+target_link_libraries(${LIB_DEEPMD_OP} PRIVATE ${LIB_DEEPMD})
 if(APPLE)
-  set_target_properties(${LIB_DEEPMD_OP} PROPERTIES INSTALL_RPATH "@loader_path;${TensorFlow_LIBRARY_PATH}")
+  set_target_properties(
+    ${LIB_DEEPMD_OP} PROPERTIES INSTALL_RPATH
+                                "@loader_path;${TensorFlow_LIBRARY_PATH}")
 else()
-  set_target_properties(${LIB_DEEPMD_OP} PROPERTIES INSTALL_RPATH "$ORIGIN;${TensorFlow_LIBRARY_PATH}")
+  set_target_properties(
+    ${LIB_DEEPMD_OP} PROPERTIES INSTALL_RPATH
+                                "$ORIGIN;${TensorFlow_LIBRARY_PATH}")
 endif()
-if (CMAKE_TESTING_ENABLED)
+if(CMAKE_TESTING_ENABLED)
   target_link_libraries(${LIB_DEEPMD_OP} PRIVATE coverage_config)
 endif()
 target_precompile_headers(${LIB_DEEPMD_OP} PRIVATE custom_op.h)
 
-if (BUILD_PY_IF)
+if(BUILD_PY_IF)
   add_library(op_grads MODULE ${OP_GRADS_SRC})
   # link: libdeepmd libtensorflow_framework
   target_link_libraries(op_grads PRIVATE ${LIB_DEEPMD})
-  target_link_libraries(
-    op_grads PRIVATE TensorFlow::tensorflow_framework
-    )
+  target_link_libraries(op_grads PRIVATE TensorFlow::tensorflow_framework)
   if(APPLE)
-    set_target_properties(
-      op_grads
-      PROPERTIES
-      INSTALL_RPATH @loader_path
-      )
+    set_target_properties(op_grads PROPERTIES INSTALL_RPATH @loader_path)
   else()
-  set_target_properties(
-    op_grads
-    PROPERTIES
-    INSTALL_RPATH $ORIGIN
-    )
-  endif ()
-  if (CMAKE_TESTING_ENABLED)
+    set_target_properties(op_grads PROPERTIES INSTALL_RPATH $ORIGIN)
+  endif()
+  if(CMAKE_TESTING_ENABLED)
     target_link_libraries(op_grads PRIVATE coverage_config)
   endif()
   target_precompile_headers(op_grads PRIVATE custom_op.h)
-endif (BUILD_PY_IF)
+endif(BUILD_PY_IF)
 
-if (BUILD_PY_IF)
-  install(TARGETS ${LIB_DEEPMD_OP}		DESTINATION deepmd/op/)
-  install(TARGETS op_grads			DESTINATION deepmd/op/)
-  install(FILES  ${OP_PY}			DESTINATION deepmd/op/)
+if(BUILD_PY_IF)
+  install(TARGETS ${LIB_DEEPMD_OP} DESTINATION deepmd/op/)
+  install(TARGETS op_grads DESTINATION deepmd/op/)
+  install(FILES ${OP_PY} DESTINATION deepmd/op/)
 else(BUILD_PY_IF)
-  install(TARGETS ${LIB_DEEPMD_OP}		DESTINATION lib/)
-endif (BUILD_PY_IF)
-
+  install(TARGETS ${LIB_DEEPMD_OP} DESTINATION lib/)
+endif(BUILD_PY_IF)
diff --git a/source/op/_add_flt_nvnmd_grad.py b/source/op/_add_flt_nvnmd_grad.py
index e1e345c386..3a0ed96512 100644
--- a/source/op/_add_flt_nvnmd_grad.py
+++ b/source/op/_add_flt_nvnmd_grad.py
@@ -1,12 +1,17 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("AddFltNvnmd")
 def _AddFltNvnmdGrad(op, grad):
     dx = op_module.flt_nvnmd(grad)
     dw = dx
     return [dx, dw]
-
diff --git a/source/op/_copy_flt_nvnmd_grad.py b/source/op/_copy_flt_nvnmd_grad.py
index dca98f6c80..f85edcf7f1 100644
--- a/source/op/_copy_flt_nvnmd_grad.py
+++ b/source/op/_copy_flt_nvnmd_grad.py
@@ -1,11 +1,16 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("CopyFltNvnmd")
 def _CpoyFltNvnmdGrad(op, grad1, grad2):
     dx = op_module.add_flt_nvnmd(grad1, grad2)
     return [dx]
-
diff --git a/source/op/_dotmul_flt_nvnmd_grad.py b/source/op/_dotmul_flt_nvnmd_grad.py
index 78fe08a1a5..97653b1540 100644
--- a/source/op/_dotmul_flt_nvnmd_grad.py
+++ b/source/op/_dotmul_flt_nvnmd_grad.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("DotmulFltNvnmd")
 def _DotmulFltNvnmdGrad(op, grad):
diff --git a/source/op/_flt_nvnmd_grad.py b/source/op/_flt_nvnmd_grad.py
index 50799ab9ae..5c4b7f1c81 100644
--- a/source/op/_flt_nvnmd_grad.py
+++ b/source/op/_flt_nvnmd_grad.py
@@ -1,11 +1,16 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("FltNvnmd")
 def _FltNvnmdGrad(op, grad):
     dx = op_module.flt_nvnmd(grad)
     return [dx]
-
diff --git a/source/op/_gelu.py b/source/op/_gelu.py
index dc818c1804..d9d4b725a0 100644
--- a/source/op/_gelu.py
+++ b/source/op/_gelu.py
@@ -3,25 +3,38 @@
 First-order derivatives and second-order derivatives for gelu function.
 """
 import tensorflow
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+)
 
 try:
     gelu = tensorflow.nn.gelu
 except AttributeError:
+
     @ops.RegisterGradient("Gelu")
-    def _gelu_cc (op, dy) :
-        return op_module.gelu_grad_custom(dy, op.inputs[0])    
+    def _gelu_cc(op, dy):
+        return op_module.gelu_grad_custom(dy, op.inputs[0])
 
     @ops.RegisterGradient("GeluGrad")
-    def _gelu_grad_cc (op, dy) :
-        return [op_module.gelu_grad_custom(dy, op.inputs[1]), op_module.gelu_grad_grad_custom(dy, op.inputs[0], op.inputs[1])]
+    def _gelu_grad_cc(op, dy):
+        return [
+            op_module.gelu_grad_custom(dy, op.inputs[1]),
+            op_module.gelu_grad_grad_custom(dy, op.inputs[0], op.inputs[1]),
+        ]
 
 
 @ops.RegisterGradient("GeluCustom")
-def _gelu_custom_cc (op, dy):
-    return op_module.gelu_grad_custom(dy, op.inputs[0])      
+def _gelu_custom_cc(op, dy):
+    return op_module.gelu_grad_custom(dy, op.inputs[0])
+
 
 @ops.RegisterGradient("GeluGradCustom")
-def _gelu_grad_custom_cc (op, dy) :
-    return [op_module.gelu_grad_custom(dy, op.inputs[1]), op_module.gelu_grad_grad_custom(dy, op.inputs[0], op.inputs[1])]
\ No newline at end of file
+def _gelu_grad_custom_cc(op, dy):
+    return [
+        op_module.gelu_grad_custom(dy, op.inputs[1]),
+        op_module.gelu_grad_grad_custom(dy, op.inputs[0], op.inputs[1]),
+    ]
diff --git a/source/op/_map_flt_nvnmd_grad.py b/source/op/_map_flt_nvnmd_grad.py
index 8ed03e3127..434701bc6e 100644
--- a/source/op/_map_flt_nvnmd_grad.py
+++ b/source/op/_map_flt_nvnmd_grad.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("MapFltNvnmd")
 def _MapFltNvnmdGrad(op, grad):
@@ -29,4 +35,3 @@ def _MapFltNvnmdGrad(op, grad):
     d_table_grad = None
     d_table_info = None
     return [dx, d_table, d_table_grad, d_table_info]
-
diff --git a/source/op/_matmul_fitnet_nvnmd_grad.py b/source/op/_matmul_fitnet_nvnmd_grad.py
index ce5c731363..77fafb7d24 100644
--- a/source/op/_matmul_fitnet_nvnmd_grad.py
+++ b/source/op/_matmul_fitnet_nvnmd_grad.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("MatmulFitnetNvnmd")
 def _MatmulFitnetNvnmdGrad(op, grad):
diff --git a/source/op/_matmul_flt2fix_nvnmd.py b/source/op/_matmul_flt2fix_nvnmd.py
index b7b105ccdc..3b47b14a09 100644
--- a/source/op/_matmul_flt2fix_nvnmd.py
+++ b/source/op/_matmul_flt2fix_nvnmd.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("MatmulFlt2fixNvnmd")
 def _MatmulFlt2fixNvnmdGrad(op, grad):
diff --git a/source/op/_matmul_flt_nvnmd_grad.py b/source/op/_matmul_flt_nvnmd_grad.py
index 300418da97..383e464e64 100644
--- a/source/op/_matmul_flt_nvnmd_grad.py
+++ b/source/op/_matmul_flt_nvnmd_grad.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("MatmulFltNvnmd")
 def _MatmulFltNvnmdGrad(op, grad):
diff --git a/source/op/_mul_flt_nvnmd_grad.py b/source/op/_mul_flt_nvnmd_grad.py
index 348abac8cd..e1fb72ac6b 100644
--- a/source/op/_mul_flt_nvnmd_grad.py
+++ b/source/op/_mul_flt_nvnmd_grad.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("MulFltNvnmd")
 def _MulFltNvnmdGrad(op, grad):
diff --git a/source/op/_prod_force_grad.py b/source/op/_prod_force_grad.py
index ddd20d9a5b..6d949fda6e 100644
--- a/source/op/_prod_force_grad.py
+++ b/source/op/_prod_force_grad.py
@@ -3,17 +3,25 @@
 Gradients for prod force.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
-     
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
+
 @ops.RegisterGradient("ProdForce")
-def _prod_force_grad_cc (op, grad):    
-    net_grad =  op_grads_module.prod_force_grad (grad, 
-                                                 op.inputs[0], 
-                                                 op.inputs[1], 
-                                                 op.inputs[2], 
-                                                 op.inputs[3], 
-                                                 op.inputs[4], 
-                                                 n_a_sel = op.get_attr("n_a_sel"),
-                                                 n_r_sel = op.get_attr("n_r_sel"))
+def _prod_force_grad_cc(op, grad):
+    net_grad = op_grads_module.prod_force_grad(
+        grad,
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        op.inputs[4],
+        n_a_sel=op.get_attr("n_a_sel"),
+        n_r_sel=op.get_attr("n_r_sel"),
+    )
     return [net_grad, None, None, None, None]
diff --git a/source/op/_prod_force_se_a_grad.py b/source/op/_prod_force_se_a_grad.py
index 8f69ef5139..4dc7e13e90 100644
--- a/source/op/_prod_force_se_a_grad.py
+++ b/source/op/_prod_force_se_a_grad.py
@@ -3,16 +3,24 @@
 Gradients for prod force.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
-     
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
+
 @ops.RegisterGradient("ProdForceSeA")
-def _prod_force_se_a_grad_cc (op, grad):    
-    net_grad =  op_grads_module.prod_force_se_a_grad (grad, 
-                                                       op.inputs[0], 
-                                                       op.inputs[1], 
-                                                       op.inputs[2], 
-                                                       op.inputs[3], 
-                                                       n_a_sel = op.get_attr("n_a_sel"),
-                                                       n_r_sel = op.get_attr("n_r_sel"))
+def _prod_force_se_a_grad_cc(op, grad):
+    net_grad = op_grads_module.prod_force_se_a_grad(
+        grad,
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        n_a_sel=op.get_attr("n_a_sel"),
+        n_r_sel=op.get_attr("n_r_sel"),
+    )
     return [net_grad, None, None, None]
diff --git a/source/op/_prod_force_se_r_grad.py b/source/op/_prod_force_se_r_grad.py
index 721ab927da..b2e9335843 100644
--- a/source/op/_prod_force_se_r_grad.py
+++ b/source/op/_prod_force_se_r_grad.py
@@ -3,14 +3,18 @@
 Gradients for prod force.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
-     
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
+
 @ops.RegisterGradient("ProdForceSeR")
-def _prod_force_se_a_grad_cc (op, grad):    
-    net_grad =  op_grads_module.prod_force_se_r_grad (grad, 
-                                                      op.inputs[0], 
-                                                      op.inputs[1], 
-                                                      op.inputs[2], 
-                                                      op.inputs[3])
+def _prod_force_se_a_grad_cc(op, grad):
+    net_grad = op_grads_module.prod_force_se_r_grad(
+        grad, op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3]
+    )
     return [net_grad, None, None, None]
diff --git a/source/op/_prod_virial_grad.py b/source/op/_prod_virial_grad.py
index 8ed49200ed..c25a7e8430 100644
--- a/source/op/_prod_virial_grad.py
+++ b/source/op/_prod_virial_grad.py
@@ -3,18 +3,26 @@
 Gradients for prod virial.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
-     
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
+
 @ops.RegisterGradient("ProdVirial")
-def _prod_virial_grad_cc (op, grad, grad_atom):    
-    net_grad =  op_grads_module.prod_virial_grad (grad, 
-                                                  op.inputs[0], 
-                                                  op.inputs[1], 
-                                                  op.inputs[2], 
-                                                  op.inputs[3], 
-                                                  op.inputs[4], 
-                                                  op.inputs[5], 
-                                                  n_a_sel = op.get_attr("n_a_sel"),
-                                                  n_r_sel = op.get_attr("n_r_sel"))
+def _prod_virial_grad_cc(op, grad, grad_atom):
+    net_grad = op_grads_module.prod_virial_grad(
+        grad,
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        op.inputs[4],
+        op.inputs[5],
+        n_a_sel=op.get_attr("n_a_sel"),
+        n_r_sel=op.get_attr("n_r_sel"),
+    )
     return [net_grad, None, None, None, None, None]
diff --git a/source/op/_prod_virial_se_a_grad.py b/source/op/_prod_virial_se_a_grad.py
index ea19a3ef14..d3bcf93529 100644
--- a/source/op/_prod_virial_se_a_grad.py
+++ b/source/op/_prod_virial_se_a_grad.py
@@ -3,17 +3,25 @@
 Gradients for prod virial.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
-     
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
+
 @ops.RegisterGradient("ProdVirialSeA")
-def _prod_virial_se_a_grad_cc (op, grad, grad_atom):    
-    net_grad =  op_grads_module.prod_virial_se_a_grad (grad, 
-                                                        op.inputs[0], 
-                                                        op.inputs[1], 
-                                                        op.inputs[2], 
-                                                        op.inputs[3], 
-                                                        op.inputs[4], 
-                                                        n_a_sel = op.get_attr("n_a_sel"),
-                                                        n_r_sel = op.get_attr("n_r_sel"))
+def _prod_virial_se_a_grad_cc(op, grad, grad_atom):
+    net_grad = op_grads_module.prod_virial_se_a_grad(
+        grad,
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        op.inputs[4],
+        n_a_sel=op.get_attr("n_a_sel"),
+        n_r_sel=op.get_attr("n_r_sel"),
+    )
     return [net_grad, None, None, None, None]
diff --git a/source/op/_prod_virial_se_r_grad.py b/source/op/_prod_virial_se_r_grad.py
index 367f2c90c3..fe085c56d2 100644
--- a/source/op/_prod_virial_se_r_grad.py
+++ b/source/op/_prod_virial_se_r_grad.py
@@ -3,15 +3,18 @@
 Gradients for prod virial.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
-     
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
+
 @ops.RegisterGradient("ProdVirialSeR")
-def _prod_virial_se_a_grad_cc (op, grad, grad_atom):    
-    net_grad =  op_grads_module.prod_virial_se_r_grad (grad, 
-                                                       op.inputs[0], 
-                                                       op.inputs[1], 
-                                                       op.inputs[2], 
-                                                       op.inputs[3], 
-                                                       op.inputs[4])
+def _prod_virial_se_a_grad_cc(op, grad, grad_atom):
+    net_grad = op_grads_module.prod_virial_se_r_grad(
+        grad, op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], op.inputs[4]
+    )
     return [net_grad, None, None, None, None]
diff --git a/source/op/_quantize_nvnmd_grad.py b/source/op/_quantize_nvnmd_grad.py
index 9356d6f1cf..1a28910f05 100644
--- a/source/op/_quantize_nvnmd_grad.py
+++ b/source/op/_quantize_nvnmd_grad.py
@@ -1,8 +1,14 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("QuantizeNvnmd")
 def _QuantizeNvnmdGrad(op, grad):
diff --git a/source/op/_soft_min_force_grad.py b/source/op/_soft_min_force_grad.py
index be3d2c29d5..b52593bf12 100644
--- a/source/op/_soft_min_force_grad.py
+++ b/source/op/_soft_min_force_grad.py
@@ -3,17 +3,24 @@
 Gradients for soft min force
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
 
-     
 @ops.RegisterGradient("SoftMinForce")
-def _soft_min_force_grad_cc (op, grad):    
-    net_grad = op_grads_module.soft_min_force_grad (grad, 
-                                                    op.inputs[0], 
-                                                    op.inputs[1], 
-                                                    op.inputs[2], 
-                                                    op.inputs[3], 
-                                                    n_a_sel = op.get_attr("n_a_sel"),
-                                                    n_r_sel = op.get_attr("n_r_sel"))
+def _soft_min_force_grad_cc(op, grad):
+    net_grad = op_grads_module.soft_min_force_grad(
+        grad,
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        n_a_sel=op.get_attr("n_a_sel"),
+        n_r_sel=op.get_attr("n_r_sel"),
+    )
     return [net_grad, None, None, None]
diff --git a/source/op/_soft_min_virial_grad.py b/source/op/_soft_min_virial_grad.py
index 6c6d980aa2..c72d7eae4a 100644
--- a/source/op/_soft_min_virial_grad.py
+++ b/source/op/_soft_min_virial_grad.py
@@ -3,18 +3,25 @@
 Gradients for soft min virial.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_grads_module
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_grads_module,
+)
+
 
-     
 @ops.RegisterGradient("SoftMinVirial")
-def _soft_min_virial_grad_cc (op, grad, grad_atom):    
-    net_grad =  op_grads_module.soft_min_virial_grad (grad, 
-                                                      op.inputs[0], 
-                                                      op.inputs[1], 
-                                                      op.inputs[2], 
-                                                      op.inputs[3], 
-                                                      op.inputs[4], 
-                                                      n_a_sel = op.get_attr("n_a_sel"),
-                                                      n_r_sel = op.get_attr("n_r_sel"))
+def _soft_min_virial_grad_cc(op, grad, grad_atom):
+    net_grad = op_grads_module.soft_min_virial_grad(
+        grad,
+        op.inputs[0],
+        op.inputs[1],
+        op.inputs[2],
+        op.inputs[3],
+        op.inputs[4],
+        n_a_sel=op.get_attr("n_a_sel"),
+        n_r_sel=op.get_attr("n_r_sel"),
+    )
     return [net_grad, None, None, None, None]
diff --git a/source/op/_tabulate_grad.py b/source/op/_tabulate_grad.py
index 6fb83966cc..f7bf64cc11 100644
--- a/source/op/_tabulate_grad.py
+++ b/source/op/_tabulate_grad.py
@@ -3,39 +3,63 @@
 Gradients for tabulate.
 """
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 # from deepmd.DescrptSeATabulate import last_layer_size
 
+
 @ops.RegisterGradient("TabulateFusion")
 @ops.RegisterGradient("TabulateFusionSeA")
-def _tabulate_fusion_se_a_grad_cc (op, dy):    
-    dy_dx, dy_df = op_module.tabulate_fusion_se_a_grad(op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, op.outputs[0])
+def _tabulate_fusion_se_a_grad_cc(op, dy):
+    dy_dx, dy_df = op_module.tabulate_fusion_se_a_grad(
+        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, op.outputs[0]
+    )
     return [None, None, dy_dx, dy_df]
 
+
 @ops.RegisterGradient("TabulateFusionGrad")
 @ops.RegisterGradient("TabulateFusionSeAGrad")
-def _tabulate_fusion_se_a_grad_grad_cc (op, dy, dy_):
-    dz_dy = op_module.tabulate_fusion_se_a_grad_grad(op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, dy_, op.inputs[5])
+def _tabulate_fusion_se_a_grad_grad_cc(op, dy, dy_):
+    dz_dy = op_module.tabulate_fusion_se_a_grad_grad(
+        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, dy_, op.inputs[5]
+    )
     return [None, None, None, None, dz_dy, None]
 
+
 @ops.RegisterGradient("TabulateFusionSeT")
-def _tabulate_fusion_se_t_grad_cc (op, dy):    
-    dy_dx, dy_df = op_module.tabulate_fusion_se_t_grad(op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, op.outputs[0])
+def _tabulate_fusion_se_t_grad_cc(op, dy):
+    dy_dx, dy_df = op_module.tabulate_fusion_se_t_grad(
+        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, op.outputs[0]
+    )
     return [None, None, dy_dx, dy_df]
 
+
 @ops.RegisterGradient("TabulateFusionSeTGrad")
-def _tabulate_fusion_se_t_grad_grad_cc (op, dy, dy_):
-    dz_dy = op_module.tabulate_fusion_se_t_grad_grad(op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, dy_, op.inputs[5])
+def _tabulate_fusion_se_t_grad_grad_cc(op, dy, dy_):
+    dz_dy = op_module.tabulate_fusion_se_t_grad_grad(
+        op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[3], dy, dy_, op.inputs[5]
+    )
     return [None, None, None, None, dz_dy, None]
 
+
 @ops.RegisterGradient("TabulateFusionSeR")
-def _tabulate_fusion_se_r_grad_cc (op, dy):    
-    dy_df = op_module.tabulate_fusion_se_r_grad(op.inputs[0], op.inputs[1], op.inputs[2], dy, op.outputs[0])
+def _tabulate_fusion_se_r_grad_cc(op, dy):
+    dy_df = op_module.tabulate_fusion_se_r_grad(
+        op.inputs[0], op.inputs[1], op.inputs[2], dy, op.outputs[0]
+    )
     return [None, None, dy_df]
 
+
 @ops.RegisterGradient("TabulateFusionSeRGrad")
-def _tabulate_fusion_se_r_grad_grad_cc (op, dy):
-    dz_dy = op_module.tabulate_fusion_se_r_grad_grad(op.inputs[0], op.inputs[1], op.inputs[2], dy, op.inputs[4])
-    return [None, None, None, dz_dy, None]
\ No newline at end of file
+def _tabulate_fusion_se_r_grad_grad_cc(op, dy):
+    dz_dy = op_module.tabulate_fusion_se_r_grad_grad(
+        op.inputs[0], op.inputs[1], op.inputs[2], dy, op.inputs[4]
+    )
+    return [None, None, None, dz_dy, None]
diff --git a/source/op/_tanh4_flt_nvnmd_grad.py b/source/op/_tanh4_flt_nvnmd_grad.py
index cbac024edc..38ab75f1ac 100644
--- a/source/op/_tanh4_flt_nvnmd_grad.py
+++ b/source/op/_tanh4_flt_nvnmd_grad.py
@@ -1,13 +1,19 @@
 #!/usr/bin/env python3
 
-from tensorflow.python.framework import ops
-from deepmd.env import op_module
-from deepmd.env import tf 
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.env import (
+    op_module,
+    tf,
+)
+
 
 @ops.RegisterGradient("Tanh4FltNvnmd")
 def _Tanh4FltNvnmdGrad(op, grad):
-    prechi = 2 ** 23
-    preclo = 2 ** 19
+    prechi = 2**23
+    preclo = 2**19
     x = op.inputs[0]
     xa = tf.abs(x)
     xc = tf.clip_by_value(xa, 0, 2)
@@ -17,12 +23,12 @@ def _Tanh4FltNvnmdGrad(op, grad):
     xxhi = xx + tf.stop_gradient(tf.floor(xx * prechi) / prechi - xx)
     xxlo = xx + tf.stop_gradient(tf.floor(xx * preclo) / preclo - xx)
     #
-    dydx = xxlo * (xhi/4 - 3/4) + 1
+    dydx = xxlo * (xhi / 4 - 3 / 4) + 1
     # dydx = xxhi * (xlo/4 - 3/4) + 1
-    dydxhi = dydx + tf.stop_gradient( tf.floor(dydx * prechi) / prechi - dydx)
-    dydxlo = dydx + tf.stop_gradient( tf.floor(dydx * preclo) / preclo - dydx)
+    dydxhi = dydx + tf.stop_gradient(tf.floor(dydx * prechi) / prechi - dydx)
+    dydxlo = dydx + tf.stop_gradient(tf.floor(dydx * preclo) / preclo - dydx)
     #
-    gradhi = grad + tf.stop_gradient( tf.floor(grad * prechi) / prechi - grad) 
+    gradhi = grad + tf.stop_gradient(tf.floor(grad * prechi) / prechi - grad)
     dx = dydxlo * gradhi
-    dx = dx + tf.stop_gradient( tf.floor(dx * prechi) / prechi - dx )
+    dx = dx + tf.stop_gradient(tf.floor(dx * prechi) / prechi - dx)
     return dx
diff --git a/source/op/add_flt_nvnmd.cc b/source/op/add_flt_nvnmd.cc
index fc60db3d3c..f0b762fc1f 100644
--- a/source/op/add_flt_nvnmd.cc
+++ b/source/op/add_flt_nvnmd.cc
@@ -10,7 +10,7 @@ y = float(float(x) + float(w))
 # float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 # float29
 1 bit sign
@@ -26,102 +26,95 @@ y = float(float(x) + float(w))
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
-template <class T> // float and double
-void add_flt_nvnmd(T &y, T x1, T x2);
+template <class T>  // float and double
+void add_flt_nvnmd(T& y, T x1, T x2);
 
 //- register the operator
 REGISTER_OP("AddFltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("w: T")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("w: T")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class AddFltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit AddFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(2, context->num_inputs());
-  const Tensor& X = context->input(0);
-  const Tensor& W = context->input(1);
-
-  const TensorShape& shX = X.shape();
-  const TensorShape& shW = W.shape();
-  TensorShape shY;
-
-  DCHECK_EQ(shW.dims(), shX.dims());
-
-  int H, N, M;
-  if (shX.dims() == 3) {
-    DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
-    DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
-    DCHECK_EQ(shW.dim_size(2), shX.dim_size(2));
-
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M = shX.dim_size(2);
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(M);
-  }
-  if (shX.dims() == 2) {
-    DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
-    DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
-
-    H = 1;
-    N = shX.dim_size(0);
-    M = shX.dim_size(1);
-
-    shY.AddDim(N);
-    shY.AddDim(M);
-  }
-
-  // create output
-  Tensor* Y = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto w = W.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-
-  int ii;
-
-  for (ii=0; ii<H*N*M; ii++) {
-    add_flt_nvnmd(y[ii], x[ii], w[ii]);
-  }
-
-} // Compute
-
-
-}; // AddFltNvnmdOp
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("AddFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    AddFltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+ public:
+  /// Constructor.
+  explicit AddFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context){};
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    // check
+    DCHECK_EQ(2, context->num_inputs());
+    const Tensor& X = context->input(0);
+    const Tensor& W = context->input(1);
+
+    const TensorShape& shX = X.shape();
+    const TensorShape& shW = W.shape();
+    TensorShape shY;
+
+    DCHECK_EQ(shW.dims(), shX.dims());
+
+    int H, N, M;
+    if (shX.dims() == 3) {
+      DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
+      DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
+      DCHECK_EQ(shW.dim_size(2), shX.dim_size(2));
+
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M = shX.dim_size(2);
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(M);
+    }
+    if (shX.dims() == 2) {
+      DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
+      DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
+
+      H = 1;
+      N = shX.dim_size(0);
+      M = shX.dim_size(1);
+
+      shY.AddDim(N);
+      shY.AddDim(M);
+    }
+
+    // create output
+    Tensor* Y = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto w = W.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    int ii;
+
+    for (ii = 0; ii < H * N * M; ii++) {
+      add_flt_nvnmd(y[ii], x[ii], w[ii]);
+    }
+
+  }  // Compute
+
+};  // AddFltNvnmdOp
+
+#define REGISTER_CPU(T)                                              \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("AddFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      AddFltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/copy_flt_nvnmd.cc b/source/op/copy_flt_nvnmd.cc
index 316867aa35..1a1e723766 100644
--- a/source/op/copy_flt_nvnmd.cc
+++ b/source/op/copy_flt_nvnmd.cc
@@ -11,7 +11,7 @@ y2 = float(x)
 # float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 # float
 1 bit sign
@@ -27,95 +27,88 @@ y2 = float(x)
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
 //- register the operator
 REGISTER_OP("CopyFltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Output("y1: T")
-  .Output("y2: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Output("y1: T")
+    .Output("y2: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class CopyFltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit CopyFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(1, context->num_inputs());
-  const Tensor& X = context->input(0);
-
-  const TensorShape& shX = X.shape();
-  TensorShape shY;
-
-  int H, N, M;
-  if (shX.dims() == 3) {
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M = shX.dim_size(2);
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(M);
-  }
-  if (shX.dims() == 2) {
-    // process 2-dimension as 3-dimension
-    H = 1;
-    N = shX.dim_size(0);
-    M = shX.dim_size(1);
-
-    shY.AddDim(N);
-    shY.AddDim(M);
-  }
-
-  // create output
-  Tensor* Y1 = NULL;
-  Tensor* Y2 = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y1));
-  OP_REQUIRES_OK(context, context->allocate_output(1, shY, &Y2));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto y1 = Y1->flat<FPTYPE>().data();
-  auto y2 = Y2->flat<FPTYPE>().data();
-
-  int ii;
-  U_Flt64_Int64 ufi;
-
-  for (ii=0; ii<H*N*M; ii++) {
-    ufi.nflt = x[ii];
-    // 1.52 - 1.21 = 32
-    ufi.nint &= FLT_MASK;
-    y1[ii] = ufi.nflt;
-    y2[ii] = ufi.nflt;
-  }
-} // Compute
-
-
-}; // CopyFltNvnmdOp
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("CopyFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    CopyFltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+ public:
+  /// Constructor.
+  explicit CopyFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context){};
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    // check
+    DCHECK_EQ(1, context->num_inputs());
+    const Tensor& X = context->input(0);
+
+    const TensorShape& shX = X.shape();
+    TensorShape shY;
+
+    int H, N, M;
+    if (shX.dims() == 3) {
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M = shX.dim_size(2);
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(M);
+    }
+    if (shX.dims() == 2) {
+      // process 2-dimension as 3-dimension
+      H = 1;
+      N = shX.dim_size(0);
+      M = shX.dim_size(1);
+
+      shY.AddDim(N);
+      shY.AddDim(M);
+    }
+
+    // create output
+    Tensor* Y1 = NULL;
+    Tensor* Y2 = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y1));
+    OP_REQUIRES_OK(context, context->allocate_output(1, shY, &Y2));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto y1 = Y1->flat<FPTYPE>().data();
+    auto y2 = Y2->flat<FPTYPE>().data();
+
+    int ii;
+    U_Flt64_Int64 ufi;
+
+    for (ii = 0; ii < H * N * M; ii++) {
+      ufi.nflt = x[ii];
+      // 1.52 - 1.21 = 32
+      ufi.nint &= FLT_MASK;
+      y1[ii] = ufi.nflt;
+      y2[ii] = ufi.nflt;
+    }
+  }  // Compute
+
+};  // CopyFltNvnmdOp
+
+#define REGISTER_CPU(T)                                               \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("CopyFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      CopyFltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/custom_op.cc b/source/op/custom_op.cc
index 741fb3ace6..d18d2caa4c 100644
--- a/source/op/custom_op.cc
+++ b/source/op/custom_op.cc
@@ -1,20 +1,20 @@
 #include "custom_op.h"
+
 #include "errors.h"
 
 namespace deepmd {
-  void safe_compute(OpKernelContext* context, std::function<void(OpKernelContext*)> ff) {
-    try{
-      ff(context);
-    } catch (deepmd::deepmd_exception_oom& e){
-      OP_REQUIRES_OK(
-          context,
-          errors::ResourceExhausted("Operation received an exception: ", e.what(),
-                          ", in file ",__FILE__, ":", __LINE__));
-    } catch (deepmd::deepmd_exception& e) {
-      OP_REQUIRES_OK(
-          context,
-          errors::Internal("Operation received an exception: ", e.what(),
-                          ", in file ",__FILE__, ":", __LINE__));
-    }
+void safe_compute(OpKernelContext* context,
+                  std::function<void(OpKernelContext*)> ff) {
+  try {
+    ff(context);
+  } catch (deepmd::deepmd_exception_oom& e) {
+    OP_REQUIRES_OK(context, errors::ResourceExhausted(
+                                "Operation received an exception: ", e.what(),
+                                ", in file ", __FILE__, ":", __LINE__));
+  } catch (deepmd::deepmd_exception& e) {
+    OP_REQUIRES_OK(
+        context, errors::Internal("Operation received an exception: ", e.what(),
+                                  ", in file ", __FILE__, ":", __LINE__));
   }
-};
\ No newline at end of file
+}
+};  // namespace deepmd
diff --git a/source/op/custom_op.h b/source/op/custom_op.h
index d00b1dfc4b..ee9c7880c7 100644
--- a/source/op/custom_op.h
+++ b/source/op/custom_op.h
@@ -1,9 +1,9 @@
 #pragma once
-#include <vector>
-#include <string>
 #include <iostream>
-#include "device.h"
+#include <string>
+#include <vector>
 
+#include "device.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/shape_inference.h"
@@ -14,22 +14,13 @@ using GPUDevice = Eigen::GpuDevice;
 
 // functions used in custom ops
 struct DeviceFunctor {
-  void operator()(
-      std::string& device, 
-      const CPUDevice& d) 
-  {
-    device = "CPU";
-  }
-  #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-  void operator()(
-      std::string& device, 
-      const GPUDevice& d) 
-  {
-    device = "GPU";
-  }
-  #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  void operator()(std::string& device, const CPUDevice& d) { device = "CPU"; }
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  void operator()(std::string& device, const GPUDevice& d) { device = "GPU"; }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 };
 
 namespace deepmd {
-  void safe_compute(OpKernelContext* context, std::function<void(OpKernelContext*)> ff);
-};
\ No newline at end of file
+void safe_compute(OpKernelContext* context,
+                  std::function<void(OpKernelContext*)> ff);
+};
diff --git a/source/op/descrpt.cc b/source/op/descrpt.cc
index 3731804fef..5560849e2a 100644
--- a/source/op/descrpt.cc
+++ b/source/op/descrpt.cc
@@ -1,44 +1,44 @@
-#include "custom_op.h"
 #include "ComputeDescriptor.h"
-#include "neighbor_list.h"
-#include "fmt_nlist.h"
+#include "custom_op.h"
 #include "errors.h"
+#include "fmt_nlist.h"
+#include "neighbor_list.h"
 
-typedef double boxtensor_t ;
+typedef double boxtensor_t;
 typedef double compute_t;
 
 REGISTER_OP("Descrpt")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("coord: T")
-.Input("type: int32")
-.Input("natoms: int32")
-.Input("box: T")
-.Input("mesh: int32")
-.Input("davg: T")
-.Input("dstd: T")
-.Attr("rcut_a: float")
-.Attr("rcut_r: float")
-.Attr("sel_a: list(int)")
-.Attr("sel_r: list(int)")
-.Attr("axis_rule: list(int)")
-.Output("descrpt: T")
-.Output("descrpt_deriv: T")
-.Output("rij: T")
-.Output("nlist: int32")
-.Output("axis: int32")
-.Output("rot_mat: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("coord: T")
+    .Input("type: int32")
+    .Input("natoms: int32")
+    .Input("box: T")
+    .Input("mesh: int32")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Attr("rcut_a: float")
+    .Attr("rcut_r: float")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Attr("axis_rule: list(int)")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist: int32")
+    .Output("axis: int32")
+    .Output("rot_mat: T");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class DescrptOp : public OpKernel {
-public:
+ public:
   explicit DescrptOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r));
     OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
     OP_REQUIRES_OK(context, context->GetAttr("axis_rule", &axis_rule));
-    cum_sum (sec_a, sel_a);
-    cum_sum (sec_r, sel_r);
+    cum_sum(sec_a, sel_a);
+    cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -50,66 +50,81 @@ class DescrptOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
-    const Tensor& coord_tensor	= context->input(0);
-    const Tensor& type_tensor	= context->input(1);
-    const Tensor& natoms_tensor	= context->input(2);
-    const Tensor& box_tensor	= context->input(3);
-    const Tensor& mesh_tensor	= context->input(4);
-    const Tensor& avg_tensor	= context->input(5);
-    const Tensor& std_tensor	= context->input(6);
+    const Tensor& coord_tensor = context->input(0);
+    const Tensor& type_tensor = context->input(1);
+    const Tensor& natoms_tensor = context->input(2);
+    const Tensor& box_tensor = context->input(3);
+    const Tensor& mesh_tensor = context->input(4);
+    const Tensor& avg_tensor = context->input(5);
+    const Tensor& std_tensor = context->input(6);
 
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of std should be 2"));
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),		errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
     int nloc = natoms(0);
     int nall = natoms(1);
     int ntypes = natoms_tensor.shape().dim_size(0) - 2;
     int nsamples = coord_tensor.shape().dim_size(0);
 
     // check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of std should be ntype"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
 
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of std should be ndescrpt"));
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
 
     int nei_mode = 0;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 12) {
+    } else if (mesh_tensor.shape().dim_size(0) == 12) {
       // user provided extended mesh
       nei_mode = 2;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
     bool b_pbc = true;
@@ -118,55 +133,61 @@ class DescrptOp : public OpKernel {
       b_pbc = false;
     }
     bool b_norm_atom = false;
-    if (nei_mode == 1){
+    if (nei_mode == 1) {
       b_norm_atom = true;
     }
 
     // Create an output tensor
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (nloc * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (nloc * ndescrpt * 12);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (nloc * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (nloc * nnei);
-    TensorShape axis_shape ;
-    axis_shape.AddDim (nsamples);
-    axis_shape.AddDim (nloc * 4);
-    TensorShape rot_mat_shape ;
-    rot_mat_shape.AddDim (nsamples);
-    rot_mat_shape.AddDim (nloc * 9);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(nloc * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 12);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(nloc * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(nloc * nnei);
+    TensorShape axis_shape;
+    axis_shape.AddDim(nsamples);
+    axis_shape.AddDim(nloc * 4);
+    TensorShape rot_mat_shape;
+    rot_mat_shape.AddDim(nsamples);
+    rot_mat_shape.AddDim(nloc * 9);
 
     Tensor* descrpt_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, descrpt_shape, &descrpt_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, descrpt_shape, &descrpt_tensor));
     Tensor* descrpt_deriv_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(1, descrpt_deriv_shape, &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(1, descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
     Tensor* rij_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(2, rij_shape, &rij_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(2, rij_shape, &rij_tensor));
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(3, nlist_shape, &nlist_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(3, nlist_shape, &nlist_tensor));
     Tensor* axis_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(4, axis_shape, &axis_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(4, axis_shape, &axis_tensor));
     Tensor* rot_mat_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(5, rot_mat_shape, &rot_mat_tensor));
-    
-    auto coord	= coord_tensor	.matrix<FPTYPE>();
-    auto type	= type_tensor	.matrix<int>();
-    auto box	= box_tensor	.matrix<FPTYPE>();
-    auto mesh	= mesh_tensor	.flat<int>();
-    auto avg	= avg_tensor	.matrix<FPTYPE>();
-    auto std	= std_tensor	.matrix<FPTYPE>();
-    auto descrpt	= descrpt_tensor	->matrix<FPTYPE>();
-    auto descrpt_deriv	= descrpt_deriv_tensor	->matrix<FPTYPE>();
-    auto rij		= rij_tensor		->matrix<FPTYPE>();
-    auto nlist		= nlist_tensor		->matrix<int>();
-    auto axis		= axis_tensor		->matrix<int>();
-    auto rot_mat	= rot_mat_tensor		->matrix<FPTYPE>();
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(5, rot_mat_shape, &rot_mat_tensor));
+
+    auto coord = coord_tensor.matrix<FPTYPE>();
+    auto type = type_tensor.matrix<int>();
+    auto box = box_tensor.matrix<FPTYPE>();
+    auto mesh = mesh_tensor.flat<int>();
+    auto avg = avg_tensor.matrix<FPTYPE>();
+    auto std = std_tensor.matrix<FPTYPE>();
+    auto descrpt = descrpt_tensor->matrix<FPTYPE>();
+    auto descrpt_deriv = descrpt_deriv_tensor->matrix<FPTYPE>();
+    auto rij = rij_tensor->matrix<FPTYPE>();
+    auto nlist = nlist_tensor->matrix<int>();
+    auto axis = axis_tensor->matrix<int>();
+    auto rot_mat = rot_mat_tensor->matrix<FPTYPE>();
 
     // // check the types
     // int max_type_v = 0;
@@ -174,197 +195,199 @@ class DescrptOp : public OpKernel {
     //   if (type(0, ii) > max_type_v) max_type_v = type(0, ii);
     // }
     // int ntypes = max_type_v + 1;
-    OP_REQUIRES (context, (ntypes == int(sel_a.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
-    OP_REQUIRES (context, (ntypes == int(sel_r.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
 
-    for (int kk = 0; kk < nsamples; ++kk){
+    for (int kk = 0; kk < nsamples; ++kk) {
       // set region
-      boxtensor_t boxt [9] = {0};
+      boxtensor_t boxt[9] = {0};
       for (int dd = 0; dd < 9; ++dd) {
-	boxt[dd] = box(kk, dd);
+        boxt[dd] = box(kk, dd);
       }
-      SimulationRegion<compute_t > region;
-      region.reinitBox (boxt);
+      SimulationRegion<compute_t> region;
+      region.reinitBox(boxt);
 
       // set & normalize coord
-      std::vector<compute_t > d_coord3 (nall*3);
-      for (int ii = 0; ii < nall; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_coord3[ii*3+dd] = coord(kk, ii*3+dd);
-	}
-	if (b_norm_atom){
-	  compute_t inter[3];
-	  region.phys2Inter (inter, &d_coord3[3*ii]);
-	  for (int dd = 0; dd < 3; ++dd){
-	    if      (inter[dd] < 0 ) inter[dd] += 1.;
-	    else if (inter[dd] >= 1) inter[dd] -= 1.;
-	  }
-	  region.inter2Phys (&d_coord3[3*ii], inter);
-	}
+      std::vector<compute_t> d_coord3(nall * 3);
+      for (int ii = 0; ii < nall; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_coord3[ii * 3 + dd] = coord(kk, ii * 3 + dd);
+        }
+        if (b_norm_atom) {
+          compute_t inter[3];
+          region.phys2Inter(inter, &d_coord3[3 * ii]);
+          for (int dd = 0; dd < 3; ++dd) {
+            if (inter[dd] < 0)
+              inter[dd] += 1.;
+            else if (inter[dd] >= 1)
+              inter[dd] -= 1.;
+          }
+          region.inter2Phys(&d_coord3[3 * ii], inter);
+        }
       }
 
       // set type
-      std::vector<int > d_type (nall);
+      std::vector<int> d_type(nall);
       for (int ii = 0; ii < nall; ++ii) d_type[ii] = type(kk, ii);
-      
+
       // build nlist
-      std::vector<std::vector<int > > d_nlist_a;
-      std::vector<std::vector<int > > d_nlist_r;
+      std::vector<std::vector<int> > d_nlist_a;
+      std::vector<std::vector<int> > d_nlist_r;
       std::vector<int> nlist_map;
       bool b_nlist_map = false;
-      if (nei_mode == 3) {	
-	int *pilist, *pnumneigh, **pfirstneigh;
-	memcpy (&pilist, &mesh(4), sizeof(int *));
-	memcpy (&pnumneigh, &mesh(8), sizeof(int *));
-	memcpy (&pfirstneigh, &mesh(12), sizeof(int **));
-	int inum = mesh(1);
-	assert (inum == nloc);
-	d_nlist_a.resize (inum);
-	d_nlist_r.resize (inum);
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  int i_idx = pilist[ii];
-	  d_nlist_r[i_idx].reserve(pnumneigh[ii]);
-	  for (unsigned jj = 0; jj < pnumneigh[ii]; ++jj){
-	    int j_idx = pfirstneigh[ii][jj];
-	    d_nlist_r[i_idx].push_back(j_idx);
-	  }
-	}
-      }
-      else if (nei_mode == 2) {
-	std::vector<int > nat_stt = {mesh(1-1), mesh(2-1), mesh(3-1)};
-	std::vector<int > nat_end = {mesh(4-1), mesh(5-1), mesh(6-1)};
-	std::vector<int > ext_stt = {mesh(7-1), mesh(8-1), mesh(9-1)};
-	std::vector<int > ext_end = {mesh(10-1), mesh(11-1), mesh(12-1)};
-	std::vector<int > global_grid (3);
-	for (int dd = 0; dd < 3; ++dd) global_grid[dd] = nat_end[dd] - nat_stt[dd];
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
-      }
-      else if (nei_mode == 1) {
-	std::vector<double > bk_d_coord3 = d_coord3;
-	std::vector<int > bk_d_type = d_type;
-	std::vector<int > ncell, ngcell;
-	copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, bk_d_type, rcut_r, region);	
-	b_nlist_map = true;
-	std::vector<int> nat_stt(3, 0);
-	std::vector<int> ext_stt(3), ext_end(3);
-	for (int dd = 0; dd < 3; ++dd){
-	  ext_stt[dd] = -ngcell[dd];
-	  ext_end[dd] = ncell[dd] + ngcell[dd];
-	}
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      }
-      else if (nei_mode == -1){
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
-      }
-      else {
-	throw deepmd::deepmd_exception("unknow neighbor mode");
+      if (nei_mode == 3) {
+        int *pilist, *pnumneigh, **pfirstneigh;
+        memcpy(&pilist, &mesh(4), sizeof(int*));
+        memcpy(&pnumneigh, &mesh(8), sizeof(int*));
+        memcpy(&pfirstneigh, &mesh(12), sizeof(int**));
+        int inum = mesh(1);
+        assert(inum == nloc);
+        d_nlist_a.resize(inum);
+        d_nlist_r.resize(inum);
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          int i_idx = pilist[ii];
+          d_nlist_r[i_idx].reserve(pnumneigh[ii]);
+          for (unsigned jj = 0; jj < pnumneigh[ii]; ++jj) {
+            int j_idx = pfirstneigh[ii][jj];
+            d_nlist_r[i_idx].push_back(j_idx);
+          }
+        }
+      } else if (nei_mode == 2) {
+        std::vector<int> nat_stt = {mesh(1 - 1), mesh(2 - 1), mesh(3 - 1)};
+        std::vector<int> nat_end = {mesh(4 - 1), mesh(5 - 1), mesh(6 - 1)};
+        std::vector<int> ext_stt = {mesh(7 - 1), mesh(8 - 1), mesh(9 - 1)};
+        std::vector<int> ext_end = {mesh(10 - 1), mesh(11 - 1), mesh(12 - 1)};
+        std::vector<int> global_grid(3);
+        for (int dd = 0; dd < 3; ++dd)
+          global_grid[dd] = nat_end[dd] - nat_stt[dd];
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
+      } else if (nei_mode == 1) {
+        std::vector<double> bk_d_coord3 = d_coord3;
+        std::vector<int> bk_d_type = d_type;
+        std::vector<int> ncell, ngcell;
+        copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3,
+                   bk_d_type, rcut_r, region);
+        b_nlist_map = true;
+        std::vector<int> nat_stt(3, 0);
+        std::vector<int> ext_stt(3), ext_end(3);
+        for (int dd = 0; dd < 3; ++dd) {
+          ext_stt[dd] = -ngcell[dd];
+          ext_end[dd] = ncell[dd] + ngcell[dd];
+        }
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, ncell, ext_stt, ext_end, region, ncell);
+      } else if (nei_mode == -1) {
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
+      } else {
+        throw deepmd::deepmd_exception("unknow neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
-#pragma omp parallel for 
-      for (int ii = 0; ii < nloc; ++ii){
-	std::vector<int> fmt_nlist_a;
-	std::vector<int> fmt_nlist_r;
-	int ret = -1;
-	if (fill_nei_a){
-	  if ((ret = format_nlist_i_fill_a (fmt_nlist_a, fmt_nlist_r, d_coord3, ntypes, d_type, region, b_pbc, ii, d_nlist_a[ii], d_nlist_r[ii], rcut_r, sec_a, sec_r)) != -1){
-	    if (count_nei_idx_overflow == 0) {
-	      std::cout << "WARNING: Radial neighbor list length of type " << ret << " is not enough" << std::endl;
-	      flush(std::cout);
-	      count_nei_idx_overflow ++;
-	    }
-	  }
-	}
+#pragma omp parallel for
+      for (int ii = 0; ii < nloc; ++ii) {
+        std::vector<int> fmt_nlist_a;
+        std::vector<int> fmt_nlist_r;
+        int ret = -1;
+        if (fill_nei_a) {
+          if ((ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, d_coord3,
+                                           ntypes, d_type, region, b_pbc, ii,
+                                           d_nlist_a[ii], d_nlist_r[ii], rcut_r,
+                                           sec_a, sec_r)) != -1) {
+            if (count_nei_idx_overflow == 0) {
+              std::cout << "WARNING: Radial neighbor list length of type "
+                        << ret << " is not enough" << std::endl;
+              flush(std::cout);
+              count_nei_idx_overflow++;
+            }
+          }
+        }
 
-	// set axis
-	std::vector<int> d_axis_type (2);
-	std::vector<int> d_axis_idx  (2);
-	make_axis (d_axis_type, d_axis_idx, d_type[ii], axis_rule, ii, fmt_nlist_a, fmt_nlist_r, d_coord3, region, b_pbc);
-	// std::cout << ii  << " type " << d_type[ii] 
-	//      << " axis 0: " << d_axis_type[0] << " " << d_axis_idx[0] 
-	//      << " axis 1: " << d_axis_type[1] << " " << d_axis_idx[1] << std::endl;
+        // set axis
+        std::vector<int> d_axis_type(2);
+        std::vector<int> d_axis_idx(2);
+        make_axis(d_axis_type, d_axis_idx, d_type[ii], axis_rule, ii,
+                  fmt_nlist_a, fmt_nlist_r, d_coord3, region, b_pbc);
+        // std::cout << ii  << " type " << d_type[ii]
+        //      << " axis 0: " << d_axis_type[0] << " " << d_axis_idx[0]
+        //      << " axis 1: " << d_axis_type[1] << " " << d_axis_idx[1] <<
+        //      std::endl;
 
-	std::vector<compute_t > d_descrpt_a;
-	std::vector<compute_t > d_descrpt_a_deriv;
-	std::vector<compute_t > d_descrpt_r;
-	std::vector<compute_t > d_descrpt_r_deriv;
-	std::vector<compute_t > d_rij_a;
-	std::vector<compute_t > d_rij_r;
-	std::vector<compute_t > rot;
-	compute_descriptor (d_descrpt_a,
-			    d_descrpt_a_deriv,
-			    d_descrpt_r,
-			    d_descrpt_r_deriv,
-			    d_rij_a,
-			    d_rij_r,
-			    rot,
-			    d_coord3,
-			    ntypes, 
-			    d_type,
-			    region, 
-			    b_pbc,
-			    ii, 
-			    fmt_nlist_a,
-			    fmt_nlist_r,
-			    sec_a, 
-			    sec_r, 
-			    d_axis_type[0],
-			    d_axis_idx [0],
-			    d_axis_type[1],
-			    d_axis_idx [1]);
-	// check sizes
-	assert (d_descrpt_a.size() == ndescrpt_a);
-	assert (d_descrpt_r.size() == ndescrpt_r);
-	assert (d_descrpt_a_deriv.size() == ndescrpt_a * 12);
-	assert (d_descrpt_r_deriv.size() == ndescrpt_r * 12);
-	assert (d_rij_a.size() == nnei_a * 3);
-	assert (d_rij_r.size() == nnei_r * 3);
-	assert (int(fmt_nlist_a.size()) == nnei_a);
-	assert (int(fmt_nlist_r.size()) == nnei_r);
-	// record outputs
-	for (int jj = 0; jj < ndescrpt_a; ++jj) {
-	  descrpt(kk, ii * ndescrpt + jj) = (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
-	}
-	for (int jj = 0; jj < ndescrpt_r; ++jj) {
-	  descrpt(kk, ii * ndescrpt + ndescrpt_a + jj) = (d_descrpt_r[jj] - avg(d_type[ii], ndescrpt_a + jj)) / std(d_type[ii], ndescrpt_a + jj);
-	}
-	for (int jj = 0; jj < ndescrpt_a * 12; ++jj) {
-	  descrpt_deriv(kk, ii * ndescrpt * 12 + jj) = d_descrpt_a_deriv[jj] / std(d_type[ii], jj/12);
-	}
-	for (int jj = 0; jj < ndescrpt_r * 12; ++jj) {
-	  descrpt_deriv(kk, ii * ndescrpt * 12 + ndescrpt_a * 12 + jj) = d_descrpt_r_deriv[jj] / std(d_type[ii], jj/12 + ndescrpt_a);
-	}
-	for (int jj = 0; jj < 9; ++jj){
-	  rot_mat(kk, ii * 9 + jj) = rot[jj];
-	}
-	for (int jj = 0; jj < nnei_a * 3; ++jj){
-	  rij (kk, ii * nnei * 3 + jj) = d_rij_a[jj];
-	}
-	for (int jj = 0; jj < nnei_r * 3; ++jj){
-	  rij (kk, ii * nnei * 3 + nnei_a * 3 + jj) = d_rij_r[jj];
-	}
-	for (int jj = 0; jj < nnei_a; ++jj){
-	  int record = fmt_nlist_a[jj];
-	  if (b_nlist_map && record >= 0) {
-	    record = nlist_map[record];
-	  }
-	  nlist (kk, ii * nnei + jj) = record;
-	}
-	for (int jj = 0; jj < nnei_r; ++jj){
-	  int record = fmt_nlist_r[jj];
-	  if (b_nlist_map && record >= 0) {
-	    record = nlist_map[record];
-	  }
-	  nlist (kk, ii * nnei + nnei_a + jj) = record;
-	}
-	for (int jj = 0; jj < 2; ++jj){
-	  axis (kk, ii * 4 + jj * 2 + 0) = d_axis_type[jj];
-	  axis (kk, ii * 4 + jj * 2 + 1) = d_axis_idx [jj];
-	}
+        std::vector<compute_t> d_descrpt_a;
+        std::vector<compute_t> d_descrpt_a_deriv;
+        std::vector<compute_t> d_descrpt_r;
+        std::vector<compute_t> d_descrpt_r_deriv;
+        std::vector<compute_t> d_rij_a;
+        std::vector<compute_t> d_rij_r;
+        std::vector<compute_t> rot;
+        compute_descriptor(d_descrpt_a, d_descrpt_a_deriv, d_descrpt_r,
+                           d_descrpt_r_deriv, d_rij_a, d_rij_r, rot, d_coord3,
+                           ntypes, d_type, region, b_pbc, ii, fmt_nlist_a,
+                           fmt_nlist_r, sec_a, sec_r, d_axis_type[0],
+                           d_axis_idx[0], d_axis_type[1], d_axis_idx[1]);
+        // check sizes
+        assert(d_descrpt_a.size() == ndescrpt_a);
+        assert(d_descrpt_r.size() == ndescrpt_r);
+        assert(d_descrpt_a_deriv.size() == ndescrpt_a * 12);
+        assert(d_descrpt_r_deriv.size() == ndescrpt_r * 12);
+        assert(d_rij_a.size() == nnei_a * 3);
+        assert(d_rij_r.size() == nnei_r * 3);
+        assert(int(fmt_nlist_a.size()) == nnei_a);
+        assert(int(fmt_nlist_r.size()) == nnei_r);
+        // record outputs
+        for (int jj = 0; jj < ndescrpt_a; ++jj) {
+          descrpt(kk, ii * ndescrpt + jj) =
+              (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
+        }
+        for (int jj = 0; jj < ndescrpt_r; ++jj) {
+          descrpt(kk, ii * ndescrpt + ndescrpt_a + jj) =
+              (d_descrpt_r[jj] - avg(d_type[ii], ndescrpt_a + jj)) /
+              std(d_type[ii], ndescrpt_a + jj);
+        }
+        for (int jj = 0; jj < ndescrpt_a * 12; ++jj) {
+          descrpt_deriv(kk, ii * ndescrpt * 12 + jj) =
+              d_descrpt_a_deriv[jj] / std(d_type[ii], jj / 12);
+        }
+        for (int jj = 0; jj < ndescrpt_r * 12; ++jj) {
+          descrpt_deriv(kk, ii * ndescrpt * 12 + ndescrpt_a * 12 + jj) =
+              d_descrpt_r_deriv[jj] / std(d_type[ii], jj / 12 + ndescrpt_a);
+        }
+        for (int jj = 0; jj < 9; ++jj) {
+          rot_mat(kk, ii * 9 + jj) = rot[jj];
+        }
+        for (int jj = 0; jj < nnei_a * 3; ++jj) {
+          rij(kk, ii * nnei * 3 + jj) = d_rij_a[jj];
+        }
+        for (int jj = 0; jj < nnei_r * 3; ++jj) {
+          rij(kk, ii * nnei * 3 + nnei_a * 3 + jj) = d_rij_r[jj];
+        }
+        for (int jj = 0; jj < nnei_a; ++jj) {
+          int record = fmt_nlist_a[jj];
+          if (b_nlist_map && record >= 0) {
+            record = nlist_map[record];
+          }
+          nlist(kk, ii * nnei + jj) = record;
+        }
+        for (int jj = 0; jj < nnei_r; ++jj) {
+          int record = fmt_nlist_r[jj];
+          if (b_nlist_map && record >= 0) {
+            record = nlist_map[record];
+          }
+          nlist(kk, ii * nnei + nnei_a + jj) = record;
+        }
+        for (int jj = 0; jj < 2; ++jj) {
+          axis(kk, ii * 4 + jj * 2 + 0) = d_axis_type[jj];
+          axis(kk, ii * 4 + jj * 2 + 1) = d_axis_idx[jj];
+        }
       }
     }
   }
-private:
+
+ private:
   float rcut_a;
   float rcut_r;
   std::vector<int32> sel_r;
@@ -376,229 +399,213 @@ class DescrptOp : public OpKernel {
   int nnei, nnei_a, nnei_r;
   bool fill_nei_a;
   int count_nei_idx_overflow;
-  void 
-  cum_sum (std::vector<int> & sec,
-	   const std::vector<int32> & n_sel) const {
-    sec.resize (n_sel.size() + 1);
+  void cum_sum(std::vector<int>& sec, const std::vector<int32>& n_sel) const {
+    sec.resize(n_sel.size() + 1);
     sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii){
-      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    for (int ii = 1; ii < sec.size(); ++ii) {
+      sec[ii] = sec[ii - 1] + n_sel[ii - 1];
     }
   }
-  void 
-  make_axis (std::vector<int > & axis_type,
-	     std::vector<int > & axis_idx,
-	     const int & type,
-	     const std::vector<int > & rule, 
-	     const int ii,
-	     const std::vector<int> & nlist_a,
-	     const std::vector<int> & nlist_r,
-	     const std::vector<compute_t> & coord3,
-	     const SimulationRegion<compute_t > & region, 
-	     const bool b_pbc) const {
+  void make_axis(std::vector<int>& axis_type,
+                 std::vector<int>& axis_idx,
+                 const int& type,
+                 const std::vector<int>& rule,
+                 const int ii,
+                 const std::vector<int>& nlist_a,
+                 const std::vector<int>& nlist_r,
+                 const std::vector<compute_t>& coord3,
+                 const SimulationRegion<compute_t>& region,
+                 const bool b_pbc) const {
     int backup_axis = -1;
-    if (rule.size() == 0){
-      make_axis_default (axis_type, axis_idx);
-    }
-    else{
+    if (rule.size() == 0) {
+      make_axis_default(axis_type, axis_idx);
+    } else {
       int ntypes = sel_a.size();
       // two axis, for each axis (a_or_r, type, id)
       assert(rule.size() == ntypes * 2 * 3);
       axis_type.resize(2);
-      axis_idx .resize(2);
+      axis_idx.resize(2);
       std::vector<int>::const_iterator iter;
       iter = rule.begin() + type * 6;
-      if (*(iter+1) >= 0) {
-	make_one_axis (axis_type[0], axis_idx[0], iter);
-      }
-      else {
-	make_one_axis (axis_type[0], axis_idx[0], iter, ii, nlist_a, nlist_r, coord3, region, b_pbc);
+      if (*(iter + 1) >= 0) {
+        make_one_axis(axis_type[0], axis_idx[0], iter);
+      } else {
+        make_one_axis(axis_type[0], axis_idx[0], iter, ii, nlist_a, nlist_r,
+                      coord3, region, b_pbc);
       }
       iter = rule.begin() + type * 6 + 3;
-      if (*(iter+1) >= 0) {      
-	make_one_axis (axis_type[1], axis_idx[1], iter);
-      }
-      else {
-	make_one_axis (axis_type[1], axis_idx[1], iter, ii, nlist_a, nlist_r, coord3, region, b_pbc);
+      if (*(iter + 1) >= 0) {
+        make_one_axis(axis_type[1], axis_idx[1], iter);
+      } else {
+        make_one_axis(axis_type[1], axis_idx[1], iter, ii, nlist_a, nlist_r,
+                      coord3, region, b_pbc);
       }
-      std::vector<int > backup_rule (3);
-      copy (iter, iter+3, backup_rule.begin());
-      backup_rule[2] ++;
-      if (*(iter+1) >= 0) {      
-	make_one_axis (axis_type[1], backup_axis, backup_rule.begin());
+      std::vector<int> backup_rule(3);
+      copy(iter, iter + 3, backup_rule.begin());
+      backup_rule[2]++;
+      if (*(iter + 1) >= 0) {
+        make_one_axis(axis_type[1], backup_axis, backup_rule.begin());
+      } else {
+        make_one_axis(axis_type[1], backup_axis, backup_rule.begin(), ii,
+                      nlist_a, nlist_r, coord3, region, b_pbc);
       }
-      else {
-	make_one_axis (axis_type[1], backup_axis, backup_rule.begin(), ii, nlist_a, nlist_r, coord3, region, b_pbc);
-      }      
     }
-    if (! check_axis (axis_type, axis_idx, ii, nlist_a, nlist_r, coord3, region, b_pbc)){
-      if (backup_axis >= 0){
-	axis_idx[1] = backup_axis;
-      }
-      else {
-	axis_idx[1] ++;
-	// std::cerr << "wrong backup axis, exit" << std::endl;
-	// exit (1);
+    if (!check_axis(axis_type, axis_idx, ii, nlist_a, nlist_r, coord3, region,
+                    b_pbc)) {
+      if (backup_axis >= 0) {
+        axis_idx[1] = backup_axis;
+      } else {
+        axis_idx[1]++;
+        // std::cerr << "wrong backup axis, exit" << std::endl;
+        // exit (1);
       }
     }
-    for (int dd = 0; dd < 2; ++dd){
+    for (int dd = 0; dd < 2; ++dd) {
       if (axis_type[dd] == 0) {
-	assert (nlist_a[axis_idx[dd]] >= 0);
-      }
-      else {
-	assert (nlist_r[axis_idx[dd]] >= 0);
+        assert(nlist_a[axis_idx[dd]] >= 0);
+      } else {
+        assert(nlist_r[axis_idx[dd]] >= 0);
       }
     }
-  }	     
-  void
-  make_one_axis (int & axis_type, 
-		 int & axis_idx,
-		 std::vector<int>::const_iterator info_i) const {
+  }
+  void make_one_axis(int& axis_type,
+                     int& axis_idx,
+                     std::vector<int>::const_iterator info_i) const {
     axis_type = *info_i;
-    if (axis_type == 0){
-      axis_idx = sec_a[*(info_i+1)] + *(info_i+2);
-    }
-    else {
-      axis_idx = sec_r[*(info_i+1)] + *(info_i+2);
+    if (axis_type == 0) {
+      axis_idx = sec_a[*(info_i + 1)] + *(info_i + 2);
+    } else {
+      axis_idx = sec_r[*(info_i + 1)] + *(info_i + 2);
     }
-  }		 
-  void
-  make_one_axis (int & axis_type, 
-		 int & axis_idx,
-		 std::vector<int>::const_iterator info_i, 
-		 const int id,
-		 const std::vector<int> & nlist_a,
-		 const std::vector<int> & nlist_r,
-		 const std::vector<compute_t> & coord3,
-		 const SimulationRegion<compute_t > & region, 
-		 const bool b_pbc) const {
+  }
+  void make_one_axis(int& axis_type,
+                     int& axis_idx,
+                     std::vector<int>::const_iterator info_i,
+                     const int id,
+                     const std::vector<int>& nlist_a,
+                     const std::vector<int>& nlist_r,
+                     const std::vector<compute_t>& coord3,
+                     const SimulationRegion<compute_t>& region,
+                     const bool b_pbc) const {
     axis_type = *info_i;
-    if (axis_type == 0){
+    if (axis_type == 0) {
       std::vector<std::pair<compute_t, int> > sort_info;
-      int excl_type = - (*(info_i+1) + 1);
+      int excl_type = -(*(info_i + 1) + 1);
       int ntypes = sel_a.size();
-      for (unsigned ii = 0; ii < ntypes; ++ii){
-	if (ii == excl_type) continue;
-	compute_t diff[3];
-	int list_idx, jd;
-	// push axis candidates into sort_info
-	for (int count = 0; count < 3; ++count){
-	  list_idx = sec_a[ii] + count;
-	  if (list_idx >= sec_a[ii+1]) continue;
-	  jd = nlist_a[list_idx];
-	  if (jd < 0) continue;
-	  if (b_pbc){
-	    region.diffNearestNeighbor (coord3[3*id+0], coord3[3*id+1], coord3[3*id+2],
-					coord3[3*jd+0], coord3[3*jd+1], coord3[3*jd+2],
-					diff[0], diff[1], diff[2]);
-	  }
-	  else {
-	    for (int dd = 0; dd < 3; ++dd){
-	      diff[dd] = coord3[3*id+dd] - coord3[3*jd+dd];
-	    }
-	  }
-	  sort_info.push_back (std::pair<compute_t, int> 
-			       (deepmd::dot3(diff, diff), list_idx) );
-	}
+      for (unsigned ii = 0; ii < ntypes; ++ii) {
+        if (ii == excl_type) continue;
+        compute_t diff[3];
+        int list_idx, jd;
+        // push axis candidates into sort_info
+        for (int count = 0; count < 3; ++count) {
+          list_idx = sec_a[ii] + count;
+          if (list_idx >= sec_a[ii + 1]) continue;
+          jd = nlist_a[list_idx];
+          if (jd < 0) continue;
+          if (b_pbc) {
+            region.diffNearestNeighbor(coord3[3 * id + 0], coord3[3 * id + 1],
+                                       coord3[3 * id + 2], coord3[3 * jd + 0],
+                                       coord3[3 * jd + 1], coord3[3 * jd + 2],
+                                       diff[0], diff[1], diff[2]);
+          } else {
+            for (int dd = 0; dd < 3; ++dd) {
+              diff[dd] = coord3[3 * id + dd] - coord3[3 * jd + dd];
+            }
+          }
+          sort_info.push_back(
+              std::pair<compute_t, int>(deepmd::dot3(diff, diff), list_idx));
+        }
       }
-      sort (sort_info.begin(), sort_info.end());
-      assert (*(info_i+2) < sort_info.size());
-      axis_idx = sort_info[*(info_i+2)].second;
-    }
-    else {
+      sort(sort_info.begin(), sort_info.end());
+      assert(*(info_i + 2) < sort_info.size());
+      axis_idx = sort_info[*(info_i + 2)].second;
+    } else {
       std::vector<std::pair<compute_t, int> > sort_info;
-      int excl_type = - *(info_i+1);
+      int excl_type = -*(info_i + 1);
       int ntypes = sel_r.size();
-      for (unsigned ii = 0; ii < ntypes; ++ii){
-	if (ii == excl_type) continue;
-	compute_t diff[3];
-	int list_idx, jd;
-	// push axis candidates for sort_info
-	for (int count = 0; count < 3; ++count){
-	  list_idx = sec_r[ii] + count;
-	  if (list_idx >= sec_r[ii+1]) continue;
-	  jd = nlist_r[list_idx];
-	  if (jd < 0) continue;
-	  if (b_pbc) {
-	    region.diffNearestNeighbor (coord3[3*id+0], coord3[3*id+1], coord3[3*id+2],
-					coord3[3*jd+0], coord3[3*jd+1], coord3[3*jd+2],
-					diff[0], diff[1], diff[2]);
-	  }
-	  else {
-	    for (int dd = 0; dd < 3; ++dd){
-	      diff[dd] = coord3[3*id+dd] - coord3[3*jd+dd];
-	    }
-	  }
-	  sort_info.push_back (std::pair<compute_t, int> 
-			       (deepmd::dot3(diff, diff), list_idx) );
-	}
+      for (unsigned ii = 0; ii < ntypes; ++ii) {
+        if (ii == excl_type) continue;
+        compute_t diff[3];
+        int list_idx, jd;
+        // push axis candidates for sort_info
+        for (int count = 0; count < 3; ++count) {
+          list_idx = sec_r[ii] + count;
+          if (list_idx >= sec_r[ii + 1]) continue;
+          jd = nlist_r[list_idx];
+          if (jd < 0) continue;
+          if (b_pbc) {
+            region.diffNearestNeighbor(coord3[3 * id + 0], coord3[3 * id + 1],
+                                       coord3[3 * id + 2], coord3[3 * jd + 0],
+                                       coord3[3 * jd + 1], coord3[3 * jd + 2],
+                                       diff[0], diff[1], diff[2]);
+          } else {
+            for (int dd = 0; dd < 3; ++dd) {
+              diff[dd] = coord3[3 * id + dd] - coord3[3 * jd + dd];
+            }
+          }
+          sort_info.push_back(
+              std::pair<compute_t, int>(deepmd::dot3(diff, diff), list_idx));
+        }
       }
-      sort (sort_info.begin(), sort_info.end());
-      assert (*(info_i+2) < sort_info.size());
-      axis_idx = sort_info[*(info_i+2)].second;
+      sort(sort_info.begin(), sort_info.end());
+      assert(*(info_i + 2) < sort_info.size());
+      axis_idx = sort_info[*(info_i + 2)].second;
     }
-  }		 
-  void 
-  make_axis_default (std::vector<int > & axis_type,
-		     std::vector<int > & axis_idx) const {
+  }
+  void make_axis_default(std::vector<int>& axis_type,
+                         std::vector<int>& axis_idx) const {
     axis_type.resize(2);
-    axis_idx .resize(2);
+    axis_idx.resize(2);
     if (nnei_a > 1) {
       // use angular neighbors
       axis_type[0] = 0;
       axis_type[1] = 0;
-    }
-    else {
+    } else {
       // use radial neighbors
       axis_type[0] = 1;
       axis_type[1] = 1;
     }
     axis_idx[0] = 0;
-    axis_idx[1] = 1;    
+    axis_idx[1] = 1;
   }
-  bool 
-  check_axis (const std::vector<int > & axis_type,
-	      const std::vector<int > & axis_idx,
-	      const int id,
-	      const std::vector<int> & nlist_a,
-	      const std::vector<int> & nlist_r,
-	      const std::vector<compute_t> & coord3,
-	      const SimulationRegion<compute_t > & region, 
-	      const bool b_pbc) const {
+  bool check_axis(const std::vector<int>& axis_type,
+                  const std::vector<int>& axis_idx,
+                  const int id,
+                  const std::vector<int>& nlist_a,
+                  const std::vector<int>& nlist_r,
+                  const std::vector<compute_t>& coord3,
+                  const SimulationRegion<compute_t>& region,
+                  const bool b_pbc) const {
     compute_t diff[2][3];
-    for (int ii = 0; ii < 2; ++ii){
+    for (int ii = 0; ii < 2; ++ii) {
       int jd = 0;
       if (axis_type[ii] == 0) {
-	jd = nlist_a[axis_idx[ii]];
+        jd = nlist_a[axis_idx[ii]];
+      } else {
+        jd = nlist_r[axis_idx[ii]];
       }
-      else {
-	jd = nlist_r[axis_idx[ii]];
-      }
-      if (b_pbc){
-	region.diffNearestNeighbor (&coord3[3*id], &coord3[3*jd], diff[ii]);
-      }
-      else {
-	for (int dd = 0; dd < 3; ++dd){
-	  diff[ii][dd] = coord3[3*id+dd] - coord3[3*jd+dd];
-	}
+      if (b_pbc) {
+        region.diffNearestNeighbor(&coord3[3 * id], &coord3[3 * jd], diff[ii]);
+      } else {
+        for (int dd = 0; dd < 3; ++dd) {
+          diff[ii][dd] = coord3[3 * id + dd] - coord3[3 * jd + dd];
+        }
       }
     }
     compute_t rij = deepmd::dot3(diff[0], diff[1]);
     compute_t rii = deepmd::dot3(diff[0], diff[0]);
     compute_t rjj = deepmd::dot3(diff[1], diff[1]);
-    if ( fabs (rij / sqrt(rii * rjj) + 1) < 1e-4  ) {
+    if (fabs(rij / sqrt(rii * rjj) + 1) < 1e-4) {
       return false;
-    }
-    else {
+    } else {
       return true;
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("Descrpt").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    DescrptOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                          \
+  REGISTER_KERNEL_BUILDER(                                       \
+      Name("Descrpt").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DescrptOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
diff --git a/source/op/descrpt_se_a_ef.cc b/source/op/descrpt_se_a_ef.cc
index 121205c9cf..a7f22ee4a7 100644
--- a/source/op/descrpt_se_a_ef.cc
+++ b/source/op/descrpt_se_a_ef.cc
@@ -1,44 +1,44 @@
 
-#include "custom_op.h"
 #include "ComputeDescriptor.h"
-#include "neighbor_list.h"
-#include "fmt_nlist.h"
+#include "custom_op.h"
 #include "errors.h"
+#include "fmt_nlist.h"
+#include "neighbor_list.h"
 
-typedef double boxtensor_t ;
+typedef double boxtensor_t;
 typedef double compute_t;
 
 REGISTER_OP("DescrptSeAEf")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("coord: T")
-.Input("type: int32")
-.Input("natoms: int32")
-.Input("box: T")
-.Input("mesh: int32")
-.Input("ef: T")
-.Input("davg: T")
-.Input("dstd: T")
-.Attr("rcut_a: float")
-.Attr("rcut_r: float")
-.Attr("rcut_r_smth: float")
-.Attr("sel_a: list(int)")
-.Attr("sel_r: list(int)")
-.Output("descrpt: T")
-.Output("descrpt_deriv: T")
-.Output("rij: T")
-.Output("nlist: int32");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("coord: T")
+    .Input("type: int32")
+    .Input("natoms: int32")
+    .Input("box: T")
+    .Input("mesh: int32")
+    .Input("ef: T")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Attr("rcut_a: float")
+    .Attr("rcut_r: float")
+    .Attr("rcut_r_smth: float")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist: int32");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class DescrptSeAEfOp : public OpKernel {
-public:
+ public:
   explicit DescrptSeAEfOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r_smth", &rcut_r_smth));
     OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
-    cum_sum (sec_a, sel_a);
-    cum_sum (sec_r, sel_r);
+    cum_sum(sec_a, sel_a);
+    cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -50,73 +50,97 @@ class DescrptSeAEfOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor	= context->input(context_input_index++);
-    const Tensor& type_tensor	= context->input(context_input_index++);
-    const Tensor& natoms_tensor	= context->input(context_input_index++);
-    const Tensor& box_tensor	= context->input(context_input_index++);
-    const Tensor& mesh_tensor	= context->input(context_input_index++);
-    const Tensor& ef_tensor	= context->input(context_input_index++);
-    const Tensor& avg_tensor	= context->input(context_input_index++);
-    const Tensor& std_tensor	= context->input(context_input_index++);
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& ef_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (ef_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of ef should be 2"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (fill_nei_a),				errors::InvalidArgument ("Rotational free descriptor only support the case rcut_a < 0"));
-    OP_REQUIRES (context, (sec_r.back() == 0),			errors::InvalidArgument ("Rotational free descriptor only support all-angular information: sel_r should be all zero."));
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (ef_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of ef should be 2"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(
+        context, (fill_nei_a),
+        errors::InvalidArgument(
+            "Rotational free descriptor only support the case rcut_a < 0"));
+    OP_REQUIRES(context, (sec_r.back() == 0),
+                errors::InvalidArgument(
+                    "Rotational free descriptor only support all-angular "
+                    "information: sel_r should be all zero."));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),		errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
     int nloc = natoms(0);
     int nall = natoms(1);
     int ntypes = natoms_tensor.shape().dim_size(0) - 2;
     int nsamples = coord_tensor.shape().dim_size(0);
 
     // check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == ef_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of std should be ntype"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == ef_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
 
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (nloc * 3 == ef_tensor.shape().dim_size(1)),			errors::InvalidArgument ("number of ef should be 3"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of std should be ndescrpt"));
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (nloc * 3 == ef_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of ef should be 3"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
 
     int nei_mode = 0;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 12) {
+    } else if (mesh_tensor.shape().dim_size(0) == 12) {
       // user provided extended mesh
       nei_mode = 2;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
     bool b_pbc = true;
@@ -125,53 +149,52 @@ class DescrptSeAEfOp : public OpKernel {
       b_pbc = false;
     }
     bool b_norm_atom = false;
-    if (nei_mode == 1){
+    if (nei_mode == 1) {
       b_norm_atom = true;
     }
 
     // Create an output tensor
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (nloc * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (nloc * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (nloc * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (nloc * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(nloc * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(nloc * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(nloc * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     descrpt_shape, 
-						     &descrpt_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
     Tensor* descrpt_deriv_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     descrpt_deriv_shape, 
-						     &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
     Tensor* rij_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     rij_shape,
-						     &rij_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     nlist_shape,
-						     &nlist_tensor));
-    
-    auto coord	= coord_tensor	.matrix<FPTYPE>();
-    auto type	= type_tensor	.matrix<int>();
-    auto box	= box_tensor	.matrix<FPTYPE>();
-    auto mesh	= mesh_tensor	.flat<int>();
-    auto ef	= ef_tensor	.matrix<FPTYPE>();
-    auto avg	= avg_tensor	.matrix<FPTYPE>();
-    auto std	= std_tensor	.matrix<FPTYPE>();
-    auto descrpt	= descrpt_tensor	->matrix<FPTYPE>();
-    auto descrpt_deriv	= descrpt_deriv_tensor	->matrix<FPTYPE>();
-    auto rij		= rij_tensor		->matrix<FPTYPE>();
-    auto nlist		= nlist_tensor		->matrix<int>();
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+
+    auto coord = coord_tensor.matrix<FPTYPE>();
+    auto type = type_tensor.matrix<int>();
+    auto box = box_tensor.matrix<FPTYPE>();
+    auto mesh = mesh_tensor.flat<int>();
+    auto ef = ef_tensor.matrix<FPTYPE>();
+    auto avg = avg_tensor.matrix<FPTYPE>();
+    auto std = std_tensor.matrix<FPTYPE>();
+    auto descrpt = descrpt_tensor->matrix<FPTYPE>();
+    auto descrpt_deriv = descrpt_deriv_tensor->matrix<FPTYPE>();
+    auto rij = rij_tensor->matrix<FPTYPE>();
+    auto nlist = nlist_tensor->matrix<int>();
 
     // // check the types
     // int max_type_v = 0;
@@ -179,165 +202,167 @@ class DescrptSeAEfOp : public OpKernel {
     //   if (type(0, ii) > max_type_v) max_type_v = type(0, ii);
     // }
     // int ntypes = max_type_v + 1;
-    OP_REQUIRES (context, (ntypes == int(sel_a.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
-    OP_REQUIRES (context, (ntypes == int(sel_r.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
 
-    for (int kk = 0; kk < nsamples; ++kk){
+    for (int kk = 0; kk < nsamples; ++kk) {
       // set region
-      boxtensor_t boxt [9] = {0};
+      boxtensor_t boxt[9] = {0};
       for (int dd = 0; dd < 9; ++dd) {
-	boxt[dd] = box(kk, dd);
+        boxt[dd] = box(kk, dd);
       }
-      SimulationRegion<compute_t > region;
-      region.reinitBox (boxt);
+      SimulationRegion<compute_t> region;
+      region.reinitBox(boxt);
 
       // set & normalize coord
-      std::vector<compute_t > d_coord3 (nall*3);
-      for (int ii = 0; ii < nall; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_coord3[ii*3+dd] = coord(kk, ii*3+dd);
-	}
-	if (b_norm_atom){
-	  compute_t inter[3];
-	  region.phys2Inter (inter, &d_coord3[3*ii]);
-	  for (int dd = 0; dd < 3; ++dd){
-	    if      (inter[dd] < 0 ) inter[dd] += 1.;
-	    else if (inter[dd] >= 1) inter[dd] -= 1.;
-	  }
-	  region.inter2Phys (&d_coord3[3*ii], inter);
-	}
+      std::vector<compute_t> d_coord3(nall * 3);
+      for (int ii = 0; ii < nall; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_coord3[ii * 3 + dd] = coord(kk, ii * 3 + dd);
+        }
+        if (b_norm_atom) {
+          compute_t inter[3];
+          region.phys2Inter(inter, &d_coord3[3 * ii]);
+          for (int dd = 0; dd < 3; ++dd) {
+            if (inter[dd] < 0)
+              inter[dd] += 1.;
+            else if (inter[dd] >= 1)
+              inter[dd] -= 1.;
+          }
+          region.inter2Phys(&d_coord3[3 * ii], inter);
+        }
       }
-      
+
       // set efield
       std::vector<compute_t> d_ef(nloc * 3);
-      for (int ii = 0; ii < nloc; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_ef[ii*3+dd] = ef(kk, ii*3+dd);
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_ef[ii * 3 + dd] = ef(kk, ii * 3 + dd);
+        }
       }
 
       // set type
-      std::vector<int > d_type (nall);
+      std::vector<int> d_type(nall);
       for (int ii = 0; ii < nall; ++ii) d_type[ii] = type(kk, ii);
 
       // build nlist
-      std::vector<std::vector<int > > d_nlist_a;
-      std::vector<std::vector<int > > d_nlist_r;
+      std::vector<std::vector<int> > d_nlist_a;
+      std::vector<std::vector<int> > d_nlist_r;
       std::vector<int> nlist_map;
       bool b_nlist_map = false;
-      if (nei_mode == 3) {	
-	int * pilist, *pjrange, *pjlist;
-	memcpy (&pilist, &mesh(4), sizeof(int *));
-	memcpy (&pjrange, &mesh(8), sizeof(int *));
-	memcpy (&pjlist, &mesh(12), sizeof(int *));
-	int inum = mesh(1);
-	assert (inum == nloc);
-	d_nlist_a.resize (inum);
-	d_nlist_r.resize (inum);
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  d_nlist_r.reserve (pjrange[inum] / inum + 10);
-	}
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  int i_idx = pilist[ii];
-	  for (unsigned jj = pjrange[ii]; jj < pjrange[ii+1]; ++jj){
-	    int j_idx = pjlist[jj];
-	    d_nlist_r[i_idx].push_back (j_idx);
-	  }
-	}
-      }
-      else if (nei_mode == 2) {
-	std::vector<int > nat_stt = {mesh(1-1), mesh(2-1), mesh(3-1)};
-	std::vector<int > nat_end = {mesh(4-1), mesh(5-1), mesh(6-1)};
-	std::vector<int > ext_stt = {mesh(7-1), mesh(8-1), mesh(9-1)};
-	std::vector<int > ext_end = {mesh(10-1), mesh(11-1), mesh(12-1)};
-	std::vector<int > global_grid (3);
-	for (int dd = 0; dd < 3; ++dd) global_grid[dd] = nat_end[dd] - nat_stt[dd];
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
-      }
-      else if (nei_mode == 1) {
-	std::vector<double > bk_d_coord3 = d_coord3;
-	std::vector<int > bk_d_type = d_type;
-	std::vector<int > ncell, ngcell;
-	copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, bk_d_type, rcut_r, region);	
-	b_nlist_map = true;
-	std::vector<int> nat_stt(3, 0);
-	std::vector<int> ext_stt(3), ext_end(3);
-	for (int dd = 0; dd < 3; ++dd){
-	  ext_stt[dd] = -ngcell[dd];
-	  ext_end[dd] = ncell[dd] + ngcell[dd];
-	}
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      }
-      else if (nei_mode == -1){
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
-      }
-      else {
-	throw deepmd::deepmd_exception("unknow neighbor mode");
+      if (nei_mode == 3) {
+        int *pilist, *pjrange, *pjlist;
+        memcpy(&pilist, &mesh(4), sizeof(int*));
+        memcpy(&pjrange, &mesh(8), sizeof(int*));
+        memcpy(&pjlist, &mesh(12), sizeof(int*));
+        int inum = mesh(1);
+        assert(inum == nloc);
+        d_nlist_a.resize(inum);
+        d_nlist_r.resize(inum);
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          d_nlist_r.reserve(pjrange[inum] / inum + 10);
+        }
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          int i_idx = pilist[ii];
+          for (unsigned jj = pjrange[ii]; jj < pjrange[ii + 1]; ++jj) {
+            int j_idx = pjlist[jj];
+            d_nlist_r[i_idx].push_back(j_idx);
+          }
+        }
+      } else if (nei_mode == 2) {
+        std::vector<int> nat_stt = {mesh(1 - 1), mesh(2 - 1), mesh(3 - 1)};
+        std::vector<int> nat_end = {mesh(4 - 1), mesh(5 - 1), mesh(6 - 1)};
+        std::vector<int> ext_stt = {mesh(7 - 1), mesh(8 - 1), mesh(9 - 1)};
+        std::vector<int> ext_end = {mesh(10 - 1), mesh(11 - 1), mesh(12 - 1)};
+        std::vector<int> global_grid(3);
+        for (int dd = 0; dd < 3; ++dd)
+          global_grid[dd] = nat_end[dd] - nat_stt[dd];
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
+      } else if (nei_mode == 1) {
+        std::vector<double> bk_d_coord3 = d_coord3;
+        std::vector<int> bk_d_type = d_type;
+        std::vector<int> ncell, ngcell;
+        copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3,
+                   bk_d_type, rcut_r, region);
+        b_nlist_map = true;
+        std::vector<int> nat_stt(3, 0);
+        std::vector<int> ext_stt(3), ext_end(3);
+        for (int dd = 0; dd < 3; ++dd) {
+          ext_stt[dd] = -ngcell[dd];
+          ext_end[dd] = ncell[dd] + ngcell[dd];
+        }
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, ncell, ext_stt, ext_end, region, ncell);
+      } else if (nei_mode == -1) {
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
+      } else {
+        throw deepmd::deepmd_exception("unknow neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
-#pragma omp parallel for 
-      for (int ii = 0; ii < nloc; ++ii){
-	std::vector<int> fmt_nlist_a;
-	std::vector<int> fmt_nlist_r;
-	int ret = -1;
-	if (fill_nei_a){
-	  if ((ret = format_nlist_i_fill_a (fmt_nlist_a, fmt_nlist_r, d_coord3, ntypes, d_type, region, b_pbc, ii, d_nlist_a[ii], d_nlist_r[ii], rcut_r, sec_a, sec_r)) != -1){
-	    if (count_nei_idx_overflow == 0) {
-	      std::cout << "WARNING: Radial neighbor list length of type " << ret << " is not enough" << std::endl;
-	      flush(std::cout);
-	      count_nei_idx_overflow ++;
-	    }
-	  }
-	}
+#pragma omp parallel for
+      for (int ii = 0; ii < nloc; ++ii) {
+        std::vector<int> fmt_nlist_a;
+        std::vector<int> fmt_nlist_r;
+        int ret = -1;
+        if (fill_nei_a) {
+          if ((ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, d_coord3,
+                                           ntypes, d_type, region, b_pbc, ii,
+                                           d_nlist_a[ii], d_nlist_r[ii], rcut_r,
+                                           sec_a, sec_r)) != -1) {
+            if (count_nei_idx_overflow == 0) {
+              std::cout << "WARNING: Radial neighbor list length of type "
+                        << ret << " is not enough" << std::endl;
+              flush(std::cout);
+              count_nei_idx_overflow++;
+            }
+          }
+        }
 
-	std::vector<compute_t > d_descrpt_a;
-	std::vector<compute_t > d_descrpt_a_deriv;
-	std::vector<compute_t > d_descrpt_r;
-	std::vector<compute_t > d_descrpt_r_deriv;
-	std::vector<compute_t > d_rij_a;
-	std::vector<compute_t > d_rij_r;      
-	compute_descriptor_se_a_extf (d_descrpt_a,
-				      d_descrpt_a_deriv,
-				      d_rij_a,
-				      d_coord3,
-				      ntypes, 
-				      d_type,
-				      region, 
-				      b_pbc,
-				      d_ef,
-				      ii, 
-				      fmt_nlist_a,
-				      sec_a, 
-				      rcut_r_smth, 
-				      rcut_r);
+        std::vector<compute_t> d_descrpt_a;
+        std::vector<compute_t> d_descrpt_a_deriv;
+        std::vector<compute_t> d_descrpt_r;
+        std::vector<compute_t> d_descrpt_r_deriv;
+        std::vector<compute_t> d_rij_a;
+        std::vector<compute_t> d_rij_r;
+        compute_descriptor_se_a_extf(
+            d_descrpt_a, d_descrpt_a_deriv, d_rij_a, d_coord3, ntypes, d_type,
+            region, b_pbc, d_ef, ii, fmt_nlist_a, sec_a, rcut_r_smth, rcut_r);
 
-	// check sizes
-	assert (d_descrpt_a.size() == ndescrpt_a);
-	assert (d_descrpt_a_deriv.size() == ndescrpt_a * 3);
-	assert (d_rij_a.size() == nnei_a * 3);
-	assert (int(fmt_nlist_a.size()) == nnei_a);
-	// record outputs
-	for (int jj = 0; jj < ndescrpt_a; ++jj) {
-	  descrpt(kk, ii * ndescrpt + jj) = (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
-	}
-	for (int jj = 0; jj < ndescrpt_a * 3; ++jj) {
-	  descrpt_deriv(kk, ii * ndescrpt * 3 + jj) = d_descrpt_a_deriv[jj] / std(d_type[ii], jj/3);
-	}
-	for (int jj = 0; jj < nnei_a * 3; ++jj){
-	  rij (kk, ii * nnei * 3 + jj) = d_rij_a[jj];
-	}
-	for (int jj = 0; jj < nnei_a; ++jj){
-	  int record = fmt_nlist_a[jj];
-	  if (b_nlist_map && record >= 0) {
-	    record = nlist_map[record];
-	  }
-	  nlist (kk, ii * nnei + jj) = record;
-	}
+        // check sizes
+        assert(d_descrpt_a.size() == ndescrpt_a);
+        assert(d_descrpt_a_deriv.size() == ndescrpt_a * 3);
+        assert(d_rij_a.size() == nnei_a * 3);
+        assert(int(fmt_nlist_a.size()) == nnei_a);
+        // record outputs
+        for (int jj = 0; jj < ndescrpt_a; ++jj) {
+          descrpt(kk, ii * ndescrpt + jj) =
+              (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
+        }
+        for (int jj = 0; jj < ndescrpt_a * 3; ++jj) {
+          descrpt_deriv(kk, ii * ndescrpt * 3 + jj) =
+              d_descrpt_a_deriv[jj] / std(d_type[ii], jj / 3);
+        }
+        for (int jj = 0; jj < nnei_a * 3; ++jj) {
+          rij(kk, ii * nnei * 3 + jj) = d_rij_a[jj];
+        }
+        for (int jj = 0; jj < nnei_a; ++jj) {
+          int record = fmt_nlist_a[jj];
+          if (b_nlist_map && record >= 0) {
+            record = nlist_map[record];
+          }
+          nlist(kk, ii * nnei + jj) = record;
+        }
       }
     }
   }
-private:
+
+ private:
   float rcut_a;
   float rcut_r;
   float rcut_r_smth;
@@ -349,20 +374,18 @@ class DescrptSeAEfOp : public OpKernel {
   int nnei, nnei_a, nnei_r;
   bool fill_nei_a;
   int count_nei_idx_overflow;
-  void 
-  cum_sum (std::vector<int> & sec,
-	   const std::vector<int32> & n_sel) const {
-    sec.resize (n_sel.size() + 1);
+  void cum_sum(std::vector<int>& sec, const std::vector<int32>& n_sel) const {
+    sec.resize(n_sel.size() + 1);
     sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii){
-      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    for (int ii = 1; ii < sec.size(); ++ii) {
+      sec[ii] = sec[ii - 1] + n_sel[ii - 1];
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("DescrptSeAEf").Device(DEVICE_CPU).TypeConstraint<T>("T"),                     \
-    DescrptSeAEfOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                               \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("DescrptSeAEf").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DescrptSeAEfOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/descrpt_se_a_ef_para.cc b/source/op/descrpt_se_a_ef_para.cc
index 952c53d473..254b54eb23 100644
--- a/source/op/descrpt_se_a_ef_para.cc
+++ b/source/op/descrpt_se_a_ef_para.cc
@@ -1,43 +1,44 @@
-#include "custom_op.h"
 #include "ComputeDescriptor.h"
-#include "neighbor_list.h"
-#include "fmt_nlist.h"
+#include "custom_op.h"
 #include "errors.h"
+#include "fmt_nlist.h"
+#include "neighbor_list.h"
 
-typedef double boxtensor_t ;
+typedef double boxtensor_t;
 typedef double compute_t;
 
 REGISTER_OP("DescrptSeAEfPara")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("coord: T")
-.Input("type: int32")
-.Input("natoms: int32")
-.Input("box: T")
-.Input("mesh: int32")
-.Input("ef: T")
-.Input("davg: T")
-.Input("dstd: T")
-.Attr("rcut_a: float")
-.Attr("rcut_r: float")
-.Attr("rcut_r_smth: float")
-.Attr("sel_a: list(int)")
-.Attr("sel_r: list(int)")
-.Output("descrpt: T")
-.Output("descrpt_deriv: T")
-.Output("rij: T")
-.Output("nlist: int32");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("coord: T")
+    .Input("type: int32")
+    .Input("natoms: int32")
+    .Input("box: T")
+    .Input("mesh: int32")
+    .Input("ef: T")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Attr("rcut_a: float")
+    .Attr("rcut_r: float")
+    .Attr("rcut_r_smth: float")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist: int32");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class DescrptSeAEfParaOp : public OpKernel {
-public:
-  explicit DescrptSeAEfParaOp(OpKernelConstruction* context) : OpKernel(context) {
+ public:
+  explicit DescrptSeAEfParaOp(OpKernelConstruction* context)
+      : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r_smth", &rcut_r_smth));
     OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
-    cum_sum (sec_a, sel_a);
-    cum_sum (sec_r, sel_r);
+    cum_sum(sec_a, sel_a);
+    cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -49,73 +50,97 @@ class DescrptSeAEfParaOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor	= context->input(context_input_index++);
-    const Tensor& type_tensor	= context->input(context_input_index++);
-    const Tensor& natoms_tensor	= context->input(context_input_index++);
-    const Tensor& box_tensor	= context->input(context_input_index++);
-    const Tensor& mesh_tensor	= context->input(context_input_index++);
-    const Tensor& ef_tensor	= context->input(context_input_index++);
-    const Tensor& avg_tensor	= context->input(context_input_index++);
-    const Tensor& std_tensor	= context->input(context_input_index++);
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& ef_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (ef_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of ef should be 2"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (fill_nei_a),				errors::InvalidArgument ("Rotational free descriptor only support the case rcut_a < 0"));
-    OP_REQUIRES (context, (sec_r.back() == 0),			errors::InvalidArgument ("Rotational free descriptor only support all-angular information: sel_r should be all zero."));
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (ef_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of ef should be 2"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(
+        context, (fill_nei_a),
+        errors::InvalidArgument(
+            "Rotational free descriptor only support the case rcut_a < 0"));
+    OP_REQUIRES(context, (sec_r.back() == 0),
+                errors::InvalidArgument(
+                    "Rotational free descriptor only support all-angular "
+                    "information: sel_r should be all zero."));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),		errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
     int nloc = natoms(0);
     int nall = natoms(1);
     int ntypes = natoms_tensor.shape().dim_size(0) - 2;
     int nsamples = coord_tensor.shape().dim_size(0);
 
     // check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == ef_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of std should be ntype"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == ef_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
 
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (nloc * 3 == ef_tensor.shape().dim_size(1)),			errors::InvalidArgument ("number of ef should be 3"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of std should be ndescrpt"));
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (nloc * 3 == ef_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of ef should be 3"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
 
     int nei_mode = 0;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 12) {
+    } else if (mesh_tensor.shape().dim_size(0) == 12) {
       // user provided extended mesh
       nei_mode = 2;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
     bool b_pbc = true;
@@ -124,53 +149,52 @@ class DescrptSeAEfParaOp : public OpKernel {
       b_pbc = false;
     }
     bool b_norm_atom = false;
-    if (nei_mode == 1){
+    if (nei_mode == 1) {
       b_norm_atom = true;
     }
 
     // Create an output tensor
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (nloc * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (nloc * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (nloc * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (nloc * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(nloc * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(nloc * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(nloc * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     descrpt_shape, 
-						     &descrpt_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
     Tensor* descrpt_deriv_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     descrpt_deriv_shape, 
-						     &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
     Tensor* rij_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     rij_shape,
-						     &rij_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     nlist_shape,
-						     &nlist_tensor));
-    
-    auto coord	= coord_tensor	.matrix<FPTYPE>();
-    auto type	= type_tensor	.matrix<int>();
-    auto box	= box_tensor	.matrix<FPTYPE>();
-    auto mesh	= mesh_tensor	.flat<int>();
-    auto ef	= ef_tensor	.matrix<FPTYPE>();
-    auto avg	= avg_tensor	.matrix<FPTYPE>();
-    auto std	= std_tensor	.matrix<FPTYPE>();
-    auto descrpt	= descrpt_tensor	->matrix<FPTYPE>();
-    auto descrpt_deriv	= descrpt_deriv_tensor	->matrix<FPTYPE>();
-    auto rij		= rij_tensor		->matrix<FPTYPE>();
-    auto nlist		= nlist_tensor		->matrix<int>();
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+
+    auto coord = coord_tensor.matrix<FPTYPE>();
+    auto type = type_tensor.matrix<int>();
+    auto box = box_tensor.matrix<FPTYPE>();
+    auto mesh = mesh_tensor.flat<int>();
+    auto ef = ef_tensor.matrix<FPTYPE>();
+    auto avg = avg_tensor.matrix<FPTYPE>();
+    auto std = std_tensor.matrix<FPTYPE>();
+    auto descrpt = descrpt_tensor->matrix<FPTYPE>();
+    auto descrpt_deriv = descrpt_deriv_tensor->matrix<FPTYPE>();
+    auto rij = rij_tensor->matrix<FPTYPE>();
+    auto nlist = nlist_tensor->matrix<int>();
 
     // // check the types
     // int max_type_v = 0;
@@ -178,165 +202,167 @@ class DescrptSeAEfParaOp : public OpKernel {
     //   if (type(0, ii) > max_type_v) max_type_v = type(0, ii);
     // }
     // int ntypes = max_type_v + 1;
-    OP_REQUIRES (context, (ntypes == int(sel_a.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
-    OP_REQUIRES (context, (ntypes == int(sel_r.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
 
-    for (int kk = 0; kk < nsamples; ++kk){
+    for (int kk = 0; kk < nsamples; ++kk) {
       // set region
-      boxtensor_t boxt [9] = {0};
+      boxtensor_t boxt[9] = {0};
       for (int dd = 0; dd < 9; ++dd) {
-	boxt[dd] = box(kk, dd);
+        boxt[dd] = box(kk, dd);
       }
-      SimulationRegion<compute_t > region;
-      region.reinitBox (boxt);
+      SimulationRegion<compute_t> region;
+      region.reinitBox(boxt);
 
       // set & normalize coord
-      std::vector<compute_t > d_coord3 (nall*3);
-      for (int ii = 0; ii < nall; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_coord3[ii*3+dd] = coord(kk, ii*3+dd);
-	}
-	if (b_norm_atom){
-	  compute_t inter[3];
-	  region.phys2Inter (inter, &d_coord3[3*ii]);
-	  for (int dd = 0; dd < 3; ++dd){
-	    if      (inter[dd] < 0 ) inter[dd] += 1.;
-	    else if (inter[dd] >= 1) inter[dd] -= 1.;
-	  }
-	  region.inter2Phys (&d_coord3[3*ii], inter);
-	}
+      std::vector<compute_t> d_coord3(nall * 3);
+      for (int ii = 0; ii < nall; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_coord3[ii * 3 + dd] = coord(kk, ii * 3 + dd);
+        }
+        if (b_norm_atom) {
+          compute_t inter[3];
+          region.phys2Inter(inter, &d_coord3[3 * ii]);
+          for (int dd = 0; dd < 3; ++dd) {
+            if (inter[dd] < 0)
+              inter[dd] += 1.;
+            else if (inter[dd] >= 1)
+              inter[dd] -= 1.;
+          }
+          region.inter2Phys(&d_coord3[3 * ii], inter);
+        }
       }
-      
+
       // set efield
       std::vector<compute_t> d_ef(nloc * 3);
-      for (int ii = 0; ii < nloc; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_ef[ii*3+dd] = ef(kk, ii*3+dd);
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_ef[ii * 3 + dd] = ef(kk, ii * 3 + dd);
+        }
       }
 
       // set type
-      std::vector<int > d_type (nall);
+      std::vector<int> d_type(nall);
       for (int ii = 0; ii < nall; ++ii) d_type[ii] = type(kk, ii);
 
       // build nlist
-      std::vector<std::vector<int > > d_nlist_a;
-      std::vector<std::vector<int > > d_nlist_r;
+      std::vector<std::vector<int> > d_nlist_a;
+      std::vector<std::vector<int> > d_nlist_r;
       std::vector<int> nlist_map;
       bool b_nlist_map = false;
-      if (nei_mode == 3) {	
-	int * pilist, *pjrange, *pjlist;
-	memcpy (&pilist, &mesh(4), sizeof(int *));
-	memcpy (&pjrange, &mesh(8), sizeof(int *));
-	memcpy (&pjlist, &mesh(12), sizeof(int *));
-	int inum = mesh(1);
-	assert (inum == nloc);
-	d_nlist_a.resize (inum);
-	d_nlist_r.resize (inum);
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  d_nlist_r.reserve (pjrange[inum] / inum + 10);
-	}
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  int i_idx = pilist[ii];
-	  for (unsigned jj = pjrange[ii]; jj < pjrange[ii+1]; ++jj){
-	    int j_idx = pjlist[jj];
-	    d_nlist_r[i_idx].push_back (j_idx);
-	  }
-	}
-      }
-      else if (nei_mode == 2) {
-	std::vector<int > nat_stt = {mesh(1-1), mesh(2-1), mesh(3-1)};
-	std::vector<int > nat_end = {mesh(4-1), mesh(5-1), mesh(6-1)};
-	std::vector<int > ext_stt = {mesh(7-1), mesh(8-1), mesh(9-1)};
-	std::vector<int > ext_end = {mesh(10-1), mesh(11-1), mesh(12-1)};
-	std::vector<int > global_grid (3);
-	for (int dd = 0; dd < 3; ++dd) global_grid[dd] = nat_end[dd] - nat_stt[dd];
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
-      }
-      else if (nei_mode == 1) {
-	std::vector<double > bk_d_coord3 = d_coord3;
-	std::vector<int > bk_d_type = d_type;
-	std::vector<int > ncell, ngcell;
-	copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, bk_d_type, rcut_r, region);	
-	b_nlist_map = true;
-	std::vector<int> nat_stt(3, 0);
-	std::vector<int> ext_stt(3), ext_end(3);
-	for (int dd = 0; dd < 3; ++dd){
-	  ext_stt[dd] = -ngcell[dd];
-	  ext_end[dd] = ncell[dd] + ngcell[dd];
-	}
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      }
-      else if (nei_mode == -1){
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
-      }
-      else {
-	throw deepmd::deepmd_exception("unknow neighbor mode");
+      if (nei_mode == 3) {
+        int *pilist, *pjrange, *pjlist;
+        memcpy(&pilist, &mesh(4), sizeof(int*));
+        memcpy(&pjrange, &mesh(8), sizeof(int*));
+        memcpy(&pjlist, &mesh(12), sizeof(int*));
+        int inum = mesh(1);
+        assert(inum == nloc);
+        d_nlist_a.resize(inum);
+        d_nlist_r.resize(inum);
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          d_nlist_r.reserve(pjrange[inum] / inum + 10);
+        }
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          int i_idx = pilist[ii];
+          for (unsigned jj = pjrange[ii]; jj < pjrange[ii + 1]; ++jj) {
+            int j_idx = pjlist[jj];
+            d_nlist_r[i_idx].push_back(j_idx);
+          }
+        }
+      } else if (nei_mode == 2) {
+        std::vector<int> nat_stt = {mesh(1 - 1), mesh(2 - 1), mesh(3 - 1)};
+        std::vector<int> nat_end = {mesh(4 - 1), mesh(5 - 1), mesh(6 - 1)};
+        std::vector<int> ext_stt = {mesh(7 - 1), mesh(8 - 1), mesh(9 - 1)};
+        std::vector<int> ext_end = {mesh(10 - 1), mesh(11 - 1), mesh(12 - 1)};
+        std::vector<int> global_grid(3);
+        for (int dd = 0; dd < 3; ++dd)
+          global_grid[dd] = nat_end[dd] - nat_stt[dd];
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
+      } else if (nei_mode == 1) {
+        std::vector<double> bk_d_coord3 = d_coord3;
+        std::vector<int> bk_d_type = d_type;
+        std::vector<int> ncell, ngcell;
+        copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3,
+                   bk_d_type, rcut_r, region);
+        b_nlist_map = true;
+        std::vector<int> nat_stt(3, 0);
+        std::vector<int> ext_stt(3), ext_end(3);
+        for (int dd = 0; dd < 3; ++dd) {
+          ext_stt[dd] = -ngcell[dd];
+          ext_end[dd] = ncell[dd] + ngcell[dd];
+        }
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, ncell, ext_stt, ext_end, region, ncell);
+      } else if (nei_mode == -1) {
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
+      } else {
+        throw deepmd::deepmd_exception("unknow neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
-#pragma omp parallel for 
-      for (int ii = 0; ii < nloc; ++ii){
-	std::vector<int> fmt_nlist_a;
-	std::vector<int> fmt_nlist_r;
-	int ret = -1;
-	if (fill_nei_a){
-	  if ((ret = format_nlist_i_fill_a (fmt_nlist_a, fmt_nlist_r, d_coord3, ntypes, d_type, region, b_pbc, ii, d_nlist_a[ii], d_nlist_r[ii], rcut_r, sec_a, sec_r)) != -1){
-	    if (count_nei_idx_overflow == 0) {
-	      std::cout << "WARNING: Radial neighbor list length of type " << ret << " is not enough" << std::endl;
-	      flush(std::cout);
-	      count_nei_idx_overflow ++;
-	    }
-	  }
-	}
+#pragma omp parallel for
+      for (int ii = 0; ii < nloc; ++ii) {
+        std::vector<int> fmt_nlist_a;
+        std::vector<int> fmt_nlist_r;
+        int ret = -1;
+        if (fill_nei_a) {
+          if ((ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, d_coord3,
+                                           ntypes, d_type, region, b_pbc, ii,
+                                           d_nlist_a[ii], d_nlist_r[ii], rcut_r,
+                                           sec_a, sec_r)) != -1) {
+            if (count_nei_idx_overflow == 0) {
+              std::cout << "WARNING: Radial neighbor list length of type "
+                        << ret << " is not enough" << std::endl;
+              flush(std::cout);
+              count_nei_idx_overflow++;
+            }
+          }
+        }
 
-	std::vector<compute_t > d_descrpt_a;
-	std::vector<compute_t > d_descrpt_a_deriv;
-	std::vector<compute_t > d_descrpt_r;
-	std::vector<compute_t > d_descrpt_r_deriv;
-	std::vector<compute_t > d_rij_a;
-	std::vector<compute_t > d_rij_r;      
-	compute_descriptor_se_a_ef_para (d_descrpt_a,
-					 d_descrpt_a_deriv,
-					 d_rij_a,
-					 d_coord3,
-					 ntypes, 
-					 d_type,
-					 region, 
-					 b_pbc,
-					 d_ef,
-					 ii, 
-					 fmt_nlist_a,
-					 sec_a, 
-					 rcut_r_smth, 
-					 rcut_r);
+        std::vector<compute_t> d_descrpt_a;
+        std::vector<compute_t> d_descrpt_a_deriv;
+        std::vector<compute_t> d_descrpt_r;
+        std::vector<compute_t> d_descrpt_r_deriv;
+        std::vector<compute_t> d_rij_a;
+        std::vector<compute_t> d_rij_r;
+        compute_descriptor_se_a_ef_para(
+            d_descrpt_a, d_descrpt_a_deriv, d_rij_a, d_coord3, ntypes, d_type,
+            region, b_pbc, d_ef, ii, fmt_nlist_a, sec_a, rcut_r_smth, rcut_r);
 
-	// check sizes
-	assert (d_descrpt_a.size() == ndescrpt_a);
-	assert (d_descrpt_a_deriv.size() == ndescrpt_a * 3);
-	assert (d_rij_a.size() == nnei_a * 3);
-	assert (int(fmt_nlist_a.size()) == nnei_a);
-	// record outputs
-	for (int jj = 0; jj < ndescrpt_a; ++jj) {
-	  descrpt(kk, ii * ndescrpt + jj) = (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
-	}
-	for (int jj = 0; jj < ndescrpt_a * 3; ++jj) {
-	  descrpt_deriv(kk, ii * ndescrpt * 3 + jj) = d_descrpt_a_deriv[jj] / std(d_type[ii], jj/3);
-	}
-	for (int jj = 0; jj < nnei_a * 3; ++jj){
-	  rij (kk, ii * nnei * 3 + jj) = d_rij_a[jj];
-	}
-	for (int jj = 0; jj < nnei_a; ++jj){
-	  int record = fmt_nlist_a[jj];
-	  if (b_nlist_map && record >= 0) {
-	    record = nlist_map[record];
-	  }
-	  nlist (kk, ii * nnei + jj) = record;
-	}
+        // check sizes
+        assert(d_descrpt_a.size() == ndescrpt_a);
+        assert(d_descrpt_a_deriv.size() == ndescrpt_a * 3);
+        assert(d_rij_a.size() == nnei_a * 3);
+        assert(int(fmt_nlist_a.size()) == nnei_a);
+        // record outputs
+        for (int jj = 0; jj < ndescrpt_a; ++jj) {
+          descrpt(kk, ii * ndescrpt + jj) =
+              (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
+        }
+        for (int jj = 0; jj < ndescrpt_a * 3; ++jj) {
+          descrpt_deriv(kk, ii * ndescrpt * 3 + jj) =
+              d_descrpt_a_deriv[jj] / std(d_type[ii], jj / 3);
+        }
+        for (int jj = 0; jj < nnei_a * 3; ++jj) {
+          rij(kk, ii * nnei * 3 + jj) = d_rij_a[jj];
+        }
+        for (int jj = 0; jj < nnei_a; ++jj) {
+          int record = fmt_nlist_a[jj];
+          if (b_nlist_map && record >= 0) {
+            record = nlist_map[record];
+          }
+          nlist(kk, ii * nnei + jj) = record;
+        }
       }
     }
   }
-private:
+
+ private:
   float rcut_a;
   float rcut_r;
   float rcut_r_smth;
@@ -348,20 +374,18 @@ class DescrptSeAEfParaOp : public OpKernel {
   int nnei, nnei_a, nnei_r;
   bool fill_nei_a;
   int count_nei_idx_overflow;
-  void 
-  cum_sum (std::vector<int> & sec,
-	   const std::vector<int32> & n_sel) const {
-    sec.resize (n_sel.size() + 1);
+  void cum_sum(std::vector<int>& sec, const std::vector<int32>& n_sel) const {
+    sec.resize(n_sel.size() + 1);
     sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii){
-      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    for (int ii = 1; ii < sec.size(); ++ii) {
+      sec[ii] = sec[ii - 1] + n_sel[ii - 1];
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("DescrptSeAEfPara").Device(DEVICE_CPU).TypeConstraint<T>("T"),                 \
-    DescrptSeAEfParaOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                   \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DescrptSeAEfPara").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DescrptSeAEfParaOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/descrpt_se_a_ef_vert.cc b/source/op/descrpt_se_a_ef_vert.cc
index 4ef76f8e0f..1be705ea77 100644
--- a/source/op/descrpt_se_a_ef_vert.cc
+++ b/source/op/descrpt_se_a_ef_vert.cc
@@ -1,43 +1,44 @@
-#include "custom_op.h"
 #include "ComputeDescriptor.h"
-#include "neighbor_list.h"
-#include "fmt_nlist.h"
+#include "custom_op.h"
 #include "errors.h"
+#include "fmt_nlist.h"
+#include "neighbor_list.h"
 
-typedef double boxtensor_t ;
+typedef double boxtensor_t;
 typedef double compute_t;
 
 REGISTER_OP("DescrptSeAEfVert")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("coord: T")
-.Input("type: int32")
-.Input("natoms: int32")
-.Input("box: T")
-.Input("mesh: int32")
-.Input("ef: T")
-.Input("davg: T")
-.Input("dstd: T")
-.Attr("rcut_a: float")
-.Attr("rcut_r: float")
-.Attr("rcut_r_smth: float")
-.Attr("sel_a: list(int)")
-.Attr("sel_r: list(int)")
-.Output("descrpt: T")
-.Output("descrpt_deriv: T")
-.Output("rij: T")
-.Output("nlist: int32");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("coord: T")
+    .Input("type: int32")
+    .Input("natoms: int32")
+    .Input("box: T")
+    .Input("mesh: int32")
+    .Input("ef: T")
+    .Input("davg: T")
+    .Input("dstd: T")
+    .Attr("rcut_a: float")
+    .Attr("rcut_r: float")
+    .Attr("rcut_r_smth: float")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Output("descrpt: T")
+    .Output("descrpt_deriv: T")
+    .Output("rij: T")
+    .Output("nlist: int32");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class DescrptSeAEfVertOp : public OpKernel {
-public:
-  explicit DescrptSeAEfVertOp(OpKernelConstruction* context) : OpKernel(context) {
+ public:
+  explicit DescrptSeAEfVertOp(OpKernelConstruction* context)
+      : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r_smth", &rcut_r_smth));
     OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
-    cum_sum (sec_a, sel_a);
-    cum_sum (sec_r, sel_r);
+    cum_sum(sec_a, sel_a);
+    cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -49,73 +50,97 @@ class DescrptSeAEfVertOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor	= context->input(context_input_index++);
-    const Tensor& type_tensor	= context->input(context_input_index++);
-    const Tensor& natoms_tensor	= context->input(context_input_index++);
-    const Tensor& box_tensor	= context->input(context_input_index++);
-    const Tensor& mesh_tensor	= context->input(context_input_index++);
-    const Tensor& ef_tensor	= context->input(context_input_index++);
-    const Tensor& avg_tensor	= context->input(context_input_index++);
-    const Tensor& std_tensor	= context->input(context_input_index++);
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& ef_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (ef_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of ef should be 2"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (fill_nei_a),				errors::InvalidArgument ("Rotational free descriptor only support the case rcut_a < 0"));
-    OP_REQUIRES (context, (sec_r.back() == 0),			errors::InvalidArgument ("Rotational free descriptor only support all-angular information: sel_r should be all zero."));
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (ef_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of ef should be 2"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(
+        context, (fill_nei_a),
+        errors::InvalidArgument(
+            "Rotational free descriptor only support the case rcut_a < 0"));
+    OP_REQUIRES(context, (sec_r.back() == 0),
+                errors::InvalidArgument(
+                    "Rotational free descriptor only support all-angular "
+                    "information: sel_r should be all zero."));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),		errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
     int nloc = natoms(0);
     int nall = natoms(1);
     int ntypes = natoms_tensor.shape().dim_size(0) - 2;
     int nsamples = coord_tensor.shape().dim_size(0);
 
     // check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == ef_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of std should be ntype"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == ef_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
 
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (nloc * 3 == ef_tensor.shape().dim_size(1)),			errors::InvalidArgument ("number of ef should be 3"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of std should be ndescrpt"));
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (nloc * 3 == ef_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of ef should be 3"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
 
     int nei_mode = 0;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 12) {
+    } else if (mesh_tensor.shape().dim_size(0) == 12) {
       // user provided extended mesh
       nei_mode = 2;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
     bool b_pbc = true;
@@ -124,53 +149,52 @@ class DescrptSeAEfVertOp : public OpKernel {
       b_pbc = false;
     }
     bool b_norm_atom = false;
-    if (nei_mode == 1){
+    if (nei_mode == 1) {
       b_norm_atom = true;
     }
 
     // Create an output tensor
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (nloc * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (nloc * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (nloc * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (nloc * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(nloc * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(nloc * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(nloc * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     descrpt_shape, 
-						     &descrpt_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
     Tensor* descrpt_deriv_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     descrpt_deriv_shape, 
-						     &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
     Tensor* rij_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     rij_shape,
-						     &rij_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-						     nlist_shape,
-						     &nlist_tensor));
-    
-    auto coord	= coord_tensor	.matrix<FPTYPE>();
-    auto type	= type_tensor	.matrix<int>();
-    auto box	= box_tensor	.matrix<FPTYPE>();
-    auto mesh	= mesh_tensor	.flat<int>();
-    auto ef	= ef_tensor	.matrix<FPTYPE>();
-    auto avg	= avg_tensor	.matrix<FPTYPE>();
-    auto std	= std_tensor	.matrix<FPTYPE>();
-    auto descrpt	= descrpt_tensor	->matrix<FPTYPE>();
-    auto descrpt_deriv	= descrpt_deriv_tensor	->matrix<FPTYPE>();
-    auto rij		= rij_tensor		->matrix<FPTYPE>();
-    auto nlist		= nlist_tensor		->matrix<int>();
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+
+    auto coord = coord_tensor.matrix<FPTYPE>();
+    auto type = type_tensor.matrix<int>();
+    auto box = box_tensor.matrix<FPTYPE>();
+    auto mesh = mesh_tensor.flat<int>();
+    auto ef = ef_tensor.matrix<FPTYPE>();
+    auto avg = avg_tensor.matrix<FPTYPE>();
+    auto std = std_tensor.matrix<FPTYPE>();
+    auto descrpt = descrpt_tensor->matrix<FPTYPE>();
+    auto descrpt_deriv = descrpt_deriv_tensor->matrix<FPTYPE>();
+    auto rij = rij_tensor->matrix<FPTYPE>();
+    auto nlist = nlist_tensor->matrix<int>();
 
     // // check the types
     // int max_type_v = 0;
@@ -178,165 +202,167 @@ class DescrptSeAEfVertOp : public OpKernel {
     //   if (type(0, ii) > max_type_v) max_type_v = type(0, ii);
     // }
     // int ntypes = max_type_v + 1;
-    OP_REQUIRES (context, (ntypes == int(sel_a.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
-    OP_REQUIRES (context, (ntypes == int(sel_r.size())),	errors::InvalidArgument ("number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
 
-    for (int kk = 0; kk < nsamples; ++kk){
+    for (int kk = 0; kk < nsamples; ++kk) {
       // set region
-      boxtensor_t boxt [9] = {0};
+      boxtensor_t boxt[9] = {0};
       for (int dd = 0; dd < 9; ++dd) {
-	boxt[dd] = box(kk, dd);
+        boxt[dd] = box(kk, dd);
       }
-      SimulationRegion<compute_t > region;
-      region.reinitBox (boxt);
+      SimulationRegion<compute_t> region;
+      region.reinitBox(boxt);
 
       // set & normalize coord
-      std::vector<compute_t > d_coord3 (nall*3);
-      for (int ii = 0; ii < nall; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_coord3[ii*3+dd] = coord(kk, ii*3+dd);
-	}
-	if (b_norm_atom){
-	  compute_t inter[3];
-	  region.phys2Inter (inter, &d_coord3[3*ii]);
-	  for (int dd = 0; dd < 3; ++dd){
-	    if      (inter[dd] < 0 ) inter[dd] += 1.;
-	    else if (inter[dd] >= 1) inter[dd] -= 1.;
-	  }
-	  region.inter2Phys (&d_coord3[3*ii], inter);
-	}
+      std::vector<compute_t> d_coord3(nall * 3);
+      for (int ii = 0; ii < nall; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_coord3[ii * 3 + dd] = coord(kk, ii * 3 + dd);
+        }
+        if (b_norm_atom) {
+          compute_t inter[3];
+          region.phys2Inter(inter, &d_coord3[3 * ii]);
+          for (int dd = 0; dd < 3; ++dd) {
+            if (inter[dd] < 0)
+              inter[dd] += 1.;
+            else if (inter[dd] >= 1)
+              inter[dd] -= 1.;
+          }
+          region.inter2Phys(&d_coord3[3 * ii], inter);
+        }
       }
-      
+
       // set efield
       std::vector<compute_t> d_ef(nloc * 3);
-      for (int ii = 0; ii < nloc; ++ii){
-	for (int dd = 0; dd < 3; ++dd){
-	  d_ef[ii*3+dd] = ef(kk, ii*3+dd);
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        for (int dd = 0; dd < 3; ++dd) {
+          d_ef[ii * 3 + dd] = ef(kk, ii * 3 + dd);
+        }
       }
 
       // set type
-      std::vector<int > d_type (nall);
+      std::vector<int> d_type(nall);
       for (int ii = 0; ii < nall; ++ii) d_type[ii] = type(kk, ii);
 
       // build nlist
-      std::vector<std::vector<int > > d_nlist_a;
-      std::vector<std::vector<int > > d_nlist_r;
+      std::vector<std::vector<int> > d_nlist_a;
+      std::vector<std::vector<int> > d_nlist_r;
       std::vector<int> nlist_map;
       bool b_nlist_map = false;
-      if (nei_mode == 3) {	
-	int * pilist, *pjrange, *pjlist;
-	memcpy (&pilist, &mesh(4), sizeof(int *));
-	memcpy (&pjrange, &mesh(8), sizeof(int *));
-	memcpy (&pjlist, &mesh(12), sizeof(int *));
-	int inum = mesh(1);
-	assert (inum == nloc);
-	d_nlist_a.resize (inum);
-	d_nlist_r.resize (inum);
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  d_nlist_r.reserve (pjrange[inum] / inum + 10);
-	}
-	for (unsigned ii = 0; ii < inum; ++ii){
-	  int i_idx = pilist[ii];
-	  for (unsigned jj = pjrange[ii]; jj < pjrange[ii+1]; ++jj){
-	    int j_idx = pjlist[jj];
-	    d_nlist_r[i_idx].push_back (j_idx);
-	  }
-	}
-      }
-      else if (nei_mode == 2) {
-	std::vector<int > nat_stt = {mesh(1-1), mesh(2-1), mesh(3-1)};
-	std::vector<int > nat_end = {mesh(4-1), mesh(5-1), mesh(6-1)};
-	std::vector<int > ext_stt = {mesh(7-1), mesh(8-1), mesh(9-1)};
-	std::vector<int > ext_end = {mesh(10-1), mesh(11-1), mesh(12-1)};
-	std::vector<int > global_grid (3);
-	for (int dd = 0; dd < 3; ++dd) global_grid[dd] = nat_end[dd] - nat_stt[dd];
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
-      }
-      else if (nei_mode == 1) {
-	std::vector<double > bk_d_coord3 = d_coord3;
-	std::vector<int > bk_d_type = d_type;
-	std::vector<int > ncell, ngcell;
-	copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, bk_d_type, rcut_r, region);	
-	b_nlist_map = true;
-	std::vector<int> nat_stt(3, 0);
-	std::vector<int> ext_stt(3), ext_end(3);
-	for (int dd = 0; dd < 3; ++dd){
-	  ext_stt[dd] = -ngcell[dd];
-	  ext_end[dd] = ncell[dd] + ngcell[dd];
-	}
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-      }
-      else if (nei_mode == -1){
-	::build_nlist (d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
-      }
-      else {
-	throw deepmd::deepmd_exception("unknow neighbor mode");
+      if (nei_mode == 3) {
+        int *pilist, *pjrange, *pjlist;
+        memcpy(&pilist, &mesh(4), sizeof(int*));
+        memcpy(&pjrange, &mesh(8), sizeof(int*));
+        memcpy(&pjlist, &mesh(12), sizeof(int*));
+        int inum = mesh(1);
+        assert(inum == nloc);
+        d_nlist_a.resize(inum);
+        d_nlist_r.resize(inum);
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          d_nlist_r.reserve(pjrange[inum] / inum + 10);
+        }
+        for (unsigned ii = 0; ii < inum; ++ii) {
+          int i_idx = pilist[ii];
+          for (unsigned jj = pjrange[ii]; jj < pjrange[ii + 1]; ++jj) {
+            int j_idx = pjlist[jj];
+            d_nlist_r[i_idx].push_back(j_idx);
+          }
+        }
+      } else if (nei_mode == 2) {
+        std::vector<int> nat_stt = {mesh(1 - 1), mesh(2 - 1), mesh(3 - 1)};
+        std::vector<int> nat_end = {mesh(4 - 1), mesh(5 - 1), mesh(6 - 1)};
+        std::vector<int> ext_stt = {mesh(7 - 1), mesh(8 - 1), mesh(9 - 1)};
+        std::vector<int> ext_end = {mesh(10 - 1), mesh(11 - 1), mesh(12 - 1)};
+        std::vector<int> global_grid(3);
+        for (int dd = 0; dd < 3; ++dd)
+          global_grid[dd] = nat_end[dd] - nat_stt[dd];
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, nat_end, ext_stt, ext_end, region, global_grid);
+      } else if (nei_mode == 1) {
+        std::vector<double> bk_d_coord3 = d_coord3;
+        std::vector<int> bk_d_type = d_type;
+        std::vector<int> ncell, ngcell;
+        copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3,
+                   bk_d_type, rcut_r, region);
+        b_nlist_map = true;
+        std::vector<int> nat_stt(3, 0);
+        std::vector<int> ext_stt(3), ext_end(3);
+        for (int dd = 0; dd < 3; ++dd) {
+          ext_stt[dd] = -ngcell[dd];
+          ext_end[dd] = ncell[dd] + ngcell[dd];
+        }
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, rcut_a, rcut_r,
+                      nat_stt, ncell, ext_stt, ext_end, region, ncell);
+      } else if (nei_mode == -1) {
+        ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, rcut_a, rcut_r, NULL);
+      } else {
+        throw deepmd::deepmd_exception("unknow neighbor mode");
       }
 
       // loop over atoms, compute descriptors for each atom
-#pragma omp parallel for 
-      for (int ii = 0; ii < nloc; ++ii){
-	std::vector<int> fmt_nlist_a;
-	std::vector<int> fmt_nlist_r;
-	int ret = -1;
-	if (fill_nei_a){
-	  if ((ret = format_nlist_i_fill_a (fmt_nlist_a, fmt_nlist_r, d_coord3, ntypes, d_type, region, b_pbc, ii, d_nlist_a[ii], d_nlist_r[ii], rcut_r, sec_a, sec_r)) != -1){
-	    if (count_nei_idx_overflow == 0) {
-	      std::cout << "WARNING: Radial neighbor list length of type " << ret << " is not enough" << std::endl;
-	      flush(std::cout);
-	      count_nei_idx_overflow ++;
-	    }
-	  }
-	}
+#pragma omp parallel for
+      for (int ii = 0; ii < nloc; ++ii) {
+        std::vector<int> fmt_nlist_a;
+        std::vector<int> fmt_nlist_r;
+        int ret = -1;
+        if (fill_nei_a) {
+          if ((ret = format_nlist_i_fill_a(fmt_nlist_a, fmt_nlist_r, d_coord3,
+                                           ntypes, d_type, region, b_pbc, ii,
+                                           d_nlist_a[ii], d_nlist_r[ii], rcut_r,
+                                           sec_a, sec_r)) != -1) {
+            if (count_nei_idx_overflow == 0) {
+              std::cout << "WARNING: Radial neighbor list length of type "
+                        << ret << " is not enough" << std::endl;
+              flush(std::cout);
+              count_nei_idx_overflow++;
+            }
+          }
+        }
 
-	std::vector<compute_t > d_descrpt_a;
-	std::vector<compute_t > d_descrpt_a_deriv;
-	std::vector<compute_t > d_descrpt_r;
-	std::vector<compute_t > d_descrpt_r_deriv;
-	std::vector<compute_t > d_rij_a;
-	std::vector<compute_t > d_rij_r;      
-	compute_descriptor_se_a_ef_vert (d_descrpt_a,
-					 d_descrpt_a_deriv,
-					 d_rij_a,
-					 d_coord3,
-					 ntypes, 
-					 d_type,
-					 region, 
-					 b_pbc,
-					 d_ef,
-					 ii, 
-					 fmt_nlist_a,
-					 sec_a, 
-					 rcut_r_smth, 
-					 rcut_r);
+        std::vector<compute_t> d_descrpt_a;
+        std::vector<compute_t> d_descrpt_a_deriv;
+        std::vector<compute_t> d_descrpt_r;
+        std::vector<compute_t> d_descrpt_r_deriv;
+        std::vector<compute_t> d_rij_a;
+        std::vector<compute_t> d_rij_r;
+        compute_descriptor_se_a_ef_vert(
+            d_descrpt_a, d_descrpt_a_deriv, d_rij_a, d_coord3, ntypes, d_type,
+            region, b_pbc, d_ef, ii, fmt_nlist_a, sec_a, rcut_r_smth, rcut_r);
 
-	// check sizes
-	assert (d_descrpt_a.size() == ndescrpt_a);
-	assert (d_descrpt_a_deriv.size() == ndescrpt_a * 3);
-	assert (d_rij_a.size() == nnei_a * 3);
-	assert (int(fmt_nlist_a.size()) == nnei_a);
-	// record outputs
-	for (int jj = 0; jj < ndescrpt_a; ++jj) {
-	  descrpt(kk, ii * ndescrpt + jj) = (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
-	}
-	for (int jj = 0; jj < ndescrpt_a * 3; ++jj) {
-	  descrpt_deriv(kk, ii * ndescrpt * 3 + jj) = d_descrpt_a_deriv[jj] / std(d_type[ii], jj/3);
-	}
-	for (int jj = 0; jj < nnei_a * 3; ++jj){
-	  rij (kk, ii * nnei * 3 + jj) = d_rij_a[jj];
-	}
-	for (int jj = 0; jj < nnei_a; ++jj){
-	  int record = fmt_nlist_a[jj];
-	  if (b_nlist_map && record >= 0) {
-	    record = nlist_map[record];
-	  }
-	  nlist (kk, ii * nnei + jj) = record;
-	}
+        // check sizes
+        assert(d_descrpt_a.size() == ndescrpt_a);
+        assert(d_descrpt_a_deriv.size() == ndescrpt_a * 3);
+        assert(d_rij_a.size() == nnei_a * 3);
+        assert(int(fmt_nlist_a.size()) == nnei_a);
+        // record outputs
+        for (int jj = 0; jj < ndescrpt_a; ++jj) {
+          descrpt(kk, ii * ndescrpt + jj) =
+              (d_descrpt_a[jj] - avg(d_type[ii], jj)) / std(d_type[ii], jj);
+        }
+        for (int jj = 0; jj < ndescrpt_a * 3; ++jj) {
+          descrpt_deriv(kk, ii * ndescrpt * 3 + jj) =
+              d_descrpt_a_deriv[jj] / std(d_type[ii], jj / 3);
+        }
+        for (int jj = 0; jj < nnei_a * 3; ++jj) {
+          rij(kk, ii * nnei * 3 + jj) = d_rij_a[jj];
+        }
+        for (int jj = 0; jj < nnei_a; ++jj) {
+          int record = fmt_nlist_a[jj];
+          if (b_nlist_map && record >= 0) {
+            record = nlist_map[record];
+          }
+          nlist(kk, ii * nnei + jj) = record;
+        }
       }
     }
   }
-private:
+
+ private:
   float rcut_a;
   float rcut_r;
   float rcut_r_smth;
@@ -348,20 +374,18 @@ class DescrptSeAEfVertOp : public OpKernel {
   int nnei, nnei_a, nnei_r;
   bool fill_nei_a;
   int count_nei_idx_overflow;
-  void 
-  cum_sum (std::vector<int> & sec,
-	   const std::vector<int32> & n_sel) const {
-    sec.resize (n_sel.size() + 1);
+  void cum_sum(std::vector<int>& sec, const std::vector<int32>& n_sel) const {
+    sec.resize(n_sel.size() + 1);
     sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii){
-      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    for (int ii = 1; ii < sec.size(); ++ii) {
+      sec[ii] = sec[ii - 1] + n_sel[ii - 1];
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("DescrptSeAEfVert").Device(DEVICE_CPU).TypeConstraint<T>("T"),                 \
-    DescrptSeAEfVertOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                   \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("DescrptSeAEfVert").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DescrptSeAEfVertOp<CPUDevice, T>);
 REGISTER_CPU(float);
-REGISTER_CPU(double);
\ No newline at end of file
+REGISTER_CPU(double);
diff --git a/source/op/dotmul_flt_nvnmd.cc b/source/op/dotmul_flt_nvnmd.cc
index b120c1a547..dad89e033d 100644
--- a/source/op/dotmul_flt_nvnmd.cc
+++ b/source/op/dotmul_flt_nvnmd.cc
@@ -14,7 +14,7 @@ we change the DSP into 22 x 22
 in the float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 # Attr
 modx = 0: normalize x[hh, : , : ]
@@ -26,152 +26,145 @@ modw = 1: normalize w[hh, : , kk]
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
-
 template <class T>
 void split_flt(T x, int64_t &sign, int64_t &expo, int64_t &mant);
 
 // read matmul_flt_nvnmd.cc
-template <class T> // float and double
+template <class T>  // float and double
 void find_max_expo(int64_t &max_expo, T *x, int64_t M);
 
 // read matmul_flt_nvnmd.cc
-template <class T> // float and double
+template <class T>  // float and double
 void find_max_expo(int64_t &max_expo, T *x, int64_t N, int64_t M);
 
 //- register the operator
 REGISTER_OP("DotmulFltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("w: T")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("w: T")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class DotmulFltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit DotmulFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(2, context->num_inputs());
-  const Tensor& X = context->input(0);
-  const Tensor& W = context->input(1);
-
-  const TensorShape& shX = X.shape();
-  const TensorShape& shW = W.shape();
-  TensorShape shY;
-  DCHECK_EQ(shW.dims(), shX.dims());
-
-  int H, N, M;
-  if (shX.dims() == 3) {
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M = shX.dim_size(2);
-
-    DCHECK_EQ(H, shW.dim_size(0));
-    DCHECK_EQ(N, shW.dim_size(1));
-    DCHECK_EQ(M, shW.dim_size(2));
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(1);
-  }
-  if (shX.dims() == 2) {
-    // process 2-dimension as 3-dimension
-    H = 1;
-    N = shX.dim_size(0);
-    M = shX.dim_size(1);
-
-    DCHECK_EQ(N, shW.dim_size(0));
-    DCHECK_EQ(M, shW.dim_size(1));
-
-    shY.AddDim(N);
-    shY.AddDim(1);
-  }
-
-  // create output
-  Tensor* Y = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto w = W.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-
-  int hh, ii, jj;
-
-  int nshift1, nshift2;
-  int64_t s;
-
-  U_Flt64_Int64 ufi1, ufi2, ufi3;
-  int64_t sign1, sign2, sign3;
-  int64_t expo1, expo2, expo3;
-  int64_t mant1, mant2, mant3;
-  int64_t expos;
-
-  int64_t expo_max1, expo_max2;
-  std::vector<int> expo_max1s;
-  std::vector<int> expo_max2s;
-  expo_max1s.resize(N);
-  expo_max2s.resize(N);
-
-  for (ii=0; ii<H*N; ii++) {
-    // find x max exponnet
-    find_max_expo(expo_max1, (FPTYPE *)&x[ii*M], M);
-    find_max_expo(expo_max2, (FPTYPE *)&w[ii*M], M);
-    //
-    s = 0;
-    for (jj=0; jj<M; jj++) {
-      // x
-      split_flt(x[ii*M+jj], sign1, expo1, mant1);
-      mant1 >>= NBIT_CUTF;
-      expos = expo_max1 - expo1;
-      expos = (expos > 63) ? 63 : expos;
-      mant1 >>= expos;
-      // w
-      split_flt(w[ii*M+jj], sign2, expo2, mant2);
-      mant2 >>= NBIT_CUTF;
-      expos = expo_max2 - expo2;
-      expos = (expos > 63) ? 63 : expos;
-      mant2 >>= expos;
-      // multiply
-      mant3 = mant1 * mant2;
-      mant3 = (sign1 ^ sign2) ? -mant3 : mant3;
-      s += mant3;
+ public:
+  /// Constructor.
+  explicit DotmulFltNvnmdOp(OpKernelConstruction *context)
+      : OpKernel(context){};
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext *context) override {
+    // check
+    DCHECK_EQ(2, context->num_inputs());
+    const Tensor &X = context->input(0);
+    const Tensor &W = context->input(1);
+
+    const TensorShape &shX = X.shape();
+    const TensorShape &shW = W.shape();
+    TensorShape shY;
+    DCHECK_EQ(shW.dims(), shX.dims());
+
+    int H, N, M;
+    if (shX.dims() == 3) {
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M = shX.dim_size(2);
+
+      DCHECK_EQ(H, shW.dim_size(0));
+      DCHECK_EQ(N, shW.dim_size(1));
+      DCHECK_EQ(M, shW.dim_size(2));
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(1);
     }
-    // y * 2^(e_a+e_b)
-    ufi3.nflt = FPTYPE(s) * pow(2.0, expo_max1 + expo_max2 - NBIT_FLTF - NBIT_FLTF);
-    ufi3.nint &= FLT_MASK;
-    y[ii] = ufi3.nflt;
-  } // loop ii
-} // Compute
-
-}; // DotmulFltNvnmdOp
+    if (shX.dims() == 2) {
+      // process 2-dimension as 3-dimension
+      H = 1;
+      N = shX.dim_size(0);
+      M = shX.dim_size(1);
 
+      DCHECK_EQ(N, shW.dim_size(0));
+      DCHECK_EQ(M, shW.dim_size(1));
 
+      shY.AddDim(N);
+      shY.AddDim(1);
+    }
 
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("DotmulFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    DotmulFltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+    // create output
+    Tensor *Y = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto w = W.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    int hh, ii, jj;
+
+    int nshift1, nshift2;
+    int64_t s;
+
+    U_Flt64_Int64 ufi1, ufi2, ufi3;
+    int64_t sign1, sign2, sign3;
+    int64_t expo1, expo2, expo3;
+    int64_t mant1, mant2, mant3;
+    int64_t expos;
+
+    int64_t expo_max1, expo_max2;
+    std::vector<int> expo_max1s;
+    std::vector<int> expo_max2s;
+    expo_max1s.resize(N);
+    expo_max2s.resize(N);
+
+    for (ii = 0; ii < H * N; ii++) {
+      // find x max exponnet
+      find_max_expo(expo_max1, (FPTYPE *)&x[ii * M], M);
+      find_max_expo(expo_max2, (FPTYPE *)&w[ii * M], M);
+      //
+      s = 0;
+      for (jj = 0; jj < M; jj++) {
+        // x
+        split_flt(x[ii * M + jj], sign1, expo1, mant1);
+        mant1 >>= NBIT_CUTF;
+        expos = expo_max1 - expo1;
+        expos = (expos > 63) ? 63 : expos;
+        mant1 >>= expos;
+        // w
+        split_flt(w[ii * M + jj], sign2, expo2, mant2);
+        mant2 >>= NBIT_CUTF;
+        expos = expo_max2 - expo2;
+        expos = (expos > 63) ? 63 : expos;
+        mant2 >>= expos;
+        // multiply
+        mant3 = mant1 * mant2;
+        mant3 = (sign1 ^ sign2) ? -mant3 : mant3;
+        s += mant3;
+      }
+      // y * 2^(e_a+e_b)
+      ufi3.nflt =
+          FPTYPE(s) * pow(2.0, expo_max1 + expo_max2 - NBIT_FLTF - NBIT_FLTF);
+      ufi3.nint &= FLT_MASK;
+      y[ii] = ufi3.nflt;
+    }  // loop ii
+  }    // Compute
+
+};  // DotmulFltNvnmdOp
+
+#define REGISTER_CPU(T)                                                 \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("DotmulFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      DotmulFltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/ewald_recp.cc b/source/op/ewald_recp.cc
index c9cc22b480..cbf93f5916 100644
--- a/source/op/ewald_recp.cc
+++ b/source/op/ewald_recp.cc
@@ -1,24 +1,24 @@
 #include "custom_op.h"
 #include "ewald.h"
 
-typedef double boxtensor_t ;
+typedef double boxtensor_t;
 typedef double compute_t;
 
 REGISTER_OP("EwaldRecp")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("coord: T")
-.Input("charge: T")
-.Input("natoms: int32")
-.Input("box: T")
-.Attr("ewald_beta: float")
-.Attr("ewald_h: float")
-.Output("energy: T")
-.Output("force: T")
-.Output("virial: T");
-
-template<typename Device, typename FPTYPE>
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("coord: T")
+    .Input("charge: T")
+    .Input("natoms: int32")
+    .Input("box: T")
+    .Attr("ewald_beta: float")
+    .Attr("ewald_h: float")
+    .Output("energy: T")
+    .Output("force: T")
+    .Output("virial: T");
+
+template <typename Device, typename FPTYPE>
 class EwaldRecpOp : public OpKernel {
-public:
+ public:
   explicit EwaldRecpOp(OpKernelConstruction* context) : OpKernel(context) {
     float beta, spacing;
     OP_REQUIRES_OK(context, context->GetAttr("ewald_beta", &(beta)));
@@ -28,57 +28,71 @@ class EwaldRecpOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int cc = 0;
-    const Tensor& coord_tensor	= context->input(cc++);
-    const Tensor& charge_tensor	= context->input(cc++);
-    const Tensor& natoms_tensor	= context->input(cc++);
-    const Tensor& box_tensor	= context->input(cc++);
+    const Tensor& coord_tensor = context->input(cc++);
+    const Tensor& charge_tensor = context->input(cc++);
+    const Tensor& natoms_tensor = context->input(cc++);
+    const Tensor& box_tensor = context->input(cc++);
 
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of coord should be 1"));
-    OP_REQUIRES (context, (charge_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of charge should be 1"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) == 1),	errors::InvalidArgument ("size of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of box should be 1"));
-    auto natoms	= natoms_tensor.flat<int>();
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of coord should be 1"));
+    OP_REQUIRES(context, (charge_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of charge should be 1"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) == 1),
+                errors::InvalidArgument("size of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of box should be 1"));
+    auto natoms = natoms_tensor.flat<int>();
     int nloc = natoms(0);
     int nsamples = coord_tensor.shape().dim_size(0) / (nloc * 3);
 
     // check the sizes
-    OP_REQUIRES (context, (nsamples * nloc * 3 == coord_tensor.shape().dim_size(0)),	errors::InvalidArgument ("coord  number of samples should match"));
-    OP_REQUIRES (context, (nsamples * nloc * 1 == charge_tensor.shape().dim_size(0)),	errors::InvalidArgument ("charge number of samples should match"));
-    OP_REQUIRES (context, (nsamples * 9 == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("box    number of samples should match"));
+    OP_REQUIRES(
+        context, (nsamples * nloc * 3 == coord_tensor.shape().dim_size(0)),
+        errors::InvalidArgument("coord  number of samples should match"));
+    OP_REQUIRES(
+        context, (nsamples * nloc * 1 == charge_tensor.shape().dim_size(0)),
+        errors::InvalidArgument("charge number of samples should match"));
+    OP_REQUIRES(
+        context, (nsamples * 9 == box_tensor.shape().dim_size(0)),
+        errors::InvalidArgument("box    number of samples should match"));
 
     // Create an output tensor
-    TensorShape energy_shape ;
-    energy_shape.AddDim (nsamples);
-    TensorShape force_shape ;
-    force_shape.AddDim (nsamples);
-    force_shape.AddDim (nloc * 3);
-    TensorShape virial_shape ;
-    virial_shape.AddDim (nsamples);
-    virial_shape.AddDim (9);
+    TensorShape energy_shape;
+    energy_shape.AddDim(nsamples);
+    TensorShape force_shape;
+    force_shape.AddDim(nsamples);
+    force_shape.AddDim(nloc * 3);
+    TensorShape virial_shape;
+    virial_shape.AddDim(nsamples);
+    virial_shape.AddDim(9);
 
     cc = 0;
     Tensor* energy_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(cc++, energy_shape, &energy_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(cc++, energy_shape, &energy_tensor));
     Tensor* force_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(cc++, force_shape, &force_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(cc++, force_shape, &force_tensor));
     Tensor* virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(cc++, virial_shape, &virial_tensor));
-    
-    auto coord	= coord_tensor	.flat<FPTYPE>();
-    auto charge	= charge_tensor	.flat<FPTYPE>();
-    auto box	= box_tensor	.flat<FPTYPE>();
-    auto energy	= energy_tensor	->flat<FPTYPE>();
-    auto force	= force_tensor	->matrix<FPTYPE>();
-    auto virial	= virial_tensor	->matrix<FPTYPE>();
-
-    for (int kk = 0; kk < nsamples; ++kk){
+    OP_REQUIRES_OK(
+        context, context->allocate_output(cc++, virial_shape, &virial_tensor));
+
+    auto coord = coord_tensor.flat<FPTYPE>();
+    auto charge = charge_tensor.flat<FPTYPE>();
+    auto box = box_tensor.flat<FPTYPE>();
+    auto energy = energy_tensor->flat<FPTYPE>();
+    auto force = force_tensor->matrix<FPTYPE>();
+    auto virial = virial_tensor->matrix<FPTYPE>();
+
+    for (int kk = 0; kk < nsamples; ++kk) {
       int box_iter = kk * 9;
       int coord_iter = kk * nloc * 3;
       int charge_iter = kk * nloc;
@@ -87,24 +101,26 @@ class EwaldRecpOp : public OpKernel {
       init_region_cpu(region, &box(box_iter));
 
       // set & normalize coord
-      std::vector<FPTYPE > d_coord3 (nloc*3);
-      for (int ii = 0; ii < nloc; ++ii){
-	FPTYPE inter[3];
-	convert_to_inter_cpu(inter, region, &coord(coord_iter + ii*3));
-	for (int dd = 0; dd < 3; ++dd){
-	  if      (inter[dd] < 0 ) inter[dd] += 1.;
-	  else if (inter[dd] >= 1) inter[dd] -= 1.;
-	}
-	convert_to_phys_cpu(&d_coord3[ii*3], region, inter);
+      std::vector<FPTYPE> d_coord3(nloc * 3);
+      for (int ii = 0; ii < nloc; ++ii) {
+        FPTYPE inter[3];
+        convert_to_inter_cpu(inter, region, &coord(coord_iter + ii * 3));
+        for (int dd = 0; dd < 3; ++dd) {
+          if (inter[dd] < 0)
+            inter[dd] += 1.;
+          else if (inter[dd] >= 1)
+            inter[dd] -= 1.;
+        }
+        convert_to_phys_cpu(&d_coord3[ii * 3], region, inter);
       }
 
       // set charge
-      std::vector<FPTYPE > d_charge (nloc);
+      std::vector<FPTYPE> d_charge(nloc);
       for (int ii = 0; ii < nloc; ++ii) d_charge[ii] = charge(charge_iter + ii);
 
       // prepare outputs std::vectors
       FPTYPE d_ener;
-      std::vector<FPTYPE> d_force(nloc*3);
+      std::vector<FPTYPE> d_force(nloc * 3);
       std::vector<FPTYPE> d_virial(9);
 
       // compute
@@ -112,21 +128,22 @@ class EwaldRecpOp : public OpKernel {
 
       // copy output
       energy(kk) = d_ener;
-      for (int ii = 0; ii < nloc * 3; ++ii){
-	force(kk, ii) = d_force[ii];
+      for (int ii = 0; ii < nloc * 3; ++ii) {
+        force(kk, ii) = d_force[ii];
       }
-      for (int ii = 0; ii < 9; ++ii){
-	virial(kk, ii) = d_virial[ii];
+      for (int ii = 0; ii < 9; ++ii) {
+        virial(kk, ii) = d_virial[ii];
       }
     }
   }
-private:
+
+ private:
   deepmd::EwaldParameters<FPTYPE> ep;
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("EwaldRecp").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    EwaldRecpOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                            \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("EwaldRecp").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      EwaldRecpOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/flt_nvnmd.cc b/source/op/flt_nvnmd.cc
index f62fc7e984..1af9f21498 100644
--- a/source/op/flt_nvnmd.cc
+++ b/source/op/flt_nvnmd.cc
@@ -10,7 +10,7 @@ y = float(x)
 # float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 # float
 1 bit sign
@@ -26,91 +26,83 @@ y = float(x)
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
-
 //- register the operator
 REGISTER_OP("FltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class FltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit FltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(1, context->num_inputs());
-  const Tensor& X = context->input(0);
-
-  const TensorShape& shX = X.shape();
-  TensorShape shY;
-
-  int H, N, M;
-  if (shX.dims() == 3) {
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M = shX.dim_size(2);
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(M);
-  }
-  if (shX.dims() == 2) {
-    // process 2-dimension as 3-dimension
-    H = 1;
-    N = shX.dim_size(0);
-    M = shX.dim_size(1);
-
-    shY.AddDim(N);
-    shY.AddDim(M);
-  }
-
-  // create output
-  Tensor* Y = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-
-  int ii;
-  U_Flt64_Int64 ufi;
-
-  for (ii=0; ii<H*N*M; ii++) {
-    ufi.nflt = x[ii];
-    ufi.nint &= FLT_MASK;
-    y[ii] = ufi.nflt;
-  }
-
-} // Compute
-
-
-}; // FltNvnmdOp
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("FltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    FltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+ public:
+  /// Constructor.
+  explicit FltNvnmdOp(OpKernelConstruction* context) : OpKernel(context){};
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    // check
+    DCHECK_EQ(1, context->num_inputs());
+    const Tensor& X = context->input(0);
+
+    const TensorShape& shX = X.shape();
+    TensorShape shY;
+
+    int H, N, M;
+    if (shX.dims() == 3) {
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M = shX.dim_size(2);
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(M);
+    }
+    if (shX.dims() == 2) {
+      // process 2-dimension as 3-dimension
+      H = 1;
+      N = shX.dim_size(0);
+      M = shX.dim_size(1);
+
+      shY.AddDim(N);
+      shY.AddDim(M);
+    }
+
+    // create output
+    Tensor* Y = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    int ii;
+    U_Flt64_Int64 ufi;
+
+    for (ii = 0; ii < H * N * M; ii++) {
+      ufi.nflt = x[ii];
+      ufi.nint &= FLT_MASK;
+      y[ii] = ufi.nflt;
+    }
+
+  }  // Compute
+
+};  // FltNvnmdOp
+
+#define REGISTER_CPU(T)                                           \
+  REGISTER_KERNEL_BUILDER(                                        \
+      Name("FltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      FltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/gelu_multi_device.cc b/source/op/gelu_multi_device.cc
index d1c9974b70..24083c507a 100644
--- a/source/op/gelu_multi_device.cc
+++ b/source/op/gelu_multi_device.cc
@@ -41,50 +41,41 @@ REGISTER_OP("GeluGradGradCustom")
 // template parameter <FPTYPE> is the datatype of the tensors.
 template <typename Device, typename FPTYPE>
 class GeluOp : public OpKernel {
- public :
+ public:
   explicit GeluOp(OpKernelConstruction* context) : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     const Tensor& x_tensor = context->input(0);
-    Tensor * output_tensor = NULL;
+    Tensor* output_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-			  x_tensor.shape(),
-			  &output_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            x_tensor.shape(), &output_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * out = output_tensor->flat<FPTYPE>().data();
-    const FPTYPE * x = x_tensor.flat<FPTYPE>().data();
+    FPTYPE* out = output_tensor->flat<FPTYPE>().data();
+    const FPTYPE* x = x_tensor.flat<FPTYPE>().data();
     const int_64 size = static_cast<int_64>(output_tensor->NumElements());
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::gelu_gpu_cuda(
-          out, 
-          x, size);
-      #endif // GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      deepmd::gelu_gpu_rocm(
-        out,
-        x,size);
-      #endif//TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::gelu_cpu(
-          out, 
-          x, size);
+#if GOOGLE_CUDA
+      deepmd::gelu_gpu_cuda(out, x, size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::gelu_gpu_rocm(out, x, size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::gelu_cpu(out, x, size);
     }
   }
- private :
+
+ private:
   std::string device;
 };
 
@@ -92,52 +83,43 @@ class GeluOp : public OpKernel {
 // template parameter <FPTYPE> is the datatype of the tensors.
 template <typename Device, typename FPTYPE>
 class GeluGradOp : public OpKernel {
- public :
+ public:
   explicit GeluGradOp(OpKernelConstruction* context) : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     const Tensor& dy_tensor = context->input(0);
-    const Tensor& x_tensor  = context->input(1);
-    Tensor * output_tensor = NULL;
+    const Tensor& x_tensor = context->input(1);
+    Tensor* output_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        x_tensor.shape(),
-        &output_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            x_tensor.shape(), &output_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * out = output_tensor->flat<FPTYPE>().data();
-    const FPTYPE * x = x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dy = dy_tensor.flat<FPTYPE>().data();
+    FPTYPE* out = output_tensor->flat<FPTYPE>().data();
+    const FPTYPE* x = x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dy = dy_tensor.flat<FPTYPE>().data();
     const int_64 size = static_cast<int_64>(output_tensor->NumElements());
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::gelu_grad_gpu_cuda(
-          out, 
-          x, dy, size);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::gelu_grad_gpu_rocm(
-          out, 
-          x, dy, size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::gelu_grad_cpu(
-          out, 
-          x, dy, size);
+#if GOOGLE_CUDA
+      deepmd::gelu_grad_gpu_cuda(out, x, dy, size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::gelu_grad_gpu_rocm(out, x, dy, size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::gelu_grad_cpu(out, x, dy, size);
     }
   }
- private :
+
+ private:
   std::string device;
 };
 
@@ -145,99 +127,90 @@ class GeluGradOp : public OpKernel {
 // template parameter <FPTYPE> is the datatype of the tensors.
 template <typename Device, typename FPTYPE>
 class GeluGradGradOp : public OpKernel {
- public :
+ public:
   explicit GeluGradGradOp(OpKernelConstruction* context) : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     const Tensor& dy_tensor = context->input(0);
     const Tensor& dy_2_tensor = context->input(1);
-    const Tensor& x_tensor  = context->input(2);
-		Tensor * output_tensor = NULL;
-		int context_output_index = 0;	
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        x_tensor.shape(),
-        &output_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    const Tensor& x_tensor = context->input(2);
+    Tensor* output_tensor = NULL;
+    int context_output_index = 0;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            x_tensor.shape(), &output_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * out = output_tensor->flat<FPTYPE>().data();
-    const FPTYPE * x = x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dy = dy_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dy_2 = dy_2_tensor.flat<FPTYPE>().data();
+    FPTYPE* out = output_tensor->flat<FPTYPE>().data();
+    const FPTYPE* x = x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dy = dy_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dy_2 = dy_2_tensor.flat<FPTYPE>().data();
     const int_64 size = static_cast<int_64>(output_tensor->NumElements());
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::gelu_grad_grad_gpu_cuda(
-          out, 
-          x, dy, dy_2, size);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::gelu_grad_grad_gpu_rocm(
-          out, 
-          x, dy, dy_2, size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::gelu_grad_grad_cpu(
-          out, 
-          x, dy, dy_2, size);
+#if GOOGLE_CUDA
+      deepmd::gelu_grad_grad_gpu_cuda(out, x, dy, dy_2, size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::gelu_grad_grad_gpu_rocm(out, x, dy, dy_2, size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::gelu_grad_grad_cpu(out, x, dy, dy_2, size);
     }
   }
- private :
+
+ private:
   std::string device;
 };
 
-#define REGISTER_CPU(T)                                                   \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("Gelu").Device(DEVICE_CPU).TypeConstraint<T>("T"),               \
-    GeluOp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),           \
-    GeluGradOp<CPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGradGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),       \
-    GeluGradGradOp<CPUDevice, T>);                                        \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluCustom").Device(DEVICE_CPU).TypeConstraint<T>("T"),         \
-    GeluOp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGradCustom").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
-    GeluGradOp<CPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGradGradCustom").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    GeluGradGradOp<CPUDevice, T>);                                     
+#define REGISTER_CPU(T)                                                     \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Gelu").Device(DEVICE_CPU).TypeConstraint<T>("T"),               \
+      GeluOp<CPUDevice, T>);                                                \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),           \
+      GeluGradOp<CPUDevice, T>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGradGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),       \
+      GeluGradGradOp<CPUDevice, T>);                                        \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluCustom").Device(DEVICE_CPU).TypeConstraint<T>("T"),         \
+      GeluOp<CPUDevice, T>);                                                \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGradCustom").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      GeluGradOp<CPUDevice, T>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGradGradCustom").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      GeluGradGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#define REGISTER_GPU(T)                                                   \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("Gelu").Device(DEVICE_GPU).TypeConstraint<T>("T"),               \
-    GeluOp<GPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"),           \
-    GeluGradOp<GPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGradGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"),       \
-    GeluGradGradOp<GPUDevice, T>);                                        \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluCustom").Device(DEVICE_GPU).TypeConstraint<T>("T"),         \
-    GeluOp<GPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGradCustom").Device(DEVICE_GPU).TypeConstraint<T>("T"),     \
-    GeluGradOp<GPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                  \
-    Name("GeluGradGradCustom").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
-    GeluGradGradOp<GPUDevice, T>);                                      
+#define REGISTER_GPU(T)                                                     \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Gelu").Device(DEVICE_GPU).TypeConstraint<T>("T"),               \
+      GeluOp<GPUDevice, T>);                                                \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"),           \
+      GeluGradOp<GPUDevice, T>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGradGrad").Device(DEVICE_GPU).TypeConstraint<T>("T"),       \
+      GeluGradGradOp<GPUDevice, T>);                                        \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluCustom").Device(DEVICE_GPU).TypeConstraint<T>("T"),         \
+      GeluOp<GPUDevice, T>);                                                \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGradCustom").Device(DEVICE_GPU).TypeConstraint<T>("T"),     \
+      GeluGradOp<GPUDevice, T>);                                            \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("GeluGradGradCustom").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+      GeluGradGradOp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
-#endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/map_aparam.cc b/source/op/map_aparam.cc
index cd70435f99..c503c3b75b 100644
--- a/source/op/map_aparam.cc
+++ b/source/op/map_aparam.cc
@@ -1,14 +1,15 @@
-#include "custom_op.h"
 #include "map_aparam.h"
 
+#include "custom_op.h"
+
 REGISTER_OP("MapAparam")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("aparam: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("output: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("aparam: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("output: T");
 
 template <typename Device, typename FPTYPE>
 class MapAparamOp : public OpKernel {
@@ -20,23 +21,29 @@ class MapAparamOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& aparam_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& aparam_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    OP_REQUIRES (context, (aparam_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of aparam should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (aparam_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of aparam should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = aparam_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -45,46 +52,42 @@ class MapAparamOp : public OpKernel {
     int numb_aparam = aparam_tensor.shape().dim_size(1) / nall;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape output_shape ;
-    output_shape.AddDim (nframes);
-    output_shape.AddDim (nloc * nnei * numb_aparam);
+    TensorShape output_shape;
+    output_shape.AddDim(nframes);
+    output_shape.AddDim(nloc * nnei * numb_aparam);
     Tensor* output_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor));
-    
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, output_shape, &output_tensor));
+
     // flat the tensors
     auto aparam = aparam_tensor.flat<FPTYPE>();
     auto nlist = nlist_tensor.flat<int>();
     auto output = output_tensor->flat<FPTYPE>();
 
     // loop over samples
-#pragma omp parallel for 
-    for (int kk = 0; kk < nframes; ++kk){
-      int output_iter	= kk * nloc * nnei * numb_aparam;
-      int aparam_iter	= kk * nall * numb_aparam;
-      int nlist_iter	= kk * nloc * nnei;
-      deepmd::map_aparam_cpu(
-	  &output(output_iter),
-	  &aparam(aparam_iter),
-	  &nlist(nlist_iter),
-	  nloc,
-	  nnei,
-	  numb_aparam);
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk) {
+      int output_iter = kk * nloc * nnei * numb_aparam;
+      int aparam_iter = kk * nall * numb_aparam;
+      int nlist_iter = kk * nloc * nnei;
+      deepmd::map_aparam_cpu(&output(output_iter), &aparam(aparam_iter),
+                             &nlist(nlist_iter), nloc, nnei, numb_aparam);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("MapAparam").Device(DEVICE_CPU).TypeConstraint<T>("T"),                        \
-    MapAparamOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                            \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("MapAparam").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      MapAparamOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/map_flt_nvnmd.cc b/source/op/map_flt_nvnmd.cc
index e871d7727e..f497896775 100644
--- a/source/op/map_flt_nvnmd.cc
+++ b/source/op/map_flt_nvnmd.cc
@@ -26,136 +26,129 @@ y output
 
 using namespace tensorflow;
 
+template <class T>  // float and double
+void mul_flt_nvnmd(T& y, T x1, T x2);
 
-template <class T> // float and double
-void mul_flt_nvnmd(T &y, T x1, T x2);
-
-template <class T> // float and double
-void add_flt_nvnmd(T &y, T x1, T x2);
+template <class T>  // float and double
+void add_flt_nvnmd(T& y, T x1, T x2);
 
 //- register the operator
 // prec = 2^n, so it doesn't need to match `T`
 REGISTER_OP("MapFltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("table: T")
-  .Input("table_grad: T")
-  .Input("table_info: T")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("table: T")
+    .Input("table_grad: T")
+    .Input("table_info: T")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class MapFltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit MapFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {	  
-}
-
-  
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  DCHECK_EQ(3, context->num_inputs());
-  
-  const Tensor& t_x = context->input(0);
-  const Tensor& t_table = context->input(1);
-  const Tensor& t_table_info = context->input(3);
-  
-  const TensorShape& shX = t_x.shape();
-  const TensorShape& shT = t_table.shape();
-  const TensorShape& shI = t_table_info.shape();
-
-  int N = shX.dim_size(0);
-  int D = shX.dim_size(1);
-  int M = shT.dim_size(1) / 4;
-  int S = shI.dim_size(0) / 5;
-
-  DCHECK_EQ(shX.dims(), 2);
-  DCHECK_EQ(shT.dims(), 2);
-  
-  /*
-    * Calculate the output
-    * 1.create tensor
-    * 2.allocate the memory
-    * 3.calculate
-    */
-  
-  //- 1.create tensor
-  TensorShape shY;
-  shY.AddDim(N);
-  shY.AddDim(D);
-  shY.AddDim(M);
-  Tensor* t_y = NULL;
-  
-  //- 2.allocate the memory
-  //* allocate memory for the Y tensor which is called output 0
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &t_y));
-  auto x = t_x.flat<FPTYPE>().data();
-  auto table = t_table.flat<FPTYPE>().data();
-  auto info = t_table_info.flat<FPTYPE>().data();
-  auto y = t_y->flat<FPTYPE>().data();
-
-  int ss, ii, jj;
-  FPTYPE xi, x0, x1, dx;
-  FPTYPE xx, id;
-  int idx;
-  int N0, N1;
-
-  U_Flt64_Int64 ufi;
-
-  FPTYPE ytmp;
-  for (ii=0; ii<N*D; ii++) {
-    // cal idx and xx
-    xi = x[ii];
-    for (ss=0; ss<S; ss++){
-      x1 = info[ss*5+1];
-      if (xi <= x1) {
-        x0 = info[ss*5+0];
-        dx = info[ss*5+2];
-        N0 = int(info[ss*5+3]);
-        N1 = int(info[ss*5+4]);
-        break;
+ public:
+  /// Constructor.
+  explicit MapFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    DCHECK_EQ(3, context->num_inputs());
+
+    const Tensor& t_x = context->input(0);
+    const Tensor& t_table = context->input(1);
+    const Tensor& t_table_info = context->input(3);
+
+    const TensorShape& shX = t_x.shape();
+    const TensorShape& shT = t_table.shape();
+    const TensorShape& shI = t_table_info.shape();
+
+    int N = shX.dim_size(0);
+    int D = shX.dim_size(1);
+    int M = shT.dim_size(1) / 4;
+    int S = shI.dim_size(0) / 5;
+
+    DCHECK_EQ(shX.dims(), 2);
+    DCHECK_EQ(shT.dims(), 2);
+
+    /*
+     * Calculate the output
+     * 1.create tensor
+     * 2.allocate the memory
+     * 3.calculate
+     */
+
+    //- 1.create tensor
+    TensorShape shY;
+    shY.AddDim(N);
+    shY.AddDim(D);
+    shY.AddDim(M);
+    Tensor* t_y = NULL;
+
+    //- 2.allocate the memory
+    //* allocate memory for the Y tensor which is called output 0
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &t_y));
+    auto x = t_x.flat<FPTYPE>().data();
+    auto table = t_table.flat<FPTYPE>().data();
+    auto info = t_table_info.flat<FPTYPE>().data();
+    auto y = t_y->flat<FPTYPE>().data();
+
+    int ss, ii, jj;
+    FPTYPE xi, x0, x1, dx;
+    FPTYPE xx, id;
+    int idx;
+    int N0, N1;
+
+    U_Flt64_Int64 ufi;
+
+    FPTYPE ytmp;
+    for (ii = 0; ii < N * D; ii++) {
+      // cal idx and xx
+      xi = x[ii];
+      for (ss = 0; ss < S; ss++) {
+        x1 = info[ss * 5 + 1];
+        if (xi <= x1) {
+          x0 = info[ss * 5 + 0];
+          dx = info[ss * 5 + 2];
+          N0 = int(info[ss * 5 + 3]);
+          N1 = int(info[ss * 5 + 4]);
+          break;
+        }
+      }
+      //
+      xx = xi - x0;
+      id = floor(xx / dx);
+      xx -= id * dx;
+      idx = id + N0;
+      if (idx >= N1) {
+        idx = N1 - 1;
+        xx = dx;
+      }
+      //
+      ufi.nflt = xx;
+      ufi.nint &= 0xfffffff000000000;  // 52 - 16 = 36 = 9 * 4
+      xx = ufi.nflt;
+      for (jj = 0; jj < M; jj++) {
+        FPTYPE a = table[idx * M * 4 + jj * 4 + 0];
+        FPTYPE b = table[idx * M * 4 + jj * 4 + 1];
+        FPTYPE c = table[idx * M * 4 + jj * 4 + 2];
+        FPTYPE d = table[idx * M * 4 + jj * 4 + 3];
+        mul_flt_nvnmd(ytmp, a, xx);
+        add_flt_nvnmd(ytmp, b, ytmp);
+        mul_flt_nvnmd(ytmp, ytmp, xx);
+        add_flt_nvnmd(ytmp, c, ytmp);
+        mul_flt_nvnmd(ytmp, ytmp, xx);
+        add_flt_nvnmd(ytmp, d, ytmp);
+        y[ii * M + jj] = ytmp;
       }
     }
-    //
-    xx = xi - x0;
-    id = floor(xx / dx);
-    xx -= id*dx;
-    idx = id + N0;
-    if (idx >= N1) {
-      idx = N1 - 1;
-      xx = dx;
-    }
-    //
-    ufi.nflt = xx;
-    ufi.nint &= 0xfffffff000000000; // 52 - 16 = 36 = 9 * 4
-    xx = ufi.nflt;
-    for (jj=0; jj<M; jj++) {
-      FPTYPE a = table[idx * M * 4 + jj * 4 + 0];
-      FPTYPE b = table[idx * M * 4 + jj * 4 + 1];
-      FPTYPE c = table[idx * M * 4 + jj * 4 + 2];
-      FPTYPE d = table[idx * M * 4 + jj * 4 + 3];
-      mul_flt_nvnmd(ytmp, a, xx);
-      add_flt_nvnmd(ytmp, b, ytmp);
-      mul_flt_nvnmd(ytmp, ytmp, xx);
-      add_flt_nvnmd(ytmp, c, ytmp);
-      mul_flt_nvnmd(ytmp, ytmp, xx);
-      add_flt_nvnmd(ytmp, d, ytmp);
-      y[ii*M+jj] = ytmp;
-    }
-  }
 
-} // Compute
-}; // MapFltNvnmdOp
+  }  // Compute
+};   // MapFltNvnmdOp
 
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("MapFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    MapFltNvnmdOp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                              \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("MapFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      MapFltNvnmdOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/matmul_fitnet_nvnmd.cc b/source/op/matmul_fitnet_nvnmd.cc
index f6652120ed..66370c7e3a 100644
--- a/source/op/matmul_fitnet_nvnmd.cc
+++ b/source/op/matmul_fitnet_nvnmd.cc
@@ -9,174 +9,169 @@ y = matmul(x, w)
 
 # Note
 consider DSP is 27bit x 18bit
-integer part of x is set as 27 bit 
+integer part of x is set as 27 bit
 integer part of w is set as 18 bit
 
 in the float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 x use 27.23 bit fixed point number
-w use 18.16 bit fixed point number add a exponent of normalization 
+w use 18.16 bit fixed point number add a exponent of normalization
 
 */
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
 // read matmul_flt_nvnmd.cc
-template <class T> // float and double
-void find_max_expo(int64_t &max_expo, T *x, int64_t M);
+template <class T>  // float and double
+void find_max_expo(int64_t& max_expo, T* x, int64_t M);
 
 // read matmul_flt_nvnmd.cc
-template <class T> // float and double
-void find_max_expo(int64_t &max_expo, T *x, int64_t N, int64_t M);
+template <class T>  // float and double
+void find_max_expo(int64_t& max_expo, T* x, int64_t N, int64_t M);
 
 //- register the operator
 REGISTER_OP("MatmulFitnetNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("w: T")
-  .Attr("nbitx: int")
-  .Attr("nbitw: int")
-  .Attr("normw: int")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("w: T")
+    .Attr("nbitx: int")
+    .Attr("nbitw: int")
+    .Attr("normw: int")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class MatmulFitnetNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit MatmulFitnetNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-  OP_REQUIRES_OK(context, context->GetAttr("nbitx", &nbitx));
-  OP_REQUIRES_OK(context, context->GetAttr("nbitw", &nbitw));
-  OP_REQUIRES_OK(context, context->GetAttr("normw", &normw));
-}
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-
-  /* 
-    * Get input
-    * 1.check
-    * 2.get tensor
-    * 3.get shape and check
-    */
-
-  //- 1.check
-  DCHECK_EQ(2, context->num_inputs());
-  
-  //- 2.get tensor
-  const Tensor& X = context->input(0);
-  const Tensor& W = context->input(1);
-  
-  //- 3. get shape and check
-  const TensorShape& shX = X.shape();
-  const TensorShape& shW = W.shape();
-  
-  int N = shX.dim_size(0);
-  int M = shX.dim_size(1);
-  int K = shW.dim_size(1);
-
-  DCHECK_EQ(M, shW.dim_size(0));
-  
-  /*
-    * Calculate the output
-    * 1.create tensor
-    * 2.allocate the memory
-    * 3.calculate
-    */
-  
-  //- 1.create tensor
-  TensorShape shY;
-  shY.AddDim(N);
-  shY.AddDim(K);
-  
-  Tensor* Y = NULL;
-  
-  //- 2.allocate the memory
-  //* allocate memory for the Y tensor which is called output 0
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-  auto x = X.flat<FPTYPE>().data();
-  auto w = W.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-  
-  // calculate
-  int ii, jj, kk;
-
-  U_Flt64_Int64 ufi;
-  int64_t expo, expo_max;
-  FPTYPE prec, div_prec;
-  FPTYPE precx, div_precx;
-  FPTYPE precw, div_precw;
-  precw = pow((FPTYPE)2.0, nbitw);
-  div_precw = (FPTYPE)1.0 / precw;
-  precx = pow((FPTYPE)2.0, nbitx);
-  div_precx = (FPTYPE)1.0 / precx;
-
-  FPTYPE xij, wjk, s;
-
-  // find max exponent of w
-  std::vector<int> expo_maxs;
-  expo_maxs.resize(K);
-
-  if (normw == 0) {
-    find_max_expo(expo_max, (FPTYPE *)&w[0], M*K);
-    for (kk=0; kk<K; kk++) {
-      expo_maxs[kk] = expo_max;
-    }
-  } else {
-    for (kk=0; kk<K; kk++) {
-      find_max_expo(expo_max, (FPTYPE *)&w[kk], M, K);
-      expo_maxs[kk] = expo_max;
-    }
+ public:
+  /// Constructor.
+  explicit MatmulFitnetNvnmdOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("nbitx", &nbitx));
+    OP_REQUIRES_OK(context, context->GetAttr("nbitw", &nbitw));
+    OP_REQUIRES_OK(context, context->GetAttr("normw", &normw));
   }
-  
-  // calculate
-  for (kk=0; kk<K; kk++) {
-    expo_max = expo_maxs[kk];
-    prec = pow((FPTYPE)2.0, expo_max);
-    div_prec = (FPTYPE)1.0 / prec;
-    // matmul
-    for (ii=0; ii<N; ii++) {
-      s = 0;
-      for (jj=0; jj<M; jj++) {
-        wjk = floor(w[jj*K + kk] * div_prec * precw) * div_precw;
-        xij = floor(x[ii*M + jj] * precx) * div_precx;
-        s += xij * wjk;
-      }
-      s = floor(s * prec * precx) * div_precx;
-      y[ii*K + kk] = s;
-    } // loop xx
-  } // loop kk
-
-} // Compute
-  
-//- define the private variable for calculation
-private:
-int nbitx, nbitw;
-int normw;
-}; // MatmulFitnetNvnmd
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("MatmulFitnetNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    MatmulFitnetNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
-REGISTER_CPU(double);
-
 
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    /*
+     * Get input
+     * 1.check
+     * 2.get tensor
+     * 3.get shape and check
+     */
+
+    //- 1.check
+    DCHECK_EQ(2, context->num_inputs());
+
+    //- 2.get tensor
+    const Tensor& X = context->input(0);
+    const Tensor& W = context->input(1);
+
+    //- 3. get shape and check
+    const TensorShape& shX = X.shape();
+    const TensorShape& shW = W.shape();
+
+    int N = shX.dim_size(0);
+    int M = shX.dim_size(1);
+    int K = shW.dim_size(1);
+
+    DCHECK_EQ(M, shW.dim_size(0));
+
+    /*
+     * Calculate the output
+     * 1.create tensor
+     * 2.allocate the memory
+     * 3.calculate
+     */
+
+    //- 1.create tensor
+    TensorShape shY;
+    shY.AddDim(N);
+    shY.AddDim(K);
+
+    Tensor* Y = NULL;
+
+    //- 2.allocate the memory
+    //* allocate memory for the Y tensor which is called output 0
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+    auto x = X.flat<FPTYPE>().data();
+    auto w = W.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    // calculate
+    int ii, jj, kk;
+
+    U_Flt64_Int64 ufi;
+    int64_t expo, expo_max;
+    FPTYPE prec, div_prec;
+    FPTYPE precx, div_precx;
+    FPTYPE precw, div_precw;
+    precw = pow((FPTYPE)2.0, nbitw);
+    div_precw = (FPTYPE)1.0 / precw;
+    precx = pow((FPTYPE)2.0, nbitx);
+    div_precx = (FPTYPE)1.0 / precx;
+
+    FPTYPE xij, wjk, s;
+
+    // find max exponent of w
+    std::vector<int> expo_maxs;
+    expo_maxs.resize(K);
+
+    if (normw == 0) {
+      find_max_expo(expo_max, (FPTYPE*)&w[0], M * K);
+      for (kk = 0; kk < K; kk++) {
+        expo_maxs[kk] = expo_max;
+      }
+    } else {
+      for (kk = 0; kk < K; kk++) {
+        find_max_expo(expo_max, (FPTYPE*)&w[kk], M, K);
+        expo_maxs[kk] = expo_max;
+      }
+    }
 
+    // calculate
+    for (kk = 0; kk < K; kk++) {
+      expo_max = expo_maxs[kk];
+      prec = pow((FPTYPE)2.0, expo_max);
+      div_prec = (FPTYPE)1.0 / prec;
+      // matmul
+      for (ii = 0; ii < N; ii++) {
+        s = 0;
+        for (jj = 0; jj < M; jj++) {
+          wjk = floor(w[jj * K + kk] * div_prec * precw) * div_precw;
+          xij = floor(x[ii * M + jj] * precx) * div_precx;
+          s += xij * wjk;
+        }
+        s = floor(s * prec * precx) * div_precx;
+        y[ii * K + kk] = s;
+      }  // loop xx
+    }    // loop kk
+
+  }  // Compute
+
+  //- define the private variable for calculation
+ private:
+  int nbitx, nbitw;
+  int normw;
+};  // MatmulFitnetNvnmd
+
+#define REGISTER_CPU(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("MatmulFitnetNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      MatmulFitnetNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
+REGISTER_CPU(double);
diff --git a/source/op/matmul_flt2fix_nvnmd.cc b/source/op/matmul_flt2fix_nvnmd.cc
index 22486fabcd..3426743192 100644
--- a/source/op/matmul_flt2fix_nvnmd.cc
+++ b/source/op/matmul_flt2fix_nvnmd.cc
@@ -14,147 +14,140 @@ we change the DSP into 22 x 22
 in the float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 
 */
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
 template <class T>
-void split_flt(T x, int64_t &sign, int64_t &expo, int64_t &mant);
+void split_flt(T x, int64_t& sign, int64_t& expo, int64_t& mant);
 
 //- register the operator
 REGISTER_OP("MatmulFlt2fixNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("w: T")
-  .Attr("nbit: int")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("w: T")
+    .Attr("nbit: int")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class MatmulFlt2fixNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit MatmulFlt2fixNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-  // nbit is nits of fraction part of fixed-point number
-  OP_REQUIRES_OK(context, context->GetAttr("nbit", &nbit));
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(2, context->num_inputs());
-  const Tensor& X = context->input(0);
-  const Tensor& W = context->input(1);
-
-  const TensorShape& shX = X.shape();
-  const TensorShape& shW = W.shape();
-  TensorShape shY;
-  DCHECK_EQ(shW.dims(), shX.dims());
-
-  int H, N, M, K;
-  if (shX.dims() == 3) {
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M = shX.dim_size(2);
-    K = shW.dim_size(2);
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(K);
-  }
-  if (shX.dims() == 2) {
-    // process 2-dimension as 3-dimension
-    H = 1;
-    N = shX.dim_size(0);
-    M = shX.dim_size(1);
-    K = shW.dim_size(1);
-
-    shY.AddDim(N);
-    shY.AddDim(K);
-  }
-
-  // create output
-  Tensor* Y = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto w = W.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-
-  int hh, ii, jj, kk;
-
-  U_Flt64_Int64 ufi;
-  int64_t sign1, sign2, sign3;
-  int64_t expo1, expo2, expo3;
-  int64_t mant1, mant2, mant3;
-  int64_t expos;
-  
-  int64_t s;
-
-  for (hh=0; hh<H; hh++) {
-    // matmul
-    for (ii=0; ii<N; ii++) {
-      for (kk=0; kk<K; kk++) {
-        s = 0;
-        for (jj=0; jj<M; jj++) {
-          // x
-          split_flt(x[hh*N*M+ii*M+jj], sign1, expo1, mant1);
-          mant1 >>= NBIT_CUTF;
-          // w
-          split_flt(w[hh*M*K+jj*K+kk], sign2, expo2, mant2);
-          mant2 >>= NBIT_CUTF;
-          // 
-          mant3 = mant1 * mant2;
-          expos = expo1 + expo2 - NBIT_FLTF - NBIT_FLTF - (-nbit);
-          if (expos > 0) {
-            mant3 <<= expos;
-          } else {
-            expos = -expos;
-            expos = (expos > 63) ? 63 : expos;
-            mant3 >>= expos;
+ public:
+  /// Constructor.
+  explicit MatmulFlt2fixNvnmdOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    // nbit is nits of fraction part of fixed-point number
+    OP_REQUIRES_OK(context, context->GetAttr("nbit", &nbit));
+  };
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    // check
+    DCHECK_EQ(2, context->num_inputs());
+    const Tensor& X = context->input(0);
+    const Tensor& W = context->input(1);
+
+    const TensorShape& shX = X.shape();
+    const TensorShape& shW = W.shape();
+    TensorShape shY;
+    DCHECK_EQ(shW.dims(), shX.dims());
+
+    int H, N, M, K;
+    if (shX.dims() == 3) {
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M = shX.dim_size(2);
+      K = shW.dim_size(2);
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(K);
+    }
+    if (shX.dims() == 2) {
+      // process 2-dimension as 3-dimension
+      H = 1;
+      N = shX.dim_size(0);
+      M = shX.dim_size(1);
+      K = shW.dim_size(1);
+
+      shY.AddDim(N);
+      shY.AddDim(K);
+    }
+
+    // create output
+    Tensor* Y = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto w = W.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    int hh, ii, jj, kk;
+
+    U_Flt64_Int64 ufi;
+    int64_t sign1, sign2, sign3;
+    int64_t expo1, expo2, expo3;
+    int64_t mant1, mant2, mant3;
+    int64_t expos;
+
+    int64_t s;
+
+    for (hh = 0; hh < H; hh++) {
+      // matmul
+      for (ii = 0; ii < N; ii++) {
+        for (kk = 0; kk < K; kk++) {
+          s = 0;
+          for (jj = 0; jj < M; jj++) {
+            // x
+            split_flt(x[hh * N * M + ii * M + jj], sign1, expo1, mant1);
+            mant1 >>= NBIT_CUTF;
+            // w
+            split_flt(w[hh * M * K + jj * K + kk], sign2, expo2, mant2);
+            mant2 >>= NBIT_CUTF;
+            //
+            mant3 = mant1 * mant2;
+            expos = expo1 + expo2 - NBIT_FLTF - NBIT_FLTF - (-nbit);
+            if (expos > 0) {
+              mant3 <<= expos;
+            } else {
+              expos = -expos;
+              expos = (expos > 63) ? 63 : expos;
+              mant3 >>= expos;
+            }
+            //
+            mant3 = (sign1 ^ sign2) ? -mant3 : mant3;
+            s += mant3;
           }
-          //
-          mant3 = (sign1 ^ sign2) ? -mant3 : mant3;
-          s += mant3;
-        }
-        ufi.nflt = FPTYPE(s) * pow(2.0, -nbit);
-        ufi.nint &= FLT_MASK;
-        y[hh*N*K+ii*K+kk] = ufi.nflt;
-      } // loop jj
-    } // loop ii
-  }// loop hh
-} // Compute
-
-
-private:
-int nbit;
-}; // MatmulFlt2fixNvnmdOp
-
-
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("MatmulFlt2fixNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    MatmulFlt2fixNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+          ufi.nflt = FPTYPE(s) * pow(2.0, -nbit);
+          ufi.nint &= FLT_MASK;
+          y[hh * N * K + ii * K + kk] = ufi.nflt;
+        }  // loop jj
+      }    // loop ii
+    }      // loop hh
+  }        // Compute
+
+ private:
+  int nbit;
+};  // MatmulFlt2fixNvnmdOp
+
+#define REGISTER_CPU(T)                                                     \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("MatmulFlt2fixNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      MatmulFlt2fixNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/matmul_flt_nvnmd.cc b/source/op/matmul_flt_nvnmd.cc
index c163b9485e..f86e97ea51 100644
--- a/source/op/matmul_flt_nvnmd.cc
+++ b/source/op/matmul_flt_nvnmd.cc
@@ -14,7 +14,7 @@ we change the DSP into 22 x 22
 in the float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 # Attr
 modx = 0: normalize x[hh, : , : ]
@@ -26,12 +26,12 @@ modw = 1: normalize w[hh, : , kk]
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
@@ -39,168 +39,160 @@ template <class T>
 void split_flt(T x, int64_t &sign, int64_t &expo, int64_t &mant);
 
 // read matmul_flt_nvnmd.cc
-template <class T> // float and double
+template <class T>  // float and double
 void find_max_expo(int64_t &max_expo, T *x, int64_t M);
 
 // read matmul_flt_nvnmd.cc
-template <class T> // float and double
+template <class T>  // float and double
 void find_max_expo(int64_t &max_expo, T *x, int64_t N, int64_t M);
 
 //- register the operator
 REGISTER_OP("MatmulFltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("w: T")
-  .Attr("normx: int")
-  .Attr("normw: int")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("w: T")
+    .Attr("normx: int")
+    .Attr("normw: int")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class MatmulFltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit MatmulFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-  OP_REQUIRES_OK(context, context->GetAttr("normx", &normx));
-  OP_REQUIRES_OK(context, context->GetAttr("normw", &normw));
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(2, context->num_inputs());
-  const Tensor& X = context->input(0);
-  const Tensor& W = context->input(1);
-
-  const TensorShape& shX = X.shape();
-  const TensorShape& shW = W.shape();
-  TensorShape shY;
-  DCHECK_EQ(shW.dims(), shX.dims());
-
-  int H, N, M, K;
-  if (shX.dims() == 3) {
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M = shX.dim_size(2);
-    K = shW.dim_size(2);
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(K);
-  }
-  if (shX.dims() == 2) {
-    // process 2-dimension as 3-dimension
-    H = 1;
-    N = shX.dim_size(0);
-    M = shX.dim_size(1);
-    K = shW.dim_size(1);
-
-    shY.AddDim(N);
-    shY.AddDim(K);
-  }
-
-  // create output
-  Tensor* Y = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto w = W.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-
-  int hh, ii, jj, kk;
-
-  U_Flt64_Int64 ufi1, ufi2, ufi3;
-  int64_t sign1, sign2, sign3;
-  int64_t expo1, expo2, expo3;
-  int64_t mant1, mant2, mant3;
-  int64_t expos;
-
-  int64_t expo_max1, expo_max2;
-  std::vector<int> expo_max1s;
-  std::vector<int> expo_max2s;
-  expo_max1s.resize(N);
-  expo_max2s.resize(K);
-
-  int64_t s;
-
-
-  for (hh=0; hh<H; hh++) {
-    // find x max exponnet
-    if (normx&0x0f == 0) { // normalize x[:,:]
-      find_max_expo(expo_max1, (FPTYPE *)&x[hh*N*M], N*M);
-      for (ii=0; ii<N; ii++) expo_max1s[ii] = expo_max1;
-    
-    } else { // normalize x[ii,:]
-      for (ii=0; ii<N; ii++) {
-        find_max_expo(expo_max1, (FPTYPE *)&x[hh*N*M+ii*M], M);
-        expo_max1s[ii] = expo_max1;
-      }
+ public:
+  /// Constructor.
+  explicit MatmulFltNvnmdOp(OpKernelConstruction *context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("normx", &normx));
+    OP_REQUIRES_OK(context, context->GetAttr("normw", &normw));
+  };
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext *context) override {
+    // check
+    DCHECK_EQ(2, context->num_inputs());
+    const Tensor &X = context->input(0);
+    const Tensor &W = context->input(1);
+
+    const TensorShape &shX = X.shape();
+    const TensorShape &shW = W.shape();
+    TensorShape shY;
+    DCHECK_EQ(shW.dims(), shX.dims());
+
+    int H, N, M, K;
+    if (shX.dims() == 3) {
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M = shX.dim_size(2);
+      K = shW.dim_size(2);
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(K);
     }
-
-    // find w max exponnet
-    if (normw&0x0f == 0) { // normalize w[:,:]
-      find_max_expo(expo_max2, (FPTYPE *)&w[hh*M*K], M*K);
-      for (kk=0; kk<K; kk++) expo_max2s[kk] = expo_max2;
-    
-    } else { // normalize w[:,kk]
-      for (kk=0; kk<K; kk++) {
-        find_max_expo(expo_max2, (FPTYPE *)&w[hh*M*K+kk], M, K);
-        expo_max2s[kk] = expo_max2;
-      }
+    if (shX.dims() == 2) {
+      // process 2-dimension as 3-dimension
+      H = 1;
+      N = shX.dim_size(0);
+      M = shX.dim_size(1);
+      K = shW.dim_size(1);
+
+      shY.AddDim(N);
+      shY.AddDim(K);
     }
 
-    // matmul
-    for (ii=0; ii<N; ii++) {
-      for (kk=0; kk<K; kk++) {
-        s = 0;
-        expo_max1 = expo_max1s[ii];
-        expo_max2 = expo_max2s[kk];
-        for (jj=0; jj<M; jj++) {
-          // x
-          split_flt(x[hh*N*M+ii*M+jj], sign1, expo1, mant1);
-          mant1 >>= NBIT_CUTF;
-          expos = expo_max1 - expo1;
-          expos = (expos > 63) ? 63 : expos;
-          mant1 >>= expos;
-          // w
-          split_flt(w[hh*M*K+jj*K+kk], sign2, expo2, mant2);
-          mant2 >>= NBIT_CUTF;
-          expos = expo_max2 - expo2;
-          expos = (expos > 63) ? 63 : expos;
-          mant2 >>= expos;
-          // multiply
-          mant3 = mant1 * mant2;
-          mant3 = (sign1 ^ sign2) ? -mant3 : mant3;
-          s += mant3;
-        } // loop jj
-        // y * 2^(e_a+e_b)
-        ufi3.nflt = FPTYPE(s) * pow(2.0, expo_max1 + expo_max2 - NBIT_FLTF - NBIT_FLTF);
-        ufi3.nint &= FLT_MASK;
-        y[hh*N*K+ii*K+kk] = ufi3.nflt;
-      } // loop kk
-    } // loop ii
-  } // loop hh
-} // Compute
-
-
-private:
-int normx;
-int normw;
-}; // MatmulFltNvnmdOp
-
-
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("MatmulFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    MatmulFltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
-REGISTER_CPU(double);
+    // create output
+    Tensor *Y = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto w = W.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    int hh, ii, jj, kk;
+
+    U_Flt64_Int64 ufi1, ufi2, ufi3;
+    int64_t sign1, sign2, sign3;
+    int64_t expo1, expo2, expo3;
+    int64_t mant1, mant2, mant3;
+    int64_t expos;
+
+    int64_t expo_max1, expo_max2;
+    std::vector<int> expo_max1s;
+    std::vector<int> expo_max2s;
+    expo_max1s.resize(N);
+    expo_max2s.resize(K);
+
+    int64_t s;
+
+    for (hh = 0; hh < H; hh++) {
+      // find x max exponnet
+      if (normx & 0x0f == 0) {  // normalize x[:,:]
+        find_max_expo(expo_max1, (FPTYPE *)&x[hh * N * M], N * M);
+        for (ii = 0; ii < N; ii++) expo_max1s[ii] = expo_max1;
+
+      } else {  // normalize x[ii,:]
+        for (ii = 0; ii < N; ii++) {
+          find_max_expo(expo_max1, (FPTYPE *)&x[hh * N * M + ii * M], M);
+          expo_max1s[ii] = expo_max1;
+        }
+      }
 
+      // find w max exponnet
+      if (normw & 0x0f == 0) {  // normalize w[:,:]
+        find_max_expo(expo_max2, (FPTYPE *)&w[hh * M * K], M * K);
+        for (kk = 0; kk < K; kk++) expo_max2s[kk] = expo_max2;
 
+      } else {  // normalize w[:,kk]
+        for (kk = 0; kk < K; kk++) {
+          find_max_expo(expo_max2, (FPTYPE *)&w[hh * M * K + kk], M, K);
+          expo_max2s[kk] = expo_max2;
+        }
+      }
 
+      // matmul
+      for (ii = 0; ii < N; ii++) {
+        for (kk = 0; kk < K; kk++) {
+          s = 0;
+          expo_max1 = expo_max1s[ii];
+          expo_max2 = expo_max2s[kk];
+          for (jj = 0; jj < M; jj++) {
+            // x
+            split_flt(x[hh * N * M + ii * M + jj], sign1, expo1, mant1);
+            mant1 >>= NBIT_CUTF;
+            expos = expo_max1 - expo1;
+            expos = (expos > 63) ? 63 : expos;
+            mant1 >>= expos;
+            // w
+            split_flt(w[hh * M * K + jj * K + kk], sign2, expo2, mant2);
+            mant2 >>= NBIT_CUTF;
+            expos = expo_max2 - expo2;
+            expos = (expos > 63) ? 63 : expos;
+            mant2 >>= expos;
+            // multiply
+            mant3 = mant1 * mant2;
+            mant3 = (sign1 ^ sign2) ? -mant3 : mant3;
+            s += mant3;
+          }  // loop jj
+          // y * 2^(e_a+e_b)
+          ufi3.nflt = FPTYPE(s) *
+                      pow(2.0, expo_max1 + expo_max2 - NBIT_FLTF - NBIT_FLTF);
+          ufi3.nint &= FLT_MASK;
+          y[hh * N * K + ii * K + kk] = ufi3.nflt;
+        }  // loop kk
+      }    // loop ii
+    }      // loop hh
+  }        // Compute
+
+ private:
+  int normx;
+  int normw;
+};  // MatmulFltNvnmdOp
+
+#define REGISTER_CPU(T)                                                 \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("MatmulFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      MatmulFltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
+REGISTER_CPU(double);
diff --git a/source/op/mul_flt_nvnmd.cc b/source/op/mul_flt_nvnmd.cc
index 53b74376f7..bdf320c386 100644
--- a/source/op/mul_flt_nvnmd.cc
+++ b/source/op/mul_flt_nvnmd.cc
@@ -10,7 +10,7 @@ y = float(float(x) + float(w))
 # float64:
 1 bit sign
 11 bits exponent
-52 bits fraction 
+52 bits fraction
 
 # float29
 1 bit sign
@@ -26,121 +26,114 @@ y = float(float(x) + float(w))
 // --------------------------------------------------------------------
 //
 
-
 //- import the library of tensorflow
-#include "custom_op.h"
 #include <vector>
-#include "math.h"
+
+#include "custom_op.h"
 #include "env_mat_nvnmd.h"
+#include "math.h"
 
 using namespace tensorflow;
 
-template <class T> // float and double
-void mul_flt_nvnmd(T &y, T x1, T x2);
+template <class T>  // float and double
+void mul_flt_nvnmd(T& y, T x1, T x2);
 
 //- register the operator
 REGISTER_OP("MulFltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Input("w: T")
-  .Output("y: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Input("w: T")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class MulFltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit MulFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-};
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  // check
-  DCHECK_EQ(2, context->num_inputs());
-  const Tensor& X = context->input(0);
-  const Tensor& W = context->input(1);
-
-  const TensorShape& shX = X.shape();
-  const TensorShape& shW = W.shape();
-  TensorShape shY;
-
-  DCHECK_EQ(shW.dims(), shX.dims());
-
-  int H, N, M1, M2;
-  if (shX.dims() == 3) {
-    DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
-    DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
-    DCHECK_EQ(shW.dim_size(2), shX.dim_size(2));
-
-    H = shX.dim_size(0);
-    N = shX.dim_size(1);
-    M1 = shX.dim_size(2);
-    M2 = shW.dim_size(2);
-
-    shY.AddDim(H);
-    shY.AddDim(N);
-    shY.AddDim(M2);
-  }
-  if (shX.dims() == 2) {
-    DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
-    DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
-
-    H = 1;
-    N = shX.dim_size(0);
-    M1 = shX.dim_size(1);
-    M2 = shW.dim_size(1);
-
-    shY.AddDim(N);
-    shY.AddDim(M2);
-  }
-
-  if (M1 != M2) {
-    DCHECK_EQ(1, M1);
-  }
-
-  // create output
-  Tensor* Y = NULL;
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-
-  // compute
-  auto x = X.flat<FPTYPE>().data();
-  auto w = W.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-
-  int ii, jj;
-  U_Flt64_Int64 ufi1, ufi2, ufi3;
-  int64_t sign1, sign2, sign3;
-  int64_t expo1, expo2, expo3;
-  int64_t mant1, mant2, mant3;
-  int64_t expos;
-
-  if (M1 == M2) {
-    for (ii=0; ii<H*N*M2; ii++) {
-      mul_flt_nvnmd(y[ii], x[ii], w[ii]);
-    }
-  } else {
-    for (ii=0; ii<H*N; ii++) {
-      for (jj=0; jj<M2; jj++) {
-        mul_flt_nvnmd(y[ii*M2+jj], x[ii], w[ii*M2+jj]);
-      }
+ public:
+  /// Constructor.
+  explicit MulFltNvnmdOp(OpKernelConstruction* context) : OpKernel(context){};
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    // check
+    DCHECK_EQ(2, context->num_inputs());
+    const Tensor& X = context->input(0);
+    const Tensor& W = context->input(1);
+
+    const TensorShape& shX = X.shape();
+    const TensorShape& shW = W.shape();
+    TensorShape shY;
+
+    DCHECK_EQ(shW.dims(), shX.dims());
+
+    int H, N, M1, M2;
+    if (shX.dims() == 3) {
+      DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
+      DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
+      DCHECK_EQ(shW.dim_size(2), shX.dim_size(2));
+
+      H = shX.dim_size(0);
+      N = shX.dim_size(1);
+      M1 = shX.dim_size(2);
+      M2 = shW.dim_size(2);
+
+      shY.AddDim(H);
+      shY.AddDim(N);
+      shY.AddDim(M2);
     }
-  }
+    if (shX.dims() == 2) {
+      DCHECK_EQ(shW.dim_size(0), shX.dim_size(0));
+      DCHECK_EQ(shW.dim_size(1), shX.dim_size(1));
 
-} // Compute
+      H = 1;
+      N = shX.dim_size(0);
+      M1 = shX.dim_size(1);
+      M2 = shW.dim_size(1);
 
+      shY.AddDim(N);
+      shY.AddDim(M2);
+    }
 
-}; // MulFltNvnmdOp
+    if (M1 != M2) {
+      DCHECK_EQ(1, M1);
+    }
 
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("MulFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    MulFltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
-REGISTER_CPU(double);
+    // create output
+    Tensor* Y = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+
+    // compute
+    auto x = X.flat<FPTYPE>().data();
+    auto w = W.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+
+    int ii, jj;
+    U_Flt64_Int64 ufi1, ufi2, ufi3;
+    int64_t sign1, sign2, sign3;
+    int64_t expo1, expo2, expo3;
+    int64_t mant1, mant2, mant3;
+    int64_t expos;
+
+    if (M1 == M2) {
+      for (ii = 0; ii < H * N * M2; ii++) {
+        mul_flt_nvnmd(y[ii], x[ii], w[ii]);
+      }
+    } else {
+      for (ii = 0; ii < H * N; ii++) {
+        for (jj = 0; jj < M2; jj++) {
+          mul_flt_nvnmd(y[ii * M2 + jj], x[ii], w[ii * M2 + jj]);
+        }
+      }
+    }
 
+  }  // Compute
 
+};  // MulFltNvnmdOp
 
+#define REGISTER_CPU(T)                                              \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("MulFltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      MulFltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
+REGISTER_CPU(double);
diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc
index fad4617cc5..6c58096824 100644
--- a/source/op/neighbor_stat.cc
+++ b/source/op/neighbor_stat.cc
@@ -1,8 +1,8 @@
 #include "custom_op.h"
-#include "neighbor_list.h"
 #include "errors.h"
+#include "neighbor_list.h"
 
-typedef double boxtensor_t ;
+typedef double boxtensor_t;
 typedef double compute_t;
 
 REGISTER_OP("NeighborStat")
@@ -12,176 +12,195 @@ REGISTER_OP("NeighborStat")
     .Input("natoms: int32")
     .Input("box : T")
     .Input("mesh : int32")
-    .Attr("rcut: float")   
+    .Attr("rcut: float")
     .Output("max_nbor_size: int32")
     .Output("min_nbor_dist: T");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class NeighborStatOp : public OpKernel {
-public:
-    explicit NeighborStatOp(OpKernelConstruction* context) : OpKernel(context) {
-        OP_REQUIRES_OK(context, context->GetAttr("rcut", &rcut));
+ public:
+  explicit NeighborStatOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("rcut", &rcut));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
+  }
+
+  void _Compute(OpKernelContext* context) {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    int nloc = natoms_tensor.flat<int>().data()[0];
+    int nall = natoms_tensor.flat<int>().data()[1];
+    int nsamples = coord_tensor.shape().dim_size(0);
+    int ntypes = natoms_tensor.shape().dim_size(0) - 2;
+    // check the sizes
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+
+    int nei_mode = 0;
+    if (mesh_tensor.shape().dim_size(0) == 6) {
+      // manual copied pbc
+      assert(nloc == nall);
+      nei_mode = 1;
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
+      // no pbc
+      nei_mode = -1;
+    } else {
+      throw deepmd::deepmd_exception("invalid mesh tensor");
     }
-
-    void Compute(OpKernelContext* context) override {
-        deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    // if region is given extended, do not use pbc
+    bool b_pbc = (nei_mode >= 1 || nei_mode == -1) ? false : true;
+    bool b_norm_atom = (nei_mode == 1) ? true : false;
+
+    TensorShape max_nbor_size_shape;
+    max_nbor_size_shape.AddDim(nloc);
+    max_nbor_size_shape.AddDim(ntypes);
+
+    int context_output_index = 0;
+    Tensor* max_nbor_size_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     max_nbor_size_shape,
+                                                     &max_nbor_size_tensor));
+
+    const FPTYPE* coord = coord_tensor.flat<FPTYPE>().data();
+    const int* type = type_tensor.flat<int>().data();
+    const FPTYPE* box = box_tensor.flat<FPTYPE>().data();
+    const int* mesh = mesh_tensor.flat<int>().data();
+    int* max_nbor_size = max_nbor_size_tensor->flat<int>().data();
+
+    for (int ii = 0; ii < static_cast<int>(max_nbor_size_tensor->NumElements());
+         ii++) {
+      max_nbor_size[ii] = 0;
     }
 
-    void _Compute(OpKernelContext* context) {
-        // Grab the input tensor
-        int context_input_index = 0;
-        const Tensor& coord_tensor	= context->input(context_input_index++);
-        const Tensor& type_tensor	= context->input(context_input_index++);
-        const Tensor& natoms_tensor	= context->input(context_input_index++);
-        const Tensor& box_tensor	= context->input(context_input_index++);
-        const Tensor& mesh_tensor	= context->input(context_input_index++);
-
-
-        OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of coord should be 2"));
-        OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of type should be 2"));
-        OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of natoms should be 1"));
-        OP_REQUIRES (context, (box_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of box should be 2"));
-        OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of mesh should be 1"));
-        OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),		errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-        int nloc = natoms_tensor.flat<int>().data()[0];
-        int nall = natoms_tensor.flat<int>().data()[1];
-        int nsamples = coord_tensor.shape().dim_size(0);
-        int ntypes = natoms_tensor.shape().dim_size(0) - 2;
-        // check the sizes
-        OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-        OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-        OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of atoms should match"));
-        OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of atoms should match"));
-        OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of box should be 9"));
-
-        int nei_mode = 0;
-        if (mesh_tensor.shape().dim_size(0) == 6) {
-            // manual copied pbc
-            assert (nloc == nall);
-            nei_mode = 1;
-        }
-        else if (mesh_tensor.shape().dim_size(0) == 0) {
-            // no pbc
-            nei_mode = -1;
-        }
-        else {
-            throw deepmd::deepmd_exception("invalid mesh tensor");
-        }
-        // if region is given extended, do not use pbc
-        bool b_pbc = (nei_mode >= 1 || nei_mode == -1) ? false : true;
-        bool b_norm_atom = (nei_mode == 1) ? true : false;
-
-        TensorShape max_nbor_size_shape ;
-        max_nbor_size_shape.AddDim (nloc);
-        max_nbor_size_shape.AddDim (ntypes);
-
-        int context_output_index = 0;
-        Tensor* max_nbor_size_tensor = NULL;
-        OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-	    					     max_nbor_size_shape,
-	    					     &max_nbor_size_tensor));
-
-        const FPTYPE* coord	= coord_tensor.flat<FPTYPE>().data();
-        const int* type	= type_tensor	  .flat<int>().data();
-        const FPTYPE* box	= box_tensor  .flat<FPTYPE>().data();
-        const int* mesh	= mesh_tensor	  .flat<int>().data();
-        int* max_nbor_size = max_nbor_size_tensor ->flat<int>().data();
-
-        for (int ii = 0; ii < static_cast<int>(max_nbor_size_tensor->NumElements()); ii++) {
-            max_nbor_size[ii] = 0;
-        }
-
-        // set region
-        boxtensor_t boxt [9] = {0};
-        for (int dd = 0; dd < 9; ++dd) {
-	        boxt[dd] = box[dd];
-        }
-        SimulationRegion<compute_t > region;
-        region.reinitBox (boxt);
-        // set & normalize coord
-        std::vector<compute_t > d_coord3 (nall * 3);
-        for (int ii = 0; ii < nall; ++ii) {
-	        for (int dd = 0; dd < 3; ++dd) {
-	            d_coord3[ii * 3 + dd] = coord[ii * 3 + dd];
-	        }
-	        if (b_norm_atom) {
-	            compute_t inter[3];
-	            region.phys2Inter (inter, &d_coord3[3 * ii]);
-	            for (int dd = 0; dd < 3; ++dd) {
-	                if      (inter[dd] < 0 ) inter[dd] += 1.;
-	                else if (inter[dd] >= 1) inter[dd] -= 1.;
-	            }
-	            region.inter2Phys (&d_coord3[3 * ii], inter);
-	        }
+    // set region
+    boxtensor_t boxt[9] = {0};
+    for (int dd = 0; dd < 9; ++dd) {
+      boxt[dd] = box[dd];
+    }
+    SimulationRegion<compute_t> region;
+    region.reinitBox(boxt);
+    // set & normalize coord
+    std::vector<compute_t> d_coord3(nall * 3);
+    for (int ii = 0; ii < nall; ++ii) {
+      for (int dd = 0; dd < 3; ++dd) {
+        d_coord3[ii * 3 + dd] = coord[ii * 3 + dd];
+      }
+      if (b_norm_atom) {
+        compute_t inter[3];
+        region.phys2Inter(inter, &d_coord3[3 * ii]);
+        for (int dd = 0; dd < 3; ++dd) {
+          if (inter[dd] < 0)
+            inter[dd] += 1.;
+          else if (inter[dd] >= 1)
+            inter[dd] -= 1.;
         }
+        region.inter2Phys(&d_coord3[3 * ii], inter);
+      }
+    }
 
-        // set type
-        std::vector<int > d_type (nall);
-        for (int ii = 0; ii < nall; ++ii) d_type[ii] = type[ii];
-      
-        // build nlist
-        std::vector<std::vector<int > > d_nlist_a;
-        std::vector<std::vector<int > > d_nlist_r;
-        std::vector<int> nlist_map;
-        bool b_nlist_map = false;
-
-        if (nei_mode == 1) {
-            // std::cout << "I'm in nei_mode 1" << std::endl;
-	        std::vector<double > bk_d_coord3 = d_coord3;
-	        std::vector<int > bk_d_type = d_type;
-	        std::vector<int > ncell, ngcell;
-	        copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3, bk_d_type, rcut, region);	
-	        b_nlist_map = true;
-	        std::vector<int> nat_stt(3, 0);
-	        std::vector<int> ext_stt(3), ext_end(3);
-	        for (int dd = 0; dd < 3; ++dd) {
-	            ext_stt[dd] = -ngcell[dd];
-	            ext_end[dd] = ncell[dd] + ngcell[dd];
-	        }
-	        ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, nloc, -1, rcut, nat_stt, ncell, ext_stt, ext_end, region, ncell);
-        }
-        else if (nei_mode == -1) {
-	        ::build_nlist (d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL);
-        }
-        else {
-	        throw deepmd::deepmd_exception("unknow neighbor mode");
-        }
+    // set type
+    std::vector<int> d_type(nall);
+    for (int ii = 0; ii < nall; ++ii) d_type[ii] = type[ii];
+
+    // build nlist
+    std::vector<std::vector<int> > d_nlist_a;
+    std::vector<std::vector<int> > d_nlist_r;
+    std::vector<int> nlist_map;
+    bool b_nlist_map = false;
+
+    if (nei_mode == 1) {
+      // std::cout << "I'm in nei_mode 1" << std::endl;
+      std::vector<double> bk_d_coord3 = d_coord3;
+      std::vector<int> bk_d_type = d_type;
+      std::vector<int> ncell, ngcell;
+      copy_coord(d_coord3, d_type, nlist_map, ncell, ngcell, bk_d_coord3,
+                 bk_d_type, rcut, region);
+      b_nlist_map = true;
+      std::vector<int> nat_stt(3, 0);
+      std::vector<int> ext_stt(3), ext_end(3);
+      for (int dd = 0; dd < 3; ++dd) {
+        ext_stt[dd] = -ngcell[dd];
+        ext_end[dd] = ncell[dd] + ngcell[dd];
+      }
+      ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, nloc, -1, rcut, nat_stt,
+                    ncell, ext_stt, ext_end, region, ncell);
+    } else if (nei_mode == -1) {
+      ::build_nlist(d_nlist_a, d_nlist_r, d_coord3, -1, rcut, NULL);
+    } else {
+      throw deepmd::deepmd_exception("unknow neighbor mode");
+    }
 
-        int MAX_NNEI = 0;
-        for (int ii = 0; ii < nloc; ii++) { 
-            MAX_NNEI = MAX_NNEI < d_nlist_r[ii].size() ? d_nlist_r[ii].size() : MAX_NNEI;
-        }
-        // allocate output tensor for deepmd-kit
-        TensorShape min_nbor_dist_shape;
-        min_nbor_dist_shape.AddDim (nloc * MAX_NNEI);
-        Tensor* min_nbor_dist_tensor = NULL;
-        OP_REQUIRES_OK(context, context->allocate_output(context_output_index++, 
-	    					     min_nbor_dist_shape,
-	    					     &min_nbor_dist_tensor));
-        FPTYPE* min_nbor_dist	= min_nbor_dist_tensor ->flat<FPTYPE>().data();
-        for (int ii = 0; ii < static_cast<int>(min_nbor_dist_tensor->NumElements()); ii++) {
-            min_nbor_dist[ii] = 10000.0;
-        }
+    int MAX_NNEI = 0;
+    for (int ii = 0; ii < nloc; ii++) {
+      MAX_NNEI =
+          MAX_NNEI < d_nlist_r[ii].size() ? d_nlist_r[ii].size() : MAX_NNEI;
+    }
+    // allocate output tensor for deepmd-kit
+    TensorShape min_nbor_dist_shape;
+    min_nbor_dist_shape.AddDim(nloc * MAX_NNEI);
+    Tensor* min_nbor_dist_tensor = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     min_nbor_dist_shape,
+                                                     &min_nbor_dist_tensor));
+    FPTYPE* min_nbor_dist = min_nbor_dist_tensor->flat<FPTYPE>().data();
+    for (int ii = 0; ii < static_cast<int>(min_nbor_dist_tensor->NumElements());
+         ii++) {
+      min_nbor_dist[ii] = 10000.0;
+    }
 
-        #pragma omp parallel for 
-        for (int ii = 0; ii < nloc; ii++) {
-            for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) {
-                int type = d_type[d_nlist_r[ii][jj]];
-                max_nbor_size[ii * ntypes + type] += 1;
-                compute_t rij[3] = {d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], d_coord3[d_nlist_r[ii][jj] * 3 + 1] - d_coord3[ii * 3 + 1], d_coord3[d_nlist_r[ii][jj] * 3 + 2] - d_coord3[ii * 3 + 2]};
-                min_nbor_dist[ii * MAX_NNEI + jj] = sqrt(rij[0] * rij[0] + rij[1] * rij[1] + rij[2] * rij[2]);
-            }
-        }
+#pragma omp parallel for
+    for (int ii = 0; ii < nloc; ii++) {
+      for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) {
+        int type = d_type[d_nlist_r[ii][jj]];
+        max_nbor_size[ii * ntypes + type] += 1;
+        compute_t rij[3] = {
+            d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0],
+            d_coord3[d_nlist_r[ii][jj] * 3 + 1] - d_coord3[ii * 3 + 1],
+            d_coord3[d_nlist_r[ii][jj] * 3 + 2] - d_coord3[ii * 3 + 2]};
+        min_nbor_dist[ii * MAX_NNEI + jj] =
+            sqrt(rij[0] * rij[0] + rij[1] * rij[1] + rij[2] * rij[2]);
+      }
     }
+  }
 
-private:
+ private:
   int nnei;
   float rcut;
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("NeighborStat").Device(DEVICE_CPU).TypeConstraint<T>("T"),                     \
-    NeighborStatOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                               \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("NeighborStat").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      NeighborStatOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/optimizer/parallel.cc b/source/op/optimizer/parallel.cc
index b0542fa94e..f3a052ba90 100644
--- a/source/op/optimizer/parallel.cc
+++ b/source/op/optimizer/parallel.cc
@@ -6,13 +6,12 @@
 
 #if TF_MAJOR_VERSION >= 2 && TF_MINOR_VERSION >= 7
 // breaking change in tf 2.7: Renaming of tensorflow::int64 to int_64_t
-#define TF_INT64 int64_t 
+#define TF_INT64 int64_t
 #else
-#define TF_INT64 tensorflow::int64 
+#define TF_INT64 tensorflow::int64
 #endif
 
 #include "parallel.h"
-
 #include "tensorflow/core/grappler/devices.h"
 #include "tensorflow/core/grappler/graph_view.h"
 #include "tensorflow/core/grappler/grappler_item.h"
@@ -55,18 +54,17 @@ TF_INT64 GetNThreads() {
   return tot;
 }
 
-Status ParallelProdForce(RemapperContext *ctx, int node_index,
+Status ParallelProdForce(RemapperContext *ctx,
+                         int node_index,
                          std::vector<bool> *invalidated_nodes,
                          std::vector<bool> *nodes_to_delete) {
   // skip on GPUs
-  if (GetNumAvailableGPUs() > 0)
-    return Status();
+  if (GetNumAvailableGPUs() > 0) return Status();
 
   const NodeDef *ori_node = ctx->graph_view.GetNode(node_index)->node();
   auto &src_attr = ori_node->attr();
   TF_INT64 tot = GetNThreads();
-  if (tot <= 1)
-    return Status();
+  if (tot <= 1) return Status();
 
   NodeDef sum_node;
   sum_node.set_name(ori_node->name());
@@ -85,8 +83,7 @@ Status ParallelProdForce(RemapperContext *ctx, int node_index,
     sub_node.set_op("ParallelProdForceSeA");
     sub_node.set_device(ori_node->device());
     // copy input
-    for (int jj = 0; jj < 4; ++jj)
-      sub_node.add_input(ori_node->input(jj));
+    for (int jj = 0; jj < 4; ++jj) sub_node.add_input(ori_node->input(jj));
     // set frac
     auto *sub_attr = sub_node.mutable_attr();
     (*sub_attr)["T"] = src_attr.at("T");
@@ -107,7 +104,8 @@ Status ParallelProdForce(RemapperContext *ctx, int node_index,
   return Status();
 }
 
-Status DPParallel::Optimize(Cluster *cluster, const GrapplerItem &item,
+Status DPParallel::Optimize(Cluster *cluster,
+                            const GrapplerItem &item,
                             GraphDef *optimized_graph) {
   GrapplerItem mutable_item = item;
   Status status;
@@ -130,7 +128,6 @@ Status DPParallel::Optimize(Cluster *cluster, const GrapplerItem &item,
       continue;
     }
     if (!item.optimization_options().is_eager_mode) {
-
       // Remap gelu
       std::map<std::string, int> matched_nodes_map;
       std::set<int> remove_node_indices;
diff --git a/source/op/optimizer/parallel.h b/source/op/optimizer/parallel.h
index f7f6b1816c..ab169113c5 100644
--- a/source/op/optimizer/parallel.h
+++ b/source/op/optimizer/parallel.h
@@ -14,12 +14,15 @@ class DPParallel : public CustomGraphOptimizer {
   }
   std::string name() const override { return "dpparallel"; };
   bool UsesFunctionLibrary() const override { return false; }
-  Status Optimize(Cluster* cluster, const GrapplerItem& item,
+  Status Optimize(Cluster* cluster,
+                  const GrapplerItem& item,
                   GraphDef* optimized_graph) override;
 #if (TF_MAJOR_VERSION >= 2 && TF_MINOR_VERSION < 6) || TF_MAJOR_VERSION < 2
-// TF 3457a2b122e50b4d44ceaaed5a663d635e5c22df
-  void Feedback(Cluster* cluster, const GrapplerItem& item,
-                const GraphDef& optimized_graph, double result) override {}
+  // TF 3457a2b122e50b4d44ceaaed5a663d635e5c22df
+  void Feedback(Cluster* cluster,
+                const GrapplerItem& item,
+                const GraphDef& optimized_graph,
+                double result) override {}
 #endif
 };
 
diff --git a/source/op/pair_tab.cc b/source/op/pair_tab.cc
index 2a22e17102..9929137d97 100644
--- a/source/op/pair_tab.cc
+++ b/source/op/pair_tab.cc
@@ -1,64 +1,75 @@
-#include "custom_op.h"
 #include "pair_tab.h"
 
+#include "custom_op.h"
+
 REGISTER_OP("PairTab")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("table_info: double")
-.Input("table_data: double")
-.Input("type: int32")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Input("scale: T")
-.Attr("sel_a: list(int)")
-.Attr("sel_r: list(int)")
-.Output("atom_energy: T")
-.Output("force: T")
-.Output("atom_virial: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("table_info: double")
+    .Input("table_data: double")
+    .Input("type: int32")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Input("scale: T")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Output("atom_energy: T")
+    .Output("force: T")
+    .Output("atom_virial: T");
 
 using namespace tensorflow;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class PairTabOp : public OpKernel {
  public:
   explicit PairTabOp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("sel_a", &sel_a));
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
-    cum_sum (sec_a, sel_a);
-    cum_sum (sec_r, sel_r);
+    cum_sum(sec_a, sel_a);
+    cum_sum(sec_r, sel_r);
     nnei_a = sec_a.back();
     nnei_r = sec_r.back();
     nnei = nnei_a + nnei_r;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int tmp_idx = 0;
-    const Tensor& table_info_tensor	= context->input(tmp_idx++);
-    const Tensor& table_data_tensor	= context->input(tmp_idx++);
-    const Tensor& type_tensor	= context->input(tmp_idx++);
-    const Tensor& rij_tensor	= context->input(tmp_idx++);
-    const Tensor& nlist_tensor	= context->input(tmp_idx++);
-    const Tensor& natoms_tensor	= context->input(tmp_idx++);
-    const Tensor& scale_tensor	= context->input(tmp_idx++);
+    const Tensor& table_info_tensor = context->input(tmp_idx++);
+    const Tensor& table_data_tensor = context->input(tmp_idx++);
+    const Tensor& type_tensor = context->input(tmp_idx++);
+    const Tensor& rij_tensor = context->input(tmp_idx++);
+    const Tensor& nlist_tensor = context->input(tmp_idx++);
+    const Tensor& natoms_tensor = context->input(tmp_idx++);
+    const Tensor& scale_tensor = context->input(tmp_idx++);
 
     // set size of the sample
-    OP_REQUIRES (context, (table_info_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of table_info should be 1"));
-    OP_REQUIRES (context, (table_data_tensor.shape().dims() == 1),	errors::InvalidArgument ("Dim of table_data should be 1"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (scale_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of scale should be 2"));
+    OP_REQUIRES(context, (table_info_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of table_info should be 1"));
+    OP_REQUIRES(context, (table_data_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of table_data should be 1"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (rij_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (scale_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of scale should be 2"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = type_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -68,105 +79,109 @@ class PairTabOp : public OpKernel {
     assert(sel_r.size() == ntypes);
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == type_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("shape of type should be nall"));
-    OP_REQUIRES (context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of rij should be 3 * nloc * nnei"));
-    OP_REQUIRES (context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of nlist should be nloc * nnei"));
-    OP_REQUIRES (context, (nloc == scale_tensor.shape().dim_size(1)),		errors::InvalidArgument ("shape of scale should be nloc"));
+    OP_REQUIRES(context, (nframes == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("shape of type should be nall"));
+    OP_REQUIRES(
+        context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("shape of rij should be 3 * nloc * nnei"));
+    OP_REQUIRES(
+        context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("shape of nlist should be nloc * nnei"));
+    OP_REQUIRES(context, (nloc == scale_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("shape of scale should be nloc"));
 
     // Create an output tensor
-    TensorShape energy_shape ;
-    energy_shape.AddDim (nframes);
-    energy_shape.AddDim (nloc);
-    TensorShape force_shape ;
-    force_shape.AddDim (nframes);
-    force_shape.AddDim (3 * nall);
-    TensorShape virial_shape ;
-    virial_shape.AddDim (nframes);
-    virial_shape.AddDim (9 * nall);
+    TensorShape energy_shape;
+    energy_shape.AddDim(nframes);
+    energy_shape.AddDim(nloc);
+    TensorShape force_shape;
+    force_shape.AddDim(nframes);
+    force_shape.AddDim(3 * nall);
+    TensorShape virial_shape;
+    virial_shape.AddDim(nframes);
+    virial_shape.AddDim(9 * nall);
     Tensor* energy_tensor = NULL;
     Tensor* force_tensor = NULL;
     Tensor* virial_tensor = NULL;
     tmp_idx = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, energy_shape, &energy_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, force_shape,  &force_tensor ));
-    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, virial_shape, &virial_tensor));
-    
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, energy_shape,
+                                                     &energy_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, force_shape,
+                                                     &force_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, virial_shape,
+                                                     &virial_tensor));
+
     // flat the tensors
     auto table_info = table_info_tensor.flat<FPTYPE>();
     auto table_data = table_data_tensor.flat<FPTYPE>();
-    auto type	= type_tensor	.matrix<int>();
-    auto rij	= rij_tensor	.matrix<FPTYPE>();
-    auto nlist	= nlist_tensor	.matrix<int>();
-    auto scale  = scale_tensor	.matrix<FPTYPE>();
-    auto energy = energy_tensor	->matrix<FPTYPE>();
-    auto force	= force_tensor	->matrix<FPTYPE>();
-    auto virial = virial_tensor	->matrix<FPTYPE>();
-
-    OP_REQUIRES (context, (ntypes == int(table_info(3)+0.1)),	errors::InvalidArgument ("ntypes provided in table does not match deeppot"));
-    int nspline = table_info(2)+0.1;
+    auto type = type_tensor.matrix<int>();
+    auto rij = rij_tensor.matrix<FPTYPE>();
+    auto nlist = nlist_tensor.matrix<int>();
+    auto scale = scale_tensor.matrix<FPTYPE>();
+    auto energy = energy_tensor->matrix<FPTYPE>();
+    auto force = force_tensor->matrix<FPTYPE>();
+    auto virial = virial_tensor->matrix<FPTYPE>();
+
+    OP_REQUIRES(context, (ntypes == int(table_info(3) + 0.1)),
+                errors::InvalidArgument(
+                    "ntypes provided in table does not match deeppot"));
+    int nspline = table_info(2) + 0.1;
     int tab_stride = 4 * nspline;
-    assert(ntypes * ntypes * tab_stride == table_data_tensor.shape().dim_size(0));
-    std::vector<double > d_table_info(4);
-    std::vector<double > d_table_data(ntypes * ntypes * tab_stride);
-    for (unsigned ii = 0; ii < d_table_info.size(); ++ii){
+    assert(ntypes * ntypes * tab_stride ==
+           table_data_tensor.shape().dim_size(0));
+    std::vector<double> d_table_info(4);
+    std::vector<double> d_table_data(ntypes * ntypes * tab_stride);
+    for (unsigned ii = 0; ii < d_table_info.size(); ++ii) {
       d_table_info[ii] = table_info(ii);
     }
-    for (unsigned ii = 0; ii < d_table_data.size(); ++ii){
+    for (unsigned ii = 0; ii < d_table_data.size(); ++ii) {
       d_table_data[ii] = table_data(ii);
     }
-    const double * p_table_info = &(d_table_info[0]);
-    const double * p_table_data = &(d_table_data[0]);
+    const double* p_table_info = &(d_table_info[0]);
+    const double* p_table_data = &(d_table_data[0]);
 
-    std::vector<int > t_sel_a(sel_a.size()), t_sel_r(sel_r.size());
-    for (int ii = 0; ii < sel_a.size(); ++ii){
+    std::vector<int> t_sel_a(sel_a.size()), t_sel_r(sel_r.size());
+    for (int ii = 0; ii < sel_a.size(); ++ii) {
       t_sel_a[ii] = sel_a[ii];
     }
-    for (int ii = 0; ii < sel_r.size(); ++ii){
+    for (int ii = 0; ii < sel_r.size(); ++ii) {
       t_sel_r[ii] = sel_r[ii];
     }
     // loop over samples
-#pragma omp parallel for 
-    for (int kk = 0; kk < nframes; ++kk){
-      deepmd::pair_tab_cpu<FPTYPE>(
-	  &energy(kk,0),
-	  &force(kk,0),
-	  &virial(kk,0),
-	  p_table_info,
-	  p_table_data,
-	  &rij(kk,0),
-	  &scale(kk,0),
-	  &type(kk,0),
-	  &nlist(kk,0),
-	  &natoms(0),
-	  t_sel_a,
-	  t_sel_r);
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk) {
+      deepmd::pair_tab_cpu<FPTYPE>(&energy(kk, 0), &force(kk, 0),
+                                   &virial(kk, 0), p_table_info, p_table_data,
+                                   &rij(kk, 0), &scale(kk, 0), &type(kk, 0),
+                                   &nlist(kk, 0), &natoms(0), t_sel_a, t_sel_r);
     }
   }
-private:
+
+ private:
   std::vector<int32> sel_r;
   std::vector<int32> sel_a;
   std::vector<int> sec_a;
   std::vector<int> sec_r;
   int nnei, nnei_a, nnei_r;
-  void
-  cum_sum (std::vector<int> & sec,
-	   const std::vector<int32> & n_sel) const {
-    sec.resize (n_sel.size() + 1);
+  void cum_sum(std::vector<int>& sec, const std::vector<int32>& n_sel) const {
+    sec.resize(n_sel.size() + 1);
     sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii){
-      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    for (int ii = 1; ii < sec.size(); ++ii) {
+      sec[ii] = sec[ii - 1] + n_sel[ii - 1];
     }
   }
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("PairTab").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    PairTabOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                          \
+  REGISTER_KERNEL_BUILDER(                                       \
+      Name("PairTab").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      PairTabOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc
index 4e32904907..2a9aa7d2cb 100644
--- a/source/op/prod_env_mat_multi_device.cc
+++ b/source/op/prod_env_mat_multi_device.cc
@@ -1,25 +1,25 @@
-#include "custom_op.h"
-#include "utilities.h"
 #include "coord.h"
-#include "region.h"
+#include "custom_op.h"
+#include "errors.h"
 #include "neighbor_list.h"
 #include "prod_env_mat.h"
-#include "errors.h"
+#include "region.h"
+#include "utilities.h"
 
 REGISTER_OP("ProdEnvMatA")
     .Attr("T: {float, double} = DT_DOUBLE")
-    .Input("coord: T")          //atomic coordinates
-    .Input("type: int32")       //atomic type
-    .Input("natoms: int32")     //local atomic number; each type atomic number
+    .Input("coord: T")       // atomic coordinates
+    .Input("type: int32")    // atomic type
+    .Input("natoms: int32")  // local atomic number; each type atomic number
     .Input("box : T")
     .Input("mesh : int32")
-    .Input("davg: T")           //average value of data
-    .Input("dstd: T")           //standard deviation
-    .Attr("rcut_a: float")      //no use
+    .Input("davg: T")       // average value of data
+    .Input("dstd: T")       // standard deviation
+    .Attr("rcut_a: float")  // no use
     .Attr("rcut_r: float")
     .Attr("rcut_r_smth: float")
     .Attr("sel_a: list(int)")
-    .Attr("sel_r: list(int)")   //all zero
+    .Attr("sel_r: list(int)")  // all zero
     .Output("descrpt: T")
     .Output("descrpt_deriv: T")
     .Output("rij: T")
@@ -34,7 +34,7 @@ Each row of the environment matrix :math:`\mathcal{R}^i` can be constructed as f
         \end{array}
         ]
 
-In the above equation, :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is 
+In the above equation, :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is
 the relative coordinate and :math:`r_{ji}=\lVert \mathbf{R}_{ji} \lVert` is its norm.
 The switching function :math:`s(r)` is defined as:
 
@@ -68,7 +68,7 @@ descrpt: The environment matrix.
 descrpt_deriv: The derivative of the environment matrix.
 rij: The distance between the atoms.
 nlist: The neighbor list of each atom.)");
-    // only sel_a and rcut_r used.
+// only sel_a and rcut_r used.
 
 // an alias of ProdEnvMatA -- Compatible with v1.3
 REGISTER_OP("DescrptSeA")
@@ -145,22 +145,22 @@ REGISTER_OP("DescrptSeR")
     .Output("descrpt: T")
     .Output("descrpt_deriv: T")
     .Output("rij: T")
-    .Output("nlist: int32"); 
+    .Output("nlist: int32");
 
 REGISTER_OP("ProdEnvMatAMix")
     .Attr("T: {float, double} = DT_DOUBLE")
-    .Input("coord: T")          //atomic coordinates
-    .Input("type: int32")       //atomic type
-    .Input("natoms: int32")     //local atomic number; each type atomic number
+    .Input("coord: T")       // atomic coordinates
+    .Input("type: int32")    // atomic type
+    .Input("natoms: int32")  // local atomic number; each type atomic number
     .Input("box : T")
     .Input("mesh : int32")
-    .Input("davg: T")           //average value of data
-    .Input("dstd: T")           //standard deviation
-    .Attr("rcut_a: float")      //no use
+    .Input("davg: T")       // average value of data
+    .Input("dstd: T")       // standard deviation
+    .Attr("rcut_a: float")  // no use
     .Attr("rcut_r: float")
     .Attr("rcut_r_smth: float")
     .Attr("sel_a: list(int)")
-    .Attr("sel_r: list(int)")   //all zero
+    .Attr("sel_r: list(int)")  // all zero
     .Output("descrpt: T")
     .Output("descrpt_deriv: T")
     .Output("rij: T")
@@ -168,8 +168,8 @@ REGISTER_OP("ProdEnvMatAMix")
     .Output("ntype: int32")
     .Output("nmask: bool")
     .Doc(R"(Compute the environment matrix mixing the atom types.
-The sorting of neighbor atoms depends not on atom types, but on the distance and index. 
-The atoms in nlist matrix will gather forward and thus save space for gaps of types in ProdEnvMatA, 
+The sorting of neighbor atoms depends not on atom types, but on the distance and index.
+The atoms in nlist matrix will gather forward and thus save space for gaps of types in ProdEnvMatA,
 resulting in optimized and relative small sel_a.
 
 The additional outputs are listed as following:
@@ -177,256 +177,224 @@ ntype: The corresponding atom types in nlist.
 nmask: The atom mask in nlist.
 )");
 
-template<typename FPTYPE>
-static int
-_norm_copy_coord_cpu(
-    std::vector<FPTYPE> & coord_cpy,
-    std::vector<int> & type_cpy,
-    std::vector<int> & mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r);
-
-template<typename FPTYPE>
-static int
-_build_nlist_cpu(
-    std::vector<int> &ilist, 
-    std::vector<int> &numneigh,
-    std::vector<int*> &firstneigh,
-    std::vector<std::vector<int>> &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r);
-
-static void
-_map_nlist_cpu(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei);
-
-static void
-_map_nei_info_cpu(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei,
-    const int & ntypes,
-    const bool & b_nlist_map);
+template <typename FPTYPE>
+static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
+                                std::vector<int>& type_cpy,
+                                std::vector<int>& mapping,
+                                int& nall,
+                                int& mem_cpy,
+                                const FPTYPE* coord,
+                                const FPTYPE* box,
+                                const int* type,
+                                const int& nloc,
+                                const int& max_cpy_trial,
+                                const float& rcut_r);
+
+template <typename FPTYPE>
+static int _build_nlist_cpu(std::vector<int>& ilist,
+                            std::vector<int>& numneigh,
+                            std::vector<int*>& firstneigh,
+                            std::vector<std::vector<int>>& jlist,
+                            int& max_nnei,
+                            int& mem_nnei,
+                            const FPTYPE* coord,
+                            const int& nloc,
+                            const int& new_nall,
+                            const int& max_nnei_trial,
+                            const float& rcut_r);
+
+static void _map_nlist_cpu(int* nlist,
+                           const int* idx_mapping,
+                           const int& nloc,
+                           const int& nnei);
+
+static void _map_nei_info_cpu(int* nlist,
+                              int* ntype,
+                              bool* nmask,
+                              const int* type,
+                              const int* idx_mapping,
+                              const int& nloc,
+                              const int& nnei,
+                              const int& ntypes,
+                              const bool& b_nlist_map);
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_cpu(
-    OpKernelContext* context,
-    FPTYPE const ** coord,
-    std::vector<FPTYPE> & coord_cpy,
-    int const** type,
-    std::vector<int> & type_cpy,
-    std::vector<int> & idx_mapping,
-    deepmd::InputNlist & inlist,
-    std::vector<int> & ilist,
-    std::vector<int> & numneigh,
-    std::vector<int*> & firstneigh,
-    std::vector<std::vector<int>> & jlist,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial);
+static void _prepare_coord_nlist_cpu(OpKernelContext* context,
+                                     FPTYPE const** coord,
+                                     std::vector<FPTYPE>& coord_cpy,
+                                     int const** type,
+                                     std::vector<int>& type_cpy,
+                                     std::vector<int>& idx_mapping,
+                                     deepmd::InputNlist& inlist,
+                                     std::vector<int>& ilist,
+                                     std::vector<int>& numneigh,
+                                     std::vector<int*>& firstneigh,
+                                     std::vector<std::vector<int>>& jlist,
+                                     int& new_nall,
+                                     int& mem_cpy,
+                                     int& mem_nnei,
+                                     int& max_nbor_size,
+                                     const FPTYPE* box,
+                                     const int* mesh_tensor_data,
+                                     const int& nloc,
+                                     const int& nei_mode,
+                                     const float& rcut_r,
+                                     const int& max_cpy_trial,
+                                     const int& max_nnei_trial);
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-static int
-_norm_copy_coord_gpu(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE * & coord_cpy,
-    int * & type_cpy,
-    int * & idx_mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r);
-
-template<typename FPTYPE>
-static int
-_build_nlist_gpu(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    int * &ilist, 
-    int * &numneigh,
-    int ** &firstneigh,
-    int * &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r);
-
-static void
-_map_nlist_gpu(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei);
-
-static void
-_map_nei_info_gpu(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei,
-    const int & ntypes,
-    const bool & b_nlist_map);
+template <typename FPTYPE>
+static int _norm_copy_coord_gpu(OpKernelContext* context,
+                                Tensor* tensor_list,
+                                FPTYPE*& coord_cpy,
+                                int*& type_cpy,
+                                int*& idx_mapping,
+                                int& nall,
+                                int& mem_cpy,
+                                const FPTYPE* coord,
+                                const FPTYPE* box,
+                                const int* type,
+                                const int& nloc,
+                                const int& max_cpy_trial,
+                                const float& rcut_r);
+
+template <typename FPTYPE>
+static int _build_nlist_gpu(OpKernelContext* context,
+                            Tensor* tensor_list,
+                            int*& ilist,
+                            int*& numneigh,
+                            int**& firstneigh,
+                            int*& jlist,
+                            int& max_nnei,
+                            int& mem_nnei,
+                            const FPTYPE* coord,
+                            const int& nloc,
+                            const int& new_nall,
+                            const int& max_nnei_trial,
+                            const float& rcut_r);
+
+static void _map_nlist_gpu(int* nlist,
+                           const int* idx_mapping,
+                           const int& nloc,
+                           const int& nnei);
+
+static void _map_nei_info_gpu(int* nlist,
+                              int* ntype,
+                              bool* nmask,
+                              const int* type,
+                              const int* idx_mapping,
+                              const int& nloc,
+                              const int& nnei,
+                              const int& ntypes,
+                              const bool& b_nlist_map);
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_gpu(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE const ** coord,
-    FPTYPE * & coord_cpy,
-    int const** type,
-    int * & type_cpy,
-    int * & idx_mapping,
-    deepmd::InputNlist & inlist,
-    int * & ilist,
-    int * & numneigh,
-    int ** & firstneigh,
-    int * & jlist,
-    int * & nbor_list_dev,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int mesh_tensor_size,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial);
-    
-#endif //GOOGLE_CUDA
+static void _prepare_coord_nlist_gpu(OpKernelContext* context,
+                                     Tensor* tensor_list,
+                                     FPTYPE const** coord,
+                                     FPTYPE*& coord_cpy,
+                                     int const** type,
+                                     int*& type_cpy,
+                                     int*& idx_mapping,
+                                     deepmd::InputNlist& inlist,
+                                     int*& ilist,
+                                     int*& numneigh,
+                                     int**& firstneigh,
+                                     int*& jlist,
+                                     int*& nbor_list_dev,
+                                     int& new_nall,
+                                     int& mem_cpy,
+                                     int& mem_nnei,
+                                     int& max_nbor_size,
+                                     const FPTYPE* box,
+                                     const int* mesh_tensor_data,
+                                     const int mesh_tensor_size,
+                                     const int& nloc,
+                                     const int& nei_mode,
+                                     const float& rcut_r,
+                                     const int& max_cpy_trial,
+                                     const int& max_nnei_trial);
 
+#endif  // GOOGLE_CUDA
 
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-static int
-_norm_copy_coord_gpu_rocm(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE * & coord_cpy,
-    int * & type_cpy,
-    int * & idx_mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r);
-
-template<typename FPTYPE>
-static int
-_build_nlist_gpu_rocm(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    int * &ilist, 
-    int * &numneigh,
-    int ** &firstneigh,
-    int * &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r);
-
-static void
-_map_nlist_gpu_rocm(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei);
-
-static void
-_map_nei_info_gpu_rocm(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei,
-    const int & ntypes,
-    const bool & b_nlist_map);
+template <typename FPTYPE>
+static int _norm_copy_coord_gpu_rocm(OpKernelContext* context,
+                                     Tensor* tensor_list,
+                                     FPTYPE*& coord_cpy,
+                                     int*& type_cpy,
+                                     int*& idx_mapping,
+                                     int& nall,
+                                     int& mem_cpy,
+                                     const FPTYPE* coord,
+                                     const FPTYPE* box,
+                                     const int* type,
+                                     const int& nloc,
+                                     const int& max_cpy_trial,
+                                     const float& rcut_r);
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_gpu_rocm(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE const ** coord,
-    FPTYPE * & coord_cpy,
-    int const** type,
-    int * & type_cpy,
-    int * & idx_mapping,
-    deepmd::InputNlist & inlist,
-    int * & ilist,
-    int * & numneigh,
-    int ** & firstneigh,
-    int * & jlist,
-    int * & nbor_list_dev,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int mesh_tensor_size,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial);
-    
-#endif //TENSORFLOW_USE_ROCM
+static int _build_nlist_gpu_rocm(OpKernelContext* context,
+                                 Tensor* tensor_list,
+                                 int*& ilist,
+                                 int*& numneigh,
+                                 int**& firstneigh,
+                                 int*& jlist,
+                                 int& max_nnei,
+                                 int& mem_nnei,
+                                 const FPTYPE* coord,
+                                 const int& nloc,
+                                 const int& new_nall,
+                                 const int& max_nnei_trial,
+                                 const float& rcut_r);
+
+static void _map_nlist_gpu_rocm(int* nlist,
+                                const int* idx_mapping,
+                                const int& nloc,
+                                const int& nnei);
+
+static void _map_nei_info_gpu_rocm(int* nlist,
+                                   int* ntype,
+                                   bool* nmask,
+                                   const int* type,
+                                   const int* idx_mapping,
+                                   const int& nloc,
+                                   const int& nnei,
+                                   const int& ntypes,
+                                   const bool& b_nlist_map);
 
+template <typename FPTYPE>
+static void _prepare_coord_nlist_gpu_rocm(OpKernelContext* context,
+                                          Tensor* tensor_list,
+                                          FPTYPE const** coord,
+                                          FPTYPE*& coord_cpy,
+                                          int const** type,
+                                          int*& type_cpy,
+                                          int*& idx_mapping,
+                                          deepmd::InputNlist& inlist,
+                                          int*& ilist,
+                                          int*& numneigh,
+                                          int**& firstneigh,
+                                          int*& jlist,
+                                          int*& nbor_list_dev,
+                                          int& new_nall,
+                                          int& mem_cpy,
+                                          int& mem_nnei,
+                                          int& max_nbor_size,
+                                          const FPTYPE* box,
+                                          const int* mesh_tensor_data,
+                                          const int mesh_tensor_size,
+                                          const int& nloc,
+                                          const int& nei_mode,
+                                          const float& rcut_r,
+                                          const int& max_cpy_trial,
+                                          const int& max_nnei_trial);
+
+#endif  // TENSORFLOW_USE_ROCM
 
 template <typename Device, typename FPTYPE>
 class ProdEnvMatAOp : public OpKernel {
-public:
+ public:
   explicit ProdEnvMatAOp(OpKernelConstruction* context) : OpKernel(context) {
     float nloc_f, nall_f;
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
@@ -436,8 +404,8 @@ class ProdEnvMatAOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
     // OP_REQUIRES_OK(context, context->GetAttr("nloc", &nloc_f));
     // OP_REQUIRES_OK(context, context->GetAttr("nall", &nall_f));
-    deepmd::cum_sum (sec_a, sel_a);
-    deepmd::cum_sum (sec_r, sel_r);
+    deepmd::cum_sum(sec_a, sel_a);
+    deepmd::cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -452,235 +420,261 @@ class ProdEnvMatAOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor	= context->input(context_input_index++);
-    const Tensor& type_tensor	= context->input(context_input_index++);
-    const Tensor& natoms_tensor	= context->input(context_input_index++);
-    const Tensor& box_tensor	= context->input(context_input_index++);
-    const Tensor& mesh_tensor   = context->input(context_input_index++);
-    const Tensor& avg_tensor	= context->input(context_input_index++);
-    const Tensor& std_tensor	= context->input(context_input_index++);
-    // set size of the sample. assume 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]], then shape(t) ==> [2, 2, 3]
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),        errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),        errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (sec_r.back() == 0),                      errors::InvalidArgument ("Rotational free descriptor only support all-angular information: sel_r should be all zero."));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    const int * natoms = natoms_tensor.flat<int>().data();
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
+    // set size of the sample. assume 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3,
+    // 3], [4, 4, 4]]], then shape(t) ==> [2, 2, 3]
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(context, (sec_r.back() == 0),
+                errors::InvalidArgument(
+                    "Rotational free descriptor only support all-angular "
+                    "information: sel_r should be all zero."));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
-    int ntypes = natoms_tensor.shape().dim_size(0) - 2; //nloc and nall mean something.
+    int ntypes =
+        natoms_tensor.shape().dim_size(0) - 2;  // nloc and nall mean something.
     int nsamples = coord_tensor.shape().dim_size(0);
     //// check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),  errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),   errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of std should be ntype"));
-    
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),      errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),          errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of std should be ndescrpt"));   
-    
-    OP_REQUIRES (context, (ntypes == int(sel_a.size())),  errors::InvalidArgument ("number of types should match the length of sel array"));
-    OP_REQUIRES (context, (ntypes == int(sel_r.size())),  errors::InvalidArgument ("number of types should match the length of sel array"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
+
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
+
+    OP_REQUIRES(context, (ntypes == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
 
     int nei_mode = 0;
     bool b_nlist_map = false;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
       b_nlist_map = true;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
 
     // Create output tensors
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (int_64(nloc) * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (int_64(nloc) * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (int_64(nloc) * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (int_64(nloc) * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(int_64(nloc) * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(int_64(nloc) * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(int_64(nloc) * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(int_64(nloc) * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
     Tensor* descrpt_deriv_tensor = NULL;
     Tensor* rij_tensor = NULL;
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        descrpt_shape,
-        &descrpt_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        descrpt_deriv_shape,
-        &descrpt_deriv_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        rij_shape,
-        &rij_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        nlist_shape,
-        &nlist_tensor));
-
-    FPTYPE * p_em = descrpt_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_rij = rij_tensor->flat<FPTYPE>().data();
-    int * p_nlist = nlist_tensor->flat<int>().data();
-    const FPTYPE * p_coord = coord_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_box = box_tensor.flat<FPTYPE>().data();
-    const FPTYPE * avg = avg_tensor.flat<FPTYPE>().data();
-    const FPTYPE * std = std_tensor.flat<FPTYPE>().data();
-    const int * p_type = type_tensor.flat<int>().data();
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+
+    FPTYPE* p_em = descrpt_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_rij = rij_tensor->flat<FPTYPE>().data();
+    int* p_nlist = nlist_tensor->flat<int>().data();
+    const FPTYPE* p_coord = coord_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_box = box_tensor.flat<FPTYPE>().data();
+    const FPTYPE* avg = avg_tensor.flat<FPTYPE>().data();
+    const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
+    const int* p_type = type_tensor.flat<int>().data();
 
     // loop over samples
-    for(int_64 ff = 0; ff < nsamples; ++ff){
-      FPTYPE * em = p_em + ff*nloc*ndescrpt;
-      FPTYPE * em_deriv = p_em_deriv + ff*nloc*ndescrpt*3;
-      FPTYPE * rij = p_rij + ff*nloc*nnei*3;
-      int * nlist = p_nlist + ff*nloc*nnei;
-      const FPTYPE * coord = p_coord + ff*nall*3;
-      const FPTYPE * box = p_box + ff*9;
-      const int * type = p_type + ff*nall;
-
-    if(device == "GPU") {
-      #if GOOGLE_CUDA
-      int * idx_mapping = NULL;
-      int * ilist = NULL, * numneigh = NULL;
-      int ** firstneigh = NULL;
-      deepmd::malloc_device_memory(firstneigh, nloc);
-      int * jlist = NULL;
-      FPTYPE * coord_cpy;
-      int * type_cpy;
-      int frame_nall = nall;
-      int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
-      std::vector<Tensor> tensor_list(7);
-      // prepare coord and nlist
-      _prepare_coord_nlist_gpu<FPTYPE>(
-          context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-          gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev,
-          frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-          box, mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-
-      // allocate temp memory, temp memory must not be used after this operation!
-      Tensor int_temp;
-      TensorShape int_shape;
-      int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
-      Tensor uint64_temp;
-      TensorShape uint64_shape;
-      uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
-      array_int = int_temp.flat<int>().data(); 
-      array_longlong = uint64_temp.flat<unsigned long long>().data();
-
-      // launch the gpu(nv) compute function
-      deepmd::prod_env_mat_a_gpu_cuda(
-          em, em_deriv, rij, nlist, 
-          coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a);
-      if(b_nlist_map) _map_nlist_gpu(nlist, idx_mapping, nloc, nnei);
-      deepmd::delete_device_memory(firstneigh);
-      #endif //GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      int * idx_mapping = NULL;
-      int * ilist = NULL, * numneigh = NULL;
-      int ** firstneigh = NULL;
-      deepmd::malloc_device_memory(firstneigh, nloc);
-      int * jlist = NULL;
-      FPTYPE * coord_cpy;
-      int * type_cpy;
-      int frame_nall = nall;
-      int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
-      std::vector<Tensor> tensor_list(7);
-      // prepare coord and nlist
-      _prepare_coord_nlist_gpu_rocm<FPTYPE>(
-          context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-          gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev,
-          frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-          box, mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-
-      // allocate temp memory, temp memory must not be used after this operation!
-      Tensor int_temp;
-      TensorShape int_shape;
-      int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
-      Tensor uint64_temp;
-      TensorShape uint64_shape;
-      uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
-      array_int = int_temp.flat<int>().data(); 
-      array_longlong = uint64_temp.flat<unsigned long long>().data();
-
-      // launch the gpu(nv) compute function
-      deepmd::prod_env_mat_a_gpu_rocm(
-          em, em_deriv, rij, nlist, 
-          coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a);
-      if(b_nlist_map) _map_nlist_gpu_rocm(nlist, idx_mapping, nloc, nnei);
-      deepmd::delete_device_memory(firstneigh);
-      #endif //TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::InputNlist inlist;
-      // some buffers, be freed after the evaluation of this frame
-      std::vector<int> idx_mapping;
-      std::vector<int> ilist(nloc), numneigh(nloc);
-      std::vector<int*> firstneigh(nloc);
-      std::vector<std::vector<int>> jlist(nloc);
-      std::vector<FPTYPE> coord_cpy;
-      std::vector<int> type_cpy;
-      int frame_nall = nall;
-      // prepare coord and nlist
-      _prepare_coord_nlist_cpu<FPTYPE>(
-	  context, &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-	  inlist, ilist, numneigh, firstneigh, jlist,
-	  frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-	  box, mesh_tensor.flat<int>().data(), nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-      // launch the cpu compute function
-      deepmd::prod_env_mat_a_cpu(
-	  em, em_deriv, rij, nlist, 
-	  coord, type, inlist, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a);
-      // do nlist mapping if coords were copied
-      if(b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei);
-    }
+    for (int_64 ff = 0; ff < nsamples; ++ff) {
+      FPTYPE* em = p_em + ff * nloc * ndescrpt;
+      FPTYPE* em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3;
+      FPTYPE* rij = p_rij + ff * nloc * nnei * 3;
+      int* nlist = p_nlist + ff * nloc * nnei;
+      const FPTYPE* coord = p_coord + ff * nall * 3;
+      const FPTYPE* box = p_box + ff * 9;
+      const int* type = p_type + ff * nall;
+
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        int* idx_mapping = NULL;
+        int *ilist = NULL, *numneigh = NULL;
+        int** firstneigh = NULL;
+        deepmd::malloc_device_memory(firstneigh, nloc);
+        int* jlist = NULL;
+        FPTYPE* coord_cpy;
+        int* type_cpy;
+        int frame_nall = nall;
+        int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
+        std::vector<Tensor> tensor_list(7);
+        // prepare coord and nlist
+        _prepare_coord_nlist_gpu<FPTYPE>(
+            context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy,
+            idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist,
+            nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size, box,
+            mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+
+        // allocate temp memory, temp memory must not be used after this
+        // operation!
+        Tensor int_temp;
+        TensorShape int_shape;
+        int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DT_INT32, int_shape, &int_temp));
+        Tensor uint64_temp;
+        TensorShape uint64_shape;
+        uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+        OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape,
+                                                       &uint64_temp));
+        array_int = int_temp.flat<int>().data();
+        array_longlong = uint64_temp.flat<unsigned long long>().data();
+
+        // launch the gpu(nv) compute function
+        deepmd::prod_env_mat_a_gpu_cuda(em, em_deriv, rij, nlist, coord, type,
+                                        gpu_inlist, array_int, array_longlong,
+                                        max_nbor_size, avg, std, nloc,
+                                        frame_nall, rcut_r, rcut_r_smth, sec_a);
+        if (b_nlist_map) _map_nlist_gpu(nlist, idx_mapping, nloc, nnei);
+        deepmd::delete_device_memory(firstneigh);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        int* idx_mapping = NULL;
+        int *ilist = NULL, *numneigh = NULL;
+        int** firstneigh = NULL;
+        deepmd::malloc_device_memory(firstneigh, nloc);
+        int* jlist = NULL;
+        FPTYPE* coord_cpy;
+        int* type_cpy;
+        int frame_nall = nall;
+        int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
+        std::vector<Tensor> tensor_list(7);
+        // prepare coord and nlist
+        _prepare_coord_nlist_gpu_rocm<FPTYPE>(
+            context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy,
+            idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist,
+            nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size, box,
+            mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+
+        // allocate temp memory, temp memory must not be used after this
+        // operation!
+        Tensor int_temp;
+        TensorShape int_shape;
+        int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DT_INT32, int_shape, &int_temp));
+        Tensor uint64_temp;
+        TensorShape uint64_shape;
+        uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+        OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape,
+                                                       &uint64_temp));
+        array_int = int_temp.flat<int>().data();
+        array_longlong = uint64_temp.flat<unsigned long long>().data();
+
+        // launch the gpu(nv) compute function
+        deepmd::prod_env_mat_a_gpu_rocm(em, em_deriv, rij, nlist, coord, type,
+                                        gpu_inlist, array_int, array_longlong,
+                                        max_nbor_size, avg, std, nloc,
+                                        frame_nall, rcut_r, rcut_r_smth, sec_a);
+        if (b_nlist_map) _map_nlist_gpu_rocm(nlist, idx_mapping, nloc, nnei);
+        deepmd::delete_device_memory(firstneigh);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::InputNlist inlist;
+        // some buffers, be freed after the evaluation of this frame
+        std::vector<int> idx_mapping;
+        std::vector<int> ilist(nloc), numneigh(nloc);
+        std::vector<int*> firstneigh(nloc);
+        std::vector<std::vector<int>> jlist(nloc);
+        std::vector<FPTYPE> coord_cpy;
+        std::vector<int> type_cpy;
+        int frame_nall = nall;
+        // prepare coord and nlist
+        _prepare_coord_nlist_cpu<FPTYPE>(
+            context, &coord, coord_cpy, &type, type_cpy, idx_mapping, inlist,
+            ilist, numneigh, firstneigh, jlist, frame_nall, mem_cpy, mem_nnei,
+            max_nbor_size, box, mesh_tensor.flat<int>().data(), nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+        // launch the cpu compute function
+        deepmd::prod_env_mat_a_cpu(em, em_deriv, rij, nlist, coord, type,
+                                   inlist, max_nbor_size, avg, std, nloc,
+                                   frame_nall, rcut_r, rcut_r_smth, sec_a);
+        // do nlist mapping if coords were copied
+        if (b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei);
+      }
     }
   }
 
-/////////////////////////////////////////////////////////////////////////////////////////////
-private:
+  /////////////////////////////////////////////////////////////////////////////////////////////
+ private:
   float rcut_a;
   float rcut_r;
   float rcut_r_smth;
@@ -693,22 +687,22 @@ class ProdEnvMatAOp : public OpKernel {
   int mem_cpy, max_cpy_trial;
   int mem_nnei, max_nnei_trial;
   std::string device;
-  int * array_int = NULL;
-  unsigned long long * array_longlong = NULL;
+  int* array_int = NULL;
+  unsigned long long* array_longlong = NULL;
   deepmd::InputNlist gpu_inlist;
-  int * nbor_list_dev = NULL;
+  int* nbor_list_dev = NULL;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdEnvMatROp : public OpKernel {
-public:
+ public:
   explicit ProdEnvMatROp(OpKernelConstruction* context) : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr("rcut", &rcut));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_smth", &rcut_smth));
     OP_REQUIRES_OK(context, context->GetAttr("sel", &sel));
-    deepmd::cum_sum (sec, sel);
+    deepmd::cum_sum(sec, sel);
     sel_null.resize(3, 0);
-    deepmd::cum_sum (sec_null, sel_null);
+    deepmd::cum_sum(sec_null, sel_null);
     ndescrpt = sec.back() * 1;
     nnei = sec.back();
     max_nbor_size = 1024;
@@ -719,233 +713,250 @@ class ProdEnvMatROp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor  = context->input(context_input_index++);
-    const Tensor& type_tensor   = context->input(context_input_index++);
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
     const Tensor& natoms_tensor = context->input(context_input_index++);
-    const Tensor& box_tensor    = context->input(context_input_index++);
-    const Tensor& mesh_tensor   = context->input(context_input_index++);
-    const Tensor& avg_tensor    = context->input(context_input_index++);
-    const Tensor& std_tensor    = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),	      errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),	      errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),        errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    const int * natoms = natoms_tensor.flat<int>().data();
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
-    int ntypes = natoms_tensor.shape().dim_size(0) - 2; //nloc and nall mean something.
+    int ntypes =
+        natoms_tensor.shape().dim_size(0) - 2;  // nloc and nall mean something.
     int nsamples = coord_tensor.shape().dim_size(0);
     //
     //// check the sizes
     // check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),  errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),   errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of std should be ntype"));
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),      errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),          errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of std should be ndescrpt"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
 
     int nei_mode = 0;
     bool b_nlist_map = false;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
       b_nlist_map = true;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
 
     // Create an output tensor
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (int_64(nloc) * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (int_64(nloc) * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (int_64(nloc) * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (int_64(nloc) * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(int_64(nloc) * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(int_64(nloc) * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(int_64(nloc) * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(int_64(nloc) * nnei);
 
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        descrpt_shape, 
-        &descrpt_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
     Tensor* descrpt_deriv_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        descrpt_deriv_shape, 
-        &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
     Tensor* rij_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        rij_shape,
-        &rij_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        nlist_shape,
-        &nlist_tensor));
-
-    FPTYPE * p_em = descrpt_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_rij = rij_tensor->flat<FPTYPE>().data();
-    int * p_nlist = nlist_tensor->flat<int>().data();
-    const FPTYPE * p_coord = coord_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_box = box_tensor.flat<FPTYPE>().data();
-    const FPTYPE * avg = avg_tensor.flat<FPTYPE>().data();
-    const FPTYPE * std = std_tensor.flat<FPTYPE>().data();
-    const int * p_type = type_tensor.flat<int>().data();
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+
+    FPTYPE* p_em = descrpt_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_rij = rij_tensor->flat<FPTYPE>().data();
+    int* p_nlist = nlist_tensor->flat<int>().data();
+    const FPTYPE* p_coord = coord_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_box = box_tensor.flat<FPTYPE>().data();
+    const FPTYPE* avg = avg_tensor.flat<FPTYPE>().data();
+    const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
+    const int* p_type = type_tensor.flat<int>().data();
 
     // loop over samples
-    for(int_64 ff = 0; ff < nsamples; ++ff){
-      FPTYPE * em = p_em + ff*nloc*ndescrpt;
-      FPTYPE * em_deriv = p_em_deriv + ff*nloc*ndescrpt*3;
-      FPTYPE * rij = p_rij + ff*nloc*nnei*3;
-      int * nlist = p_nlist + ff*nloc*nnei;
-      const FPTYPE * coord = p_coord + ff*nall*3;
-      const FPTYPE * box = p_box + ff*9;
-      const int * type = p_type + ff*nall;
-
-    if(device == "GPU") {
-      #if GOOGLE_CUDA
-      int * idx_mapping = NULL;
-      int * ilist = NULL, * numneigh = NULL;
-      int ** firstneigh = NULL;
-      deepmd::malloc_device_memory(firstneigh, nloc);
-      int * jlist = NULL;
-      FPTYPE * coord_cpy;
-      int * type_cpy;
-      int frame_nall = nall;
-      int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
-      std::vector<Tensor> tensor_list(7);
-      // prepare coord and nlist
-      _prepare_coord_nlist_gpu<FPTYPE>(
-          context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-          gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev,
-          frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-          box, mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode, rcut, max_cpy_trial, max_nnei_trial);
-  
-      // allocate temp memory, temp memory must not be used after this operation!
-      Tensor int_temp;
-      TensorShape int_shape;
-      int_shape.AddDim(sec.size() + int_64(nloc) * sec.size() + nloc);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
-      Tensor uint64_temp;
-      TensorShape uint64_shape;
-      uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
-      array_int = int_temp.flat<int>().data(); 
-      array_longlong = uint64_temp.flat<unsigned long long>().data();
-      
-      // launch the gpu(nv) compute function
-      deepmd::prod_env_mat_r_gpu_cuda(
-          em, em_deriv, rij, nlist, 
-          coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut, rcut_smth, sec);
-      if(b_nlist_map) _map_nlist_gpu(nlist, idx_mapping, nloc, nnei);
-      deepmd::delete_device_memory(firstneigh);
-      #endif //GOOGLE_CUDA
-
-      
-      #if TENSORFLOW_USE_ROCM
-      int * idx_mapping = NULL;
-      int * ilist = NULL, * numneigh = NULL;
-      int ** firstneigh = NULL;
-      deepmd::malloc_device_memory(firstneigh, nloc);
-      int * jlist = NULL;
-      FPTYPE * coord_cpy;
-      int * type_cpy;
-      int frame_nall = nall;
-      int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
-      std::vector<Tensor> tensor_list(7);
-      // prepare coord and nlist
-      _prepare_coord_nlist_gpu_rocm<FPTYPE>(
-          context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-          gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev,
-          frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-          box, mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode, rcut, max_cpy_trial, max_nnei_trial);
-  
-      // allocate temp memory, temp memory must not be used after this operation!
-      Tensor int_temp;
-      TensorShape int_shape;
-      int_shape.AddDim(sec.size() + int_64(nloc) * sec.size() + nloc);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
-      Tensor uint64_temp;
-      TensorShape uint64_shape;
-      uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
-      array_int = int_temp.flat<int>().data(); 
-      array_longlong = uint64_temp.flat<unsigned long long>().data();
-      
-      // launch the gpu(nv) compute function
-      deepmd::prod_env_mat_r_gpu_rocm(
-          em, em_deriv, rij, nlist, 
-          coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut, rcut_smth, sec);
-      if(b_nlist_map) _map_nlist_gpu_rocm(nlist, idx_mapping, nloc, nnei);
-      deepmd::delete_device_memory(firstneigh);
-      #endif //TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::InputNlist inlist;
-      // some buffers, be freed after the evaluation of this frame
-      std::vector<int> idx_mapping;
-      std::vector<int> ilist(nloc), numneigh(nloc);
-      std::vector<int*> firstneigh(nloc);
-      std::vector<std::vector<int>> jlist(nloc);
-      std::vector<FPTYPE> coord_cpy;
-      std::vector<int> type_cpy;
-      int frame_nall = nall;
-      // prepare coord and nlist
-      _prepare_coord_nlist_cpu<FPTYPE>(
-	  context, &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-	  inlist, ilist, numneigh, firstneigh, jlist,
-	  frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-	  box, mesh_tensor.flat<int>().data(), nloc, nei_mode, rcut, max_cpy_trial, max_nnei_trial);
-      // launch the cpu compute function
-      deepmd::prod_env_mat_r_cpu(
-          em, em_deriv, rij, nlist, 
-          coord, type, inlist, max_nbor_size, avg, std, nloc, frame_nall, rcut, rcut_smth, sec);
-      if(b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei);
-    }
+    for (int_64 ff = 0; ff < nsamples; ++ff) {
+      FPTYPE* em = p_em + ff * nloc * ndescrpt;
+      FPTYPE* em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3;
+      FPTYPE* rij = p_rij + ff * nloc * nnei * 3;
+      int* nlist = p_nlist + ff * nloc * nnei;
+      const FPTYPE* coord = p_coord + ff * nall * 3;
+      const FPTYPE* box = p_box + ff * 9;
+      const int* type = p_type + ff * nall;
+
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        int* idx_mapping = NULL;
+        int *ilist = NULL, *numneigh = NULL;
+        int** firstneigh = NULL;
+        deepmd::malloc_device_memory(firstneigh, nloc);
+        int* jlist = NULL;
+        FPTYPE* coord_cpy;
+        int* type_cpy;
+        int frame_nall = nall;
+        int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
+        std::vector<Tensor> tensor_list(7);
+        // prepare coord and nlist
+        _prepare_coord_nlist_gpu<FPTYPE>(
+            context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy,
+            idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist,
+            nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size, box,
+            mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
+            rcut, max_cpy_trial, max_nnei_trial);
+
+        // allocate temp memory, temp memory must not be used after this
+        // operation!
+        Tensor int_temp;
+        TensorShape int_shape;
+        int_shape.AddDim(sec.size() + int_64(nloc) * sec.size() + nloc);
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DT_INT32, int_shape, &int_temp));
+        Tensor uint64_temp;
+        TensorShape uint64_shape;
+        uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+        OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape,
+                                                       &uint64_temp));
+        array_int = int_temp.flat<int>().data();
+        array_longlong = uint64_temp.flat<unsigned long long>().data();
+
+        // launch the gpu(nv) compute function
+        deepmd::prod_env_mat_r_gpu_cuda(em, em_deriv, rij, nlist, coord, type,
+                                        gpu_inlist, array_int, array_longlong,
+                                        max_nbor_size, avg, std, nloc,
+                                        frame_nall, rcut, rcut_smth, sec);
+        if (b_nlist_map) _map_nlist_gpu(nlist, idx_mapping, nloc, nnei);
+        deepmd::delete_device_memory(firstneigh);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        int* idx_mapping = NULL;
+        int *ilist = NULL, *numneigh = NULL;
+        int** firstneigh = NULL;
+        deepmd::malloc_device_memory(firstneigh, nloc);
+        int* jlist = NULL;
+        FPTYPE* coord_cpy;
+        int* type_cpy;
+        int frame_nall = nall;
+        int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
+        std::vector<Tensor> tensor_list(7);
+        // prepare coord and nlist
+        _prepare_coord_nlist_gpu_rocm<FPTYPE>(
+            context, &tensor_list[0], &coord, coord_cpy, &type, type_cpy,
+            idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist,
+            nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size, box,
+            mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
+            rcut, max_cpy_trial, max_nnei_trial);
+
+        // allocate temp memory, temp memory must not be used after this
+        // operation!
+        Tensor int_temp;
+        TensorShape int_shape;
+        int_shape.AddDim(sec.size() + int_64(nloc) * sec.size() + nloc);
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DT_INT32, int_shape, &int_temp));
+        Tensor uint64_temp;
+        TensorShape uint64_shape;
+        uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+        OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape,
+                                                       &uint64_temp));
+        array_int = int_temp.flat<int>().data();
+        array_longlong = uint64_temp.flat<unsigned long long>().data();
+
+        // launch the gpu(nv) compute function
+        deepmd::prod_env_mat_r_gpu_rocm(em, em_deriv, rij, nlist, coord, type,
+                                        gpu_inlist, array_int, array_longlong,
+                                        max_nbor_size, avg, std, nloc,
+                                        frame_nall, rcut, rcut_smth, sec);
+        if (b_nlist_map) _map_nlist_gpu_rocm(nlist, idx_mapping, nloc, nnei);
+        deepmd::delete_device_memory(firstneigh);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::InputNlist inlist;
+        // some buffers, be freed after the evaluation of this frame
+        std::vector<int> idx_mapping;
+        std::vector<int> ilist(nloc), numneigh(nloc);
+        std::vector<int*> firstneigh(nloc);
+        std::vector<std::vector<int>> jlist(nloc);
+        std::vector<FPTYPE> coord_cpy;
+        std::vector<int> type_cpy;
+        int frame_nall = nall;
+        // prepare coord and nlist
+        _prepare_coord_nlist_cpu<FPTYPE>(
+            context, &coord, coord_cpy, &type, type_cpy, idx_mapping, inlist,
+            ilist, numneigh, firstneigh, jlist, frame_nall, mem_cpy, mem_nnei,
+            max_nbor_size, box, mesh_tensor.flat<int>().data(), nloc, nei_mode,
+            rcut, max_cpy_trial, max_nnei_trial);
+        // launch the cpu compute function
+        deepmd::prod_env_mat_r_cpu(em, em_deriv, rij, nlist, coord, type,
+                                   inlist, max_nbor_size, avg, std, nloc,
+                                   frame_nall, rcut, rcut_smth, sec);
+        if (b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei);
+      }
     }
   }
 
-/////////////////////////////////////////////////////////////////////////////////////////////
+  /////////////////////////////////////////////////////////////////////////////////////////////
 
-private:
+ private:
   float rcut;
   float rcut_smth;
   std::vector<int32> sel;
@@ -956,15 +967,15 @@ class ProdEnvMatROp : public OpKernel {
   int mem_cpy, max_cpy_trial;
   int mem_nnei, max_nnei_trial;
   std::string device;
-  int * array_int = NULL;
-  unsigned long long * array_longlong = NULL;
+  int* array_int = NULL;
+  unsigned long long* array_longlong = NULL;
   deepmd::InputNlist gpu_inlist;
-  int * nbor_list_dev = NULL;
+  int* nbor_list_dev = NULL;
 };
 
 template <typename Device, typename FPTYPE>
 class ProdEnvMatAMixOp : public OpKernel {
-public:
+ public:
   explicit ProdEnvMatAMixOp(OpKernelConstruction* context) : OpKernel(context) {
     float nloc_f, nall_f;
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
@@ -974,8 +985,8 @@ class ProdEnvMatAMixOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
     // OP_REQUIRES_OK(context, context->GetAttr("nloc", &nloc_f));
     // OP_REQUIRES_OK(context, context->GetAttr("nall", &nall_f));
-    deepmd::cum_sum (sec_a, sel_a);
-    deepmd::cum_sum (sec_r, sel_r);
+    deepmd::cum_sum(sec_a, sel_a);
+    deepmd::cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -990,93 +1001,114 @@ class ProdEnvMatAMixOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor	= context->input(context_input_index++);
-    const Tensor& type_tensor	= context->input(context_input_index++);
-    const Tensor& natoms_tensor	= context->input(context_input_index++);
-    const Tensor& box_tensor	= context->input(context_input_index++);
-    const Tensor& mesh_tensor   = context->input(context_input_index++);
-    const Tensor& avg_tensor	= context->input(context_input_index++);
-    const Tensor& std_tensor	= context->input(context_input_index++);
-    // set size of the sample. assume 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]], then shape(t) ==> [2, 2, 3]
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),        errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),        errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (sec_r.back() == 0),                      errors::InvalidArgument ("Rotational free descriptor only support all-angular information: sel_r should be all zero."));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    const int * natoms = natoms_tensor.flat<int>().data();
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
+    // set size of the sample. assume 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3,
+    // 3], [4, 4, 4]]], then shape(t) ==> [2, 2, 3]
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(context, (sec_r.back() == 0),
+                errors::InvalidArgument(
+                    "Rotational free descriptor only support all-angular "
+                    "information: sel_r should be all zero."));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
     int ntypes = natoms_tensor.shape().dim_size(0) - 2;
     int nsamples = coord_tensor.shape().dim_size(0);
     //// check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),  errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),   errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of std should be ntype"));
-    
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),      errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),          errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of std should be ndescrpt"));   
-    
-    OP_REQUIRES (context, (1 == int(sel_a.size())),  errors::InvalidArgument ("the length of sel array should be 1 in this op"));
-    OP_REQUIRES (context, (1 == int(sel_r.size())),  errors::InvalidArgument ("the length of sel array should be 1 in this op"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
+
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
+
+    OP_REQUIRES(context, (1 == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "the length of sel array should be 1 in this op"));
+    OP_REQUIRES(context, (1 == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "the length of sel array should be 1 in this op"));
 
     int nei_mode = 0;
     bool b_nlist_map = false;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
       b_nlist_map = true;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
 
     // Create output tensors
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (int_64(nloc) * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (int_64(nloc) * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (int_64(nloc) * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (int_64(nloc) * nnei);
-    TensorShape ntype_shape ;
-    ntype_shape.AddDim (nsamples);
-    ntype_shape.AddDim (nloc * nnei);
-    TensorShape nmask_shape ;
-    nmask_shape.AddDim (nsamples);
-    nmask_shape.AddDim (nloc * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(int_64(nloc) * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(int_64(nloc) * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(int_64(nloc) * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(int_64(nloc) * nnei);
+    TensorShape ntype_shape;
+    ntype_shape.AddDim(nsamples);
+    ntype_shape.AddDim(nloc * nnei);
+    TensorShape nmask_shape;
+    nmask_shape.AddDim(nsamples);
+    nmask_shape.AddDim(nloc * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
@@ -1085,174 +1117,181 @@ class ProdEnvMatAMixOp : public OpKernel {
     Tensor* nlist_tensor = NULL;
     Tensor* ntype_tensor = NULL;
     Tensor* nmask_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        descrpt_shape,
-        &descrpt_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        descrpt_deriv_shape,
-        &descrpt_deriv_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        rij_shape,
-        &rij_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        nlist_shape,
-        &nlist_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        ntype_shape,
-        &ntype_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        nmask_shape,
-        &nmask_tensor));
-
-    FPTYPE * p_em = descrpt_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_rij = rij_tensor->flat<FPTYPE>().data();
-    int * p_nlist = nlist_tensor->flat<int>().data();
-    int * p_ntype = ntype_tensor->flat<int>().data();
-    bool * p_nmask = nmask_tensor->flat<bool>().data();
-    const FPTYPE * p_coord = coord_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_box = box_tensor.flat<FPTYPE>().data();
-    const FPTYPE * avg = avg_tensor.flat<FPTYPE>().data();
-    const FPTYPE * std = std_tensor.flat<FPTYPE>().data();
-    const int * p_type = type_tensor.flat<int>().data();
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, ntype_shape,
+                                            &ntype_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nmask_shape,
+                                            &nmask_tensor));
+
+    FPTYPE* p_em = descrpt_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_rij = rij_tensor->flat<FPTYPE>().data();
+    int* p_nlist = nlist_tensor->flat<int>().data();
+    int* p_ntype = ntype_tensor->flat<int>().data();
+    bool* p_nmask = nmask_tensor->flat<bool>().data();
+    const FPTYPE* p_coord = coord_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_box = box_tensor.flat<FPTYPE>().data();
+    const FPTYPE* avg = avg_tensor.flat<FPTYPE>().data();
+    const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
+    const int* p_type = type_tensor.flat<int>().data();
 
     // loop over samples
-    for(int_64 ff = 0; ff < nsamples; ++ff){
-      FPTYPE * em = p_em + ff*nloc*ndescrpt;
-      FPTYPE * em_deriv = p_em_deriv + ff*nloc*ndescrpt*3;
-      FPTYPE * rij = p_rij + ff*nloc*nnei*3;
-      int * nlist = p_nlist + ff*nloc*nnei;
-      int * ntype = p_ntype + ff*nloc*nnei;
-      bool * nmask = p_nmask + ff*nloc*nnei;
-      const FPTYPE * coord = p_coord + ff*nall*3;
-      const FPTYPE * box = p_box + ff*9;
-      const int * type = p_type + ff*nall;
-
-    if(device == "GPU") {
-      #if GOOGLE_CUDA
-      int * idx_mapping = NULL;
-      int * ilist = NULL, * numneigh = NULL;
-      int ** firstneigh = NULL;
-      deepmd::malloc_device_memory(firstneigh, nloc);
-      int * jlist = NULL;
-      FPTYPE * coord_cpy;
-      int * type_cpy;
-      int frame_nall = nall;
-      int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
-      std::vector<Tensor> tensor_list(7);
-      Tensor fake_type; // all zeros
-      TensorShape fake_type_shape;
-      fake_type_shape.AddDim(nall);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape, &fake_type));
-      deepmd::memset_device_memory(fake_type.flat<int>().data(), 0, nall);
-      const int * f_type = fake_type.flat<int>().data();
-      // prepare coord and nlist
-      _prepare_coord_nlist_gpu<FPTYPE>(
-          context, &tensor_list[0], &coord, coord_cpy, &f_type, type_cpy, idx_mapping, 
-          gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev,
-          frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-          box, mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-
-      // allocate temp memory, temp memory must not be used after this operation!
-      Tensor int_temp;
-      TensorShape int_shape;
-      int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
-      Tensor uint64_temp;
-      TensorShape uint64_shape;
-      uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
-      array_int = int_temp.flat<int>().data(); 
-      array_longlong = uint64_temp.flat<unsigned long long>().data();
-
-      // launch the gpu(nv) compute function
-      deepmd::prod_env_mat_a_gpu_cuda(
-          em, em_deriv, rij, nlist, 
-          coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a, f_type);
-      _map_nei_info_gpu(nlist, ntype, nmask, type, idx_mapping, nloc, nnei, ntypes, b_nlist_map);
-      deepmd::delete_device_memory(firstneigh);
-      #endif //GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      int * idx_mapping = NULL;
-      int * ilist = NULL, * numneigh = NULL;
-      int ** firstneigh = NULL;
-      deepmd::malloc_device_memory(firstneigh, nloc);
-      int * jlist = NULL;
-      FPTYPE * coord_cpy;
-      int * type_cpy;
-      int frame_nall = nall;
-      int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
-      std::vector<Tensor> tensor_list(7);
-      Tensor fake_type; // all zeros
-      TensorShape fake_type_shape;
-      fake_type_shape.AddDim(nall);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape, &fake_type));
-      deepmd::memset_device_memory(fake_type.flat<int>().data(), 0, nall);
-      const int * f_type = fake_type.flat<int>().data();
-      // prepare coord and nlist
-      _prepare_coord_nlist_gpu_rocm<FPTYPE>(
-          context, &tensor_list[0], &coord, coord_cpy, &f_type, type_cpy, idx_mapping, 
-          gpu_inlist, ilist, numneigh, firstneigh, jlist, nbor_list_dev,
-          frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-          box, mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-
-      // allocate temp memory, temp memory must not be used after this operation!
-      Tensor int_temp;
-      TensorShape int_shape;
-      int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, int_shape, &int_temp));
-      Tensor uint64_temp;
-      TensorShape uint64_shape;
-      uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
-      OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape, &uint64_temp));
-      array_int = int_temp.flat<int>().data(); 
-      array_longlong = uint64_temp.flat<unsigned long long>().data();
-
-      // launch the gpu(nv) compute function
-      deepmd::prod_env_mat_a_gpu_rocm(
-          em, em_deriv, rij, nlist, 
-          coord, type, gpu_inlist, array_int, array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a, f_type);
-      _map_nei_info_gpu_rocm(nlist, ntype, nmask, type, idx_mapping, nloc, nnei, ntypes, b_nlist_map);
-      deepmd::delete_device_memory(firstneigh);
-      #endif //TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::InputNlist inlist;
-      // some buffers, be freed after the evaluation of this frame
-      std::vector<int> idx_mapping;
-      std::vector<int> ilist(nloc), numneigh(nloc);
-      std::vector<int*> firstneigh(nloc);
-      std::vector<std::vector<int>> jlist(nloc);
-      std::vector<FPTYPE> coord_cpy;
-      std::vector<int> type_cpy;
-      int frame_nall = nall;
-      std::vector<int> fake_type(nall, 0);
-      const int * f_type = &fake_type[0];
-      // prepare coord and nlist
-      _prepare_coord_nlist_cpu<FPTYPE>(
-	  context, &coord, coord_cpy, &f_type, type_cpy, idx_mapping, 
-	  inlist, ilist, numneigh, firstneigh, jlist,
-	  frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-	  box, mesh_tensor.flat<int>().data(), nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-      // launch the cpu compute function
-      deepmd::prod_env_mat_a_cpu(
-	  em, em_deriv, rij, nlist, 
-	  coord, type, inlist, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a, f_type);
-      // do nlist mapping if coords were copied
-    _map_nei_info_cpu(nlist, ntype, nmask, type, &idx_mapping[0], nloc, nnei, ntypes, b_nlist_map);
-    }
+    for (int_64 ff = 0; ff < nsamples; ++ff) {
+      FPTYPE* em = p_em + ff * nloc * ndescrpt;
+      FPTYPE* em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3;
+      FPTYPE* rij = p_rij + ff * nloc * nnei * 3;
+      int* nlist = p_nlist + ff * nloc * nnei;
+      int* ntype = p_ntype + ff * nloc * nnei;
+      bool* nmask = p_nmask + ff * nloc * nnei;
+      const FPTYPE* coord = p_coord + ff * nall * 3;
+      const FPTYPE* box = p_box + ff * 9;
+      const int* type = p_type + ff * nall;
+
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        int* idx_mapping = NULL;
+        int *ilist = NULL, *numneigh = NULL;
+        int** firstneigh = NULL;
+        deepmd::malloc_device_memory(firstneigh, nloc);
+        int* jlist = NULL;
+        FPTYPE* coord_cpy;
+        int* type_cpy;
+        int frame_nall = nall;
+        int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
+        std::vector<Tensor> tensor_list(7);
+        Tensor fake_type;  // all zeros
+        TensorShape fake_type_shape;
+        fake_type_shape.AddDim(nall);
+        OP_REQUIRES_OK(context, context->allocate_temp(
+                                    DT_INT32, fake_type_shape, &fake_type));
+        deepmd::memset_device_memory(fake_type.flat<int>().data(), 0, nall);
+        const int* f_type = fake_type.flat<int>().data();
+        // prepare coord and nlist
+        _prepare_coord_nlist_gpu<FPTYPE>(
+            context, &tensor_list[0], &coord, coord_cpy, &f_type, type_cpy,
+            idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist,
+            nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size, box,
+            mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+
+        // allocate temp memory, temp memory must not be used after this
+        // operation!
+        Tensor int_temp;
+        TensorShape int_shape;
+        int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DT_INT32, int_shape, &int_temp));
+        Tensor uint64_temp;
+        TensorShape uint64_shape;
+        uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+        OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape,
+                                                       &uint64_temp));
+        array_int = int_temp.flat<int>().data();
+        array_longlong = uint64_temp.flat<unsigned long long>().data();
+
+        // launch the gpu(nv) compute function
+        deepmd::prod_env_mat_a_gpu_cuda(
+            em, em_deriv, rij, nlist, coord, type, gpu_inlist, array_int,
+            array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r,
+            rcut_r_smth, sec_a, f_type);
+        _map_nei_info_gpu(nlist, ntype, nmask, type, idx_mapping, nloc, nnei,
+                          ntypes, b_nlist_map);
+        deepmd::delete_device_memory(firstneigh);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        int* idx_mapping = NULL;
+        int *ilist = NULL, *numneigh = NULL;
+        int** firstneigh = NULL;
+        deepmd::malloc_device_memory(firstneigh, nloc);
+        int* jlist = NULL;
+        FPTYPE* coord_cpy;
+        int* type_cpy;
+        int frame_nall = nall;
+        int mesh_tensor_size = static_cast<int>(mesh_tensor.NumElements());
+        std::vector<Tensor> tensor_list(7);
+        Tensor fake_type;  // all zeros
+        TensorShape fake_type_shape;
+        fake_type_shape.AddDim(nall);
+        OP_REQUIRES_OK(context, context->allocate_temp(
+                                    DT_INT32, fake_type_shape, &fake_type));
+        deepmd::memset_device_memory(fake_type.flat<int>().data(), 0, nall);
+        const int* f_type = fake_type.flat<int>().data();
+        // prepare coord and nlist
+        _prepare_coord_nlist_gpu_rocm<FPTYPE>(
+            context, &tensor_list[0], &coord, coord_cpy, &f_type, type_cpy,
+            idx_mapping, gpu_inlist, ilist, numneigh, firstneigh, jlist,
+            nbor_list_dev, frame_nall, mem_cpy, mem_nnei, max_nbor_size, box,
+            mesh_tensor.flat<int>().data(), mesh_tensor_size, nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+
+        // allocate temp memory, temp memory must not be used after this
+        // operation!
+        Tensor int_temp;
+        TensorShape int_shape;
+        int_shape.AddDim(sec_a.size() + int_64(nloc) * sec_a.size() + nloc);
+        OP_REQUIRES_OK(context,
+                       context->allocate_temp(DT_INT32, int_shape, &int_temp));
+        Tensor uint64_temp;
+        TensorShape uint64_shape;
+        uint64_shape.AddDim(int_64(nloc) * max_nbor_size * 2);
+        OP_REQUIRES_OK(context, context->allocate_temp(DT_UINT64, uint64_shape,
+                                                       &uint64_temp));
+        array_int = int_temp.flat<int>().data();
+        array_longlong = uint64_temp.flat<unsigned long long>().data();
+
+        // launch the gpu(nv) compute function
+        deepmd::prod_env_mat_a_gpu_rocm(
+            em, em_deriv, rij, nlist, coord, type, gpu_inlist, array_int,
+            array_longlong, max_nbor_size, avg, std, nloc, frame_nall, rcut_r,
+            rcut_r_smth, sec_a, f_type);
+        _map_nei_info_gpu_rocm(nlist, ntype, nmask, type, idx_mapping, nloc,
+                               nnei, ntypes, b_nlist_map);
+        deepmd::delete_device_memory(firstneigh);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::InputNlist inlist;
+        // some buffers, be freed after the evaluation of this frame
+        std::vector<int> idx_mapping;
+        std::vector<int> ilist(nloc), numneigh(nloc);
+        std::vector<int*> firstneigh(nloc);
+        std::vector<std::vector<int>> jlist(nloc);
+        std::vector<FPTYPE> coord_cpy;
+        std::vector<int> type_cpy;
+        int frame_nall = nall;
+        std::vector<int> fake_type(nall, 0);
+        const int* f_type = &fake_type[0];
+        // prepare coord and nlist
+        _prepare_coord_nlist_cpu<FPTYPE>(
+            context, &coord, coord_cpy, &f_type, type_cpy, idx_mapping, inlist,
+            ilist, numneigh, firstneigh, jlist, frame_nall, mem_cpy, mem_nnei,
+            max_nbor_size, box, mesh_tensor.flat<int>().data(), nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+        // launch the cpu compute function
+        deepmd::prod_env_mat_a_cpu(
+            em, em_deriv, rij, nlist, coord, type, inlist, max_nbor_size, avg,
+            std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a, f_type);
+        // do nlist mapping if coords were copied
+        _map_nei_info_cpu(nlist, ntype, nmask, type, &idx_mapping[0], nloc,
+                          nnei, ntypes, b_nlist_map);
+      }
     }
   }
 
-/////////////////////////////////////////////////////////////////////////////////////////////
-private:
+  /////////////////////////////////////////////////////////////////////////////////////////////
+ private:
   float rcut_a;
   float rcut_r;
   float rcut_r_smth;
@@ -1265,247 +1304,232 @@ class ProdEnvMatAMixOp : public OpKernel {
   int mem_cpy, max_cpy_trial;
   int mem_nnei, max_nnei_trial;
   std::string device;
-  int * array_int = NULL;
-  unsigned long long * array_longlong = NULL;
+  int* array_int = NULL;
+  unsigned long long* array_longlong = NULL;
   deepmd::InputNlist gpu_inlist;
-  int * nbor_list_dev = NULL;
+  int* nbor_list_dev = NULL;
 };
 
-
-template<typename FPTYPE>
-static int
-_norm_copy_coord_cpu(
-    std::vector<FPTYPE> & coord_cpy,
-    std::vector<int> & type_cpy,
-    std::vector<int> & idx_mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r)
-{
-  std::vector<FPTYPE> tmp_coord(nall*3);
-  std::copy(coord, coord+nall*3, tmp_coord.begin());
+template <typename FPTYPE>
+static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
+                                std::vector<int>& type_cpy,
+                                std::vector<int>& idx_mapping,
+                                int& nall,
+                                int& mem_cpy,
+                                const FPTYPE* coord,
+                                const FPTYPE* box,
+                                const int* type,
+                                const int& nloc,
+                                const int& max_cpy_trial,
+                                const float& rcut_r) {
+  std::vector<FPTYPE> tmp_coord(nall * 3);
+  std::copy(coord, coord + nall * 3, tmp_coord.begin());
   deepmd::Region<FPTYPE> region;
   init_region_cpu(region, box);
   normalize_coord_cpu(&tmp_coord[0], nall, region);
   int tt;
-  for(tt = 0; tt < max_cpy_trial; ++tt){
-    coord_cpy.resize(mem_cpy*3);
+  for (tt = 0; tt < max_cpy_trial; ++tt) {
+    coord_cpy.resize(mem_cpy * 3);
     type_cpy.resize(mem_cpy);
     idx_mapping.resize(mem_cpy);
-    int ret = copy_coord_cpu(
-	&coord_cpy[0], &type_cpy[0], &idx_mapping[0], &nall, 
-	&tmp_coord[0], type, nloc, mem_cpy, rcut_r, region);
-    if(ret == 0){
+    int ret =
+        copy_coord_cpu(&coord_cpy[0], &type_cpy[0], &idx_mapping[0], &nall,
+                       &tmp_coord[0], type, nloc, mem_cpy, rcut_r, region);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_cpy *= 2;
     }
   }
   return (tt != max_cpy_trial);
 }
 
-template<typename FPTYPE>
-static int
-_build_nlist_cpu(
-    std::vector<int> &ilist, 
-    std::vector<int> &numneigh,
-    std::vector<int*> &firstneigh,
-    std::vector<std::vector<int>> &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r)
-{
+template <typename FPTYPE>
+static int _build_nlist_cpu(std::vector<int>& ilist,
+                            std::vector<int>& numneigh,
+                            std::vector<int*>& firstneigh,
+                            std::vector<std::vector<int>>& jlist,
+                            int& max_nnei,
+                            int& mem_nnei,
+                            const FPTYPE* coord,
+                            const int& nloc,
+                            const int& new_nall,
+                            const int& max_nnei_trial,
+                            const float& rcut_r) {
   int tt;
-  for(tt = 0; tt < max_nnei_trial; ++tt){
-    for(int ii = 0; ii < nloc; ++ii){
+  for (tt = 0; tt < max_nnei_trial; ++tt) {
+    for (int ii = 0; ii < nloc; ++ii) {
       jlist[ii].resize(mem_nnei);
       firstneigh[ii] = &jlist[ii][0];
     }
     deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-    int ret = build_nlist_cpu(
-	inlist, &max_nnei, 
-	coord, nloc, new_nall, mem_nnei, rcut_r);
-    if(ret == 0){
+    int ret = build_nlist_cpu(inlist, &max_nnei, coord, nloc, new_nall,
+                              mem_nnei, rcut_r);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_nnei *= 2;
     }
   }
   return (tt != max_nnei_trial);
 }
-    
-static void
-_map_nlist_cpu(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei)
-{
-  for (int_64 ii = 0; ii < nloc; ++ii){
-    for (int_64 jj = 0; jj < nnei; ++jj){
-      int record = nlist[ii*nnei+jj];
-      if (record >= 0) {		
-	nlist[ii*nnei+jj] = idx_mapping[record];	      
+
+static void _map_nlist_cpu(int* nlist,
+                           const int* idx_mapping,
+                           const int& nloc,
+                           const int& nnei) {
+  for (int_64 ii = 0; ii < nloc; ++ii) {
+    for (int_64 jj = 0; jj < nnei; ++jj) {
+      int record = nlist[ii * nnei + jj];
+      if (record >= 0) {
+        nlist[ii * nnei + jj] = idx_mapping[record];
       }
     }
-  }  
+  }
 }
 
-static void
-_map_nei_info_cpu(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei,
-    const int & ntypes,
-    const bool & b_nlist_map)
-{
-  deepmd::use_nei_info_cpu(nlist, ntype, nmask, type, idx_mapping, nloc, nnei, ntypes, b_nlist_map);
+static void _map_nei_info_cpu(int* nlist,
+                              int* ntype,
+                              bool* nmask,
+                              const int* type,
+                              const int* idx_mapping,
+                              const int& nloc,
+                              const int& nnei,
+                              const int& ntypes,
+                              const bool& b_nlist_map) {
+  deepmd::use_nei_info_cpu(nlist, ntype, nmask, type, idx_mapping, nloc, nnei,
+                           ntypes, b_nlist_map);
 }
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_cpu(
-    OpKernelContext* context,
-    FPTYPE const ** coord,
-    std::vector<FPTYPE> & coord_cpy,
-    int const** type,
-    std::vector<int> & type_cpy,
-    std::vector<int> & idx_mapping,
-    deepmd::InputNlist & inlist,
-    std::vector<int> & ilist,
-    std::vector<int> & numneigh,
-    std::vector<int*> & firstneigh,
-    std::vector<std::vector<int>> & jlist,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial)
-{    
+static void _prepare_coord_nlist_cpu(OpKernelContext* context,
+                                     FPTYPE const** coord,
+                                     std::vector<FPTYPE>& coord_cpy,
+                                     int const** type,
+                                     std::vector<int>& type_cpy,
+                                     std::vector<int>& idx_mapping,
+                                     deepmd::InputNlist& inlist,
+                                     std::vector<int>& ilist,
+                                     std::vector<int>& numneigh,
+                                     std::vector<int*>& firstneigh,
+                                     std::vector<std::vector<int>>& jlist,
+                                     int& new_nall,
+                                     int& mem_cpy,
+                                     int& mem_nnei,
+                                     int& max_nbor_size,
+                                     const FPTYPE* box,
+                                     const int* mesh_tensor_data,
+                                     const int& nloc,
+                                     const int& nei_mode,
+                                     const float& rcut_r,
+                                     const int& max_cpy_trial,
+                                     const int& max_nnei_trial) {
   inlist.inum = nloc;
-  if(nei_mode != 3){
+  if (nei_mode != 3) {
     // build nlist by myself
     // normalize and copy coord
-    if(nei_mode == 1){
-      int copy_ok = _norm_copy_coord_cpu(
-	  coord_cpy, type_cpy, idx_mapping, new_nall, mem_cpy,
-	  *coord, box, *type, nloc, max_cpy_trial, rcut_r);
-      OP_REQUIRES (context, copy_ok, errors::Aborted("cannot allocate mem for copied coords"));
+    if (nei_mode == 1) {
+      int copy_ok = _norm_copy_coord_cpu(coord_cpy, type_cpy, idx_mapping,
+                                         new_nall, mem_cpy, *coord, box, *type,
+                                         nloc, max_cpy_trial, rcut_r);
+      OP_REQUIRES(context, copy_ok,
+                  errors::Aborted("cannot allocate mem for copied coords"));
       *coord = &coord_cpy[0];
       *type = &type_cpy[0];
     }
     // build nlist
-    int build_ok = _build_nlist_cpu(
-	ilist, numneigh, firstneigh, jlist, max_nbor_size, mem_nnei,
-	*coord, nloc, new_nall, max_nnei_trial, rcut_r);
-    OP_REQUIRES (context, build_ok, errors::Aborted("cannot allocate mem for nlist"));
+    int build_ok = _build_nlist_cpu(ilist, numneigh, firstneigh, jlist,
+                                    max_nbor_size, mem_nnei, *coord, nloc,
+                                    new_nall, max_nnei_trial, rcut_r);
+    OP_REQUIRES(context, build_ok,
+                errors::Aborted("cannot allocate mem for nlist"));
     inlist.ilist = &ilist[0];
     inlist.numneigh = &numneigh[0];
     inlist.firstneigh = &firstneigh[0];
-  }
-  else{
+  } else {
     // copy pointers to nlist data
-    memcpy(&inlist.ilist, 4 + mesh_tensor_data, sizeof(int *));
-    memcpy(&inlist.numneigh, 8 + mesh_tensor_data, sizeof(int *));
-    memcpy(&inlist.firstneigh, 12 + mesh_tensor_data, sizeof(int **));
+    memcpy(&inlist.ilist, 4 + mesh_tensor_data, sizeof(int*));
+    memcpy(&inlist.numneigh, 8 + mesh_tensor_data, sizeof(int*));
+    memcpy(&inlist.firstneigh, 12 + mesh_tensor_data, sizeof(int**));
     max_nbor_size = max_numneigh(inlist);
   }
 }
 
 #if GOOGLE_CUDA
-template<typename FPTYPE>
-static int
-_norm_copy_coord_gpu(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE * & coord_cpy,
-    int * & type_cpy,
-    int * & idx_mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r)
-{
+template <typename FPTYPE>
+static int _norm_copy_coord_gpu(OpKernelContext* context,
+                                Tensor* tensor_list,
+                                FPTYPE*& coord_cpy,
+                                int*& type_cpy,
+                                int*& idx_mapping,
+                                int& nall,
+                                int& mem_cpy,
+                                const FPTYPE* coord,
+                                const FPTYPE* box,
+                                const int* type,
+                                const int& nloc,
+                                const int& max_cpy_trial,
+                                const float& rcut_r) {
   // Tensor FPTYPE_temp;
   TensorShape FPTYPE_shape;
-  FPTYPE_shape.AddDim(nall*3);
-  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, FPTYPE_shape, tensor_list);
-  FPTYPE * tmp_coord = (*tensor_list).flat<FPTYPE>().data();
-  DPErrcheck(cudaMemcpy(tmp_coord, coord, sizeof(FPTYPE) * nall * 3, cudaMemcpyDeviceToDevice));
-  
+  FPTYPE_shape.AddDim(nall * 3);
+  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, FPTYPE_shape,
+                         tensor_list);
+  FPTYPE* tmp_coord = (*tensor_list).flat<FPTYPE>().data();
+  DPErrcheck(cudaMemcpy(tmp_coord, coord, sizeof(FPTYPE) * nall * 3,
+                        cudaMemcpyDeviceToDevice));
+
   deepmd::Region<FPTYPE> region;
   init_region_cpu(region, box);
   FPTYPE box_info[18];
-  std::copy(region.boxt, region.boxt+9, box_info);
-  std::copy(region.rec_boxt, region.rec_boxt+9, box_info+9);
+  std::copy(region.boxt, region.boxt + 9, box_info);
+  std::copy(region.rec_boxt, region.rec_boxt + 9, box_info + 9);
   int cell_info[23];
   deepmd::compute_cell_info(cell_info, rcut_r, region);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  //Tensor double_temp;
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  // Tensor double_temp;
   TensorShape double_shape;
   double_shape.AddDim(18);
-  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, double_shape, tensor_list+1);
-  //Tensor int_temp;
+  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, double_shape,
+                         tensor_list + 1);
+  // Tensor int_temp;
   TensorShape int_shape;
-  int_shape.AddDim(23+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
-  context, context->allocate_temp(DT_INT32, int_shape, tensor_list+2);
-  FPTYPE * box_info_dev = (*(tensor_list+1)).flat<FPTYPE>().data();
-  int * cell_info_dev = (*(tensor_list+2)).flat<int>().data();
-  int * int_data_dev = cell_info_dev + 23;
+  int_shape.AddDim(23 + nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                   total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + 1 +
+                   nloc);
+  context, context->allocate_temp(DT_INT32, int_shape, tensor_list + 2);
+  FPTYPE* box_info_dev = (*(tensor_list + 1)).flat<FPTYPE>().data();
+  int* cell_info_dev = (*(tensor_list + 2)).flat<int>().data();
+  int* int_data_dev = cell_info_dev + 23;
   deepmd::memcpy_host_to_device(box_info_dev, box_info, 18);
   deepmd::memcpy_host_to_device(cell_info_dev, cell_info, 23);
   deepmd::Region<FPTYPE> region_dev;
-  FPTYPE * new_boxt = region_dev.boxt;
-  FPTYPE * new_rec_boxt = region_dev.rec_boxt;
+  FPTYPE* new_boxt = region_dev.boxt;
+  FPTYPE* new_rec_boxt = region_dev.rec_boxt;
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
   deepmd::normalize_coord_gpu(tmp_coord, nall, region_dev);
   int tt;
-  for(tt = 0; tt < max_cpy_trial; ++tt){
-    //Tensor cpy_temp;
+  for (tt = 0; tt < max_cpy_trial; ++tt) {
+    // Tensor cpy_temp;
     TensorShape cpy_shape;
-    cpy_shape.AddDim(mem_cpy*3);
-    context->allocate_temp(DataTypeToEnum<FPTYPE>::value, cpy_shape, tensor_list+3);
-    //Tensor t_temp;
+    cpy_shape.AddDim(mem_cpy * 3);
+    context->allocate_temp(DataTypeToEnum<FPTYPE>::value, cpy_shape,
+                           tensor_list + 3);
+    // Tensor t_temp;
     TensorShape t_shape;
-    t_shape.AddDim(mem_cpy*2);
-    context, context->allocate_temp(DT_INT32, t_shape, tensor_list+4);
-    coord_cpy = (*(tensor_list+3)).flat<FPTYPE>().data();
-    type_cpy = (*(tensor_list+4)).flat<int>().data();
+    t_shape.AddDim(mem_cpy * 2);
+    context, context->allocate_temp(DT_INT32, t_shape, tensor_list + 4);
+    coord_cpy = (*(tensor_list + 3)).flat<FPTYPE>().data();
+    type_cpy = (*(tensor_list + 4)).flat<int>().data();
     idx_mapping = type_cpy + mem_cpy;
     int ret = deepmd::copy_coord_gpu(
-        coord_cpy, type_cpy, idx_mapping, &nall, int_data_dev,
-        tmp_coord, type, nloc, mem_cpy, loc_cellnum, total_cellnum, cell_info_dev, region_dev);
-    if(ret == 0){
+        coord_cpy, type_cpy, idx_mapping, &nall, int_data_dev, tmp_coord, type,
+        nloc, mem_cpy, loc_cellnum, total_cellnum, cell_info_dev, region_dev);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_cpy *= 2;
     }
   }
@@ -1514,228 +1538,221 @@ _norm_copy_coord_gpu(
   return (tt != max_cpy_trial);
 }
 
-template<typename FPTYPE>
-static int
-_build_nlist_gpu(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    int * &ilist, 
-    int * &numneigh,
-    int ** &firstneigh,
-    int * &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r)
-{
-  //Tensor nlist_temp;
+template <typename FPTYPE>
+static int _build_nlist_gpu(OpKernelContext* context,
+                            Tensor* tensor_list,
+                            int*& ilist,
+                            int*& numneigh,
+                            int**& firstneigh,
+                            int*& jlist,
+                            int& max_nnei,
+                            int& mem_nnei,
+                            const FPTYPE* coord,
+                            const int& nloc,
+                            const int& new_nall,
+                            const int& max_nnei_trial,
+                            const float& rcut_r) {
+  // Tensor nlist_temp;
   TensorShape nlist_shape;
-  nlist_shape.AddDim(nloc*2);
+  nlist_shape.AddDim(nloc * 2);
   context->allocate_temp(DT_INT32, nlist_shape, tensor_list);
   ilist = (*tensor_list).flat<int>().data();
   numneigh = ilist + nloc;
-  //Tensor jlist_temp;
-  int * ind_data = NULL;
-  
+  // Tensor jlist_temp;
+  int* ind_data = NULL;
+
   std::vector<int*> firstneigh_host(nloc);
   int tt;
-  for(tt = 0; tt < max_nnei_trial; ++tt){
+  for (tt = 0; tt < max_nnei_trial; ++tt) {
     TensorShape jlist_shape;
-    jlist_shape.AddDim(3*int_64(nloc)*mem_nnei);
-    context->allocate_temp(DT_INT32, jlist_shape, tensor_list+1);
-    jlist = (*(tensor_list+1)).flat<int>().data();
+    jlist_shape.AddDim(3 * int_64(nloc) * mem_nnei);
+    context->allocate_temp(DT_INT32, jlist_shape, tensor_list + 1);
+    jlist = (*(tensor_list + 1)).flat<int>().data();
     ind_data = jlist + nloc * mem_nnei;
-    for(int_64 ii = 0; ii < nloc; ++ii){
+    for (int_64 ii = 0; ii < nloc; ++ii) {
       firstneigh_host[ii] = jlist + ii * mem_nnei;
     }
     deepmd::memcpy_host_to_device(firstneigh, firstneigh_host);
     deepmd::InputNlist inlist(nloc, ilist, numneigh, firstneigh);
-    int ret = deepmd::build_nlist_gpu(
-        inlist, &max_nnei, ind_data, 
-        coord, nloc, new_nall, mem_nnei, rcut_r);
-    if(ret == 0){
+    int ret = deepmd::build_nlist_gpu(inlist, &max_nnei, ind_data, coord, nloc,
+                                      new_nall, mem_nnei, rcut_r);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_nnei *= 2;
     }
   }
   return (tt != max_nnei_trial);
 }
 
-static void
-_map_nlist_gpu(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei)
-{
+static void _map_nlist_gpu(int* nlist,
+                           const int* idx_mapping,
+                           const int& nloc,
+                           const int& nnei) {
   deepmd::use_nlist_map(nlist, idx_mapping, nloc, nnei);
 }
 
-static void
-_map_nei_info_gpu(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei,
-    const int & ntypes,
-    const bool & b_nlist_map)
-{
-  deepmd::use_nei_info_gpu(nlist, ntype, nmask, type, idx_mapping, nloc, nnei, ntypes, b_nlist_map);
+static void _map_nei_info_gpu(int* nlist,
+                              int* ntype,
+                              bool* nmask,
+                              const int* type,
+                              const int* idx_mapping,
+                              const int& nloc,
+                              const int& nnei,
+                              const int& ntypes,
+                              const bool& b_nlist_map) {
+  deepmd::use_nei_info_gpu(nlist, ntype, nmask, type, idx_mapping, nloc, nnei,
+                           ntypes, b_nlist_map);
 }
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_gpu(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE const ** coord,
-    FPTYPE * & coord_cpy,
-    int const** type,
-    int * & type_cpy,
-    int * & idx_mapping,
-    deepmd::InputNlist & inlist,
-    int * & ilist,
-    int * & numneigh,
-    int ** & firstneigh,
-    int * & jlist,
-    int * & nbor_list_dev,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int mesh_tensor_size,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial)
-{    
-  if(nei_mode != 3){
+static void _prepare_coord_nlist_gpu(OpKernelContext* context,
+                                     Tensor* tensor_list,
+                                     FPTYPE const** coord,
+                                     FPTYPE*& coord_cpy,
+                                     int const** type,
+                                     int*& type_cpy,
+                                     int*& idx_mapping,
+                                     deepmd::InputNlist& inlist,
+                                     int*& ilist,
+                                     int*& numneigh,
+                                     int**& firstneigh,
+                                     int*& jlist,
+                                     int*& nbor_list_dev,
+                                     int& new_nall,
+                                     int& mem_cpy,
+                                     int& mem_nnei,
+                                     int& max_nbor_size,
+                                     const FPTYPE* box,
+                                     const int* mesh_tensor_data,
+                                     const int mesh_tensor_size,
+                                     const int& nloc,
+                                     const int& nei_mode,
+                                     const float& rcut_r,
+                                     const int& max_cpy_trial,
+                                     const int& max_nnei_trial) {
+  if (nei_mode != 3) {
     inlist.inum = nloc;
     // build nlist by myself
     // normalize and copy coord
-    if(nei_mode == 1){
+    if (nei_mode == 1) {
       int copy_ok = _norm_copy_coord_gpu(
-        context, tensor_list, coord_cpy, type_cpy, idx_mapping, new_nall, mem_cpy,
-        *coord, box, *type, nloc, max_cpy_trial, rcut_r);
-      OP_REQUIRES (context, copy_ok, errors::Aborted("cannot allocate mem for copied coords"));
+          context, tensor_list, coord_cpy, type_cpy, idx_mapping, new_nall,
+          mem_cpy, *coord, box, *type, nloc, max_cpy_trial, rcut_r);
+      OP_REQUIRES(context, copy_ok,
+                  errors::Aborted("cannot allocate mem for copied coords"));
       *coord = coord_cpy;
       *type = type_cpy;
     }
-    //build nlist
-    int build_ok = _build_nlist_gpu(
-      context, tensor_list + 5, ilist, numneigh, firstneigh, jlist, max_nbor_size, mem_nnei,
-      *coord, nloc, new_nall, max_nnei_trial, rcut_r);
-    OP_REQUIRES (context, build_ok, errors::Aborted("cannot allocate mem for nlist"));
+    // build nlist
+    int build_ok =
+        _build_nlist_gpu(context, tensor_list + 5, ilist, numneigh, firstneigh,
+                         jlist, max_nbor_size, mem_nnei, *coord, nloc, new_nall,
+                         max_nnei_trial, rcut_r);
+    OP_REQUIRES(context, build_ok,
+                errors::Aborted("cannot allocate mem for nlist"));
     if (max_nbor_size <= 1024) {
       max_nbor_size = 1024;
-    }
-    else if (max_nbor_size <= 2048) {
+    } else if (max_nbor_size <= 2048) {
       max_nbor_size = 2048;
-    }
-    else {
+    } else {
       max_nbor_size = 4096;
     }
     inlist.ilist = ilist;
     inlist.numneigh = numneigh;
     inlist.firstneigh = firstneigh;
-  }
-  else{
+  } else {
     // update nbor list
     deepmd::InputNlist inlist_temp;
     inlist_temp.inum = nloc;
-    deepmd::env_mat_nbor_update(
-        inlist_temp, inlist, max_nbor_size, nbor_list_dev,
-        mesh_tensor_data, mesh_tensor_size);
-    OP_REQUIRES (context, (max_numneigh(inlist_temp) <= max_nbor_size), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_numneigh(inlist_temp)) + " is larger than " + std::to_string(max_nbor_size) + ", which currently is not supported by deepmd-kit."));
+    deepmd::env_mat_nbor_update(inlist_temp, inlist, max_nbor_size,
+                                nbor_list_dev, mesh_tensor_data,
+                                mesh_tensor_size);
+    OP_REQUIRES(context, (max_numneigh(inlist_temp) <= max_nbor_size),
+                errors::InvalidArgument(
+                    "Assert failed, max neighbor size of atom(lammps) " +
+                    std::to_string(max_numneigh(inlist_temp)) +
+                    " is larger than " + std::to_string(max_nbor_size) +
+                    ", which currently is not supported by deepmd-kit."));
   }
 }
 #endif  // GOOGLE_CUDA
 
-
 #if TENSORFLOW_USE_ROCM
-template<typename FPTYPE>
-static int
-_norm_copy_coord_gpu_rocm(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE * & coord_cpy,
-    int * & type_cpy,
-    int * & idx_mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r)
-{
+template <typename FPTYPE>
+static int _norm_copy_coord_gpu_rocm(OpKernelContext* context,
+                                     Tensor* tensor_list,
+                                     FPTYPE*& coord_cpy,
+                                     int*& type_cpy,
+                                     int*& idx_mapping,
+                                     int& nall,
+                                     int& mem_cpy,
+                                     const FPTYPE* coord,
+                                     const FPTYPE* box,
+                                     const int* type,
+                                     const int& nloc,
+                                     const int& max_cpy_trial,
+                                     const float& rcut_r) {
   // Tensor FPTYPE_temp;
   TensorShape FPTYPE_shape;
-  FPTYPE_shape.AddDim(nall*3);
-  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, FPTYPE_shape, tensor_list);
-  FPTYPE * tmp_coord = (*tensor_list).flat<FPTYPE>().data();
-  DPErrcheck(hipMemcpy(tmp_coord, coord, sizeof(FPTYPE) * nall * 3, hipMemcpyDeviceToDevice));
-  
+  FPTYPE_shape.AddDim(nall * 3);
+  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, FPTYPE_shape,
+                         tensor_list);
+  FPTYPE* tmp_coord = (*tensor_list).flat<FPTYPE>().data();
+  DPErrcheck(hipMemcpy(tmp_coord, coord, sizeof(FPTYPE) * nall * 3,
+                       hipMemcpyDeviceToDevice));
+
   deepmd::Region<FPTYPE> region;
   init_region_cpu(region, box);
   FPTYPE box_info[18];
-  std::copy(region.boxt, region.boxt+9, box_info);
-  std::copy(region.rec_boxt, region.rec_boxt+9, box_info+9);
+  std::copy(region.boxt, region.boxt + 9, box_info);
+  std::copy(region.rec_boxt, region.rec_boxt + 9, box_info + 9);
   int cell_info[23];
   deepmd::compute_cell_info(cell_info, rcut_r, region);
-  const int loc_cellnum=cell_info[21];
-  const int total_cellnum=cell_info[22];
-  //Tensor double_temp;
+  const int loc_cellnum = cell_info[21];
+  const int total_cellnum = cell_info[22];
+  // Tensor double_temp;
   TensorShape double_shape;
   double_shape.AddDim(18);
-  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, double_shape, tensor_list+1);
-  //Tensor int_temp;
+  context->allocate_temp(DataTypeToEnum<FPTYPE>::value, double_shape,
+                         tensor_list + 1);
+  // Tensor int_temp;
   TensorShape int_shape;
-  int_shape.AddDim(23+nloc*3+loc_cellnum+total_cellnum*3+total_cellnum*3+loc_cellnum+1+total_cellnum+1+nloc);
-  context, context->allocate_temp(DT_INT32, int_shape, tensor_list+2);
-  FPTYPE * box_info_dev = (*(tensor_list+1)).flat<FPTYPE>().data();
-  int * cell_info_dev = (*(tensor_list+2)).flat<int>().data();
-  int * int_data_dev = cell_info_dev + 23;
+  int_shape.AddDim(23 + nloc * 3 + loc_cellnum + total_cellnum * 3 +
+                   total_cellnum * 3 + loc_cellnum + 1 + total_cellnum + 1 +
+                   nloc);
+  context, context->allocate_temp(DT_INT32, int_shape, tensor_list + 2);
+  FPTYPE* box_info_dev = (*(tensor_list + 1)).flat<FPTYPE>().data();
+  int* cell_info_dev = (*(tensor_list + 2)).flat<int>().data();
+  int* int_data_dev = cell_info_dev + 23;
   deepmd::memcpy_host_to_device(box_info_dev, box_info, 18);
   deepmd::memcpy_host_to_device(cell_info_dev, cell_info, 23);
   deepmd::Region<FPTYPE> region_dev;
-  FPTYPE * new_boxt = region_dev.boxt;
-  FPTYPE * new_rec_boxt = region_dev.rec_boxt;
+  FPTYPE* new_boxt = region_dev.boxt;
+  FPTYPE* new_rec_boxt = region_dev.rec_boxt;
   region_dev.boxt = box_info_dev;
   region_dev.rec_boxt = box_info_dev + 9;
   deepmd::normalize_coord_gpu_rocm(tmp_coord, nall, region_dev);
   int tt;
-  for(tt = 0; tt < max_cpy_trial; ++tt){
-    //Tensor cpy_temp;
+  for (tt = 0; tt < max_cpy_trial; ++tt) {
+    // Tensor cpy_temp;
     TensorShape cpy_shape;
-    cpy_shape.AddDim(mem_cpy*3);
-    context->allocate_temp(DataTypeToEnum<FPTYPE>::value, cpy_shape, tensor_list+3);
-    //Tensor t_temp;
+    cpy_shape.AddDim(mem_cpy * 3);
+    context->allocate_temp(DataTypeToEnum<FPTYPE>::value, cpy_shape,
+                           tensor_list + 3);
+    // Tensor t_temp;
     TensorShape t_shape;
-    t_shape.AddDim(mem_cpy*2);
-    context, context->allocate_temp(DT_INT32, t_shape, tensor_list+4);
-    coord_cpy = (*(tensor_list+3)).flat<FPTYPE>().data();
-    type_cpy = (*(tensor_list+4)).flat<int>().data();
+    t_shape.AddDim(mem_cpy * 2);
+    context, context->allocate_temp(DT_INT32, t_shape, tensor_list + 4);
+    coord_cpy = (*(tensor_list + 3)).flat<FPTYPE>().data();
+    type_cpy = (*(tensor_list + 4)).flat<int>().data();
     idx_mapping = type_cpy + mem_cpy;
     int ret = deepmd::copy_coord_gpu_rocm(
-        coord_cpy, type_cpy, idx_mapping, &nall, int_data_dev,
-        tmp_coord, type, nloc, mem_cpy, loc_cellnum, total_cellnum, cell_info_dev, region_dev);
-    if(ret == 0){
+        coord_cpy, type_cpy, idx_mapping, &nall, int_data_dev, tmp_coord, type,
+        nloc, mem_cpy, loc_cellnum, total_cellnum, cell_info_dev, region_dev);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_cpy *= 2;
     }
   }
@@ -1744,202 +1761,210 @@ _norm_copy_coord_gpu_rocm(
   return (tt != max_cpy_trial);
 }
 
-template<typename FPTYPE>
-static int
-_build_nlist_gpu_rocm(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    int * &ilist, 
-    int * &numneigh,
-    int ** &firstneigh,
-    int * &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r)
-{
-  //Tensor nlist_temp;
+template <typename FPTYPE>
+static int _build_nlist_gpu_rocm(OpKernelContext* context,
+                                 Tensor* tensor_list,
+                                 int*& ilist,
+                                 int*& numneigh,
+                                 int**& firstneigh,
+                                 int*& jlist,
+                                 int& max_nnei,
+                                 int& mem_nnei,
+                                 const FPTYPE* coord,
+                                 const int& nloc,
+                                 const int& new_nall,
+                                 const int& max_nnei_trial,
+                                 const float& rcut_r) {
+  // Tensor nlist_temp;
   TensorShape nlist_shape;
-  nlist_shape.AddDim(nloc*2);
+  nlist_shape.AddDim(nloc * 2);
   context->allocate_temp(DT_INT32, nlist_shape, tensor_list);
   ilist = (*tensor_list).flat<int>().data();
   numneigh = ilist + nloc;
-  //Tensor jlist_temp;
-  int * ind_data = NULL;
-  
+  // Tensor jlist_temp;
+  int* ind_data = NULL;
+
   std::vector<int*> firstneigh_host(nloc);
   int tt;
-  for(tt = 0; tt < max_nnei_trial; ++tt){
+  for (tt = 0; tt < max_nnei_trial; ++tt) {
     TensorShape jlist_shape;
-    jlist_shape.AddDim(3*int_64(nloc)*mem_nnei);
-    context->allocate_temp(DT_INT32, jlist_shape, tensor_list+1);
-    jlist = (*(tensor_list+1)).flat<int>().data();
+    jlist_shape.AddDim(3 * int_64(nloc) * mem_nnei);
+    context->allocate_temp(DT_INT32, jlist_shape, tensor_list + 1);
+    jlist = (*(tensor_list + 1)).flat<int>().data();
     ind_data = jlist + nloc * mem_nnei;
-    for(int_64 ii = 0; ii < nloc; ++ii){
+    for (int_64 ii = 0; ii < nloc; ++ii) {
       firstneigh_host[ii] = jlist + ii * mem_nnei;
     }
     deepmd::memcpy_host_to_device(firstneigh, firstneigh_host);
     deepmd::InputNlist inlist(nloc, ilist, numneigh, firstneigh);
-    int ret = deepmd::build_nlist_gpu_rocm(
-        inlist, &max_nnei, ind_data, 
-        coord, nloc, new_nall, mem_nnei, rcut_r);
-    if(ret == 0){
+    int ret = deepmd::build_nlist_gpu_rocm(inlist, &max_nnei, ind_data, coord,
+                                           nloc, new_nall, mem_nnei, rcut_r);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_nnei *= 2;
     }
   }
   return (tt != max_nnei_trial);
 }
 
-static void
-_map_nlist_gpu_rocm(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei)
-{
+static void _map_nlist_gpu_rocm(int* nlist,
+                                const int* idx_mapping,
+                                const int& nloc,
+                                const int& nnei) {
   deepmd::use_nlist_map(nlist, idx_mapping, nloc, nnei);
 }
 
-static void
-_map_nei_info_gpu_rocm(
-    int * nlist,
-    int * ntype,
-    bool * nmask,
-    const int * type,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei,
-    const int & ntypes,
-    const bool & b_nlist_map)
-{
-  deepmd::use_nei_info_gpu_rocm(nlist, ntype, nmask, type, idx_mapping, nloc, nnei, ntypes, b_nlist_map);
+static void _map_nei_info_gpu_rocm(int* nlist,
+                                   int* ntype,
+                                   bool* nmask,
+                                   const int* type,
+                                   const int* idx_mapping,
+                                   const int& nloc,
+                                   const int& nnei,
+                                   const int& ntypes,
+                                   const bool& b_nlist_map) {
+  deepmd::use_nei_info_gpu_rocm(nlist, ntype, nmask, type, idx_mapping, nloc,
+                                nnei, ntypes, b_nlist_map);
 }
 
-
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_gpu_rocm(
-    OpKernelContext* context,
-    Tensor * tensor_list,
-    FPTYPE const ** coord,
-    FPTYPE * & coord_cpy,
-    int const** type,
-    int * & type_cpy,
-    int * & idx_mapping,
-    deepmd::InputNlist & inlist,
-    int * & ilist,
-    int * & numneigh,
-    int ** & firstneigh,
-    int * & jlist,
-    int * & nbor_list_dev,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int mesh_tensor_size,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial)
-{    
-  if(nei_mode != 3){
+static void _prepare_coord_nlist_gpu_rocm(OpKernelContext* context,
+                                          Tensor* tensor_list,
+                                          FPTYPE const** coord,
+                                          FPTYPE*& coord_cpy,
+                                          int const** type,
+                                          int*& type_cpy,
+                                          int*& idx_mapping,
+                                          deepmd::InputNlist& inlist,
+                                          int*& ilist,
+                                          int*& numneigh,
+                                          int**& firstneigh,
+                                          int*& jlist,
+                                          int*& nbor_list_dev,
+                                          int& new_nall,
+                                          int& mem_cpy,
+                                          int& mem_nnei,
+                                          int& max_nbor_size,
+                                          const FPTYPE* box,
+                                          const int* mesh_tensor_data,
+                                          const int mesh_tensor_size,
+                                          const int& nloc,
+                                          const int& nei_mode,
+                                          const float& rcut_r,
+                                          const int& max_cpy_trial,
+                                          const int& max_nnei_trial) {
+  if (nei_mode != 3) {
     inlist.inum = nloc;
     // build nlist by myself
     // normalize and copy coord
-    if(nei_mode == 1){
+    if (nei_mode == 1) {
       int copy_ok = _norm_copy_coord_gpu_rocm(
-        context, tensor_list, coord_cpy, type_cpy, idx_mapping, new_nall, mem_cpy,
-        *coord, box, *type, nloc, max_cpy_trial, rcut_r);
-      OP_REQUIRES (context, copy_ok, errors::Aborted("cannot allocate mem for copied coords"));
+          context, tensor_list, coord_cpy, type_cpy, idx_mapping, new_nall,
+          mem_cpy, *coord, box, *type, nloc, max_cpy_trial, rcut_r);
+      OP_REQUIRES(context, copy_ok,
+                  errors::Aborted("cannot allocate mem for copied coords"));
       *coord = coord_cpy;
       *type = type_cpy;
     }
-    //build nlist
-    int build_ok = _build_nlist_gpu_rocm(
-      context, tensor_list + 5, ilist, numneigh, firstneigh, jlist, max_nbor_size, mem_nnei,
-      *coord, nloc, new_nall, max_nnei_trial, rcut_r);
-    OP_REQUIRES (context, build_ok, errors::Aborted("cannot allocate mem for nlist"));
+    // build nlist
+    int build_ok =
+        _build_nlist_gpu_rocm(context, tensor_list + 5, ilist, numneigh,
+                              firstneigh, jlist, max_nbor_size, mem_nnei,
+                              *coord, nloc, new_nall, max_nnei_trial, rcut_r);
+    OP_REQUIRES(context, build_ok,
+                errors::Aborted("cannot allocate mem for nlist"));
     if (max_nbor_size <= 1024) {
       max_nbor_size = 1024;
-    }
-    else if (max_nbor_size <= 2048) {
+    } else if (max_nbor_size <= 2048) {
       max_nbor_size = 2048;
-    }
-    else {
+    } else {
       max_nbor_size = 4096;
     }
     inlist.ilist = ilist;
     inlist.numneigh = numneigh;
     inlist.firstneigh = firstneigh;
-  }
-  else{
+  } else {
     // update nbor list
     deepmd::InputNlist inlist_temp;
     inlist_temp.inum = nloc;
-    deepmd::env_mat_nbor_update(
-        inlist_temp, inlist, max_nbor_size, nbor_list_dev,
-        mesh_tensor_data, mesh_tensor_size);
-    OP_REQUIRES (context, (max_numneigh(inlist_temp) <= max_nbor_size), errors::InvalidArgument ("Assert failed, max neighbor size of atom(lammps) " + std::to_string(max_numneigh(inlist_temp)) + " is larger than " + std::to_string(max_nbor_size) + ", which currently is not supported by deepmd-kit."));
+    deepmd::env_mat_nbor_update(inlist_temp, inlist, max_nbor_size,
+                                nbor_list_dev, mesh_tensor_data,
+                                mesh_tensor_size);
+    OP_REQUIRES(context, (max_numneigh(inlist_temp) <= max_nbor_size),
+                errors::InvalidArgument(
+                    "Assert failed, max neighbor size of atom(lammps) " +
+                    std::to_string(max_numneigh(inlist_temp)) +
+                    " is larger than " + std::to_string(max_nbor_size) +
+                    ", which currently is not supported by deepmd-kit."));
   }
 }
 #endif  // TENSORFLOW_USE_ROCM
 
-
 // Register the CPU kernels.
 // Compatible with v1.3
-#define REGISTER_CPU(T)                                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("ProdEnvMatA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
-    ProdEnvMatAOp<CPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("ProdEnvMatR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
-    ProdEnvMatROp<CPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("ProdEnvMatAMix").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                     \
-    ProdEnvMatAMixOp<CPUDevice, T>);                                                                      \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("DescrptSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
-    ProdEnvMatAOp<CPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("DescrptNorot").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
-    ProdEnvMatAOp<CPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("DescrptSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                                        \
-    ProdEnvMatROp<CPUDevice, T>);   
-REGISTER_CPU(float);                  
-REGISTER_CPU(double);                 
-            
-// Register the GPU kernels.                  
+#define REGISTER_CPU(T)                                                 \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ProdEnvMatA").Device(DEVICE_CPU).TypeConstraint<T>("T"),    \
+      ProdEnvMatAOp<CPUDevice, T>);                                     \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ProdEnvMatR").Device(DEVICE_CPU).TypeConstraint<T>("T"),    \
+      ProdEnvMatROp<CPUDevice, T>);                                     \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ProdEnvMatAMix").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdEnvMatAMixOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("DescrptSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      ProdEnvMatAOp<CPUDevice, T>);                                     \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("DescrptNorot").Device(DEVICE_CPU).TypeConstraint<T>("T"),   \
+      ProdEnvMatAOp<CPUDevice, T>);                                     \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("DescrptSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      ProdEnvMatROp<CPUDevice, T>);
+REGISTER_CPU(float);
+REGISTER_CPU(double);
+
+// Register the GPU kernels.
 // Compatible with v1.3
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM            
-#define REGISTER_GPU(T)                                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("ProdEnvMatA").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
-    ProdEnvMatAOp<GPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("ProdEnvMatR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
-    ProdEnvMatROp<GPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("ProdEnvMatAMix").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
-    ProdEnvMatAMixOp<GPUDevice, T>);                                                                      \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("DescrptSeA").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
-    ProdEnvMatAOp<GPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("DescrptNorot").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
-    ProdEnvMatAOp<GPUDevice, T>);                                                                         \
-REGISTER_KERNEL_BUILDER(                                                                                  \
-    Name("DescrptSeR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms").HostMemory("box"), \
-    ProdEnvMatROp<GPUDevice, T>);
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#define REGISTER_GPU(T)                                    \
+  REGISTER_KERNEL_BUILDER(Name("ProdEnvMatA")              \
+                              .Device(DEVICE_GPU)          \
+                              .TypeConstraint<T>("T")      \
+                              .HostMemory("natoms")        \
+                              .HostMemory("box"),          \
+                          ProdEnvMatAOp<GPUDevice, T>);    \
+  REGISTER_KERNEL_BUILDER(Name("ProdEnvMatR")              \
+                              .Device(DEVICE_GPU)          \
+                              .TypeConstraint<T>("T")      \
+                              .HostMemory("natoms")        \
+                              .HostMemory("box"),          \
+                          ProdEnvMatROp<GPUDevice, T>);    \
+  REGISTER_KERNEL_BUILDER(Name("ProdEnvMatAMix")           \
+                              .Device(DEVICE_GPU)          \
+                              .TypeConstraint<T>("T")      \
+                              .HostMemory("natoms")        \
+                              .HostMemory("box"),          \
+                          ProdEnvMatAMixOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("DescrptSeA")               \
+                              .Device(DEVICE_GPU)          \
+                              .TypeConstraint<T>("T")      \
+                              .HostMemory("natoms")        \
+                              .HostMemory("box"),          \
+                          ProdEnvMatAOp<GPUDevice, T>);    \
+  REGISTER_KERNEL_BUILDER(Name("DescrptNorot")             \
+                              .Device(DEVICE_GPU)          \
+                              .TypeConstraint<T>("T")      \
+                              .HostMemory("natoms")        \
+                              .HostMemory("box"),          \
+                          ProdEnvMatAOp<GPUDevice, T>);    \
+  REGISTER_KERNEL_BUILDER(Name("DescrptSeR")               \
+                              .Device(DEVICE_GPU)          \
+                              .TypeConstraint<T>("T")      \
+                              .HostMemory("natoms")        \
+                              .HostMemory("box"),          \
+                          ProdEnvMatROp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/prod_env_mat_multi_device_nvnmd.cc b/source/op/prod_env_mat_multi_device_nvnmd.cc
index b614d185a4..cfccd6605a 100644
--- a/source/op/prod_env_mat_multi_device_nvnmd.cc
+++ b/source/op/prod_env_mat_multi_device_nvnmd.cc
@@ -1,10 +1,10 @@
 /*
 //==================================================
- _   _  __     __  _   _   __  __   ____  
-| \ | | \ \   / / | \ | | |  \/  | |  _ \ 
+ _   _  __     __  _   _   __  __   ____
+| \ | | \ \   / / | \ | | |  \/  | |  _ \
 |  \| |  \ \ / /  |  \| | | |\/| | | | | |
 | |\  |   \ V /   | |\  | | |  | | | |_| |
-|_| \_|    \_/    |_| \_| |_|  |_| |____/ 
+|_| \_|    \_/    |_| \_| |_|  |_| |____/
 
 //==================================================
 
@@ -15,250 +15,226 @@ date: 2021-12-6
 
 */
 
-
-#include "custom_op.h"
-#include "utilities.h"
 #include "coord.h"
-#include "region.h"
+#include "custom_op.h"
+#include "errors.h"
 #include "neighbor_list.h"
 #include "prod_env_mat_nvnmd.h"
-#include "errors.h"
+#include "region.h"
+#include "utilities.h"
 
 // ProdEnvMatANvnmd
 // have been remove for the same function
 
 REGISTER_OP("ProdEnvMatANvnmdQuantize")
     .Attr("T: {float, double} = DT_DOUBLE")
-    .Input("coord: T")          //atomic coordinates
-    .Input("type: int32")       //atomic type
-    .Input("natoms: int32")     //local atomic number; each type atomic number
+    .Input("coord: T")       // atomic coordinates
+    .Input("type: int32")    // atomic type
+    .Input("natoms: int32")  // local atomic number; each type atomic number
     .Input("box : T")
     .Input("mesh : int32")
-    .Input("davg: T")           //average value of data
-    .Input("dstd: T")           //standard deviation
-    .Attr("rcut_a: float")      //no use
+    .Input("davg: T")       // average value of data
+    .Input("dstd: T")       // standard deviation
+    .Attr("rcut_a: float")  // no use
     .Attr("rcut_r: float")
     .Attr("rcut_r_smth: float")
     .Attr("sel_a: list(int)")
-    .Attr("sel_r: list(int)")   //all zero
+    .Attr("sel_r: list(int)")  // all zero
     .Output("descrpt: T")
     .Output("descrpt_deriv: T")
     .Output("rij: T")
     .Output("nlist: int32");
-    // only sel_a and rcut_r uesd.
-
-template<typename FPTYPE>
-static int
-_norm_copy_coord_cpu(
-    std::vector<FPTYPE> & coord_cpy,
-    std::vector<int> & type_cpy,
-    std::vector<int> & mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r);
-
-template<typename FPTYPE>
-static int
-_build_nlist_cpu(
-    std::vector<int> &ilist, 
-    std::vector<int> &numneigh,
-    std::vector<int*> &firstneigh,
-    std::vector<std::vector<int>> &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r);
-
-static void
-_map_nlist_cpu(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei);
+// only sel_a and rcut_r uesd.
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_cpu(
-    OpKernelContext* context,
-    FPTYPE const ** coord,
-    std::vector<FPTYPE> & coord_cpy,
-    int const** type,
-    std::vector<int> & type_cpy,
-    std::vector<int> & idx_mapping,
-    deepmd::InputNlist & inlist,
-    std::vector<int> & ilist,
-    std::vector<int> & numneigh,
-    std::vector<int*> & firstneigh,
-    std::vector<std::vector<int>> & jlist,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial);
+static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
+                                std::vector<int>& type_cpy,
+                                std::vector<int>& mapping,
+                                int& nall,
+                                int& mem_cpy,
+                                const FPTYPE* coord,
+                                const FPTYPE* box,
+                                const int* type,
+                                const int& nloc,
+                                const int& max_cpy_trial,
+                                const float& rcut_r);
+
+template <typename FPTYPE>
+static int _build_nlist_cpu(std::vector<int>& ilist,
+                            std::vector<int>& numneigh,
+                            std::vector<int*>& firstneigh,
+                            std::vector<std::vector<int>>& jlist,
+                            int& max_nnei,
+                            int& mem_nnei,
+                            const FPTYPE* coord,
+                            const int& nloc,
+                            const int& new_nall,
+                            const int& max_nnei_trial,
+                            const float& rcut_r);
+
+static void _map_nlist_cpu(int* nlist,
+                           const int* idx_mapping,
+                           const int& nloc,
+                           const int& nnei);
+
+template <typename FPTYPE>
+static void _prepare_coord_nlist_cpu(OpKernelContext* context,
+                                     FPTYPE const** coord,
+                                     std::vector<FPTYPE>& coord_cpy,
+                                     int const** type,
+                                     std::vector<int>& type_cpy,
+                                     std::vector<int>& idx_mapping,
+                                     deepmd::InputNlist& inlist,
+                                     std::vector<int>& ilist,
+                                     std::vector<int>& numneigh,
+                                     std::vector<int*>& firstneigh,
+                                     std::vector<std::vector<int>>& jlist,
+                                     int& new_nall,
+                                     int& mem_cpy,
+                                     int& mem_nnei,
+                                     int& max_nbor_size,
+                                     const FPTYPE* box,
+                                     const int* mesh_tensor_data,
+                                     const int& nloc,
+                                     const int& nei_mode,
+                                     const float& rcut_r,
+                                     const int& max_cpy_trial,
+                                     const int& max_nnei_trial);
 
 // instance of function
 
-template<typename FPTYPE>
-static int
-_norm_copy_coord_cpu(
-    std::vector<FPTYPE> & coord_cpy,
-    std::vector<int> & type_cpy,
-    std::vector<int> & idx_mapping,
-    int & nall,
-    int & mem_cpy,
-    const FPTYPE * coord,
-    const FPTYPE * box,
-    const int * type,
-    const int &nloc, 
-    const int &max_cpy_trial, 
-    const float & rcut_r)
-{
-  std::vector<FPTYPE> tmp_coord(nall*3);
-  std::copy(coord, coord+nall*3, tmp_coord.begin());
+template <typename FPTYPE>
+static int _norm_copy_coord_cpu(std::vector<FPTYPE>& coord_cpy,
+                                std::vector<int>& type_cpy,
+                                std::vector<int>& idx_mapping,
+                                int& nall,
+                                int& mem_cpy,
+                                const FPTYPE* coord,
+                                const FPTYPE* box,
+                                const int* type,
+                                const int& nloc,
+                                const int& max_cpy_trial,
+                                const float& rcut_r) {
+  std::vector<FPTYPE> tmp_coord(nall * 3);
+  std::copy(coord, coord + nall * 3, tmp_coord.begin());
   deepmd::Region<FPTYPE> region;
   init_region_cpu(region, box);
   normalize_coord_cpu(&tmp_coord[0], nall, region);
   int tt;
-  for(tt = 0; tt < max_cpy_trial; ++tt){
-    coord_cpy.resize(mem_cpy*3);
+  for (tt = 0; tt < max_cpy_trial; ++tt) {
+    coord_cpy.resize(mem_cpy * 3);
     type_cpy.resize(mem_cpy);
     idx_mapping.resize(mem_cpy);
-    int ret = copy_coord_cpu(
-	&coord_cpy[0], &type_cpy[0], &idx_mapping[0], &nall, 
-	&tmp_coord[0], type, nloc, mem_cpy, rcut_r, region);
-    if(ret == 0){
+    int ret =
+        copy_coord_cpu(&coord_cpy[0], &type_cpy[0], &idx_mapping[0], &nall,
+                       &tmp_coord[0], type, nloc, mem_cpy, rcut_r, region);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_cpy *= 2;
     }
   }
   return (tt != max_cpy_trial);
 }
 
-template<typename FPTYPE>
-static int
-_build_nlist_cpu(
-    std::vector<int> &ilist, 
-    std::vector<int> &numneigh,
-    std::vector<int*> &firstneigh,
-    std::vector<std::vector<int>> &jlist,
-    int & max_nnei,
-    int & mem_nnei,
-    const FPTYPE *coord,
-    const int & nloc,
-    const int & new_nall,
-    const int & max_nnei_trial,
-    const float & rcut_r)
-{
+template <typename FPTYPE>
+static int _build_nlist_cpu(std::vector<int>& ilist,
+                            std::vector<int>& numneigh,
+                            std::vector<int*>& firstneigh,
+                            std::vector<std::vector<int>>& jlist,
+                            int& max_nnei,
+                            int& mem_nnei,
+                            const FPTYPE* coord,
+                            const int& nloc,
+                            const int& new_nall,
+                            const int& max_nnei_trial,
+                            const float& rcut_r) {
   int tt;
-  for(tt = 0; tt < max_nnei_trial; ++tt){
-    for(int ii = 0; ii < nloc; ++ii){
+  for (tt = 0; tt < max_nnei_trial; ++tt) {
+    for (int ii = 0; ii < nloc; ++ii) {
       jlist[ii].resize(mem_nnei);
       firstneigh[ii] = &jlist[ii][0];
     }
     deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
-    int ret = build_nlist_cpu(
-	inlist, &max_nnei, 
-	coord, nloc, new_nall, mem_nnei, rcut_r);
-    if(ret == 0){
+    int ret = build_nlist_cpu(inlist, &max_nnei, coord, nloc, new_nall,
+                              mem_nnei, rcut_r);
+    if (ret == 0) {
       break;
-    }
-    else{
+    } else {
       mem_nnei *= 2;
     }
   }
   return (tt != max_nnei_trial);
 }
-    
-static void
-_map_nlist_cpu(
-    int * nlist,
-    const int * idx_mapping,
-    const int & nloc,
-    const int & nnei)
-{
-  for (int ii = 0; ii < nloc; ++ii){
-    for (int jj = 0; jj < nnei; ++jj){
-      int record = nlist[ii*nnei+jj];
-      if (record >= 0) {		
-	nlist[ii*nnei+jj] = idx_mapping[record];	      
+
+static void _map_nlist_cpu(int* nlist,
+                           const int* idx_mapping,
+                           const int& nloc,
+                           const int& nnei) {
+  for (int ii = 0; ii < nloc; ++ii) {
+    for (int jj = 0; jj < nnei; ++jj) {
+      int record = nlist[ii * nnei + jj];
+      if (record >= 0) {
+        nlist[ii * nnei + jj] = idx_mapping[record];
       }
     }
-  }  
+  }
 }
 
 template <typename FPTYPE>
-static void
-_prepare_coord_nlist_cpu(
-    OpKernelContext* context,
-    FPTYPE const ** coord,
-    std::vector<FPTYPE> & coord_cpy,
-    int const** type,
-    std::vector<int> & type_cpy,
-    std::vector<int> & idx_mapping,
-    deepmd::InputNlist & inlist,
-    std::vector<int> & ilist,
-    std::vector<int> & numneigh,
-    std::vector<int*> & firstneigh,
-    std::vector<std::vector<int>> & jlist,
-    int & new_nall,
-    int & mem_cpy,
-    int & mem_nnei,
-    int & max_nbor_size,
-    const FPTYPE * box,
-    const int * mesh_tensor_data,
-    const int & nloc,
-    const int & nei_mode,
-    const float & rcut_r,
-    const int & max_cpy_trial,
-    const int & max_nnei_trial)
-{    
+static void _prepare_coord_nlist_cpu(OpKernelContext* context,
+                                     FPTYPE const** coord,
+                                     std::vector<FPTYPE>& coord_cpy,
+                                     int const** type,
+                                     std::vector<int>& type_cpy,
+                                     std::vector<int>& idx_mapping,
+                                     deepmd::InputNlist& inlist,
+                                     std::vector<int>& ilist,
+                                     std::vector<int>& numneigh,
+                                     std::vector<int*>& firstneigh,
+                                     std::vector<std::vector<int>>& jlist,
+                                     int& new_nall,
+                                     int& mem_cpy,
+                                     int& mem_nnei,
+                                     int& max_nbor_size,
+                                     const FPTYPE* box,
+                                     const int* mesh_tensor_data,
+                                     const int& nloc,
+                                     const int& nei_mode,
+                                     const float& rcut_r,
+                                     const int& max_cpy_trial,
+                                     const int& max_nnei_trial) {
   inlist.inum = nloc;
-  if(nei_mode != 3){
+  if (nei_mode != 3) {
     // build nlist by myself
     // normalize and copy coord
-    if(nei_mode == 1){
-      int copy_ok = _norm_copy_coord_cpu(
-	  coord_cpy, type_cpy, idx_mapping, new_nall, mem_cpy,
-	  *coord, box, *type, nloc, max_cpy_trial, rcut_r);
-      OP_REQUIRES (context, copy_ok, errors::Aborted("cannot allocate mem for copied coords"));
+    if (nei_mode == 1) {
+      int copy_ok = _norm_copy_coord_cpu(coord_cpy, type_cpy, idx_mapping,
+                                         new_nall, mem_cpy, *coord, box, *type,
+                                         nloc, max_cpy_trial, rcut_r);
+      OP_REQUIRES(context, copy_ok,
+                  errors::Aborted("cannot allocate mem for copied coords"));
       *coord = &coord_cpy[0];
       *type = &type_cpy[0];
     }
     // build nlist
-    int build_ok = _build_nlist_cpu(
-	ilist, numneigh, firstneigh, jlist, max_nbor_size, mem_nnei,
-	*coord, nloc, new_nall, max_nnei_trial, rcut_r);
-    OP_REQUIRES (context, build_ok, errors::Aborted("cannot allocate mem for nlist"));
+    int build_ok = _build_nlist_cpu(ilist, numneigh, firstneigh, jlist,
+                                    max_nbor_size, mem_nnei, *coord, nloc,
+                                    new_nall, max_nnei_trial, rcut_r);
+    OP_REQUIRES(context, build_ok,
+                errors::Aborted("cannot allocate mem for nlist"));
     inlist.ilist = &ilist[0];
     inlist.numneigh = &numneigh[0];
     inlist.firstneigh = &firstneigh[0];
-  }
-  else{
+  } else {
     // copy pointers to nlist data
-    memcpy(&inlist.ilist, 4 + mesh_tensor_data, sizeof(int *));
-    memcpy(&inlist.numneigh, 8 + mesh_tensor_data, sizeof(int *));
-    memcpy(&inlist.firstneigh, 12 + mesh_tensor_data, sizeof(int **));
+    memcpy(&inlist.ilist, 4 + mesh_tensor_data, sizeof(int*));
+    memcpy(&inlist.numneigh, 8 + mesh_tensor_data, sizeof(int*));
+    memcpy(&inlist.firstneigh, 12 + mesh_tensor_data, sizeof(int**));
     max_nbor_size = max_numneigh(inlist);
   }
 }
 
-
 /*
 //==================================================
   ProdEnvMatANvnmdOp
@@ -273,12 +249,11 @@ _prepare_coord_nlist_cpu(
 //==================================================
 */
 
-
-
 template <typename Device, typename FPTYPE>
 class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
-public:
-  explicit ProdEnvMatANvnmdQuantizeOp(OpKernelConstruction* context) : OpKernel(context) {
+ public:
+  explicit ProdEnvMatANvnmdQuantizeOp(OpKernelConstruction* context)
+      : OpKernel(context) {
     float nloc_f, nall_f;
     OP_REQUIRES_OK(context, context->GetAttr("rcut_a", &rcut_a));
     OP_REQUIRES_OK(context, context->GetAttr("rcut_r", &rcut_r));
@@ -287,8 +262,8 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("sel_r", &sel_r));
     // OP_REQUIRES_OK(context, context->GetAttr("nloc", &nloc_f));
     // OP_REQUIRES_OK(context, context->GetAttr("nall", &nall_f));
-    deepmd::cum_sum (sec_a, sel_a);
-    deepmd::cum_sum (sec_r, sel_r);
+    deepmd::cum_sum(sec_a, sel_a);
+    deepmd::cum_sum(sec_r, sel_r);
     ndescrpt_a = sec_a.back() * 4;
     ndescrpt_r = sec_r.back() * 1;
     ndescrpt = ndescrpt_a + ndescrpt_r;
@@ -303,167 +278,183 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& coord_tensor	= context->input(context_input_index++);
-    const Tensor& type_tensor	= context->input(context_input_index++);
-    const Tensor& natoms_tensor	= context->input(context_input_index++);
-    const Tensor& box_tensor	= context->input(context_input_index++);
-    const Tensor& mesh_tensor   = context->input(context_input_index++);
-    const Tensor& avg_tensor	= context->input(context_input_index++);
-    const Tensor& std_tensor	= context->input(context_input_index++);
-    // set size of the sample. assume 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]], then shape(t) ==> [2, 2, 3]
-    OP_REQUIRES (context, (coord_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of coord should be 2"));
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),        errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (box_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of box should be 2"));
-    OP_REQUIRES (context, (mesh_tensor.shape().dims() == 1),        errors::InvalidArgument ("Dim of mesh should be 1"));
-    OP_REQUIRES (context, (avg_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of avg should be 2"));
-    OP_REQUIRES (context, (std_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of std should be 2"));
-    OP_REQUIRES (context, (sec_r.back() == 0),                      errors::InvalidArgument ("Rotational free descriptor only support all-angular information: sel_r should be all zero."));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    const int * natoms = natoms_tensor.flat<int>().data();
+    const Tensor& coord_tensor = context->input(context_input_index++);
+    const Tensor& type_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
+    const Tensor& box_tensor = context->input(context_input_index++);
+    const Tensor& mesh_tensor = context->input(context_input_index++);
+    const Tensor& avg_tensor = context->input(context_input_index++);
+    const Tensor& std_tensor = context->input(context_input_index++);
+    // set size of the sample. assume 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3,
+    // 3], [4, 4, 4]]], then shape(t) ==> [2, 2, 3]
+    OP_REQUIRES(context, (coord_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of coord should be 2"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (box_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of box should be 2"));
+    OP_REQUIRES(context, (mesh_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of mesh should be 1"));
+    OP_REQUIRES(context, (avg_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of avg should be 2"));
+    OP_REQUIRES(context, (std_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of std should be 2"));
+    OP_REQUIRES(context, (sec_r.back() == 0),
+                errors::InvalidArgument(
+                    "Rotational free descriptor only support all-angular "
+                    "information: sel_r should be all zero."));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
-    int ntypes = natoms_tensor.shape().dim_size(0) - 2; //nloc and nall mean something.
+    int ntypes =
+        natoms_tensor.shape().dim_size(0) - 2;  // nloc and nall mean something.
     int nsamples = coord_tensor.shape().dim_size(0);
     //// check the sizes
-    OP_REQUIRES (context, (nsamples == type_tensor.shape().dim_size(0)),  errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nsamples == box_tensor.shape().dim_size(0)),   errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (ntypes == avg_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of avg should be ntype"));
-    OP_REQUIRES (context, (ntypes == std_tensor.shape().dim_size(0)),     errors::InvalidArgument ("number of std should be ntype"));
-    
-    OP_REQUIRES (context, (nall * 3 == coord_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),      errors::InvalidArgument ("number of atoms should match"));
-    OP_REQUIRES (context, (9 == box_tensor.shape().dim_size(1)),          errors::InvalidArgument ("number of box should be 9"));
-    OP_REQUIRES (context, (ndescrpt == avg_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of avg should be ndescrpt"));
-    OP_REQUIRES (context, (ndescrpt == std_tensor.shape().dim_size(1)),   errors::InvalidArgument ("number of std should be ndescrpt"));   
-    
-    OP_REQUIRES (context, (ntypes == int(sel_a.size())),  errors::InvalidArgument ("number of types should match the length of sel array"));
-    OP_REQUIRES (context, (ntypes == int(sel_r.size())),  errors::InvalidArgument ("number of types should match the length of sel array"));
+    OP_REQUIRES(context, (nsamples == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nsamples == box_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (ntypes == avg_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of avg should be ntype"));
+    OP_REQUIRES(context, (ntypes == std_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of std should be ntype"));
+
+    OP_REQUIRES(context, (nall * 3 == coord_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of atoms should match"));
+    OP_REQUIRES(context, (9 == box_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of box should be 9"));
+    OP_REQUIRES(context, (ndescrpt == avg_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of avg should be ndescrpt"));
+    OP_REQUIRES(context, (ndescrpt == std_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of std should be ndescrpt"));
+
+    OP_REQUIRES(context, (ntypes == int(sel_a.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
+    OP_REQUIRES(context, (ntypes == int(sel_r.size())),
+                errors::InvalidArgument(
+                    "number of types should match the length of sel array"));
 
     int nei_mode = 0;
     bool b_nlist_map = false;
     if (mesh_tensor.shape().dim_size(0) == 16) {
       // lammps neighbor list
       nei_mode = 3;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 6) {
+    } else if (mesh_tensor.shape().dim_size(0) == 6) {
       // manual copied pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = 1;
       b_nlist_map = true;
-    }
-    else if (mesh_tensor.shape().dim_size(0) == 0) {
+    } else if (mesh_tensor.shape().dim_size(0) == 0) {
       // no pbc
-      assert (nloc == nall);
+      assert(nloc == nall);
       nei_mode = -1;
-    }
-    else {
+    } else {
       throw deepmd::deepmd_exception("invalid mesh tensor");
     }
 
     // Create output tensors
-    TensorShape descrpt_shape ;
-    descrpt_shape.AddDim (nsamples);
-    descrpt_shape.AddDim (nloc * ndescrpt);
-    TensorShape descrpt_deriv_shape ;
-    descrpt_deriv_shape.AddDim (nsamples);
-    descrpt_deriv_shape.AddDim (nloc * ndescrpt * 3);
-    TensorShape rij_shape ;
-    rij_shape.AddDim (nsamples);
-    rij_shape.AddDim (nloc * nnei * 3);
-    TensorShape nlist_shape ;
-    nlist_shape.AddDim (nsamples);
-    nlist_shape.AddDim (nloc * nnei);
+    TensorShape descrpt_shape;
+    descrpt_shape.AddDim(nsamples);
+    descrpt_shape.AddDim(nloc * ndescrpt);
+    TensorShape descrpt_deriv_shape;
+    descrpt_deriv_shape.AddDim(nsamples);
+    descrpt_deriv_shape.AddDim(nloc * ndescrpt * 3);
+    TensorShape rij_shape;
+    rij_shape.AddDim(nsamples);
+    rij_shape.AddDim(nloc * nnei * 3);
+    TensorShape nlist_shape;
+    nlist_shape.AddDim(nsamples);
+    nlist_shape.AddDim(nloc * nnei);
     // define output tensor
     int context_output_index = 0;
     Tensor* descrpt_tensor = NULL;
     Tensor* descrpt_deriv_tensor = NULL;
     Tensor* rij_tensor = NULL;
     Tensor* nlist_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        descrpt_shape,
-        &descrpt_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        descrpt_deriv_shape,
-        &descrpt_deriv_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        rij_shape,
-        &rij_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        nlist_shape,
-        &nlist_tensor));
-
-    FPTYPE * p_em = descrpt_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_rij = rij_tensor->flat<FPTYPE>().data();
-    int * p_nlist = nlist_tensor->flat<int>().data();
-    const FPTYPE * p_coord = coord_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_box = box_tensor.flat<FPTYPE>().data();
-    const FPTYPE * avg = avg_tensor.flat<FPTYPE>().data();
-    const FPTYPE * std = std_tensor.flat<FPTYPE>().data();
-    const int * p_type = type_tensor.flat<int>().data();
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, descrpt_shape,
+                                          &descrpt_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descrpt_deriv_shape,
+                                                     &descrpt_deriv_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     rij_shape, &rij_tensor));
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, nlist_shape,
+                                            &nlist_tensor));
+
+    FPTYPE* p_em = descrpt_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_rij = rij_tensor->flat<FPTYPE>().data();
+    int* p_nlist = nlist_tensor->flat<int>().data();
+    const FPTYPE* p_coord = coord_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_box = box_tensor.flat<FPTYPE>().data();
+    const FPTYPE* avg = avg_tensor.flat<FPTYPE>().data();
+    const FPTYPE* std = std_tensor.flat<FPTYPE>().data();
+    const int* p_type = type_tensor.flat<int>().data();
 
     // loop over samples
-    for(int ff = 0; ff < nsamples; ++ff){
-      FPTYPE * em = p_em + ff*nloc*ndescrpt;
-      FPTYPE * em_deriv = p_em_deriv + ff*nloc*ndescrpt*3;
-      FPTYPE * rij = p_rij + ff*nloc*nnei*3;
-      int * nlist = p_nlist + ff*nloc*nnei;
-      const FPTYPE * coord = p_coord + ff*nall*3;
-      const FPTYPE * box = p_box + ff*9;
-      const int * type = p_type + ff*nall;
-
-    if(device == "GPU") {
-      #if GOOGLE_CUDA
-      // UNDEFINE
-      #endif //GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      // UNDEFINE
-      #endif //TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::InputNlist inlist;
-      // some buffers, be freed after the evaluation of this frame
-      std::vector<int> idx_mapping;
-      std::vector<int> ilist(nloc), numneigh(nloc);
-      std::vector<int*> firstneigh(nloc);
-      std::vector<std::vector<int>> jlist(nloc);
-      std::vector<FPTYPE> coord_cpy;
-      std::vector<int> type_cpy;
-      int frame_nall = nall;
-      // prepare coord and nlist
-      _prepare_coord_nlist_cpu<FPTYPE>(
-	  context, &coord, coord_cpy, &type, type_cpy, idx_mapping, 
-	  inlist, ilist, numneigh, firstneigh, jlist,
-	  frame_nall, mem_cpy, mem_nnei, max_nbor_size,
-	  box, mesh_tensor.flat<int>().data(), nloc, nei_mode, rcut_r, max_cpy_trial, max_nnei_trial);
-      // launch the cpu compute function
-      deepmd::prod_env_mat_a_nvnmd_quantize_cpu(
-	  em, em_deriv, rij, nlist, 
-	  coord, type, inlist, max_nbor_size, avg, std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a);
-      // do nlist mapping if coords were copied
-      if(b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei);
-    }
+    for (int ff = 0; ff < nsamples; ++ff) {
+      FPTYPE* em = p_em + ff * nloc * ndescrpt;
+      FPTYPE* em_deriv = p_em_deriv + ff * nloc * ndescrpt * 3;
+      FPTYPE* rij = p_rij + ff * nloc * nnei * 3;
+      int* nlist = p_nlist + ff * nloc * nnei;
+      const FPTYPE* coord = p_coord + ff * nall * 3;
+      const FPTYPE* box = p_box + ff * 9;
+      const int* type = p_type + ff * nall;
+
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+// UNDEFINE
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+// UNDEFINE
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::InputNlist inlist;
+        // some buffers, be freed after the evaluation of this frame
+        std::vector<int> idx_mapping;
+        std::vector<int> ilist(nloc), numneigh(nloc);
+        std::vector<int*> firstneigh(nloc);
+        std::vector<std::vector<int>> jlist(nloc);
+        std::vector<FPTYPE> coord_cpy;
+        std::vector<int> type_cpy;
+        int frame_nall = nall;
+        // prepare coord and nlist
+        _prepare_coord_nlist_cpu<FPTYPE>(
+            context, &coord, coord_cpy, &type, type_cpy, idx_mapping, inlist,
+            ilist, numneigh, firstneigh, jlist, frame_nall, mem_cpy, mem_nnei,
+            max_nbor_size, box, mesh_tensor.flat<int>().data(), nloc, nei_mode,
+            rcut_r, max_cpy_trial, max_nnei_trial);
+        // launch the cpu compute function
+        deepmd::prod_env_mat_a_nvnmd_quantize_cpu(
+            em, em_deriv, rij, nlist, coord, type, inlist, max_nbor_size, avg,
+            std, nloc, frame_nall, rcut_r, rcut_r_smth, sec_a);
+        // do nlist mapping if coords were copied
+        if (b_nlist_map) _map_nlist_cpu(nlist, &idx_mapping[0], nloc, nnei);
+      }
     }
   }
 
-/////////////////////////////////////////////////////////////////////////////////////////////
-private:
+  /////////////////////////////////////////////////////////////////////////////////////////////
+ private:
   float rcut_a;
   float rcut_r;
   float rcut_r_smth;
@@ -476,25 +467,25 @@ class ProdEnvMatANvnmdQuantizeOp : public OpKernel {
   int mem_cpy, max_cpy_trial;
   int mem_nnei, max_nnei_trial;
   std::string device;
-  int * array_int = NULL;
-  unsigned long long * array_longlong = NULL;
+  int* array_int = NULL;
+  unsigned long long* array_longlong = NULL;
   deepmd::InputNlist gpu_inlist;
-  int * nbor_list_dev = NULL;
+  int* nbor_list_dev = NULL;
 };
 
-
 // Register the CPU kernels.
 // Compatible with v1.3
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("ProdEnvMatANvnmdQuantize").Device(DEVICE_CPU).TypeConstraint<T>("T"),  \
-    ProdEnvMatANvnmdQuantizeOp<CPUDevice, T>); 
-
-REGISTER_CPU(float);                  
-REGISTER_CPU(double);              
-            
-// Register the GPU kernels.                  
+#define REGISTER_CPU(T)                                    \
+  REGISTER_KERNEL_BUILDER(Name("ProdEnvMatANvnmdQuantize") \
+                              .Device(DEVICE_CPU)          \
+                              .TypeConstraint<T>("T"),     \
+                          ProdEnvMatANvnmdQuantizeOp<CPUDevice, T>);
+
+REGISTER_CPU(float);
+REGISTER_CPU(double);
+
+// Register the GPU kernels.
 // Compatible with v1.3
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM            
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 // UNDEFINE
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/prod_force.cc b/source/op/prod_force.cc
index a97fb6c575..591a56ffda 100644
--- a/source/op/prod_force.cc
+++ b/source/op/prod_force.cc
@@ -1,22 +1,21 @@
 #include "custom_op.h"
 
 REGISTER_OP("ProdForce")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("nlist: int32")
-.Input("axis: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("force: T");
-
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("nlist: int32")
+    .Input("axis: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("force: T");
 
 using namespace tensorflow;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdForceOp : public OpKernel {
  public:
   explicit ProdForceOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -26,26 +25,34 @@ class ProdForceOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
-    const Tensor& net_deriv_tensor	= context->input(0);
-    const Tensor& in_deriv_tensor	= context->input(1);
-    const Tensor& nlist_tensor		= context->input(2);
-    const Tensor& axis_tensor		= context->input(3);
-    const Tensor& natoms_tensor		= context->input(4);
+    const Tensor& net_deriv_tensor = context->input(0);
+    const Tensor& in_deriv_tensor = context->input(1);
+    const Tensor& nlist_tensor = context->input(2);
+    const Tensor& axis_tensor = context->input(3);
+    const Tensor& natoms_tensor = context->input(4);
 
     // set size of the sample
-    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (axis_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of axis should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (axis_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of axis should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -54,22 +61,32 @@ class ProdForceOp : public OpKernel {
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == axis_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-
-    OP_REQUIRES (context, (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
-    OP_REQUIRES (context, (nloc * 4 == axis_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of axis type+id should match 2+2"));
+    OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == axis_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+
+    OP_REQUIRES(context,
+                (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
+    OP_REQUIRES(
+        context, (nloc * 4 == axis_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("number of axis type+id should match 2+2"));
 
     // Create an output tensor
-    TensorShape force_shape ;
-    force_shape.AddDim (nframes);
-    force_shape.AddDim (3 * nall);
-    // std::cout << "forcesahpe " << force_shape.dim_size(0) << " " << force_shape.dim_size(1) << std::endl;
+    TensorShape force_shape;
+    force_shape.AddDim(nframes);
+    force_shape.AddDim(3 * nall);
+    // std::cout << "forcesahpe " << force_shape.dim_size(0) << " " <<
+    // force_shape.dim_size(1) << std::endl;
     Tensor* force_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, force_shape, &force_tensor));
-    
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, force_shape, &force_tensor));
+
     // flat the tensors
     auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
     auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
@@ -78,94 +95,113 @@ class ProdForceOp : public OpKernel {
     auto force = force_tensor->flat<FPTYPE>();
 
     // loop over samples
-#pragma omp parallel for 
-    for (int kk = 0; kk < nframes; ++kk){
-      int force_iter	= kk * nall * 3;
-      int net_iter	= kk * nloc * ndescrpt;
-      int in_iter	= kk * nloc * ndescrpt * 12;
-      int nlist_iter	= kk * nloc * nnei;
-      int axis_iter	= kk * nloc * 4;
-
-      for (int ii = 0; ii < nall; ++ii){
-	int i_idx = ii;
-	force (force_iter + i_idx * 3 + 0) = 0;
-	force (force_iter + i_idx * 3 + 1) = 0;
-	force (force_iter + i_idx * 3 + 2) = 0;
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk) {
+      int force_iter = kk * nall * 3;
+      int net_iter = kk * nloc * ndescrpt;
+      int in_iter = kk * nloc * ndescrpt * 12;
+      int nlist_iter = kk * nloc * nnei;
+      int axis_iter = kk * nloc * 4;
+
+      for (int ii = 0; ii < nall; ++ii) {
+        int i_idx = ii;
+        force(force_iter + i_idx * 3 + 0) = 0;
+        force(force_iter + i_idx * 3 + 1) = 0;
+        force(force_iter + i_idx * 3 + 2) = 0;
       }
 
       // compute force of a frame
-      for (int ii = 0; ii < nloc; ++ii){
-	int i_idx = ii;
-	
-	// deriv wrt center atom
-	for (int aa = 0; aa < ndescrpt; ++aa){
-	  force (force_iter + i_idx * 3 + 0) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 0);
-	  force (force_iter + i_idx * 3 + 1) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 1);
-	  force (force_iter + i_idx * 3 + 2) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 2);
-	}
-
-	// set axes
-	int axis0_type = axis (axis_iter + i_idx * 4 + 0);
-	int axis1_type = axis (axis_iter + i_idx * 4 + 2);
-	int axis_0  = axis (axis_iter + i_idx * 4 + 1);
-	int axis_1  = axis (axis_iter + i_idx * 4 + 3);
-	if (axis0_type == 1) axis_0 += n_a_sel;
-	if (axis1_type == 1) axis_1 += n_a_sel;
-
-	// deriv wrt neighbors
-	for (int jj = 0; jj < nnei; ++jj){
-	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);
-	  // if (j_idx > nloc) j_idx = j_idx % nloc;
-	  if (j_idx < 0) continue;
-	  if (jj == axis_0) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      force (force_iter + j_idx * 3 + 0) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + 0);
-	      force (force_iter + j_idx * 3 + 1) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + 1);
-	      force (force_iter + j_idx * 3 + 2) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + 2);
-	    }
-	  }
-	  else if (jj == axis_1) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      force (force_iter + j_idx * 3 + 0) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + 0);
-	      force (force_iter + j_idx * 3 + 1) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + 1);
-	      force (force_iter + j_idx * 3 + 2) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + 2);
-	    }
-	  }
-	  else {
-	    int aa_start, aa_end;
-	    make_descript_range (aa_start, aa_end, jj);
-	    for (int aa = aa_start; aa < aa_end; ++aa) {
-	      force (force_iter + j_idx * 3 + 0) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + 0);
-	      force (force_iter + j_idx * 3 + 1) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + 1);
-	      force (force_iter + j_idx * 3 + 2) -= net_deriv (net_iter + i_idx * ndescrpt + aa) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + 2);
-	    }
-	  }
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        int i_idx = ii;
+
+        // deriv wrt center atom
+        for (int aa = 0; aa < ndescrpt; ++aa) {
+          force(force_iter + i_idx * 3 + 0) -=
+              net_deriv(net_iter + i_idx * ndescrpt + aa) *
+              in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 0);
+          force(force_iter + i_idx * 3 + 1) -=
+              net_deriv(net_iter + i_idx * ndescrpt + aa) *
+              in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 1);
+          force(force_iter + i_idx * 3 + 2) -=
+              net_deriv(net_iter + i_idx * ndescrpt + aa) *
+              in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 2);
+        }
+
+        // set axes
+        int axis0_type = axis(axis_iter + i_idx * 4 + 0);
+        int axis1_type = axis(axis_iter + i_idx * 4 + 2);
+        int axis_0 = axis(axis_iter + i_idx * 4 + 1);
+        int axis_1 = axis(axis_iter + i_idx * 4 + 3);
+        if (axis0_type == 1) axis_0 += n_a_sel;
+        if (axis1_type == 1) axis_1 += n_a_sel;
+
+        // deriv wrt neighbors
+        for (int jj = 0; jj < nnei; ++jj) {
+          int j_idx = nlist(nlist_iter + i_idx * nnei + jj);
+          // if (j_idx > nloc) j_idx = j_idx % nloc;
+          if (j_idx < 0) continue;
+          if (jj == axis_0) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              force(force_iter + j_idx * 3 + 0) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + 0);
+              force(force_iter + j_idx * 3 + 1) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + 1);
+              force(force_iter + j_idx * 3 + 2) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + 2);
+            }
+          } else if (jj == axis_1) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              force(force_iter + j_idx * 3 + 0) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + 0);
+              force(force_iter + j_idx * 3 + 1) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + 1);
+              force(force_iter + j_idx * 3 + 2) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + 2);
+            }
+          } else {
+            int aa_start, aa_end;
+            make_descript_range(aa_start, aa_end, jj);
+            for (int aa = aa_start; aa < aa_end; ++aa) {
+              force(force_iter + j_idx * 3 + 0) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + 0);
+              force(force_iter + j_idx * 3 + 1) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + 1);
+              force(force_iter + j_idx * 3 + 2) -=
+                  net_deriv(net_iter + i_idx * ndescrpt + aa) *
+                  in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + 2);
+            }
+          }
+        }
       }
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("ProdForce").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdForceOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                            \
+  REGISTER_KERNEL_BUILDER(                                         \
+      Name("ProdForce").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
diff --git a/source/op/prod_force_grad.cc b/source/op/prod_force_grad.cc
index 67423d7489..5e63ff2308 100644
--- a/source/op/prod_force_grad.cc
+++ b/source/op/prod_force_grad.cc
@@ -1,58 +1,66 @@
 #include "custom_op.h"
 
 REGISTER_OP("ProdForceGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("nlist: int32")
-.Input("axis: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("nlist: int32")
+    .Input("axis: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class ProdForceGradOp : public OpKernel 
-{
-public:
+template <typename Device, typename FPTYPE>
+class ProdForceGradOp : public OpKernel {
+ public:
   explicit ProdForceGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
-    const Tensor& grad_tensor		= context->input(0);
-    const Tensor& net_deriv_tensor	= context->input(1);
-    const Tensor& in_deriv_tensor	= context->input(2);
-    const Tensor& nlist_tensor		= context->input(3);
-    const Tensor& axis_tensor		= context->input(4);
-    const Tensor& natoms_tensor		= context->input(5);
+    const Tensor& grad_tensor = context->input(0);
+    const Tensor& net_deriv_tensor = context->input(1);
+    const Tensor& in_deriv_tensor = context->input(2);
+    const Tensor& nlist_tensor = context->input(3);
+    const Tensor& axis_tensor = context->input(4);
+    const Tensor& natoms_tensor = context->input(5);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-    TensorShape axis_shape		= axis_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (axis_shape.dims() == 2),	errors::InvalidArgument ("Dim of axis should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+    TensorShape axis_shape = axis_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (axis_shape.dims() == 2),
+                errors::InvalidArgument("Dim of axis should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -60,122 +68,140 @@ class ProdForceGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == axis_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 12 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
-    OP_REQUIRES (context, (nloc * 4 == axis_shape.dim_size(1)),		errors::InvalidArgument ("number of axis type+id should be 2+2"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == axis_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (nloc * 3 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * ndescrpt * 12 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
+    OP_REQUIRES(
+        context, (nloc * 4 == axis_shape.dim_size(1)),
+        errors::InvalidArgument("number of axis type+id should be 2+2"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.flat<FPTYPE>();
-    auto net_deriv	= net_deriv_tensor	.flat<FPTYPE>();
-    auto in_deriv	= in_deriv_tensor	.flat<FPTYPE>();
-    auto nlist		= nlist_tensor		.flat<int>();
-    auto axis		= axis_tensor		.flat<int>();
-    auto grad_net	= grad_net_tensor	->flat<FPTYPE>();
+    auto grad = grad_tensor.flat<FPTYPE>();
+    auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
+    auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto axis = axis_tensor.flat<int>();
+    auto grad_net = grad_net_tensor->flat<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-
-      int grad_iter	= kk * nloc * 3;
-      int net_iter	= kk * nloc * ndescrpt;
-      int in_iter	= kk * nloc * ndescrpt * 12;
-      int nlist_iter	= kk * nloc * nnei;
-      int axis_iter	= kk * nloc * 4;
-      int grad_net_iter	= kk * nloc * ndescrpt;
+    for (int kk = 0; kk < nframes; ++kk) {
+      int grad_iter = kk * nloc * 3;
+      int net_iter = kk * nloc * ndescrpt;
+      int in_iter = kk * nloc * ndescrpt * 12;
+      int nlist_iter = kk * nloc * nnei;
+      int axis_iter = kk * nloc * 4;
+      int grad_net_iter = kk * nloc * ndescrpt;
 
       // reset the frame to 0
-      for (int ii = 0; ii < nloc; ++ii){
-	for (int aa = 0; aa < ndescrpt; ++aa){
-	  grad_net (grad_net_iter + ii * ndescrpt + aa) = 0;
-	}
-      }      
+      for (int ii = 0; ii < nloc; ++ii) {
+        for (int aa = 0; aa < ndescrpt; ++aa) {
+          grad_net(grad_net_iter + ii * ndescrpt + aa) = 0;
+        }
+      }
 
       // compute grad of one frame
-      for (int ii = 0; ii < nloc; ++ii){
-	int i_idx = ii;
-	
-	// deriv wrt center atom
-	for (int aa = 0; aa < ndescrpt; ++aa){
-	  for (int dd = 0; dd < 3; ++dd){
-	    grad_net (grad_net_iter + i_idx * ndescrpt + aa) -= grad (grad_iter + i_idx * 3 + dd) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + dd);
-	  }
-	}
-
-	// set axes
-	int axis0_type = axis (axis_iter + i_idx * 4 + 0);
-	int axis1_type = axis (axis_iter + i_idx * 4 + 2);
-	int axis_0  = axis (axis_iter + i_idx * 4 + 1);
-	int axis_1  = axis (axis_iter + i_idx * 4 + 3);
-	if (axis0_type == 1) axis_0 += n_a_sel;
-	if (axis1_type == 1) axis_1 += n_a_sel;
-
-	// loop over neighbors
-	for (int jj = 0; jj < nnei; ++jj){
-	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);	  
-	  if (j_idx > nloc) j_idx = j_idx % nloc;
-	  if (j_idx < 0) continue;
-	  if (jj == axis_0) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      for (int dd = 0; dd < 3; ++dd){
-		grad_net (grad_net_iter + i_idx * ndescrpt + aa) -= grad (grad_iter + j_idx * 3 + dd) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd);
-	      }
-	    }
-	  }
-	  else if (jj == axis_1) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      for (int dd = 0; dd < 3; ++dd){
-		grad_net (grad_net_iter + i_idx * ndescrpt + aa) -= grad (grad_iter + j_idx * 3 + dd) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd);
-	      }
-	    }
-	  }
-	  else {
-	    int aa_start, aa_end;
-	    make_descript_range (aa_start, aa_end, jj);
-	    for (int aa = aa_start; aa < aa_end; ++aa){
-	      for (int dd = 0; dd < 3; ++dd){
-		grad_net (grad_net_iter + i_idx * ndescrpt + aa) -= grad (grad_iter + j_idx * 3 + dd) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd);
-	      }
-	    }
-	  }
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        int i_idx = ii;
+
+        // deriv wrt center atom
+        for (int aa = 0; aa < ndescrpt; ++aa) {
+          for (int dd = 0; dd < 3; ++dd) {
+            grad_net(grad_net_iter + i_idx * ndescrpt + aa) -=
+                grad(grad_iter + i_idx * 3 + dd) *
+                in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + dd);
+          }
+        }
+
+        // set axes
+        int axis0_type = axis(axis_iter + i_idx * 4 + 0);
+        int axis1_type = axis(axis_iter + i_idx * 4 + 2);
+        int axis_0 = axis(axis_iter + i_idx * 4 + 1);
+        int axis_1 = axis(axis_iter + i_idx * 4 + 3);
+        if (axis0_type == 1) axis_0 += n_a_sel;
+        if (axis1_type == 1) axis_1 += n_a_sel;
+
+        // loop over neighbors
+        for (int jj = 0; jj < nnei; ++jj) {
+          int j_idx = nlist(nlist_iter + i_idx * nnei + jj);
+          if (j_idx > nloc) j_idx = j_idx % nloc;
+          if (j_idx < 0) continue;
+          if (jj == axis_0) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              for (int dd = 0; dd < 3; ++dd) {
+                grad_net(grad_net_iter + i_idx * ndescrpt + aa) -=
+                    grad(grad_iter + j_idx * 3 + dd) *
+                    in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 +
+                             dd);
+              }
+            }
+          } else if (jj == axis_1) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              for (int dd = 0; dd < 3; ++dd) {
+                grad_net(grad_net_iter + i_idx * ndescrpt + aa) -=
+                    grad(grad_iter + j_idx * 3 + dd) *
+                    in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 +
+                             dd);
+              }
+            }
+          } else {
+            int aa_start, aa_end;
+            make_descript_range(aa_start, aa_end, jj);
+            for (int aa = aa_start; aa < aa_end; ++aa) {
+              for (int dd = 0; dd < 3; ++dd) {
+                grad_net(grad_net_iter + i_idx * ndescrpt + aa) -=
+                    grad(grad_iter + j_idx * 3 + dd) *
+                    in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 +
+                             dd);
+              }
+            }
+          }
+        }
       }
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("ProdForceGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdForceGradOp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("ProdForceGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/prod_force_grad_multi_device.cc b/source/op/prod_force_grad_multi_device.cc
index 2316fa3029..3383eb9f6c 100644
--- a/source/op/prod_force_grad_multi_device.cc
+++ b/source/op/prod_force_grad_multi_device.cc
@@ -21,42 +21,51 @@ REGISTER_OP("ProdForceSeRGrad")
     .Input("natoms: int32")
     .Output("grad_net: T");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdForceSeAGradOp : public OpKernel {
-public:
-  explicit ProdForceSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+ public:
+  explicit ProdForceSeAGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
   }
 
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
 
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -64,111 +73,120 @@ class ProdForceSeAGradOp : public OpKernel {
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (nloc * 3 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context,
+                (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (int_64(nloc) * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(int_64(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        grad_net_shape, 
-        &grad_net_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    assert (nframes == grad_net_shape.dim_size(0));
-    assert (nframes == grad_shape.dim_size(0));
-    assert (nframes == net_deriv_tensor.shape().dim_size(0));
-    assert (nframes == in_deriv_tensor.shape().dim_size(0));
-    assert (nframes == nlist_tensor.shape().dim_size(0));
-    assert (nloc * ndescrpt == grad_net_shape.dim_size(1));
-    assert (nloc * 3 == grad_shape.dim_size(1));
-    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
-    assert (nnei * 4 == ndescrpt);	
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            grad_net_shape, &grad_net_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    assert(nframes == grad_net_shape.dim_size(0));
+    assert(nframes == grad_shape.dim_size(0));
+    assert(nframes == net_deriv_tensor.shape().dim_size(0));
+    assert(nframes == in_deriv_tensor.shape().dim_size(0));
+    assert(nframes == nlist_tensor.shape().dim_size(0));
+    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(nloc * 3 == grad_shape.dim_size(1));
+    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(nnei * 4 == ndescrpt);
     // flat the tensors
-    FPTYPE * p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_grad = grad_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const int * p_nlist	= nlist_tensor.flat<int>().data();
+    FPTYPE* p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_grad = grad_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
+
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* grad_net = p_grad_net + kk * nloc * ndescrpt;
+      const FPTYPE* grad = p_grad + kk * nloc * 3;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        deepmd::prod_force_grad_a_gpu_cuda(grad_net, grad, in_deriv, nlist,
+                                           nloc, nnei);
+#endif  // GOOGLE_CUDA
 
-    for (int_64 kk = 0; kk < nframes; ++kk){
-        FPTYPE * grad_net = p_grad_net + kk * nloc * ndescrpt;
-        const FPTYPE * grad = p_grad + kk * nloc * 3;
-        const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-        const int * nlist = p_nlist + kk * nloc * nnei; 
-        if (device == "GPU") {
-        #if GOOGLE_CUDA
-        deepmd::prod_force_grad_a_gpu_cuda(    
-            grad_net, 
-            grad, in_deriv, nlist, nloc, nnei);
-        #endif // GOOGLE_CUDA
-        
-        #if TENSORFLOW_USE_ROCM
-        deepmd::prod_force_grad_a_gpu_rocm(    
-            grad_net, 
-            grad, in_deriv, nlist, nloc, nnei);
-        #endif // TENSORFLOW_USE_ROCM
-        }
-        else if (device == "CPU") {
-        deepmd::prod_force_grad_a_cpu(    
-            grad_net, 
-            grad, in_deriv, nlist, nloc, nnei);
-        }
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_force_grad_a_gpu_rocm(grad_net, grad, in_deriv, nlist,
+                                           nloc, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_force_grad_a_cpu(grad_net, grad, in_deriv, nlist, nloc,
+                                      nnei);
+      }
     }
   }
-private:
+
+ private:
   std::string device;
   int n_r_sel, n_a_sel, n_a_shift;
 };
 
-template<typename Device, typename FPTYPE>
-class ProdForceSeRGradOp : public OpKernel 
-{
-public:
-  explicit ProdForceSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+template <typename Device, typename FPTYPE>
+class ProdForceSeRGradOp : public OpKernel {
+ public:
+  explicit ProdForceSeRGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
 
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -176,96 +194,100 @@ class ProdForceSeRGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (nloc * 3 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context,
+                (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (int_64(nloc) * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(int_64(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        grad_net_shape, 
-        &grad_net_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    assert (nframes == grad_net_shape.dim_size(0));
-    assert (nframes == grad_shape.dim_size(0));
-    assert (nframes == net_deriv_tensor.shape().dim_size(0));
-    assert (nframes == in_deriv_tensor.shape().dim_size(0));
-    assert (nframes == nlist_tensor.shape().dim_size(0));
-    assert (nloc * ndescrpt == grad_net_shape.dim_size(1));
-    assert (nloc * 3 == grad_shape.dim_size(1));
-    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
-    assert (nnei * 1 == ndescrpt);	
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            grad_net_shape, &grad_net_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    assert(nframes == grad_net_shape.dim_size(0));
+    assert(nframes == grad_shape.dim_size(0));
+    assert(nframes == net_deriv_tensor.shape().dim_size(0));
+    assert(nframes == in_deriv_tensor.shape().dim_size(0));
+    assert(nframes == nlist_tensor.shape().dim_size(0));
+    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(nloc * 3 == grad_shape.dim_size(1));
+    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(nnei * 1 == ndescrpt);
     // flat the tensors
-    FPTYPE * p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_grad = grad_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const int * p_nlist	= nlist_tensor.flat<int>().data();
+    FPTYPE* p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_grad = grad_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
 
     // loop over frames
-    for (int_64 kk = 0; kk < nframes; ++kk){
-        FPTYPE * grad_net = p_grad_net + kk * nloc * ndescrpt;
-        const FPTYPE * grad = p_grad + kk * nloc * 3;
-        const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-        const int * nlist = p_nlist + kk * nloc * nnei; 
-        if (device == "GPU") {
-          #if GOOGLE_CUDA
-          deepmd::prod_force_grad_r_gpu_cuda(    
-              grad_net, 
-              grad, in_deriv, nlist, nloc, nnei);
-          #endif // GOOGLE_CUDA
-          
-          #if TENSORFLOW_USE_ROCM
-          deepmd::prod_force_grad_r_gpu_rocm(    
-              grad_net, 
-              grad, in_deriv, nlist, nloc, nnei);
-          #endif // TENSORFLOW_USE_ROCM
-        }
-        else if (device == "CPU") {
-          deepmd::prod_force_grad_r_cpu(    
-              grad_net, 
-              grad, in_deriv, nlist, nloc, nnei);
-        }
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* grad_net = p_grad_net + kk * nloc * ndescrpt;
+      const FPTYPE* grad = p_grad + kk * nloc * 3;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        deepmd::prod_force_grad_r_gpu_cuda(grad_net, grad, in_deriv, nlist,
+                                           nloc, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_force_grad_r_gpu_rocm(grad_net, grad, in_deriv, nlist,
+                                           nloc, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_force_grad_r_cpu(grad_net, grad, in_deriv, nlist, nloc,
+                                      nnei);
+      }
     }
   }
-  private:
+
+ private:
   std::string device;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                       \
-REGISTER_KERNEL_BUILDER(                                                                      \
-    Name("ProdForceSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdForceSeAGradOp<CPUDevice, T>);                                                        \
-REGISTER_KERNEL_BUILDER(                                                                      \
-    Name("ProdForceSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdForceSeRGradOp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                                   \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ProdForceSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceSeAGradOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ProdForceSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceSeRGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 // Register the GPU kernels.
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#define REGISTER_GPU(T)                                                                      \
-REGISTER_KERNEL_BUILDER(                                                                     \
-    Name("ProdForceSeAGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdForceSeAGradOp<GPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                     \
-    Name("ProdForceSeRGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdForceSeRGradOp<GPUDevice, T>);
+#define REGISTER_GPU(T)                                      \
+  REGISTER_KERNEL_BUILDER(Name("ProdForceSeAGrad")           \
+                              .Device(DEVICE_GPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .HostMemory("natoms"),         \
+                          ProdForceSeAGradOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("ProdForceSeRGrad")           \
+                              .Device(DEVICE_GPU)            \
+                              .TypeConstraint<T>("T")        \
+                              .HostMemory("natoms"),         \
+                          ProdForceSeRGradOp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
-#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
\ No newline at end of file
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/prod_force_multi_device.cc b/source/op/prod_force_multi_device.cc
index 08c77bca65..73914292db 100644
--- a/source/op/prod_force_multi_device.cc
+++ b/source/op/prod_force_multi_device.cc
@@ -1,6 +1,6 @@
 #include "custom_op.h"
-#include "prod_force.h"
 #include "errors.h"
+#include "prod_force.h"
 
 REGISTER_OP("ProdForceSeA")
     .Attr("T: {float, double} = DT_DOUBLE")
@@ -45,76 +45,88 @@ REGISTER_OP("ProdForceSeR")
     .Input("natoms: int32")
     .Output("force: T");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdForceSeAOp : public OpKernel {
-public:
+ public:
   explicit ProdForceSeAOp(OpKernelConstruction* context) : OpKernel(context) {
-    if(context->HasAttr("parallel")) OP_REQUIRES_OK(context, context->GetAttr("parallel", &parallel));
-    if(context->HasAttr("start_frac")) OP_REQUIRES_OK(context, context->GetAttr("start_frac", &start_frac));
-    if(context->HasAttr("end_frac")) OP_REQUIRES_OK(context, context->GetAttr("end_frac", &end_frac));
+    if (context->HasAttr("parallel"))
+      OP_REQUIRES_OK(context, context->GetAttr("parallel", &parallel));
+    if (context->HasAttr("start_frac"))
+      OP_REQUIRES_OK(context, context->GetAttr("start_frac", &start_frac));
+    if (context->HasAttr("end_frac"))
+      OP_REQUIRES_OK(context, context->GetAttr("end_frac", &end_frac));
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& net_deriv_tensor  = context->input(context_input_index++);
-    const Tensor& in_deriv_tensor   = context->input(context_input_index++);
-    const Tensor& nlist_tensor      = context->input(context_input_index++);
-    const Tensor& natoms_tensor     = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    const int * natoms = natoms_tensor.flat<int>().data();
+    OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
     // check the sizes
-    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)), errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),    errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(
+        context,
+        (int_64(nloc) * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("number of descriptors should match"));
     // Create an output tensor
-    TensorShape force_shape ;
-    force_shape.AddDim (nframes);
-    force_shape.AddDim (3 * nall);
+    TensorShape force_shape;
+    force_shape.AddDim(nframes);
+    force_shape.AddDim(3 * nall);
     Tensor* force_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        force_shape, 
-        &force_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    assert (nframes == force_shape.dim_size(0));
-    assert (nframes == net_deriv_tensor.shape().dim_size(0));
-    assert (nframes == in_deriv_tensor.shape().dim_size(0));
-    assert (nframes == nlist_tensor.shape().dim_size(0));
-    assert (nall * 3 == force_shape.dim_size(1));
-    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
-    assert (nnei * 4 == ndescrpt);	  
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, force_shape,
+                                            &force_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    assert(nframes == force_shape.dim_size(0));
+    assert(nframes == net_deriv_tensor.shape().dim_size(0));
+    assert(nframes == in_deriv_tensor.shape().dim_size(0));
+    assert(nframes == nlist_tensor.shape().dim_size(0));
+    assert(nall * 3 == force_shape.dim_size(1));
+    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(nnei * 4 == ndescrpt);
 
     // flat the tensors
-    FPTYPE * p_force = force_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const int * p_nlist = nlist_tensor.flat<int>().data();
+    FPTYPE* p_force = force_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
 
     int start_index = 0, end_index = nloc, nloc_loc = nloc;
     if (parallel) {
       if (device != "CPU")
-        throw deepmd::deepmd_exception("Auto parallelization for ProdForceA is not supported on GPUs!");
+        throw deepmd::deepmd_exception(
+            "Auto parallelization for ProdForceA is not supported on GPUs!");
       // we split in_deriv, net_deriv, and nlist along nloc
       // compute start and end index along nloc
       // frac belongs to [0, 1]
@@ -124,31 +136,28 @@ class ProdForceSeAOp : public OpKernel {
       nloc_loc = end_index - start_index;
     }
 
-    for(int_64 kk = 0; kk < nframes; ++kk){
-      FPTYPE * force = p_force + kk * nall * 3;
-      const FPTYPE * net_deriv = p_net_deriv + kk * nloc * ndescrpt;
-      const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-      const int * nlist = p_nlist + kk * nloc * nnei;      
-    if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::prod_force_a_gpu_cuda(    
-          force, 
-          net_deriv, in_deriv, nlist, nloc, nall, nnei);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::prod_force_a_gpu_rocm(    
-          force, 
-          net_deriv, in_deriv, nlist, nloc, nall, nnei);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::prod_force_a_cpu(    
-          force, 
-          net_deriv, in_deriv, nlist, nloc_loc, nall, nnei, start_index=start_index);
-    }
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* force = p_force + kk * nall * 3;
+      const FPTYPE* net_deriv = p_net_deriv + kk * nloc * ndescrpt;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        deepmd::prod_force_a_gpu_cuda(force, net_deriv, in_deriv, nlist, nloc,
+                                      nall, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_force_a_gpu_rocm(force, net_deriv, in_deriv, nlist, nloc,
+                                      nall, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_force_a_cpu(force, net_deriv, in_deriv, nlist, nloc_loc,
+                                 nall, nnei, start_index = start_index);
+      }
     }
   }
+
  private:
   std::string device;
   bool parallel = false;
@@ -156,120 +165,128 @@ class ProdForceSeAOp : public OpKernel {
   float end_frac = 1.f;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdForceSeROp : public OpKernel {
-public:
+ public:
   explicit ProdForceSeROp(OpKernelConstruction* context) : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& net_deriv_tensor  = context->input(context_input_index++);
-    const Tensor& in_deriv_tensor   = context->input(context_input_index++);
-    const Tensor& nlist_tensor      = context->input(context_input_index++);
-    const Tensor& natoms_tensor     = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    const int * natoms = natoms_tensor.flat<int>().data();
+    OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
     // check the sizes
-    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)), errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),    errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context,
+                (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
     // Create an output tensor
-    TensorShape force_shape ;
-    force_shape.AddDim (nframes);
-    force_shape.AddDim (3 * nall);
+    TensorShape force_shape;
+    force_shape.AddDim(nframes);
+    force_shape.AddDim(3 * nall);
     Tensor* force_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        force_shape, 
-        &force_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    assert (nframes == force_shape.dim_size(0));
-    assert (nframes == net_deriv_tensor.shape().dim_size(0));
-    assert (nframes == in_deriv_tensor.shape().dim_size(0));
-    assert (nframes == nlist_tensor.shape().dim_size(0));
-    assert (nall * 3 == force_shape.dim_size(1));
-    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
-    assert (nnei * 1 == ndescrpt);	 
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++, force_shape,
+                                            &force_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    assert(nframes == force_shape.dim_size(0));
+    assert(nframes == net_deriv_tensor.shape().dim_size(0));
+    assert(nframes == in_deriv_tensor.shape().dim_size(0));
+    assert(nframes == nlist_tensor.shape().dim_size(0));
+    assert(nall * 3 == force_shape.dim_size(1));
+    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(nnei * 1 == ndescrpt);
     // flat the tensors
-    FPTYPE * p_force = force_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const int * p_nlist = nlist_tensor.flat<int>().data();
+    FPTYPE* p_force = force_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
 
-    for(int_64 kk = 0; kk < nframes; ++kk){
-      FPTYPE * force = p_force + kk * nall * 3;
-      const FPTYPE * net_deriv = p_net_deriv + kk * nloc * ndescrpt;
-      const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-      const int * nlist = p_nlist + kk * nloc * nnei;      
-    if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::prod_force_r_gpu_cuda(    
-          force, 
-          net_deriv, in_deriv, nlist, nloc, nall, nnei);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::prod_force_r_gpu_rocm(    
-          force, 
-          net_deriv, in_deriv, nlist, nloc, nall, nnei);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::prod_force_r_cpu(    
-          force, 
-          net_deriv, in_deriv, nlist, nloc, nall, nnei);
-    }
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* force = p_force + kk * nall * 3;
+      const FPTYPE* net_deriv = p_net_deriv + kk * nloc * ndescrpt;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        deepmd::prod_force_r_gpu_cuda(force, net_deriv, in_deriv, nlist, nloc,
+                                      nall, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_force_r_gpu_rocm(force, net_deriv, in_deriv, nlist, nloc,
+                                      nall, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_force_r_cpu(force, net_deriv, in_deriv, nlist, nloc, nall,
+                                 nnei);
+      }
     }
   }
+
  private:
   std::string device;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                  \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ProdForceSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdForceSeAOp<CPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ProdForceNorot").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdForceSeAOp<CPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ParallelProdForceSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),             \
-    ProdForceSeAOp<CPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ProdForceSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdForceSeROp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                       \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("ProdForceSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),         \
+      ProdForceSeAOp<CPUDevice, T>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("ProdForceNorot").Device(DEVICE_CPU).TypeConstraint<T>("T"),       \
+      ProdForceSeAOp<CPUDevice, T>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("ParallelProdForceSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceSeAOp<CPUDevice, T>);                                          \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("ProdForceSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),         \
+      ProdForceSeROp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 // Register the GPU kernels.
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#define REGISTER_GPU(T)                                                                  \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ProdForceSeA").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdForceSeAOp<GPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ProdForceNorot").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdForceSeAOp<GPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                 \
-    Name("ProdForceSeR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdForceSeROp<GPUDevice, T>);
+#define REGISTER_GPU(T)                                  \
+  REGISTER_KERNEL_BUILDER(Name("ProdForceSeA")           \
+                              .Device(DEVICE_GPU)        \
+                              .TypeConstraint<T>("T")    \
+                              .HostMemory("natoms"),     \
+                          ProdForceSeAOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("ProdForceNorot")         \
+                              .Device(DEVICE_GPU)        \
+                              .TypeConstraint<T>("T")    \
+                              .HostMemory("natoms"),     \
+                          ProdForceSeAOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("ProdForceSeR")           \
+                              .Device(DEVICE_GPU)        \
+                              .TypeConstraint<T>("T")    \
+                              .HostMemory("natoms"),     \
+                          ProdForceSeROp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-
diff --git a/source/op/prod_force_se_a_grad.cc b/source/op/prod_force_se_a_grad.cc
index 84b2a7ed3b..1d81b7b26e 100644
--- a/source/op/prod_force_se_a_grad.cc
+++ b/source/op/prod_force_se_a_grad.cc
@@ -2,55 +2,63 @@
 #include "prod_force_grad.h"
 
 REGISTER_OP("ProdForceSeAGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class ProdForceSeAGradOp : public OpKernel 
-{
-public:
-  explicit ProdForceSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+template <typename Device, typename FPTYPE>
+class ProdForceSeAGradOp : public OpKernel {
+ public:
+  explicit ProdForceSeAGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -58,68 +66,70 @@ class ProdForceSeAGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (nloc * 3 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.flat<FPTYPE>();
-    auto net_deriv	= net_deriv_tensor	.flat<FPTYPE>();
-    auto in_deriv	= in_deriv_tensor	.flat<FPTYPE>();
-    auto nlist		= nlist_tensor		.flat<int>();
-    auto grad_net	= grad_net_tensor	->flat<FPTYPE>();
+    auto grad = grad_tensor.flat<FPTYPE>();
+    auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
+    auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto grad_net = grad_net_tensor->flat<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-
-      int grad_iter	= kk * nloc * 3;
-      int in_iter	= kk * nloc * ndescrpt * 3;
-      int nlist_iter	= kk * nloc * nnei;
-      int grad_net_iter	= kk * nloc * ndescrpt;
-
-      deepmd::prod_force_grad_a_cpu(
-	  &grad_net(grad_net_iter),
-	  &grad(grad_iter),
-	  &in_deriv(in_iter),
-	  &nlist(nlist_iter),
-	  nloc, 
-	  nnei);
+    for (int kk = 0; kk < nframes; ++kk) {
+      int grad_iter = kk * nloc * 3;
+      int in_iter = kk * nloc * ndescrpt * 3;
+      int nlist_iter = kk * nloc * nnei;
+      int grad_net_iter = kk * nloc * ndescrpt;
+
+      deepmd::prod_force_grad_a_cpu(&grad_net(grad_net_iter), &grad(grad_iter),
+                                    &in_deriv(in_iter), &nlist(nlist_iter),
+                                    nloc, nnei);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("ProdForceSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdForceSeAGradOp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                                   \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ProdForceSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceSeAGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/prod_force_se_r_grad.cc
index e02f0c8750..178cf0c732 100644
--- a/source/op/prod_force_se_r_grad.cc
+++ b/source/op/prod_force_se_r_grad.cc
@@ -2,50 +2,57 @@
 #include "prod_force_grad.h"
 
 REGISTER_OP("ProdForceSeRGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class ProdForceSeRGradOp : public OpKernel 
-{
-public:
-  explicit ProdForceSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {
-  }
+template <typename Device, typename FPTYPE>
+class ProdForceSeRGradOp : public OpKernel {
+ public:
+  explicit ProdForceSeRGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
 
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -53,52 +60,54 @@ class ProdForceSeRGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (nloc * 3 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.flat<FPTYPE>();
-    auto net_deriv	= net_deriv_tensor	.flat<FPTYPE>();
-    auto in_deriv	= in_deriv_tensor	.flat<FPTYPE>();
-    auto nlist		= nlist_tensor		.flat<int>();
-    auto grad_net	= grad_net_tensor	->flat<FPTYPE>();
+    auto grad = grad_tensor.flat<FPTYPE>();
+    auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
+    auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto grad_net = grad_net_tensor->flat<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-
-      int grad_iter	= kk * nloc * 3;
-      int in_iter	= kk * nloc * ndescrpt * 3;
-      int nlist_iter	= kk * nloc * nnei;
-      int grad_net_iter	= kk * nloc * ndescrpt;
-      
-      deepmd::prod_force_grad_r_cpu(
-	  &grad_net(grad_net_iter),
-	  &grad(grad_iter),
-	  &in_deriv(in_iter),
-	  &nlist(nlist_iter),
-	  nloc,
-	  nnei);
+    for (int kk = 0; kk < nframes; ++kk) {
+      int grad_iter = kk * nloc * 3;
+      int in_iter = kk * nloc * ndescrpt * 3;
+      int nlist_iter = kk * nloc * nnei;
+      int grad_net_iter = kk * nloc * ndescrpt;
+
+      deepmd::prod_force_grad_r_cpu(&grad_net(grad_net_iter), &grad(grad_iter),
+                                    &in_deriv(in_iter), &nlist(nlist_iter),
+                                    nloc, nnei);
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("ProdForceSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdForceSeRGradOp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                                   \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("ProdForceSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdForceSeRGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/prod_virial.cc b/source/op/prod_virial.cc
index a8df2bc848..8769ccf8f1 100644
--- a/source/op/prod_virial.cc
+++ b/source/op/prod_virial.cc
@@ -1,24 +1,23 @@
 #include "custom_op.h"
 
 REGISTER_OP("ProdVirial")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("axis: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("virial: T")
-.Output("atom_virial: T");
-
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("axis: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("virial: T")
+    .Output("atom_virial: T");
 
 using namespace tensorflow;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdVirialOp : public OpKernel {
  public:
   explicit ProdVirialOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -28,28 +27,37 @@ class ProdVirialOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
-    const Tensor& net_deriv_tensor	= context->input(0);
-    const Tensor& in_deriv_tensor	= context->input(1);
-    const Tensor& rij_tensor		= context->input(2);
-    const Tensor& nlist_tensor		= context->input(3);
-    const Tensor& axis_tensor		= context->input(4);
-    const Tensor& natoms_tensor		= context->input(5);
+    const Tensor& net_deriv_tensor = context->input(0);
+    const Tensor& in_deriv_tensor = context->input(1);
+    const Tensor& rij_tensor = context->input(2);
+    const Tensor& nlist_tensor = context->input(3);
+    const Tensor& axis_tensor = context->input(4);
+    const Tensor& natoms_tensor = context->input(5);
 
     // set size of the sample
-    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (axis_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of axis should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (axis_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of axis should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -58,28 +66,40 @@ class ProdVirialOp : public OpKernel {
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == axis_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-
-    OP_REQUIRES (context, (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("dim of rij should be nnei * 3"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
-    OP_REQUIRES (context, (nloc * 4 == axis_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of axis type+id should be 2+2"));
+    OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == axis_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+
+    OP_REQUIRES(context,
+                (nloc * ndescrpt * 12 == in_deriv_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("dim of rij should be nnei * 3"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
+    OP_REQUIRES(
+        context, (nloc * 4 == axis_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("number of axis type+id should be 2+2"));
 
     // Create an output tensor
-    TensorShape virial_shape ;
-    virial_shape.AddDim (nframes);
-    virial_shape.AddDim (9);
+    TensorShape virial_shape;
+    virial_shape.AddDim(nframes);
+    virial_shape.AddDim(9);
     Tensor* virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, virial_shape, &virial_tensor));
-    TensorShape atom_virial_shape ;
-    atom_virial_shape.AddDim (nframes);
-    atom_virial_shape.AddDim (9 * nall);
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, virial_shape, &virial_tensor));
+    TensorShape atom_virial_shape;
+    atom_virial_shape.AddDim(nframes);
+    atom_virial_shape.AddDim(9 * nall);
     Tensor* atom_virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape, &atom_virial_tensor));
-    
+    OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape,
+                                                     &atom_virial_tensor));
+
     // flat the tensors
     auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
     auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
@@ -91,102 +111,109 @@ class ProdVirialOp : public OpKernel {
 
     // loop over samples
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-      int net_iter	= kk * nloc * ndescrpt;
-      int in_iter	= kk * nloc * ndescrpt * 12;
-      int rij_iter	= kk * nloc * nnei * 3;
-      int nlist_iter	= kk * nloc * nnei;
-      int axis_iter	= kk * nloc * 4;
-      int virial_iter	= kk * 9;
-      int atom_virial_iter	= kk * nall * 9;
-
-      for (int ii = 0; ii < 9; ++ ii){
-	virial (virial_iter + ii) = 0.;
+    for (int kk = 0; kk < nframes; ++kk) {
+      int net_iter = kk * nloc * ndescrpt;
+      int in_iter = kk * nloc * ndescrpt * 12;
+      int rij_iter = kk * nloc * nnei * 3;
+      int nlist_iter = kk * nloc * nnei;
+      int axis_iter = kk * nloc * 4;
+      int virial_iter = kk * 9;
+      int atom_virial_iter = kk * nall * 9;
+
+      for (int ii = 0; ii < 9; ++ii) {
+        virial(virial_iter + ii) = 0.;
       }
-      for (int ii = 0; ii < 9 * nall; ++ ii){
-	atom_virial (atom_virial_iter + ii) = 0.;
+      for (int ii = 0; ii < 9 * nall; ++ii) {
+        atom_virial(atom_virial_iter + ii) = 0.;
       }
 
       // compute virial of a frame
-      for (int ii = 0; ii < nloc; ++ii){
-	int i_idx = ii;
-	
-	// set axes
-	int axis0_type = axis (axis_iter + i_idx * 4 + 0);
-	int axis1_type = axis (axis_iter + i_idx * 4 + 2);
-	int axis_0  = axis (axis_iter + i_idx * 4 + 1);
-	int axis_1  = axis (axis_iter + i_idx * 4 + 3);
-	if (axis0_type == 1) axis_0 += n_a_sel;
-	if (axis1_type == 1) axis_1 += n_a_sel;
-
-	// deriv wrt neighbors
-	for (int jj = 0; jj < nnei; ++jj){
-	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);
-	  if (j_idx < 0) continue;
-	  if (jj == axis_0) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      FPTYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
-	      for (int dd0 = 0; dd0 < 3; ++dd0){
-		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  FPTYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd0);
-		  virial (virial_iter + dd0 * 3 + dd1) += tmp_v;
-		  atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += tmp_v;
-		}
-	      }
-	    }
-	  }
-	  else if (jj == axis_1) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      FPTYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
-	      for (int dd0 = 0; dd0 < 3; ++dd0){
-		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  FPTYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd0);
-		  virial (virial_iter + dd0 * 3 + dd1) += tmp_v;
-		  atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += tmp_v;
-		}
-	      }
-	    }
-	  }
-	  else {
-	    int aa_start, aa_end;
-	    make_descript_range (aa_start, aa_end, jj);
-	    for (int aa = aa_start; aa < aa_end; ++aa) {
-	      FPTYPE pref = -1.0 * net_deriv (net_iter + i_idx * ndescrpt + aa);
-	      for (int dd0 = 0; dd0 < 3; ++dd0){
-		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  FPTYPE tmp_v = pref * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *  in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd0);
-		  virial (virial_iter + dd0 * 3 + dd1) += tmp_v;
-		  atom_virial (atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) += tmp_v;
-		}
-	      }
-	    }
-	  }
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        int i_idx = ii;
+
+        // set axes
+        int axis0_type = axis(axis_iter + i_idx * 4 + 0);
+        int axis1_type = axis(axis_iter + i_idx * 4 + 2);
+        int axis_0 = axis(axis_iter + i_idx * 4 + 1);
+        int axis_1 = axis(axis_iter + i_idx * 4 + 3);
+        if (axis0_type == 1) axis_0 += n_a_sel;
+        if (axis1_type == 1) axis_1 += n_a_sel;
+
+        // deriv wrt neighbors
+        for (int jj = 0; jj < nnei; ++jj) {
+          int j_idx = nlist(nlist_iter + i_idx * nnei + jj);
+          if (j_idx < 0) continue;
+          if (jj == axis_0) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              FPTYPE pref = -1.0 * net_deriv(net_iter + i_idx * ndescrpt + aa);
+              for (int dd0 = 0; dd0 < 3; ++dd0) {
+                for (int dd1 = 0; dd1 < 3; ++dd1) {
+                  FPTYPE tmp_v =
+                      pref * rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *
+                      in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 +
+                               dd0);
+                  virial(virial_iter + dd0 * 3 + dd1) += tmp_v;
+                  atom_virial(atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) +=
+                      tmp_v;
+                }
+              }
+            }
+          } else if (jj == axis_1) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              FPTYPE pref = -1.0 * net_deriv(net_iter + i_idx * ndescrpt + aa);
+              for (int dd0 = 0; dd0 < 3; ++dd0) {
+                for (int dd1 = 0; dd1 < 3; ++dd1) {
+                  FPTYPE tmp_v =
+                      pref * rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *
+                      in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 +
+                               dd0);
+                  virial(virial_iter + dd0 * 3 + dd1) += tmp_v;
+                  atom_virial(atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) +=
+                      tmp_v;
+                }
+              }
+            }
+          } else {
+            int aa_start, aa_end;
+            make_descript_range(aa_start, aa_end, jj);
+            for (int aa = aa_start; aa < aa_end; ++aa) {
+              FPTYPE pref = -1.0 * net_deriv(net_iter + i_idx * ndescrpt + aa);
+              for (int dd0 = 0; dd0 < 3; ++dd0) {
+                for (int dd1 = 0; dd1 < 3; ++dd1) {
+                  FPTYPE tmp_v =
+                      pref * rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *
+                      in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 +
+                               dd0);
+                  virial(virial_iter + dd0 * 3 + dd1) += tmp_v;
+                  atom_virial(atom_virial_iter + j_idx * 9 + dd0 * 3 + dd1) +=
+                      tmp_v;
+                }
+              }
+            }
+          }
+        }
       }
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("ProdVirial").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdVirialOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                             \
+  REGISTER_KERNEL_BUILDER(                                          \
+      Name("ProdVirial").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
diff --git a/source/op/prod_virial_grad.cc b/source/op/prod_virial_grad.cc
index 33fa0348dc..e7683111b2 100644
--- a/source/op/prod_virial_grad.cc
+++ b/source/op/prod_virial_grad.cc
@@ -1,62 +1,71 @@
 #include "custom_op.h"
 
 REGISTER_OP("ProdVirialGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("axis: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("axis: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class ProdVirialGradOp : public OpKernel 
-{
-public:
+template <typename Device, typename FPTYPE>
+class ProdVirialGradOp : public OpKernel {
+ public:
   explicit ProdVirialGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
-    const Tensor& grad_tensor		= context->input(0);
-    const Tensor& net_deriv_tensor	= context->input(1);
-    const Tensor& in_deriv_tensor	= context->input(2);
-    const Tensor& rij_tensor		= context->input(3);
-    const Tensor& nlist_tensor		= context->input(4);
-    const Tensor& axis_tensor		= context->input(5);
-    const Tensor& natoms_tensor		= context->input(6);
+    const Tensor& grad_tensor = context->input(0);
+    const Tensor& net_deriv_tensor = context->input(1);
+    const Tensor& in_deriv_tensor = context->input(2);
+    const Tensor& rij_tensor = context->input(3);
+    const Tensor& nlist_tensor = context->input(4);
+    const Tensor& axis_tensor = context->input(5);
+    const Tensor& natoms_tensor = context->input(6);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape rij_shape		= rij_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-    TensorShape axis_shape		= axis_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (axis_shape.dims() == 2),	errors::InvalidArgument ("Dim of axis should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape rij_shape = rij_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+    TensorShape axis_shape = axis_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_shape.dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (axis_shape.dims() == 2),
+                errors::InvalidArgument("Dim of axis should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -64,127 +73,145 @@ class ProdVirialGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == axis_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 12 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
-    OP_REQUIRES (context, (nloc * 4 == axis_shape.dim_size(1)),		errors::InvalidArgument ("number of axis type+id should be 2+2"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == rij_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == axis_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (9 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * ndescrpt * 12 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
+                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
+    OP_REQUIRES(
+        context, (nloc * 4 == axis_shape.dim_size(1)),
+        errors::InvalidArgument("number of axis type+id should be 2+2"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.flat<FPTYPE>();
-    auto net_deriv	= net_deriv_tensor	.flat<FPTYPE>();
-    auto in_deriv	= in_deriv_tensor	.flat<FPTYPE>();
-    auto rij		= rij_tensor		.flat<FPTYPE>();
-    auto nlist		= nlist_tensor		.flat<int>();
-    auto axis		= axis_tensor		.flat<int>();
-    auto grad_net	= grad_net_tensor	->flat<FPTYPE>();
+    auto grad = grad_tensor.flat<FPTYPE>();
+    auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
+    auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
+    auto rij = rij_tensor.flat<FPTYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto axis = axis_tensor.flat<int>();
+    auto grad_net = grad_net_tensor->flat<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-
-      int grad_iter	= kk * 9;
-      int net_iter	= kk * nloc * ndescrpt;
-      int in_iter	= kk * nloc * ndescrpt * 12;
-      int rij_iter	= kk * nloc * nnei * 3;
-      int nlist_iter	= kk * nloc * nnei;
-      int axis_iter	= kk * nloc * 4;
-      int grad_net_iter	= kk * nloc * ndescrpt;
+    for (int kk = 0; kk < nframes; ++kk) {
+      int grad_iter = kk * 9;
+      int net_iter = kk * nloc * ndescrpt;
+      int in_iter = kk * nloc * ndescrpt * 12;
+      int rij_iter = kk * nloc * nnei * 3;
+      int nlist_iter = kk * nloc * nnei;
+      int axis_iter = kk * nloc * 4;
+      int grad_net_iter = kk * nloc * ndescrpt;
 
       // reset the frame to 0
-      for (int ii = 0; ii < nloc; ++ii){
-	for (int aa = 0; aa < ndescrpt; ++aa){
-	  grad_net (grad_net_iter + ii * ndescrpt + aa) = 0;
-	}
-      }      
+      for (int ii = 0; ii < nloc; ++ii) {
+        for (int aa = 0; aa < ndescrpt; ++aa) {
+          grad_net(grad_net_iter + ii * ndescrpt + aa) = 0;
+        }
+      }
 
       // compute grad of one frame
-      for (int ii = 0; ii < nloc; ++ii){
-	int i_idx = ii;
-	
-	// set axes
-	int axis0_type = axis (axis_iter + i_idx * 4 + 0);
-	int axis1_type = axis (axis_iter + i_idx * 4 + 2);
-	int axis_0  = axis (axis_iter + i_idx * 4 + 1);
-	int axis_1  = axis (axis_iter + i_idx * 4 + 3);
-	if (axis0_type == 1) axis_0 += n_a_sel;
-	if (axis1_type == 1) axis_1 += n_a_sel;
-
-	// loop over neighbors
-	for (int jj = 0; jj < nnei; ++jj){
-	  int j_idx = nlist (nlist_iter + i_idx * nnei + jj);	  
-	  if (j_idx < 0) continue;
-	  if (jj == axis_0) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      for (int dd0 = 0; dd0 < 3; ++dd0){
-		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  grad_net (grad_net_iter + i_idx * ndescrpt + aa) += 
-		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 + dd0);
-		}
-	      }
-	    }
-	  }
-	  else if (jj == axis_1) {
-	    for (int aa = 0; aa < ndescrpt; ++aa){
-	      for (int dd0 = 0; dd0 < 3; ++dd0){
-		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  grad_net (grad_net_iter + i_idx * ndescrpt + aa) += 
-		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 + dd0);
-		}
-	      }
-	    }
-	  }
-	  else {
-	    int aa_start, aa_end;
-	    make_descript_range (aa_start, aa_end, jj);
-	    for (int aa = aa_start; aa < aa_end; ++aa){
-	      for (int dd0 = 0; dd0 < 3; ++dd0){
-		for (int dd1 = 0; dd1 < 3; ++dd1){
-		  grad_net (grad_net_iter + i_idx * ndescrpt + aa) += 
-		      -1.0 * grad (grad_iter + dd0 * 3 + dd1) * rij (rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) * in_deriv (in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 + dd0);
-		}
-	      }
-	    }
-	  }
-	}
+      for (int ii = 0; ii < nloc; ++ii) {
+        int i_idx = ii;
+
+        // set axes
+        int axis0_type = axis(axis_iter + i_idx * 4 + 0);
+        int axis1_type = axis(axis_iter + i_idx * 4 + 2);
+        int axis_0 = axis(axis_iter + i_idx * 4 + 1);
+        int axis_1 = axis(axis_iter + i_idx * 4 + 3);
+        if (axis0_type == 1) axis_0 += n_a_sel;
+        if (axis1_type == 1) axis_1 += n_a_sel;
+
+        // loop over neighbors
+        for (int jj = 0; jj < nnei; ++jj) {
+          int j_idx = nlist(nlist_iter + i_idx * nnei + jj);
+          if (j_idx < 0) continue;
+          if (jj == axis_0) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              for (int dd0 = 0; dd0 < 3; ++dd0) {
+                for (int dd1 = 0; dd1 < 3; ++dd1) {
+                  grad_net(grad_net_iter + i_idx * ndescrpt + aa) +=
+                      -1.0 * grad(grad_iter + dd0 * 3 + dd1) *
+                      rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *
+                      in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 3 +
+                               dd0);
+                }
+              }
+            }
+          } else if (jj == axis_1) {
+            for (int aa = 0; aa < ndescrpt; ++aa) {
+              for (int dd0 = 0; dd0 < 3; ++dd0) {
+                for (int dd1 = 0; dd1 < 3; ++dd1) {
+                  grad_net(grad_net_iter + i_idx * ndescrpt + aa) +=
+                      -1.0 * grad(grad_iter + dd0 * 3 + dd1) *
+                      rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *
+                      in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 6 +
+                               dd0);
+                }
+              }
+            }
+          } else {
+            int aa_start, aa_end;
+            make_descript_range(aa_start, aa_end, jj);
+            for (int aa = aa_start; aa < aa_end; ++aa) {
+              for (int dd0 = 0; dd0 < 3; ++dd0) {
+                for (int dd1 = 0; dd1 < 3; ++dd1) {
+                  grad_net(grad_net_iter + i_idx * ndescrpt + aa) +=
+                      -1.0 * grad(grad_iter + dd0 * 3 + dd1) *
+                      rij(rij_iter + i_idx * nnei * 3 + jj * 3 + dd1) *
+                      in_deriv(in_iter + i_idx * ndescrpt * 12 + aa * 12 + 9 +
+                               dd0);
+                }
+              }
+            }
+          }
+        }
       }
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("ProdVirialGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdVirialGradOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                 \
+  REGISTER_KERNEL_BUILDER(                                              \
+      Name("ProdVirialGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/prod_virial_grad_multi_device.cc b/source/op/prod_virial_grad_multi_device.cc
index 8007d72acc..5c45e8d416 100644
--- a/source/op/prod_virial_grad_multi_device.cc
+++ b/source/op/prod_virial_grad_multi_device.cc
@@ -23,46 +23,55 @@ REGISTER_OP("ProdVirialSeRGrad")
     .Input("natoms: int32")
     .Output("grad_net: T");
 
-template<typename Device, typename FPTYPE>
-class ProdVirialSeAGradOp : public OpKernel 
-{
-public:
-  explicit ProdVirialSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+template <typename Device, typename FPTYPE>
+class ProdVirialSeAGradOp : public OpKernel {
+ public:
+  explicit ProdVirialSeAGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& rij_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape rij_shape		= rij_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape rij_shape = rij_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_shape.dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -70,122 +79,134 @@ class ProdVirialSeAGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (int_64(nloc) * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == rij_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (9 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context,
+                (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (int_64(nloc) * nnei * 3 == rij_shape.dim_size(1)),
+                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (int_64(nloc) * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(int_64(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        grad_net_shape, 
-        &grad_net_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    assert (nframes == grad_net_shape.dim_size(0));
-    assert (nframes == grad_shape.dim_size(0));
-    assert (nframes == net_deriv_tensor.shape().dim_size(0));
-    assert (nframes == in_deriv_tensor.shape().dim_size(0));
-    assert (nframes == rij_tensor.shape().dim_size(0));
-    assert (nframes == nlist_tensor.shape().dim_size(0));
-    assert (nloc * ndescrpt == grad_net_shape.dim_size(1));
-    assert (9 == grad_shape.dim_size(1));
-    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert (nloc * nnei * 3 == rij_tensor.shape().dim_size(1));
-    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
-    assert (nnei * 4 == ndescrpt);
-    
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            grad_net_shape, &grad_net_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    assert(nframes == grad_net_shape.dim_size(0));
+    assert(nframes == grad_shape.dim_size(0));
+    assert(nframes == net_deriv_tensor.shape().dim_size(0));
+    assert(nframes == in_deriv_tensor.shape().dim_size(0));
+    assert(nframes == rij_tensor.shape().dim_size(0));
+    assert(nframes == nlist_tensor.shape().dim_size(0));
+    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(9 == grad_shape.dim_size(1));
+    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(nloc * nnei * 3 == rij_tensor.shape().dim_size(1));
+    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(nnei * 4 == ndescrpt);
+
     // flat the tensors
-    FPTYPE * p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_grad = grad_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_rij = rij_tensor.flat<FPTYPE>().data();
-    const int * p_nlist	= nlist_tensor.flat<int>().data();
+    FPTYPE* p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_grad = grad_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_rij = rij_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
 
     // loop over frames
-    for (int_64 kk = 0; kk < nframes; ++kk){
-      FPTYPE * grad_net = p_grad_net + kk * nloc * ndescrpt;
-      const FPTYPE * grad = p_grad + kk * 9;
-      const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-      const FPTYPE * rij = p_rij + kk * nloc * nnei * 3;
-      const int * nlist = p_nlist + kk * nloc * nnei; 
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* grad_net = p_grad_net + kk * nloc * ndescrpt;
+      const FPTYPE* grad = p_grad + kk * 9;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const FPTYPE* rij = p_rij + kk * nloc * nnei * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
       if (device == "GPU") {
-        #if GOOGLE_CUDA
-        deepmd::prod_virial_grad_a_gpu_cuda(    
-          grad_net, 
-          grad, in_deriv, rij, nlist, nloc, nnei);
-        #endif // GOOGLE_CUDA
-        
-        #if TENSORFLOW_USE_ROCM
-        deepmd::prod_virial_grad_a_gpu_rocm(    
-          grad_net, 
-          grad, in_deriv, rij, nlist, nloc, nnei);
-        #endif // TENSORFLOW_USE_ROCM
-      }
-      else if (device == "CPU") {
-        deepmd::prod_virial_grad_a_cpu(    
-          grad_net, 
-          grad, in_deriv, rij, nlist, nloc, nnei);
+#if GOOGLE_CUDA
+        deepmd::prod_virial_grad_a_gpu_cuda(grad_net, grad, in_deriv, rij,
+                                            nlist, nloc, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_virial_grad_a_gpu_rocm(grad_net, grad, in_deriv, rij,
+                                            nlist, nloc, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_virial_grad_a_cpu(grad_net, grad, in_deriv, rij, nlist,
+                                       nloc, nnei);
       }
     }
   }
-private:
+
+ private:
   std::string device;
   int n_r_sel, n_a_sel, n_a_shift;
 };
 
-template<typename Device, typename FPTYPE>
-class ProdVirialSeRGradOp : public OpKernel 
-{
-public:
-  explicit ProdVirialSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+template <typename Device, typename FPTYPE>
+class ProdVirialSeRGradOp : public OpKernel {
+ public:
+  explicit ProdVirialSeRGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& rij_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape rij_shape		= rij_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape rij_shape = rij_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_shape.dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -193,103 +214,109 @@ class ProdVirialSeRGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (int_64(nloc) * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == rij_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (9 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context,
+                (int_64(nloc) * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (int_64(nloc) * nnei * 3 == rij_shape.dim_size(1)),
+                errors::InvalidArgument("dim of rij should be  nnei * 3"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (int_64(nloc) * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(int_64(nloc) * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
     int context_output_index = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        grad_net_shape, 
-        &grad_net_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
-    assert (nframes == grad_net_shape.dim_size(0));
-    assert (nframes == grad_shape.dim_size(0));
-    assert (nframes == net_deriv_tensor.shape().dim_size(0));
-    assert (nframes == in_deriv_tensor.shape().dim_size(0));
-    assert (nframes == rij_tensor.shape().dim_size(0));
-    assert (nframes == nlist_tensor.shape().dim_size(0));
-    assert (nloc * ndescrpt == grad_net_shape.dim_size(1));
-    assert (9 == grad_shape.dim_size(1));
-    assert (nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
-    assert (nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
-    assert (nloc * nnei * 3 == rij_tensor.shape().dim_size(1));
-    assert (nloc * nnei == nlist_tensor.shape().dim_size(1));
-    assert (nnei * 1 == ndescrpt);
-    
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            grad_net_shape, &grad_net_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
+    assert(nframes == grad_net_shape.dim_size(0));
+    assert(nframes == grad_shape.dim_size(0));
+    assert(nframes == net_deriv_tensor.shape().dim_size(0));
+    assert(nframes == in_deriv_tensor.shape().dim_size(0));
+    assert(nframes == rij_tensor.shape().dim_size(0));
+    assert(nframes == nlist_tensor.shape().dim_size(0));
+    assert(nloc * ndescrpt == grad_net_shape.dim_size(1));
+    assert(9 == grad_shape.dim_size(1));
+    assert(nloc * ndescrpt == net_deriv_tensor.shape().dim_size(1));
+    assert(nloc * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1));
+    assert(nloc * nnei * 3 == rij_tensor.shape().dim_size(1));
+    assert(nloc * nnei == nlist_tensor.shape().dim_size(1));
+    assert(nnei * 1 == ndescrpt);
+
     // flat the tensors
-    FPTYPE * p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_grad = grad_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_rij = rij_tensor.flat<FPTYPE>().data();
-    const int * p_nlist	= nlist_tensor.flat<int>().data();
+    FPTYPE* p_grad_net = grad_net_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_grad = grad_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_rij = rij_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
 
     // loop over frames
-    for (int_64 kk = 0; kk < nframes; ++kk){
-      FPTYPE * grad_net = p_grad_net + kk * nloc * ndescrpt;
-      const FPTYPE * grad = p_grad + kk * 9;
-      const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-      const FPTYPE * rij = p_rij + kk * nloc * nnei * 3;
-      const int * nlist = p_nlist + kk * nloc * nnei; 
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* grad_net = p_grad_net + kk * nloc * ndescrpt;
+      const FPTYPE* grad = p_grad + kk * 9;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const FPTYPE* rij = p_rij + kk * nloc * nnei * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
       if (device == "GPU") {
-        #if GOOGLE_CUDA
-        deepmd::prod_virial_grad_r_gpu_cuda(    
-          grad_net, 
-          grad, in_deriv, rij, nlist, nloc, nnei);
-        #endif // GOOGLE_CUDA
-        
-        #if TENSORFLOW_USE_ROCM
-        deepmd::prod_virial_grad_r_gpu_rocm(    
-          grad_net, 
-          grad, in_deriv, rij, nlist, nloc, nnei);
-        #endif // TENSORFLOW_USE_ROCM
-      }
-      else if (device == "CPU") {
-        deepmd::prod_virial_grad_r_cpu(    
-          grad_net, 
-          grad, in_deriv, rij, nlist, nloc, nnei);
+#if GOOGLE_CUDA
+        deepmd::prod_virial_grad_r_gpu_cuda(grad_net, grad, in_deriv, rij,
+                                            nlist, nloc, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_virial_grad_r_gpu_rocm(grad_net, grad, in_deriv, rij,
+                                            nlist, nloc, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_virial_grad_r_cpu(grad_net, grad, in_deriv, rij, nlist,
+                                       nloc, nnei);
       }
     }
   }
-private:
+
+ private:
   std::string device;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                        \
-REGISTER_KERNEL_BUILDER(                                                                       \
-    Name("ProdVirialSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdVirialSeAGradOp<CPUDevice, T>);                                                        \
-REGISTER_KERNEL_BUILDER(                                                                       \
-    Name("ProdVirialSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                       \
-    ProdVirialSeRGradOp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("ProdVirialSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialSeAGradOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("ProdVirialSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialSeRGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 // Register the GPU kernels.
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#define REGISTER_GPU(T)                                                                       \
-REGISTER_KERNEL_BUILDER(                                                                      \
-    Name("ProdVirialSeAGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdVirialSeAGradOp<GPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                      \
-    Name("ProdVirialSeRGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdVirialSeRGradOp<GPUDevice, T>);
+#define REGISTER_GPU(T)                                       \
+  REGISTER_KERNEL_BUILDER(Name("ProdVirialSeAGrad")           \
+                              .Device(DEVICE_GPU)             \
+                              .TypeConstraint<T>("T")         \
+                              .HostMemory("natoms"),          \
+                          ProdVirialSeAGradOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("ProdVirialSeRGrad")           \
+                              .Device(DEVICE_GPU)             \
+                              .TypeConstraint<T>("T")         \
+                              .HostMemory("natoms"),          \
+                          ProdVirialSeRGradOp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/prod_virial_multi_device.cc b/source/op/prod_virial_multi_device.cc
index 31cf5fff9a..2df92eb19a 100644
--- a/source/op/prod_virial_multi_device.cc
+++ b/source/op/prod_virial_multi_device.cc
@@ -35,223 +35,245 @@ REGISTER_OP("ProdVirialSeR")
     .Output("virial: T")
     .Output("atom_virial: T");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdVirialSeAOp : public OpKernel {
  public:
   explicit ProdVirialSeAOp(OpKernelConstruction* context) : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& net_deriv_tensor  = context->input(context_input_index++);
-    const Tensor& in_deriv_tensor   = context->input(context_input_index++);
-    const Tensor& rij_tensor        = context->input(context_input_index++);
-    const Tensor& nlist_tensor      = context->input(context_input_index++);
-    const Tensor& natoms_tensor     = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    const int * natoms = natoms_tensor.flat<int>().data();
+    OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
     // check the sizes
-    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)), errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),      errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),    errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (int_64(nloc) * nnei * 3 == rij_tensor.shape().dim_size(1)),  errors::InvalidArgument ("dim of rij should be nnei * 3"));
+    OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(
+        context,
+        (int_64(nloc) * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context,
+                (int_64(nloc) * nnei * 3 == rij_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("dim of rij should be nnei * 3"));
     // Create an output tensor
-    TensorShape virial_shape ;
-    virial_shape.AddDim (nframes);
-    virial_shape.AddDim (9);
+    TensorShape virial_shape;
+    virial_shape.AddDim(nframes);
+    virial_shape.AddDim(9);
     TensorShape atom_virial_shape;
-    atom_virial_shape.AddDim (nframes);
-    atom_virial_shape.AddDim (9 * nall);
+    atom_virial_shape.AddDim(nframes);
+    atom_virial_shape.AddDim(9 * nall);
     int context_output_index = 0;
     Tensor* virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        virial_shape, 
-        &virial_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, virial_shape,
+                                          &virial_tensor));
     Tensor* atom_virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        atom_virial_shape, 
-        &atom_virial_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     atom_virial_shape,
+                                                     &atom_virial_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * p_virial = virial_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_atom_virial = atom_virial_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_rij = rij_tensor.flat<FPTYPE>().data();
-    const int * p_nlist = nlist_tensor.flat<int>().data();
-    
-    for(int_64 kk = 0; kk < nframes; ++kk){
-      FPTYPE * virial = p_virial + kk * 9;
-      FPTYPE * atom_virial = p_atom_virial + kk * nall * 9;
-      const FPTYPE * net_deriv = p_net_deriv + kk * nloc * ndescrpt;
-      const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-      const FPTYPE * rij = p_rij + kk * nloc * nnei * 3;
-      const int * nlist = p_nlist + kk * nloc * nnei;      
-    if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::prod_virial_a_gpu_cuda(    
-          virial, atom_virial,
-          net_deriv, in_deriv, rij, nlist, nloc, nall, nnei);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::prod_virial_a_gpu_rocm(    
-          virial, atom_virial,
-          net_deriv, in_deriv, rij, nlist, nloc, nall, nnei);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::prod_virial_a_cpu(    
-          virial, atom_virial,
-          net_deriv, in_deriv, rij, nlist, nloc, nall, nnei);
-    }
+    FPTYPE* p_virial = virial_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_atom_virial = atom_virial_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_rij = rij_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
+
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* virial = p_virial + kk * 9;
+      FPTYPE* atom_virial = p_atom_virial + kk * nall * 9;
+      const FPTYPE* net_deriv = p_net_deriv + kk * nloc * ndescrpt;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const FPTYPE* rij = p_rij + kk * nloc * nnei * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        deepmd::prod_virial_a_gpu_cuda(virial, atom_virial, net_deriv, in_deriv,
+                                       rij, nlist, nloc, nall, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_virial_a_gpu_rocm(virial, atom_virial, net_deriv, in_deriv,
+                                       rij, nlist, nloc, nall, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_virial_a_cpu(virial, atom_virial, net_deriv, in_deriv, rij,
+                                  nlist, nloc, nall, nnei);
+      }
     }
   }
+
  private:
   std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class ProdVirialSeROp : public OpKernel {
  public:
   explicit ProdVirialSeROp(OpKernelConstruction* context) : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& net_deriv_tensor  = context->input(context_input_index++);
-    const Tensor& in_deriv_tensor   = context->input(context_input_index++);
-    const Tensor& rij_tensor        = context->input(context_input_index++);
-    const Tensor& nlist_tensor      = context->input(context_input_index++);
-    const Tensor& natoms_tensor     = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (net_deriv_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),         errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),       errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),      errors::InvalidArgument ("Dim of natoms should be 1"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3), errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    const int * natoms = natoms_tensor.flat<int>().data();
+    OP_REQUIRES(context, (net_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
     // check the sizes
-    OP_REQUIRES (context, (nframes == in_deriv_tensor.shape().dim_size(0)), errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),      errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),    errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (int_64(nloc) * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (int_64(nloc) * nnei * 3 == rij_tensor.shape().dim_size(1)),  errors::InvalidArgument ("dim of rij should be nnei * 3"));
+    OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(
+        context,
+        (int_64(nloc) * ndescrpt * 3 == in_deriv_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context,
+                (int_64(nloc) * nnei * 3 == rij_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("dim of rij should be nnei * 3"));
     // Create an output tensor
-    TensorShape virial_shape ;
-    virial_shape.AddDim (nframes);
-    virial_shape.AddDim (9);
+    TensorShape virial_shape;
+    virial_shape.AddDim(nframes);
+    virial_shape.AddDim(9);
     TensorShape atom_virial_shape;
-    atom_virial_shape.AddDim (nframes);
-    atom_virial_shape.AddDim (9 * nall);
+    atom_virial_shape.AddDim(nframes);
+    atom_virial_shape.AddDim(9 * nall);
     int context_output_index = 0;
     Tensor* virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++, 
-        virial_shape, 
-        &virial_tensor));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(context_output_index++, virial_shape,
+                                          &virial_tensor));
     Tensor* atom_virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-        atom_virial_shape, 
-        &atom_virial_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     atom_virial_shape,
+                                                     &atom_virial_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * p_virial = virial_tensor->flat<FPTYPE>().data();
-    FPTYPE * p_atom_virial = atom_virial_tensor->flat<FPTYPE>().data();
-    const FPTYPE * p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
-    const FPTYPE * p_rij = rij_tensor.flat<FPTYPE>().data();
-    const int * p_nlist = nlist_tensor.flat<int>().data();
-    
-    for(int_64 kk = 0; kk < nframes; ++kk){
-      FPTYPE * virial = p_virial + kk * 9;
-      FPTYPE * atom_virial = p_atom_virial + kk * nall * 9;
-      const FPTYPE * net_deriv = p_net_deriv + kk * nloc * ndescrpt;
-      const FPTYPE * in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
-      const FPTYPE * rij = p_rij + kk * nloc * nnei * 3;
-      const int * nlist = p_nlist + kk * nloc * nnei;      
-    if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::prod_virial_r_gpu_cuda(    
-          virial, atom_virial,
-          net_deriv, in_deriv, rij, nlist, nloc, nall, nnei);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::prod_virial_r_gpu_rocm(    
-          virial, atom_virial,
-          net_deriv, in_deriv, rij, nlist, nloc, nall, nnei);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::prod_virial_r_cpu(    
-          virial, atom_virial,
-          net_deriv, in_deriv, rij, nlist, nloc, nall, nnei);
-    }
+    FPTYPE* p_virial = virial_tensor->flat<FPTYPE>().data();
+    FPTYPE* p_atom_virial = atom_virial_tensor->flat<FPTYPE>().data();
+    const FPTYPE* p_net_deriv = net_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_in_deriv = in_deriv_tensor.flat<FPTYPE>().data();
+    const FPTYPE* p_rij = rij_tensor.flat<FPTYPE>().data();
+    const int* p_nlist = nlist_tensor.flat<int>().data();
+
+    for (int_64 kk = 0; kk < nframes; ++kk) {
+      FPTYPE* virial = p_virial + kk * 9;
+      FPTYPE* atom_virial = p_atom_virial + kk * nall * 9;
+      const FPTYPE* net_deriv = p_net_deriv + kk * nloc * ndescrpt;
+      const FPTYPE* in_deriv = p_in_deriv + kk * nloc * ndescrpt * 3;
+      const FPTYPE* rij = p_rij + kk * nloc * nnei * 3;
+      const int* nlist = p_nlist + kk * nloc * nnei;
+      if (device == "GPU") {
+#if GOOGLE_CUDA
+        deepmd::prod_virial_r_gpu_cuda(virial, atom_virial, net_deriv, in_deriv,
+                                       rij, nlist, nloc, nall, nnei);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+        deepmd::prod_virial_r_gpu_rocm(virial, atom_virial, net_deriv, in_deriv,
+                                       rij, nlist, nloc, nall, nnei);
+#endif  // TENSORFLOW_USE_ROCM
+      } else if (device == "CPU") {
+        deepmd::prod_virial_r_cpu(virial, atom_virial, net_deriv, in_deriv, rij,
+                                  nlist, nloc, nall, nnei);
+      }
     }
   }
+
  private:
   std::string device;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdVirialSeAOp<CPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialNorot").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdVirialSeAOp<CPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdVirialSeROp<CPUDevice, T>);
+#define REGISTER_CPU(T)                                                  \
+  REGISTER_KERNEL_BUILDER(                                               \
+      Name("ProdVirialSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),   \
+      ProdVirialSeAOp<CPUDevice, T>);                                    \
+  REGISTER_KERNEL_BUILDER(                                               \
+      Name("ProdVirialNorot").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialSeAOp<CPUDevice, T>);                                    \
+  REGISTER_KERNEL_BUILDER(                                               \
+      Name("ProdVirialSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),   \
+      ProdVirialSeROp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 // Register the GPU kernels.
-#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM 
-#define REGISTER_GPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialSeA").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdVirialSeAOp<GPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialNorot").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdVirialSeAOp<GPUDevice, T>);                                                       \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialSeR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("natoms"), \
-    ProdVirialSeROp<GPUDevice, T>);
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#define REGISTER_GPU(T)                                   \
+  REGISTER_KERNEL_BUILDER(Name("ProdVirialSeA")           \
+                              .Device(DEVICE_GPU)         \
+                              .TypeConstraint<T>("T")     \
+                              .HostMemory("natoms"),      \
+                          ProdVirialSeAOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("ProdVirialNorot")         \
+                              .Device(DEVICE_GPU)         \
+                              .TypeConstraint<T>("T")     \
+                              .HostMemory("natoms"),      \
+                          ProdVirialSeAOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("ProdVirialSeR")           \
+                              .Device(DEVICE_GPU)         \
+                              .TypeConstraint<T>("T")     \
+                              .HostMemory("natoms"),      \
+                          ProdVirialSeROp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/prod_virial_se_a_grad.cc
index 00a88e0f76..aafc6662ae 100644
--- a/source/op/prod_virial_se_a_grad.cc
+++ b/source/op/prod_virial_se_a_grad.cc
@@ -2,59 +2,68 @@
 #include "prod_virial_grad.h"
 
 REGISTER_OP("ProdVirialSeAGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class ProdVirialSeAGradOp : public OpKernel 
-{
-public:
-  explicit ProdVirialSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+template <typename Device, typename FPTYPE>
+class ProdVirialSeAGradOp : public OpKernel {
+ public:
+  explicit ProdVirialSeAGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
     n_a_shift = n_a_sel * 4;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& rij_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape rij_shape		= rij_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape rij_shape = rij_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_shape.dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -62,74 +71,77 @@ class ProdVirialSeAGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == rij_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (9 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
+                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.flat<FPTYPE>();
-    auto net_deriv	= net_deriv_tensor	.flat<FPTYPE>();
-    auto in_deriv	= in_deriv_tensor	.flat<FPTYPE>();
-    auto rij		= rij_tensor		.flat<FPTYPE>();
-    auto nlist		= nlist_tensor		.flat<int>();
-    auto grad_net	= grad_net_tensor	->flat<FPTYPE>();
+    auto grad = grad_tensor.flat<FPTYPE>();
+    auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
+    auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
+    auto rij = rij_tensor.flat<FPTYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto grad_net = grad_net_tensor->flat<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-
-      int grad_iter	= kk * 9;
-      int in_iter	= kk * nloc * ndescrpt * 3;
-      int rij_iter	= kk * nloc * nnei * 3;
-      int nlist_iter	= kk * nloc * nnei;
-      int grad_net_iter	= kk * nloc * ndescrpt;
-
-      deepmd::prod_virial_grad_a_cpu(
-	  &grad_net(grad_net_iter),
-	  &grad(grad_iter),
-	  &in_deriv(in_iter),
-	  &rij(rij_iter),
-	  &nlist(nlist_iter),
-	  nloc,
-	  nnei);
+    for (int kk = 0; kk < nframes; ++kk) {
+      int grad_iter = kk * 9;
+      int in_iter = kk * nloc * ndescrpt * 3;
+      int rij_iter = kk * nloc * nnei * 3;
+      int nlist_iter = kk * nloc * nnei;
+      int grad_net_iter = kk * nloc * ndescrpt;
+
+      deepmd::prod_virial_grad_a_cpu(&grad_net(grad_net_iter), &grad(grad_iter),
+                                     &in_deriv(in_iter), &rij(rij_iter),
+                                     &nlist(nlist_iter), nloc, nnei);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdVirialSeAGradOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("ProdVirialSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialSeAGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/prod_virial_se_r_grad.cc
index 7f9005abe4..92e10b3ff7 100644
--- a/source/op/prod_virial_se_r_grad.cc
+++ b/source/op/prod_virial_se_r_grad.cc
@@ -2,54 +2,62 @@
 #include "prod_virial_grad.h"
 
 REGISTER_OP("ProdVirialSeRGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("net_deriv: T")
-.Input("in_deriv: T")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("net_deriv: T")
+    .Input("in_deriv: T")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class ProdVirialSeRGradOp : public OpKernel 
-{
-public:
-  explicit ProdVirialSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {
-  }
+template <typename Device, typename FPTYPE>
+class ProdVirialSeRGradOp : public OpKernel {
+ public:
+  explicit ProdVirialSeRGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& net_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& in_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& rij_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& net_deriv_tensor = context->input(context_input_index++);
+    const Tensor& in_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape net_deriv_shape		= net_deriv_tensor.shape();
-    TensorShape in_deriv_shape		= in_deriv_tensor.shape();
-    TensorShape rij_shape		= rij_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (net_deriv_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (in_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape net_deriv_shape = net_deriv_tensor.shape();
+    TensorShape in_deriv_shape = in_deriv_tensor.shape();
+    TensorShape rij_shape = rij_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (net_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (in_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_shape.dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -57,58 +65,61 @@ class ProdVirialSeRGradOp : public OpKernel
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == in_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == in_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == rij_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (9 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * ndescrpt * 3 == in_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
+                errors::InvalidArgument("dim of rij should be  nnei * 3"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc * ndescrpt);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc * ndescrpt);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.flat<FPTYPE>();
-    auto net_deriv	= net_deriv_tensor	.flat<FPTYPE>();
-    auto in_deriv	= in_deriv_tensor	.flat<FPTYPE>();
-    auto rij		= rij_tensor		.flat<FPTYPE>();
-    auto nlist		= nlist_tensor		.flat<int>();
-    auto grad_net	= grad_net_tensor	->flat<FPTYPE>();
+    auto grad = grad_tensor.flat<FPTYPE>();
+    auto net_deriv = net_deriv_tensor.flat<FPTYPE>();
+    auto in_deriv = in_deriv_tensor.flat<FPTYPE>();
+    auto rij = rij_tensor.flat<FPTYPE>();
+    auto nlist = nlist_tensor.flat<int>();
+    auto grad_net = grad_net_tensor->flat<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-
-      int grad_iter	= kk * 9;
-      int in_iter	= kk * nloc * ndescrpt * 3;
-      int rij_iter	= kk * nloc * nnei * 3;
-      int nlist_iter	= kk * nloc * nnei;
-      int grad_net_iter	= kk * nloc * ndescrpt;
-
-      deepmd::prod_virial_grad_r_cpu(
-	  &grad_net(grad_net_iter),
-	  &grad(grad_iter),
-	  &in_deriv(in_iter),
-	  &rij(rij_iter),
-	  &nlist(nlist_iter),
-	  nloc,
-	  nnei);
+    for (int kk = 0; kk < nframes; ++kk) {
+      int grad_iter = kk * 9;
+      int in_iter = kk * nloc * ndescrpt * 3;
+      int rij_iter = kk * nloc * nnei * 3;
+      int nlist_iter = kk * nloc * nnei;
+      int grad_net_iter = kk * nloc * ndescrpt;
+
+      deepmd::prod_virial_grad_r_cpu(&grad_net(grad_net_iter), &grad(grad_iter),
+                                     &in_deriv(in_iter), &rij(rij_iter),
+                                     &nlist(nlist_iter), nloc, nnei);
     }
   }
 };
 
 // Register the GPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("ProdVirialSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    ProdVirialSeRGradOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("ProdVirialSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      ProdVirialSeRGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/quantize_nvnmd.cc b/source/op/quantize_nvnmd.cc
index 6cfd636198..505bdb51ba 100644
--- a/source/op/quantize_nvnmd.cc
+++ b/source/op/quantize_nvnmd.cc
@@ -6,7 +6,7 @@
 
 # Function
 prec = 2**nbit
-y = quantize(x * prec) / prec 
+y = quantize(x * prec) / prec
 quantize is floor/round
 
 # Parameter
@@ -27,122 +27,112 @@ quantize is floor/round
 
 using namespace tensorflow;
 
-
 //- register the operator
 REGISTER_OP("QuantizeNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Attr("isround: int")
-  .Attr("nbit1: int")
-  .Attr("nbit2: int")
-  .Attr("nbit3: int")
-  .Output("y: T");
-
-
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Attr("isround: int")
+    .Attr("nbit1: int")
+    .Attr("nbit2: int")
+    .Attr("nbit3: int")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class QuantizeNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit QuantizeNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-  //- define the attribute of context
-  //* the context is the input from your tensorflow code
-  OP_REQUIRES_OK(context, context->GetAttr("nbit1", &nbit1));
-  OP_REQUIRES_OK(context, context->GetAttr("nbit2", &nbit2));
-  OP_REQUIRES_OK(context, context->GetAttr("nbit3", &nbit3));
-  OP_REQUIRES_OK(context, context->GetAttr("isround", &isround));
-}
-
-
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  
-  /* 
-    * Get input
-    * 1.check
-    * 2.get tensor
-    * 3.get shape and check
-    */
-
-//- 1.check
-  DCHECK_EQ(1, context->num_inputs());
-  
-  //- 2.get tensor
-  const Tensor& X = context->input(0);
-  
-  //- 3. get shape and check
-  const TensorShape& shX = X.shape();
-  
-  TensorShape shY;
-
-  int N;
-  if (shX.dims() == 1) {
-    shY.AddDim(shX.dim_size(0));
-    N = shX.dim_size(0);
-  } 
-  if (shX.dims() == 2) {
-    shY.AddDim(shX.dim_size(0));
-    shY.AddDim(shX.dim_size(1));
-    N = shX.dim_size(0) * shX.dim_size(1);
-  }
-  if (shX.dims() == 3) {
-    shY.AddDim(shX.dim_size(0));
-    shY.AddDim(shX.dim_size(1));
-    shY.AddDim(shX.dim_size(2));
-    N = shX.dim_size(0) * shX.dim_size(1) * shX.dim_size(2);
+ public:
+  /// Constructor.
+  explicit QuantizeNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
+    //- define the attribute of context
+    //* the context is the input from your tensorflow code
+    OP_REQUIRES_OK(context, context->GetAttr("nbit1", &nbit1));
+    OP_REQUIRES_OK(context, context->GetAttr("nbit2", &nbit2));
+    OP_REQUIRES_OK(context, context->GetAttr("nbit3", &nbit3));
+    OP_REQUIRES_OK(context, context->GetAttr("isround", &isround));
   }
-  
-  /*
-    * Calculate the output
-    */
-  
-  
-  Tensor* Y = NULL;
-  
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-  auto x = X.flat<FPTYPE>().data();
-  auto y = Y->flat<FPTYPE>().data();
-  FPTYPE prec;
-  
-  // calculate
-  int ii;
-
-  if (this->nbit1 < 0){
-    for(ii=0; ii<N; ii++){
-      y[ii] = x[ii];
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    /*
+     * Get input
+     * 1.check
+     * 2.get tensor
+     * 3.get shape and check
+     */
+
+    //- 1.check
+    DCHECK_EQ(1, context->num_inputs());
+
+    //- 2.get tensor
+    const Tensor& X = context->input(0);
+
+    //- 3. get shape and check
+    const TensorShape& shX = X.shape();
+
+    TensorShape shY;
+
+    int N;
+    if (shX.dims() == 1) {
+      shY.AddDim(shX.dim_size(0));
+      N = shX.dim_size(0);
+    }
+    if (shX.dims() == 2) {
+      shY.AddDim(shX.dim_size(0));
+      shY.AddDim(shX.dim_size(1));
+      N = shX.dim_size(0) * shX.dim_size(1);
+    }
+    if (shX.dims() == 3) {
+      shY.AddDim(shX.dim_size(0));
+      shY.AddDim(shX.dim_size(1));
+      shY.AddDim(shX.dim_size(2));
+      N = shX.dim_size(0) * shX.dim_size(1) * shX.dim_size(2);
     }
-  }
-  //
-  else {
-    prec = 1 << this->nbit1;
 
-    if (this->isround)
-      for(ii=0; ii<N; ii++){
-          y[ii] = round(x[ii] * prec) / prec;
+    /*
+     * Calculate the output
+     */
+
+    Tensor* Y = NULL;
+
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+    auto x = X.flat<FPTYPE>().data();
+    auto y = Y->flat<FPTYPE>().data();
+    FPTYPE prec;
+
+    // calculate
+    int ii;
+
+    if (this->nbit1 < 0) {
+      for (ii = 0; ii < N; ii++) {
+        y[ii] = x[ii];
       }
-    else
-      for(ii=0; ii<N; ii++){
+    }
+    //
+    else {
+      prec = 1 << this->nbit1;
+
+      if (this->isround)
+        for (ii = 0; ii < N; ii++) {
+          y[ii] = round(x[ii] * prec) / prec;
+        }
+      else
+        for (ii = 0; ii < N; ii++) {
           y[ii] = floor(x[ii] * prec) / prec;
-      }
-  }
-} // Compute
-  
-//- define the private variable for calculation
-private:
-int nbit1, nbit2, nbit3;
-int isround;
+        }
+    }
+  }  // Compute
+
+  //- define the private variable for calculation
+ private:
+  int nbit1, nbit2, nbit3;
+  int isround;
 };
 
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("QuantizeNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    QuantizeNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+#define REGISTER_CPU(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("QuantizeNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      QuantizeNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
-
diff --git a/source/op/soft_min.cc b/source/op/soft_min.cc
index f7770ab58b..ec1233e424 100644
--- a/source/op/soft_min.cc
+++ b/source/op/soft_min.cc
@@ -1,26 +1,26 @@
-#include "custom_op.h"
 #include "ComputeDescriptor.h"
+#include "custom_op.h"
 #include "soft_min_switch.h"
 
 REGISTER_OP("SoftMinSwitch")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("type: int32")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("sel_a: list(int)")
-.Attr("sel_r: list(int)")
-.Attr("alpha: float")
-.Attr("rmin: float")
-.Attr("rmax: float")
-.Output("sw_value: T")
-.Output("sw_deriv: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("type: int32")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("sel_a: list(int)")
+    .Attr("sel_r: list(int)")
+    .Attr("alpha: float")
+    .Attr("rmin: float")
+    .Attr("rmax: float")
+    .Output("sw_value: T")
+    .Output("sw_deriv: T");
 
 using namespace tensorflow;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class SoftMinSwitchOp : public OpKernel {
  public:
   explicit SoftMinSwitchOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -29,33 +29,40 @@ class SoftMinSwitchOp : public OpKernel {
     OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha));
     OP_REQUIRES_OK(context, context->GetAttr("rmin", &rmin));
     OP_REQUIRES_OK(context, context->GetAttr("rmax", &rmax));
-    cum_sum (sec_a, sel_a);
-    cum_sum (sec_r, sel_r);
+    cum_sum(sec_a, sel_a);
+    cum_sum(sec_r, sel_r);
     nnei_a = sec_a.back();
     nnei_r = sec_r.back();
     nnei = nnei_a + nnei_r;
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int tmp_idx = 0;
-    const Tensor& type_tensor	= context->input(tmp_idx++);
-    const Tensor& rij_tensor	= context->input(tmp_idx++);
-    const Tensor& nlist_tensor	= context->input(tmp_idx++);
-    const Tensor& natoms_tensor	= context->input(tmp_idx++);
+    const Tensor& type_tensor = context->input(tmp_idx++);
+    const Tensor& rij_tensor = context->input(tmp_idx++);
+    const Tensor& nlist_tensor = context->input(tmp_idx++);
+    const Tensor& natoms_tensor = context->input(tmp_idx++);
 
     // set size of the sample
-    OP_REQUIRES (context, (type_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of type should be 2"));
-    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (type_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of type should be 2"));
+    OP_REQUIRES(context, (rij_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = type_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -65,72 +72,72 @@ class SoftMinSwitchOp : public OpKernel {
     assert(sel_r.size() == ntypes);
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == type_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nall == type_tensor.shape().dim_size(1)),		errors::InvalidArgument ("shape of type should be nall"));
-    OP_REQUIRES (context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of rij should be 3 * nloc * nnei"));
-    OP_REQUIRES (context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),	errors::InvalidArgument ("shape of nlist should be nloc * nnei"));
+    OP_REQUIRES(context, (nframes == type_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nall == type_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("shape of type should be nall"));
+    OP_REQUIRES(
+        context, (3 * nnei * nloc == rij_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("shape of rij should be 3 * nloc * nnei"));
+    OP_REQUIRES(
+        context, (nnei * nloc == nlist_tensor.shape().dim_size(1)),
+        errors::InvalidArgument("shape of nlist should be nloc * nnei"));
 
     // Create an output tensor
-    TensorShape sw_value_shape ;
-    sw_value_shape.AddDim (nframes);
-    sw_value_shape.AddDim (nloc);
-    TensorShape sw_deriv_shape ;
-    sw_deriv_shape.AddDim (nframes);
-    sw_deriv_shape.AddDim (3 * nnei * nloc);
+    TensorShape sw_value_shape;
+    sw_value_shape.AddDim(nframes);
+    sw_value_shape.AddDim(nloc);
+    TensorShape sw_deriv_shape;
+    sw_deriv_shape.AddDim(nframes);
+    sw_deriv_shape.AddDim(3 * nnei * nloc);
     Tensor* sw_value_tensor = NULL;
     Tensor* sw_deriv_tensor = NULL;
     tmp_idx = 0;
-    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, sw_value_shape, &sw_value_tensor));
-    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, sw_deriv_shape, &sw_deriv_tensor ));
-    
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, sw_value_shape,
+                                                     &sw_value_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(tmp_idx++, sw_deriv_shape,
+                                                     &sw_deriv_tensor));
+
     // flat the tensors
-    auto type	= type_tensor	.matrix<int>();
-    auto rij	= rij_tensor	.matrix<FPTYPE>();
-    auto nlist	= nlist_tensor	.matrix<int>();
-    auto sw_value = sw_value_tensor	->matrix<FPTYPE>();
-    auto sw_deriv = sw_deriv_tensor	->matrix<FPTYPE>();
+    auto type = type_tensor.matrix<int>();
+    auto rij = rij_tensor.matrix<FPTYPE>();
+    auto nlist = nlist_tensor.matrix<int>();
+    auto sw_value = sw_value_tensor->matrix<FPTYPE>();
+    auto sw_deriv = sw_deriv_tensor->matrix<FPTYPE>();
 
     // loop over samples
-#pragma omp parallel for 
-    for (int kk = 0; kk < nframes; ++kk){
-      deepmd::soft_min_switch_cpu<FPTYPE>(
-	  &sw_value(kk, 0),
-	  &sw_deriv(kk, 0),
-	  &rij(kk, 0),
-	  &nlist(kk, 0),
-	  nloc,
-	  nnei,
-	  alpha,
-	  rmin,
-	  rmax);
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk) {
+      deepmd::soft_min_switch_cpu<FPTYPE>(&sw_value(kk, 0), &sw_deriv(kk, 0),
+                                          &rij(kk, 0), &nlist(kk, 0), nloc,
+                                          nnei, alpha, rmin, rmax);
     }
   }
-private:
+
+ private:
   std::vector<int32> sel_r;
   std::vector<int32> sel_a;
   std::vector<int> sec_a;
   std::vector<int> sec_r;
   float alpha, rmin, rmax;
   int nnei, nnei_a, nnei_r;
-  void
-  cum_sum (std::vector<int> & sec,
-	   const std::vector<int32> & n_sel) const {
-    sec.resize (n_sel.size() + 1);
+  void cum_sum(std::vector<int>& sec, const std::vector<int32>& n_sel) const {
+    sec.resize(n_sel.size() + 1);
     sec[0] = 0;
-    for (int ii = 1; ii < sec.size(); ++ii){
-      sec[ii] = sec[ii-1] + n_sel[ii-1];
+    for (int ii = 1; ii < sec.size(); ++ii) {
+      sec[ii] = sec[ii - 1] + n_sel[ii - 1];
     }
   }
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("SoftMinSwitch").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    SoftMinSwitchOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("SoftMinSwitch").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      SoftMinSwitchOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
diff --git a/source/op/soft_min_force.cc b/source/op/soft_min_force.cc
index f10a48dc26..14d1655171 100644
--- a/source/op/soft_min_force.cc
+++ b/source/op/soft_min_force.cc
@@ -2,20 +2,20 @@
 #include "soft_min_switch_force.h"
 
 REGISTER_OP("SoftMinForce")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("du: T")
-.Input("sw_deriv: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("force: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("du: T")
+    .Input("sw_deriv: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("force: T");
 
 using namespace tensorflow;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class SoftMinForceOp : public OpKernel {
  public:
   explicit SoftMinForceOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -24,24 +24,31 @@ class SoftMinForceOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
-    const Tensor& du_tensor		= context->input(0);
-    const Tensor& sw_deriv_tensor	= context->input(1);
-    const Tensor& nlist_tensor		= context->input(2);
-    const Tensor& natoms_tensor		= context->input(3);
+    const Tensor& du_tensor = context->input(0);
+    const Tensor& sw_deriv_tensor = context->input(1);
+    const Tensor& nlist_tensor = context->input(2);
+    const Tensor& natoms_tensor = context->input(3);
 
     // set size of the sample
-    OP_REQUIRES (context, (du_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of du should be 2"));
-    OP_REQUIRES (context, (sw_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of switch deriv should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (du_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of du should be 2"));
+    OP_REQUIRES(context, (sw_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of switch deriv should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -49,20 +56,27 @@ class SoftMinForceOp : public OpKernel {
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == sw_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
+    OP_REQUIRES(context, (nframes == sw_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
 
-    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of du should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of switch deriv should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of du should match"));
+    OP_REQUIRES(context,
+                (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of switch deriv should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape force_shape ;
-    force_shape.AddDim (nframes);
-    force_shape.AddDim (3 * nall);
+    TensorShape force_shape;
+    force_shape.AddDim(nframes);
+    force_shape.AddDim(3 * nall);
     Tensor* force_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, force_shape, &force_tensor));
-    
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, force_shape, &force_tensor));
+
     // flat the tensors
     auto du = du_tensor.matrix<FPTYPE>();
     auto sw_deriv = sw_deriv_tensor.matrix<FPTYPE>();
@@ -70,26 +84,22 @@ class SoftMinForceOp : public OpKernel {
     auto force = force_tensor->matrix<FPTYPE>();
 
     // loop over samples
-#pragma omp parallel for 
-    for (int kk = 0; kk < nframes; ++kk){
-      deepmd::soft_min_switch_force_cpu(
-	  &force(kk,0),
-	  &du(kk,0),
-	  &sw_deriv(kk,0),
-	  &nlist(kk,0),
-	  nloc,
-	  nall,
-	  nnei);
+#pragma omp parallel for
+    for (int kk = 0; kk < nframes; ++kk) {
+      deepmd::soft_min_switch_force_cpu(&force(kk, 0), &du(kk, 0),
+                                        &sw_deriv(kk, 0), &nlist(kk, 0), nloc,
+                                        nall, nnei);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("SoftMinForce").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    SoftMinForceOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                               \
+  REGISTER_KERNEL_BUILDER(                                            \
+      Name("SoftMinForce").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      SoftMinForceOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/soft_min_force_grad.cc b/source/op/soft_min_force_grad.cc
index d5095d1005..c4769d91a4 100644
--- a/source/op/soft_min_force_grad.cc
+++ b/source/op/soft_min_force_grad.cc
@@ -2,105 +2,119 @@
 #include "soft_min_switch_force_grad.h"
 
 REGISTER_OP("SoftMinForceGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("du: T")
-.Input("sw_deriv: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("du: T")
+    .Input("sw_deriv: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class SoftMinForceGradOp : public OpKernel 
-{
-public:
-  explicit SoftMinForceGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+template <typename Device, typename FPTYPE>
+class SoftMinForceGradOp : public OpKernel {
+ public:
+  explicit SoftMinForceGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& du_tensor		= context->input(context_input_index++);
-    const Tensor& sw_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& du_tensor = context->input(context_input_index++);
+    const Tensor& sw_deriv_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape du_shape		= du_tensor.shape();
-    TensorShape sw_deriv_shape		= sw_deriv_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape du_shape = du_tensor.shape();
+    TensorShape sw_deriv_shape = sw_deriv_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
 
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (du_shape.dims() == 2),	errors::InvalidArgument ("Dim of du should be 2"));
-    OP_REQUIRES (context, (sw_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of sw deriv should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (du_shape.dims() == 2),
+                errors::InvalidArgument("Dim of du should be 2"));
+    OP_REQUIRES(context, (sw_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of sw deriv should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
 
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == sw_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of du should match"));
-    OP_REQUIRES (context, (nloc * 3 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of sw deriv should match"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == sw_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of du should match"));
+    OP_REQUIRES(
+        context, (nloc * 3 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of sw deriv should match"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.matrix<FPTYPE>();
-    auto du		= du_tensor		.matrix<FPTYPE>();
-    auto sw_deriv	= sw_deriv_tensor	.matrix<FPTYPE>();
-    auto nlist		= nlist_tensor		.matrix<int>();
-    auto grad_net	= grad_net_tensor	->matrix<FPTYPE>();
+    auto grad = grad_tensor.matrix<FPTYPE>();
+    auto du = du_tensor.matrix<FPTYPE>();
+    auto sw_deriv = sw_deriv_tensor.matrix<FPTYPE>();
+    auto nlist = nlist_tensor.matrix<int>();
+    auto grad_net = grad_net_tensor->matrix<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-      deepmd::soft_min_switch_force_grad_cpu(
-	  &grad_net(kk,0),
-	  &grad(kk,0),
-	  &sw_deriv(kk,0),
-	  &nlist(kk,0),
-	  nloc,
-	  nnei);
+    for (int kk = 0; kk < nframes; ++kk) {
+      deepmd::soft_min_switch_force_grad_cpu(&grad_net(kk, 0), &grad(kk, 0),
+                                             &sw_deriv(kk, 0), &nlist(kk, 0),
+                                             nloc, nnei);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("SoftMinForceGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    SoftMinForceGradOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                   \
+  REGISTER_KERNEL_BUILDER(                                                \
+      Name("SoftMinForceGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      SoftMinForceGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/soft_min_virial.cc b/source/op/soft_min_virial.cc
index 72d4a21e55..e172b1fe26 100644
--- a/source/op/soft_min_virial.cc
+++ b/source/op/soft_min_virial.cc
@@ -2,22 +2,22 @@
 #include "soft_min_switch_virial.h"
 
 REGISTER_OP("SoftMinVirial")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("du: T")
-.Input("sw_deriv: T")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("virial: T")
-.Output("atom_virial: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("du: T")
+    .Input("sw_deriv: T")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("virial: T")
+    .Output("atom_virial: T");
 
 using namespace tensorflow;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class SoftMinVirialOp : public OpKernel {
  public:
   explicit SoftMinVirialOp(OpKernelConstruction* context) : OpKernel(context) {
@@ -26,27 +26,35 @@ class SoftMinVirialOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& du_tensor		= context->input(context_input_index++);
-    const Tensor& sw_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& rij_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& du_tensor = context->input(context_input_index++);
+    const Tensor& sw_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    OP_REQUIRES (context, (du_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (sw_deriv_tensor.shape().dims() == 2),	errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_tensor.shape().dims() == 2),		errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    OP_REQUIRES(context, (du_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (sw_deriv_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
@@ -54,27 +62,37 @@ class SoftMinVirialOp : public OpKernel {
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == sw_deriv_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == rij_tensor.shape().dim_size(0)),		errors::InvalidArgument ("number of samples should match"));
-    OP_REQUIRES (context, (nframes == nlist_tensor.shape().dim_size(0)),	errors::InvalidArgument ("number of samples should match"));
-
-    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),		errors::InvalidArgument ("number of du should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)), errors::InvalidArgument ("number of sw_deriv should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),	errors::InvalidArgument ("dim of rij should be nnei * 3"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),				errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == sw_deriv_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == rij_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+    OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
+                errors::InvalidArgument("number of samples should match"));
+
+    OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of du should match"));
+    OP_REQUIRES(context,
+                (nloc * nnei * 3 == sw_deriv_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of sw_deriv should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == rij_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("dim of rij should be nnei * 3"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape virial_shape ;
-    virial_shape.AddDim (nframes);
-    virial_shape.AddDim (9);
+    TensorShape virial_shape;
+    virial_shape.AddDim(nframes);
+    virial_shape.AddDim(9);
     Tensor* virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, virial_shape, &virial_tensor));
-    TensorShape atom_virial_shape ;
-    atom_virial_shape.AddDim (nframes);
-    atom_virial_shape.AddDim (9 * nall);
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, virial_shape, &virial_tensor));
+    TensorShape atom_virial_shape;
+    atom_virial_shape.AddDim(nframes);
+    atom_virial_shape.AddDim(9 * nall);
     Tensor* atom_virial_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape, &atom_virial_tensor));
-    
+    OP_REQUIRES_OK(context, context->allocate_output(1, atom_virial_shape,
+                                                     &atom_virial_tensor));
+
     // flat the tensors
     auto du = du_tensor.matrix<FPTYPE>();
     auto sw_deriv = sw_deriv_tensor.matrix<FPTYPE>();
@@ -85,29 +103,21 @@ class SoftMinVirialOp : public OpKernel {
 
     // loop over samples
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
+    for (int kk = 0; kk < nframes; ++kk) {
       deepmd::soft_min_switch_virial_cpu(
-	  &virial(kk,0),
-	  &atom_virial(kk,0),
-	  &du(kk,0),
-	  &sw_deriv(kk,0),
-	  &rij(kk,0),
-	  &nlist(kk,0),
-	  nloc,
-	  nall,
-	  nnei);
+          &virial(kk, 0), &atom_virial(kk, 0), &du(kk, 0), &sw_deriv(kk, 0),
+          &rij(kk, 0), &nlist(kk, 0), nloc, nall, nnei);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel;
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("SoftMinVirial").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    SoftMinVirialOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("SoftMinVirial").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      SoftMinVirialOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
-
-
diff --git a/source/op/soft_min_virial_grad.cc b/source/op/soft_min_virial_grad.cc
index f92ac2a5c9..396a3de620 100644
--- a/source/op/soft_min_virial_grad.cc
+++ b/source/op/soft_min_virial_grad.cc
@@ -2,126 +2,140 @@
 #include "soft_min_switch_virial_grad.h"
 
 REGISTER_OP("SoftMinVirialGrad")
-.Attr("T: {float, double} = DT_DOUBLE")
-.Input("grad: T")
-.Input("du: T")
-.Input("sw_deriv: T")
-.Input("rij: T")
-.Input("nlist: int32")
-.Input("natoms: int32")
-.Attr("n_a_sel: int")
-.Attr("n_r_sel: int")
-.Output("grad_net: T");
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("grad: T")
+    .Input("du: T")
+    .Input("sw_deriv: T")
+    .Input("rij: T")
+    .Input("nlist: int32")
+    .Input("natoms: int32")
+    .Attr("n_a_sel: int")
+    .Attr("n_r_sel: int")
+    .Output("grad_net: T");
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-template<typename Device, typename FPTYPE>
-class SoftMinVirialGradOp : public OpKernel 
-{
-public:
-  explicit SoftMinVirialGradOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));    
-    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));    
+template <typename Device, typename FPTYPE>
+class SoftMinVirialGradOp : public OpKernel {
+ public:
+  explicit SoftMinVirialGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("n_a_sel", &n_a_sel));
+    OP_REQUIRES_OK(context, context->GetAttr("n_r_sel", &n_r_sel));
   }
 
   void Compute(OpKernelContext* context) override {
-    deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& grad_tensor		= context->input(context_input_index++);
-    const Tensor& du_tensor		= context->input(context_input_index++);
-    const Tensor& sw_deriv_tensor	= context->input(context_input_index++);
-    const Tensor& rij_tensor		= context->input(context_input_index++);
-    const Tensor& nlist_tensor		= context->input(context_input_index++);
-    const Tensor& natoms_tensor		= context->input(context_input_index++);
+    const Tensor& grad_tensor = context->input(context_input_index++);
+    const Tensor& du_tensor = context->input(context_input_index++);
+    const Tensor& sw_deriv_tensor = context->input(context_input_index++);
+    const Tensor& rij_tensor = context->input(context_input_index++);
+    const Tensor& nlist_tensor = context->input(context_input_index++);
+    const Tensor& natoms_tensor = context->input(context_input_index++);
 
     // set size of the sample
-    TensorShape grad_shape		= grad_tensor.shape();
-    TensorShape du_shape		= du_tensor.shape();
-    TensorShape sw_deriv_shape		= sw_deriv_tensor.shape();
-    TensorShape rij_shape		= rij_tensor.shape();
-    TensorShape nlist_shape		= nlist_tensor.shape();
-
-    OP_REQUIRES (context, (grad_shape.dims() == 2),	errors::InvalidArgument ("Dim of grad should be 2"));
-    OP_REQUIRES (context, (du_shape.dims() == 2),errors::InvalidArgument ("Dim of net deriv should be 2"));
-    OP_REQUIRES (context, (sw_deriv_shape.dims() == 2), errors::InvalidArgument ("Dim of input deriv should be 2"));
-    OP_REQUIRES (context, (rij_shape.dims() == 2),	errors::InvalidArgument ("Dim of rij should be 2"));
-    OP_REQUIRES (context, (nlist_shape.dims() == 2),	errors::InvalidArgument ("Dim of nlist should be 2"));
-    OP_REQUIRES (context, (natoms_tensor.shape().dims() == 1),		errors::InvalidArgument ("Dim of natoms should be 1"));
-
-    OP_REQUIRES (context, (natoms_tensor.shape().dim_size(0) >= 3),	errors::InvalidArgument ("number of atoms should be larger than (or equal to) 3"));
-    auto natoms	= natoms_tensor	.flat<int>();
+    TensorShape grad_shape = grad_tensor.shape();
+    TensorShape du_shape = du_tensor.shape();
+    TensorShape sw_deriv_shape = sw_deriv_tensor.shape();
+    TensorShape rij_shape = rij_tensor.shape();
+    TensorShape nlist_shape = nlist_tensor.shape();
+
+    OP_REQUIRES(context, (grad_shape.dims() == 2),
+                errors::InvalidArgument("Dim of grad should be 2"));
+    OP_REQUIRES(context, (du_shape.dims() == 2),
+                errors::InvalidArgument("Dim of net deriv should be 2"));
+    OP_REQUIRES(context, (sw_deriv_shape.dims() == 2),
+                errors::InvalidArgument("Dim of input deriv should be 2"));
+    OP_REQUIRES(context, (rij_shape.dims() == 2),
+                errors::InvalidArgument("Dim of rij should be 2"));
+    OP_REQUIRES(context, (nlist_shape.dims() == 2),
+                errors::InvalidArgument("Dim of nlist should be 2"));
+    OP_REQUIRES(context, (natoms_tensor.shape().dims() == 1),
+                errors::InvalidArgument("Dim of natoms should be 1"));
+
+    OP_REQUIRES(context, (natoms_tensor.shape().dim_size(0) >= 3),
+                errors::InvalidArgument(
+                    "number of atoms should be larger than (or equal to) 3"));
+    auto natoms = natoms_tensor.flat<int>();
 
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nnei = nlist_tensor.shape().dim_size(1) / nloc;
 
     // check the sizes
-    OP_REQUIRES (context, (nframes == grad_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == sw_deriv_shape.dim_size(0)),	errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == rij_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    OP_REQUIRES (context, (nframes == nlist_shape.dim_size(0)),		errors::InvalidArgument ("number of frames should match"));
-    
-    OP_REQUIRES (context, (9 == grad_shape.dim_size(1)),		errors::InvalidArgument ("input grad shape should be 3 x natoms"));
-    OP_REQUIRES (context, (nloc == du_tensor.shape().dim_size(1)),	errors::InvalidArgument ("number of du should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),errors::InvalidArgument ("number of descriptors should match"));
-    OP_REQUIRES (context, (nloc * nnei * 3 == rij_shape.dim_size(1)),	errors::InvalidArgument ("dim of rij should be  nnei * 3"));
-    OP_REQUIRES (context, (nnei == n_a_sel + n_r_sel),			errors::InvalidArgument ("number of neighbors should match"));
+    OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == sw_deriv_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == rij_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+    OP_REQUIRES(context, (nframes == nlist_shape.dim_size(0)),
+                errors::InvalidArgument("number of frames should match"));
+
+    OP_REQUIRES(
+        context, (9 == grad_shape.dim_size(1)),
+        errors::InvalidArgument("input grad shape should be 3 x natoms"));
+    OP_REQUIRES(context, (nloc == du_tensor.shape().dim_size(1)),
+                errors::InvalidArgument("number of du should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == sw_deriv_shape.dim_size(1)),
+                errors::InvalidArgument("number of descriptors should match"));
+    OP_REQUIRES(context, (nloc * nnei * 3 == rij_shape.dim_size(1)),
+                errors::InvalidArgument("dim of rij should be  nnei * 3"));
+    OP_REQUIRES(context, (nnei == n_a_sel + n_r_sel),
+                errors::InvalidArgument("number of neighbors should match"));
 
     // Create an output tensor
-    TensorShape grad_net_shape ;
-    grad_net_shape.AddDim (nframes);
-    grad_net_shape.AddDim (nloc);
+    TensorShape grad_net_shape;
+    grad_net_shape.AddDim(nframes);
+    grad_net_shape.AddDim(nloc);
 
     // allocate the output tensor
     Tensor* grad_net_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
-    
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, grad_net_shape, &grad_net_tensor));
+
     // flat the tensors
-    auto grad		= grad_tensor		.matrix<FPTYPE>();
-    auto du		= du_tensor		.matrix<FPTYPE>();
-    auto sw_deriv	= sw_deriv_tensor	.matrix<FPTYPE>();
-    auto rij		= rij_tensor		.matrix<FPTYPE>();
-    auto nlist		= nlist_tensor		.matrix<int>();
-    auto grad_net	= grad_net_tensor	->matrix<FPTYPE>();
+    auto grad = grad_tensor.matrix<FPTYPE>();
+    auto du = du_tensor.matrix<FPTYPE>();
+    auto sw_deriv = sw_deriv_tensor.matrix<FPTYPE>();
+    auto rij = rij_tensor.matrix<FPTYPE>();
+    auto nlist = nlist_tensor.matrix<int>();
+    auto grad_net = grad_net_tensor->matrix<FPTYPE>();
 
     // loop over frames
 #pragma omp parallel for
-    for (int kk = 0; kk < nframes; ++kk){
-      deepmd::soft_min_switch_virial_grad_cpu(
-	  &grad_net(kk, 0),
-	  &grad(kk, 0),
-	  &sw_deriv(kk, 0),
-	  &rij(kk, 0),
-	  &nlist(kk, 0),
-	  nloc,
-	  nnei);
+    for (int kk = 0; kk < nframes; ++kk) {
+      deepmd::soft_min_switch_virial_grad_cpu(&grad_net(kk, 0), &grad(kk, 0),
+                                              &sw_deriv(kk, 0), &rij(kk, 0),
+                                              &nlist(kk, 0), nloc, nnei);
     }
   }
-private:
+
+ private:
   int n_r_sel, n_a_sel, n_a_shift;
-  inline void 
-  make_descript_range (int & idx_start,
-		       int & idx_end,
-		       const int & nei_idx) {
+  inline void make_descript_range(int& idx_start,
+                                  int& idx_end,
+                                  const int& nei_idx) {
     if (nei_idx < n_a_sel) {
       idx_start = nei_idx * 4;
-      idx_end   = nei_idx * 4 + 4;
-    }
-    else {
+      idx_end = nei_idx * 4 + 4;
+    } else {
       idx_start = n_a_shift + (nei_idx - n_a_sel);
-      idx_end   = n_a_shift + (nei_idx - n_a_sel) + 1;
+      idx_end = n_a_shift + (nei_idx - n_a_sel) + 1;
     }
   }
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                   \
-REGISTER_KERNEL_BUILDER(                                                                  \
-    Name("SoftMinVirialGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),                      \
-    SoftMinVirialGradOp<CPUDevice, T>); 
+#define REGISTER_CPU(T)                                                    \
+  REGISTER_KERNEL_BUILDER(                                                 \
+      Name("SoftMinVirialGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      SoftMinVirialGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
diff --git a/source/op/tabulate_multi_device.cc b/source/op/tabulate_multi_device.cc
index 2a2640d286..2914b75c64 100644
--- a/source/op/tabulate_multi_device.cc
+++ b/source/op/tabulate_multi_device.cc
@@ -16,8 +16,8 @@ REGISTER_OP("TabulateFusionGrad")
     .Input("table_info: T")
     .Input("em_x: T")
     .Input("em: T")
-    .Input("dy: T")        
-    .Input("descriptor: T")         
+    .Input("dy: T")
+    .Input("descriptor: T")
     .Output("dy_dem_x: T")
     .Output("dy_dem: T");
 
@@ -47,8 +47,8 @@ REGISTER_OP("TabulateFusionSeAGrad")
     .Input("table_info: T")
     .Input("em_x: T")
     .Input("em: T")
-    .Input("dy: T")        
-    .Input("descriptor: T")         
+    .Input("dy: T")
+    .Input("descriptor: T")
     .Output("dy_dem_x: T")
     .Output("dy_dem: T");
 
@@ -78,8 +78,8 @@ REGISTER_OP("TabulateFusionSeTGrad")
     .Input("table_info: T")
     .Input("em_x: T")
     .Input("em: T")
-    .Input("dy: T")        
-    .Input("descriptor: T")  
+    .Input("dy: T")
+    .Input("descriptor: T")
     .Output("dy_dem_x: T")
     .Output("dy_dem: T");
 
@@ -107,8 +107,8 @@ REGISTER_OP("TabulateFusionSeRGrad")
     .Input("table: T")
     .Input("table_info: T")
     .Input("em: T")
-    .Input("dy: T")        
-    .Input("descriptor: T")         
+    .Input("dy: T")
+    .Input("descriptor: T")
     .Output("dy_dem: T");
 
 REGISTER_OP("TabulateFusionSeRGradGrad")
@@ -120,675 +120,696 @@ REGISTER_OP("TabulateFusionSeRGradGrad")
     .Input("descriptor: T")
     .Output("dz_dy: T");
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeAOp : public OpKernel {
  public:
-  explicit TabulateFusionSeAOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("last_layer_size", &last_layer_size));
+  explicit TabulateFusionSeAOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("last_layer_size", &last_layer_size));
   }
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_x_tensor	= context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
+    const Tensor& em_x_tensor = context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (table_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of table should be 2"));
-    OP_REQUIRES (context, (em_x_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input should be 2"));
-    OP_REQUIRES (context, (em_tensor.shape().dims() == 3),      errors::InvalidArgument ("Dim of input should be 3"));
+    OP_REQUIRES(context, (table_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of table should be 2"));
+    OP_REQUIRES(context, (em_x_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (em_tensor.shape().dims() == 3),
+                errors::InvalidArgument("Dim of input should be 3"));
     TensorShape descriptor_shape;
-    descriptor_shape.AddDim (em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim (4); // TODO: be careful here;
-    descriptor_shape.AddDim (last_layer_size);
+    descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
+    descriptor_shape.AddDim(4);  // TODO: be careful here;
+    descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		descriptor_shape,
-	  		&descriptor_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descriptor_shape,
+                                                     &descriptor_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * descriptor = descriptor_tensor->flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em_x = em_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
+    FPTYPE* descriptor = descriptor_tensor->flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em_x = em_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei = em_tensor.shape().dim_size(1);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::tabulate_fusion_se_a_gpu_cuda(    
-          descriptor,
-          table, table_info, em_x, em, nloc, nnei, last_layer_size);
-      #endif // GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      deepmd::tabulate_fusion_se_a_gpu_rocm(    
-          descriptor,
-          table, table_info, em_x, em, nloc, nnei, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_a_cpu(    
-          descriptor,
-          table, table_info, em_x, em, nloc, nnei, last_layer_size);
+#if GOOGLE_CUDA
+      deepmd::tabulate_fusion_se_a_gpu_cuda(descriptor, table, table_info, em_x,
+                                            em, nloc, nnei, last_layer_size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::tabulate_fusion_se_a_gpu_rocm(descriptor, table, table_info, em_x,
+                                            em, nloc, nnei, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_a_cpu(descriptor, table, table_info, em_x, em,
+                                       nloc, nnei, last_layer_size);
     }
   }
-private:
-    int last_layer_size;
-    std::string device;
+
+ private:
+  int last_layer_size;
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeAGradOp : public OpKernel {
  public:
-  explicit TabulateFusionSeAGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit TabulateFusionSeAGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_x_tensor	= context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
-    const Tensor& dy_tensor	= context->input(context_input_index++);
+    const Tensor& em_x_tensor = context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
+    const Tensor& dy_tensor = context->input(context_input_index++);
     const Tensor& descriptor_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (dy_tensor.shape().dims() == 3), errors::InvalidArgument ("Dim of table should be 3"));
+    OP_REQUIRES(context, (dy_tensor.shape().dims() == 3),
+                errors::InvalidArgument("Dim of table should be 3"));
     int context_output_index = 0;
     Tensor* dy_dem_x_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		em_x_tensor.shape(),
-        &dy_dem_x_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     em_x_tensor.shape(),
+                                                     &dy_dem_x_tensor));
     Tensor* dy_dem_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		em_tensor.shape(),
-	  		&dy_dem_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            em_tensor.shape(), &dy_dem_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
 
     // flat the tensors
-    FPTYPE * dy_dem_x = dy_dem_x_tensor->flat<FPTYPE>().data();
-    FPTYPE * dy_dem = dy_dem_tensor->flat<FPTYPE>().data();
-    const FPTYPE * descriptor = descriptor_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em_x = em_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dy = dy_tensor.flat<FPTYPE>().data();
+    FPTYPE* dy_dem_x = dy_dem_x_tensor->flat<FPTYPE>().data();
+    FPTYPE* dy_dem = dy_dem_tensor->flat<FPTYPE>().data();
+    const FPTYPE* descriptor = descriptor_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em_x = em_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dy = dy_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei = em_tensor.shape().dim_size(1);
     const int last_layer_size = descriptor_tensor.shape().dim_size(2);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::tabulate_fusion_se_a_grad_gpu_cuda(    
-          dy_dem_x, dy_dem,
-          table, table_info, em_x, em, dy, nloc, nnei, last_layer_size);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::tabulate_fusion_se_a_grad_gpu_rocm(    
-          dy_dem_x, dy_dem,
-          table, table_info, em_x, em, dy, nloc, nnei, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_a_grad_cpu(    
-          dy_dem_x, dy_dem,
-          table, table_info, em_x, em, dy, nloc, nnei, last_layer_size);
+#if GOOGLE_CUDA
+      deepmd::tabulate_fusion_se_a_grad_gpu_cuda(dy_dem_x, dy_dem, table,
+                                                 table_info, em_x, em, dy, nloc,
+                                                 nnei, last_layer_size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::tabulate_fusion_se_a_grad_gpu_rocm(dy_dem_x, dy_dem, table,
+                                                 table_info, em_x, em, dy, nloc,
+                                                 nnei, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_a_grad_cpu(dy_dem_x, dy_dem, table, table_info,
+                                            em_x, em, dy, nloc, nnei,
+                                            last_layer_size);
     }
   }
-private:
-    std::string device;
+
+ private:
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeAGradGradOp : public OpKernel {
  public:
-  explicit TabulateFusionSeAGradGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit TabulateFusionSeAGradGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_x_tensor	= context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
-    const Tensor& dz_dy_dem_x_tensor	= context->input(context_input_index++);
-    const Tensor& dz_dy_dem_tensor	= context->input(context_input_index++);
+    const Tensor& em_x_tensor = context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
+    const Tensor& dz_dy_dem_x_tensor = context->input(context_input_index++);
+    const Tensor& dz_dy_dem_tensor = context->input(context_input_index++);
     const Tensor& descriptor_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (dz_dy_dem_x_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input should be 2"));
-    OP_REQUIRES (context, (dz_dy_dem_tensor.shape().dims() == 3),      errors::InvalidArgument ("Dim of input should be 3"));
+    OP_REQUIRES(context, (dz_dy_dem_x_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (dz_dy_dem_tensor.shape().dims() == 3),
+                errors::InvalidArgument("Dim of input should be 3"));
     int context_output_index = 0;
     Tensor* dz_dy_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		descriptor_tensor.shape(),
-        &dz_dy_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descriptor_tensor.shape(),
+                                                     &dz_dy_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
 
     // flat the tensors
-    FPTYPE * dz_dy = dz_dy_tensor->flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em_x = em_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dz_dy_dem_x = dz_dy_dem_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dz_dy_dem = dz_dy_dem_tensor.flat<FPTYPE>().data();
+    FPTYPE* dz_dy = dz_dy_tensor->flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em_x = em_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dz_dy_dem_x = dz_dy_dem_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dz_dy_dem = dz_dy_dem_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei = em_tensor.shape().dim_size(1);
     const int last_layer_size = descriptor_tensor.shape().dim_size(2);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
+#if GOOGLE_CUDA
       deepmd::tabulate_fusion_se_a_grad_grad_gpu_cuda(
-          dz_dy,
-          table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, nnei, last_layer_size);
-      #endif // GOOGLE_CUDA
-      #if TENSORFLOW_USE_ROCM
+          dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
+          nnei, last_layer_size);
+#endif  // GOOGLE_CUDA
+#if TENSORFLOW_USE_ROCM
       deepmd::tabulate_fusion_se_a_grad_grad_gpu_rocm(
-          dz_dy,
-          table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, nnei, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-      OP_REQUIRES (context, (last_layer_size <= 1024),      errors::InvalidArgument ("In the process of model compression, the size of the last layer of embedding net must be less than 1024!"));
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_a_grad_grad_cpu(
-          dz_dy,
-          table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, nnei, last_layer_size);
+          dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
+          nnei, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+      OP_REQUIRES(context, (last_layer_size <= 1024),
+                  errors::InvalidArgument(
+                      "In the process of model compression, the size of the "
+                      "last layer of embedding net must be less than 1024!"));
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_a_grad_grad_cpu(dz_dy, table, table_info, em_x,
+                                                 em, dz_dy_dem_x, dz_dy_dem,
+                                                 nloc, nnei, last_layer_size);
     }
   }
-private:
-    std::string device;
+
+ private:
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeTOp : public OpKernel {
  public:
-  explicit TabulateFusionSeTOp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("last_layer_size", &last_layer_size));
+  explicit TabulateFusionSeTOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("last_layer_size", &last_layer_size));
   }
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_x_tensor	= context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
+    const Tensor& em_x_tensor = context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (table_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of table should be 2"));
-    OP_REQUIRES (context, (em_x_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of em_x_tensor should be 2"));
-    OP_REQUIRES (context, (em_tensor.shape().dims() == 3),      errors::InvalidArgument ("Dim of em_tensor should be 3"));
+    OP_REQUIRES(context, (table_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of table should be 2"));
+    OP_REQUIRES(context, (em_x_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of em_x_tensor should be 2"));
+    OP_REQUIRES(context, (em_tensor.shape().dims() == 3),
+                errors::InvalidArgument("Dim of em_tensor should be 3"));
     TensorShape descriptor_shape;
-    descriptor_shape.AddDim (em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim (last_layer_size);
+    descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
+    descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		descriptor_shape,
-	  		&descriptor_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descriptor_shape,
+                                                     &descriptor_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * descriptor = descriptor_tensor->flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em_x = em_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
+    FPTYPE* descriptor = descriptor_tensor->flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em_x = em_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei_i = em_tensor.shape().dim_size(1);
     const int nnei_j = em_tensor.shape().dim_size(2);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::tabulate_fusion_se_t_gpu_cuda(    
-          descriptor,
-          table, table_info, em_x, em, nloc, nnei_i, nnei_j, last_layer_size);
-      #endif // GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      deepmd::tabulate_fusion_se_t_gpu_rocm(    
-          descriptor,
-          table, table_info, em_x, em, nloc, nnei_i, nnei_j, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_t_cpu(    
-          descriptor,
-          table, table_info, em_x, em, nloc, nnei_i, nnei_j, last_layer_size);
+#if GOOGLE_CUDA
+      deepmd::tabulate_fusion_se_t_gpu_cuda(descriptor, table, table_info, em_x,
+                                            em, nloc, nnei_i, nnei_j,
+                                            last_layer_size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::tabulate_fusion_se_t_gpu_rocm(descriptor, table, table_info, em_x,
+                                            em, nloc, nnei_i, nnei_j,
+                                            last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_t_cpu(descriptor, table, table_info, em_x, em,
+                                       nloc, nnei_i, nnei_j, last_layer_size);
     }
   }
-private:
-    int last_layer_size;
-    std::string device;
+
+ private:
+  int last_layer_size;
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeTGradOp : public OpKernel {
  public:
-  explicit TabulateFusionSeTGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit TabulateFusionSeTGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_x_tensor	= context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
-    const Tensor& dy_tensor	= context->input(context_input_index++);
+    const Tensor& em_x_tensor = context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
+    const Tensor& dy_tensor = context->input(context_input_index++);
     const Tensor& descriptor_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (dy_tensor.shape().dims() == 2), errors::InvalidArgument ("Dim of dy_tensor should be 2"));
+    OP_REQUIRES(context, (dy_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of dy_tensor should be 2"));
     int context_output_index = 0;
     Tensor* dy_dem_x_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		em_x_tensor.shape(),
-        &dy_dem_x_tensor));
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     em_x_tensor.shape(),
+                                                     &dy_dem_x_tensor));
     Tensor* dy_dem_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		em_tensor.shape(),
-	  		&dy_dem_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            em_tensor.shape(), &dy_dem_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
 
     // flat the tensors
-    FPTYPE * dy_dem_x = dy_dem_x_tensor->flat<FPTYPE>().data();
-    FPTYPE * dy_dem = dy_dem_tensor->flat<FPTYPE>().data();
-    const FPTYPE * descriptor = descriptor_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em_x = em_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dy = dy_tensor.flat<FPTYPE>().data();
+    FPTYPE* dy_dem_x = dy_dem_x_tensor->flat<FPTYPE>().data();
+    FPTYPE* dy_dem = dy_dem_tensor->flat<FPTYPE>().data();
+    const FPTYPE* descriptor = descriptor_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em_x = em_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dy = dy_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei_i = em_tensor.shape().dim_size(1);
     const int nnei_j = em_tensor.shape().dim_size(2);
     const int last_layer_size = descriptor_tensor.shape().dim_size(1);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::tabulate_fusion_se_t_grad_gpu_cuda(    
-          dy_dem_x, dy_dem,
-          table, table_info, em_x, em, dy, nloc, nnei_i, nnei_j, last_layer_size);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::tabulate_fusion_se_t_grad_gpu_rocm(    
-          dy_dem_x, dy_dem,
-          table, table_info, em_x, em, dy, nloc, nnei_i, nnei_j, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_t_grad_cpu(    
-          dy_dem_x, dy_dem,
-          table, table_info, em_x, em, dy, nloc, nnei_i, nnei_j, last_layer_size);
+#if GOOGLE_CUDA
+      deepmd::tabulate_fusion_se_t_grad_gpu_cuda(
+          dy_dem_x, dy_dem, table, table_info, em_x, em, dy, nloc, nnei_i,
+          nnei_j, last_layer_size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::tabulate_fusion_se_t_grad_gpu_rocm(
+          dy_dem_x, dy_dem, table, table_info, em_x, em, dy, nloc, nnei_i,
+          nnei_j, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_t_grad_cpu(dy_dem_x, dy_dem, table, table_info,
+                                            em_x, em, dy, nloc, nnei_i, nnei_j,
+                                            last_layer_size);
     }
   }
-private:
-    std::string device;
+
+ private:
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeTGradGradOp : public OpKernel {
  public:
-  explicit TabulateFusionSeTGradGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit TabulateFusionSeTGradGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_x_tensor	= context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
-    const Tensor& dz_dy_dem_x_tensor	= context->input(context_input_index++);
-    const Tensor& dz_dy_dem_tensor	= context->input(context_input_index++);
+    const Tensor& em_x_tensor = context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
+    const Tensor& dz_dy_dem_x_tensor = context->input(context_input_index++);
+    const Tensor& dz_dy_dem_tensor = context->input(context_input_index++);
     const Tensor& descriptor_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (dz_dy_dem_x_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input should be 2"));
-    OP_REQUIRES (context, (dz_dy_dem_tensor.shape().dims() == 3),      errors::InvalidArgument ("Dim of input should be 3"));
+    OP_REQUIRES(context, (dz_dy_dem_x_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (dz_dy_dem_tensor.shape().dims() == 3),
+                errors::InvalidArgument("Dim of input should be 3"));
     int context_output_index = 0;
     Tensor* dz_dy_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		descriptor_tensor.shape(),
-        &dz_dy_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descriptor_tensor.shape(),
+                                                     &dz_dy_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
 
     // flat the tensors
-    FPTYPE * dz_dy = dz_dy_tensor->flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em_x = em_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dz_dy_dem_x = dz_dy_dem_x_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dz_dy_dem = dz_dy_dem_tensor.flat<FPTYPE>().data();
-    const int nloc   = em_tensor.shape().dim_size(0);
+    FPTYPE* dz_dy = dz_dy_tensor->flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em_x = em_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dz_dy_dem_x = dz_dy_dem_x_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dz_dy_dem = dz_dy_dem_tensor.flat<FPTYPE>().data();
+    const int nloc = em_tensor.shape().dim_size(0);
     const int nnei_i = em_tensor.shape().dim_size(1);
     const int nnei_j = em_tensor.shape().dim_size(2);
     const int last_layer_size = descriptor_tensor.shape().dim_size(1);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
+#if GOOGLE_CUDA
       deepmd::tabulate_fusion_se_t_grad_grad_gpu_cuda(
-          dz_dy,
-          table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, nnei_i, nnei_j, last_layer_size);
-      #endif // GOOGLE_CUDA
-      #if TENSORFLOW_USE_ROCM
+          dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
+          nnei_i, nnei_j, last_layer_size);
+#endif  // GOOGLE_CUDA
+#if TENSORFLOW_USE_ROCM
       deepmd::tabulate_fusion_se_t_grad_grad_gpu_rocm(
-          dz_dy,
-          table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, nnei_i, nnei_j, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-      OP_REQUIRES (context, (last_layer_size <= 1024),      errors::InvalidArgument ("In the process of model compression, the size of the last layer of embedding net must be less than 1024!"));
-    }
-    else if (device == "CPU") {
+          dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
+          nnei_i, nnei_j, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+      OP_REQUIRES(context, (last_layer_size <= 1024),
+                  errors::InvalidArgument(
+                      "In the process of model compression, the size of the "
+                      "last layer of embedding net must be less than 1024!"));
+    } else if (device == "CPU") {
       deepmd::tabulate_fusion_se_t_grad_grad_cpu(
-          dz_dy,
-          table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc, nnei_i, nnei_j, last_layer_size);
+          dz_dy, table, table_info, em_x, em, dz_dy_dem_x, dz_dy_dem, nloc,
+          nnei_i, nnei_j, last_layer_size);
     }
   }
-private:
-    std::string device;
+
+ private:
+  std::string device;
 };
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeROp : public OpKernel {
  public:
-  explicit TabulateFusionSeROp(OpKernelConstruction* context) : OpKernel(context) {
-    OP_REQUIRES_OK(context, context->GetAttr("last_layer_size", &last_layer_size));
+  explicit TabulateFusionSeROp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context,
+                   context->GetAttr("last_layer_size", &last_layer_size));
   }
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (table_tensor.shape().dims() == 2),   errors::InvalidArgument ("Dim of table should be 2"));
-    OP_REQUIRES (context, (em_tensor.shape().dims() == 2),    errors::InvalidArgument ("Dim of input should be 2"));
+    OP_REQUIRES(context, (table_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of table should be 2"));
+    OP_REQUIRES(context, (em_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
     TensorShape descriptor_shape;
-    descriptor_shape.AddDim (em_tensor.shape().dim_size(0));
-    descriptor_shape.AddDim (em_tensor.shape().dim_size(1)); // TODO: be careful here;
-    descriptor_shape.AddDim (last_layer_size);
+    descriptor_shape.AddDim(em_tensor.shape().dim_size(0));
+    descriptor_shape.AddDim(
+        em_tensor.shape().dim_size(1));  // TODO: be careful here;
+    descriptor_shape.AddDim(last_layer_size);
     int context_output_index = 0;
     Tensor* descriptor_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		descriptor_shape,
-	  		&descriptor_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descriptor_shape,
+                                                     &descriptor_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
     // flat the tensors
-    FPTYPE * descriptor = descriptor_tensor->flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
+    FPTYPE* descriptor = descriptor_tensor->flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei = em_tensor.shape().dim_size(1);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::tabulate_fusion_se_r_gpu_cuda(    
-          descriptor,
-          table, table_info, em, nloc, nnei, last_layer_size);
-      #endif // GOOGLE_CUDA
-
-      #if TENSORFLOW_USE_ROCM
-      deepmd::tabulate_fusion_se_r_gpu_rocm(    
-          descriptor,
-          table, table_info, em, nloc, nnei, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_r_cpu(    
-          descriptor,
-          table, table_info, em, nloc, nnei, last_layer_size);
+#if GOOGLE_CUDA
+      deepmd::tabulate_fusion_se_r_gpu_cuda(descriptor, table, table_info, em,
+                                            nloc, nnei, last_layer_size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::tabulate_fusion_se_r_gpu_rocm(descriptor, table, table_info, em,
+                                            nloc, nnei, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_r_cpu(descriptor, table, table_info, em, nloc,
+                                       nnei, last_layer_size);
     }
   }
-private:
-    int last_layer_size;
-    std::string device;
+
+ private:
+  int last_layer_size;
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeRGradOp : public OpKernel {
  public:
-  explicit TabulateFusionSeRGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit TabulateFusionSeRGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
-      deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
   }
 
   void _Compute(OpKernelContext* context) {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
-    const Tensor& dy_tensor	= context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
+    const Tensor& dy_tensor = context->input(context_input_index++);
     const Tensor& descriptor_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (dy_tensor.shape().dims() == 3), errors::InvalidArgument ("Dim of table should be 3"));
+    OP_REQUIRES(context, (dy_tensor.shape().dims() == 3),
+                errors::InvalidArgument("Dim of table should be 3"));
     int context_output_index = 0;
     Tensor* dy_dem_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		em_tensor.shape(),
-	  		&dy_dem_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(context_output_index++,
+                                            em_tensor.shape(), &dy_dem_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
 
     // flat the tensors
-    FPTYPE * dy_dem = dy_dem_tensor->flat<FPTYPE>().data();
-    const FPTYPE * descriptor = descriptor_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dy = dy_tensor.flat<FPTYPE>().data();
+    FPTYPE* dy_dem = dy_dem_tensor->flat<FPTYPE>().data();
+    const FPTYPE* descriptor = descriptor_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dy = dy_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei = em_tensor.shape().dim_size(1);
     const int last_layer_size = descriptor_tensor.shape().dim_size(2);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
-      deepmd::tabulate_fusion_se_r_grad_gpu_cuda(    
-          dy_dem,
-          table, table_info, em, dy, nloc, nnei, last_layer_size);
-      #endif // GOOGLE_CUDA
-      
-      #if TENSORFLOW_USE_ROCM
-      deepmd::tabulate_fusion_se_r_grad_gpu_rocm(    
-          dy_dem,
-          table, table_info, em, dy, nloc, nnei, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-    }
-    else if (device == "CPU") {
-      deepmd::tabulate_fusion_se_r_grad_cpu(    
-          dy_dem,
-          table, table_info, em, dy, nloc, nnei, last_layer_size);
+#if GOOGLE_CUDA
+      deepmd::tabulate_fusion_se_r_grad_gpu_cuda(
+          dy_dem, table, table_info, em, dy, nloc, nnei, last_layer_size);
+#endif  // GOOGLE_CUDA
+
+#if TENSORFLOW_USE_ROCM
+      deepmd::tabulate_fusion_se_r_grad_gpu_rocm(
+          dy_dem, table, table_info, em, dy, nloc, nnei, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+    } else if (device == "CPU") {
+      deepmd::tabulate_fusion_se_r_grad_cpu(dy_dem, table, table_info, em, dy,
+                                            nloc, nnei, last_layer_size);
     }
   }
-private:
-    std::string device;
+
+ private:
+  std::string device;
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class TabulateFusionSeRGradGradOp : public OpKernel {
  public:
-  explicit TabulateFusionSeRGradGradOp(OpKernelConstruction* context) : OpKernel(context) {}
+  explicit TabulateFusionSeRGradGradOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
   void Compute(OpKernelContext* context) override {
     // Grab the input tensor
     int context_input_index = 0;
-    const Tensor& table_tensor	= context->input(context_input_index++);
+    const Tensor& table_tensor = context->input(context_input_index++);
     const Tensor& table_info_tensor = context->input(context_input_index++);
-    const Tensor& em_tensor	= context->input(context_input_index++);
-    const Tensor& dz_dy_dem_tensor	= context->input(context_input_index++);
+    const Tensor& em_tensor = context->input(context_input_index++);
+    const Tensor& dz_dy_dem_tensor = context->input(context_input_index++);
     const Tensor& descriptor_tensor = context->input(context_input_index++);
     // set size of the sample
-    OP_REQUIRES (context, (dz_dy_dem_tensor.shape().dims() == 2),      errors::InvalidArgument ("Dim of input should be 2"));
+    OP_REQUIRES(context, (dz_dy_dem_tensor.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
     int context_output_index = 0;
     Tensor* dz_dy_tensor = NULL;
-    OP_REQUIRES_OK(context, context->allocate_output(
-        context_output_index++,
-	  		descriptor_tensor.shape(),
-        &dz_dy_tensor));
-    DeviceFunctor() (
-        device,
-        context->eigen_device<Device>()
-    );
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     descriptor_tensor.shape(),
+                                                     &dz_dy_tensor));
+    DeviceFunctor()(device, context->eigen_device<Device>());
 
     // flat the tensors
-    FPTYPE * dz_dy = dz_dy_tensor->flat<FPTYPE>().data();
-    const FPTYPE * table = table_tensor.flat<FPTYPE>().data();
-    const FPTYPE * table_info = table_info_tensor.flat<FPTYPE>().data();
-    const FPTYPE * em = em_tensor.flat<FPTYPE>().data();
-    const FPTYPE * dz_dy_dem = dz_dy_dem_tensor.flat<FPTYPE>().data();
+    FPTYPE* dz_dy = dz_dy_tensor->flat<FPTYPE>().data();
+    const FPTYPE* table = table_tensor.flat<FPTYPE>().data();
+    const FPTYPE* table_info = table_info_tensor.flat<FPTYPE>().data();
+    const FPTYPE* em = em_tensor.flat<FPTYPE>().data();
+    const FPTYPE* dz_dy_dem = dz_dy_dem_tensor.flat<FPTYPE>().data();
     const int nloc = em_tensor.shape().dim_size(0);
     const int nnei = em_tensor.shape().dim_size(1);
     const int last_layer_size = descriptor_tensor.shape().dim_size(2);
 
     if (device == "GPU") {
-      #if GOOGLE_CUDA
+#if GOOGLE_CUDA
       deepmd::tabulate_fusion_se_r_grad_grad_gpu_cuda(
-          dz_dy,
-          table, table_info, em, dz_dy_dem, nloc, nnei, last_layer_size);
-      #endif // GOOGLE_CUDA
-      #if TENSORFLOW_USE_ROCM
+          dz_dy, table, table_info, em, dz_dy_dem, nloc, nnei, last_layer_size);
+#endif  // GOOGLE_CUDA
+#if TENSORFLOW_USE_ROCM
       deepmd::tabulate_fusion_se_r_grad_grad_gpu_rocm(
-          dz_dy,
-          table, table_info, em, dz_dy_dem, nloc, nnei, last_layer_size);
-      #endif // TENSORFLOW_USE_ROCM
-      OP_REQUIRES (context, (last_layer_size <= 1024),      errors::InvalidArgument ("In the process of model compression, the size of the last layer of embedding net must be less than 1024!"));
-    }
-    else if (device == "CPU") {
+          dz_dy, table, table_info, em, dz_dy_dem, nloc, nnei, last_layer_size);
+#endif  // TENSORFLOW_USE_ROCM
+      OP_REQUIRES(context, (last_layer_size <= 1024),
+                  errors::InvalidArgument(
+                      "In the process of model compression, the size of the "
+                      "last layer of embedding net must be less than 1024!"));
+    } else if (device == "CPU") {
       deepmd::tabulate_fusion_se_r_grad_grad_cpu(
-          dz_dy,
-          table, table_info, em, dz_dy_dem, nloc, nnei, last_layer_size);
+          dz_dy, table, table_info, em, dz_dy_dem, nloc, nnei, last_layer_size);
     }
   }
-private:
-    std::string device;
+
+ private:
+  std::string device;
 };
 
-#define REGISTER_CPU(T)                                                                \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusion").Device(DEVICE_CPU).TypeConstraint<T>("T"),                  \
-    TabulateFusionSeAOp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),              \
-    TabulateFusionSeAGradOp<CPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionGradGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),          \
-    TabulateFusionSeAGradGradOp<CPUDevice, T>);                                        \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),               \
-    TabulateFusionSeAOp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),           \
-    TabulateFusionSeAGradOp<CPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeAGradGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),       \
-    TabulateFusionSeAGradGradOp<CPUDevice, T>);                                        \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeT").Device(DEVICE_CPU).TypeConstraint<T>("T"),               \
-    TabulateFusionSeTOp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeTGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),           \
-    TabulateFusionSeTGradOp<CPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeTGradGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),       \
-    TabulateFusionSeTGradGradOp<CPUDevice, T>);                                        \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),               \
-    TabulateFusionSeROp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),           \
-    TabulateFusionSeRGradOp<CPUDevice, T>);                                            \
-REGISTER_KERNEL_BUILDER(                                                               \
-    Name("TabulateFusionSeRGradGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),       \
-    TabulateFusionSeRGradGradOp<CPUDevice, T>);                                                          
+#define REGISTER_CPU(T)                                                        \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusion").Device(DEVICE_CPU).TypeConstraint<T>("T"),        \
+      TabulateFusionSeAOp<CPUDevice, T>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"),    \
+      TabulateFusionSeAGradOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionGradGrad")                       \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<T>("T"),                         \
+                          TabulateFusionSeAGradGradOp<CPUDevice, T>);          \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionSeA").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      TabulateFusionSeAOp<CPUDevice, T>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionSeAGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      TabulateFusionSeAGradOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGradGrad")                    \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<T>("T"),                         \
+                          TabulateFusionSeAGradGradOp<CPUDevice, T>);          \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionSeT").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      TabulateFusionSeTOp<CPUDevice, T>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionSeTGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      TabulateFusionSeTGradOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGradGrad")                    \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<T>("T"),                         \
+                          TabulateFusionSeTGradGradOp<CPUDevice, T>);          \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionSeR").Device(DEVICE_CPU).TypeConstraint<T>("T"),     \
+      TabulateFusionSeROp<CPUDevice, T>);                                      \
+  REGISTER_KERNEL_BUILDER(                                                     \
+      Name("TabulateFusionSeRGrad").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      TabulateFusionSeRGradOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGradGrad")                    \
+                              .Device(DEVICE_CPU)                              \
+                              .TypeConstraint<T>("T"),                         \
+                          TabulateFusionSeRGradGradOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 
-#if  GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-#define REGISTER_GPU(T)                                                                                     \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusion").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),              \
-    TabulateFusionSeAOp<GPUDevice, T>);                                                                     \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),          \
-    TabulateFusionSeAGradOp<GPUDevice, T>);                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionGradGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),      \
-    TabulateFusionSeAGradGradOp<GPUDevice, T>);                                                             \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeA").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),           \
-    TabulateFusionSeAOp<GPUDevice, T>);                                                                     \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeAGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),       \
-    TabulateFusionSeAGradOp<GPUDevice, T>);                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeAGradGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),   \
-    TabulateFusionSeAGradGradOp<GPUDevice, T>);                                                             \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeT").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),           \
-    TabulateFusionSeTOp<GPUDevice, T>);                                                                     \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeTGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),       \
-    TabulateFusionSeTGradOp<GPUDevice, T>);                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeTGradGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),   \
-    TabulateFusionSeTGradGradOp<GPUDevice, T>);                                                             \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeR").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),           \
-    TabulateFusionSeROp<GPUDevice, T>);                                                                     \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeRGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),       \
-    TabulateFusionSeRGradOp<GPUDevice, T>);                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                                    \
-    Name("TabulateFusionSeRGradGrad").Device(DEVICE_GPU).TypeConstraint<T>("T").HostMemory("table_info"),   \
-    TabulateFusionSeRGradGradOp<GPUDevice, T>);
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#define REGISTER_GPU(T)                                               \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusion")                      \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeAOp<GPUDevice, T>);         \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionGrad")                  \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeAGradOp<GPUDevice, T>);     \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionGradGrad")              \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeAGradGradOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeA")                   \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeAOp<GPUDevice, T>);         \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGrad")               \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeAGradOp<GPUDevice, T>);     \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeAGradGrad")           \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeAGradGradOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeT")                   \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeTOp<GPUDevice, T>);         \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGrad")               \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeTGradOp<GPUDevice, T>);     \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeTGradGrad")           \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeTGradGradOp<GPUDevice, T>); \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeR")                   \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeROp<GPUDevice, T>);         \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGrad")               \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeRGradOp<GPUDevice, T>);     \
+  REGISTER_KERNEL_BUILDER(Name("TabulateFusionSeRGradGrad")           \
+                              .Device(DEVICE_GPU)                     \
+                              .TypeConstraint<T>("T")                 \
+                              .HostMemory("table_info"),              \
+                          TabulateFusionSeRGradGradOp<GPUDevice, T>);
 REGISTER_GPU(float);
 REGISTER_GPU(double);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/source/op/tanh4_flt_nvnmd.cc b/source/op/tanh4_flt_nvnmd.cc
index a019dbac63..ad29af5a75 100644
--- a/source/op/tanh4_flt_nvnmd.cc
+++ b/source/op/tanh4_flt_nvnmd.cc
@@ -23,116 +23,108 @@ using namespace tensorflow;
 
 //- register the operator
 REGISTER_OP("Tanh4FltNvnmd")
-  .Attr("T: {float, double} = DT_DOUBLE")
-  .Input("x: T")
-  .Output("y: T");
-
-
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("x: T")
+    .Output("y: T");
 
 //- create the operator class
 //* the class must inherit the OpKernel Class
 template <typename Device, typename FPTYPE>
 class Tanh4FltNvnmdOp : public OpKernel {
-public:
-
-/// Constructor.
-explicit Tanh4FltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
-  //- define the attribute of context
-  //* the context is the input from your tensorflow code
-}
-  
-  
-/// Compute the descriptor
-/// param: context
-void Compute(OpKernelContext* context) override {
-  
-  /* 
-    * Get input
-    * 1.check
-    * 2.get tensor
-    * 3.get shape and check
-    */
-
-  //- 1.check
-  DCHECK_EQ(1, context->num_inputs());
-  
-  //- 2.get tensor
-  const Tensor& X = context->input(0);
-  
-  //- 3. get shape and check
-  const TensorShape& shX = X.shape();
-  
-  int N = shX.dim_size(0);
-  int M = shX.dim_size(1);
-  
-  /*
-    * Calculate the output
-    * 1.create tensor
-    * 2.allocate the memory
-    * 3.calculate
-    */
-  
-  //- 1.create tensor
-  TensorShape shY;
-  shY.AddDim(N);
-  shY.AddDim(M);
-  
-  Tensor* Y = NULL;
-  
-  //- 2.allocate the memory
-  //* allocate memory for the Y tensor which is called output 0
-  OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
-  auto xs = X.matrix<FPTYPE>();
-  auto ys = Y->matrix<FPTYPE>();
-  FPTYPE prec23, prec21, prec19, prec17, prec15;
-  FPTYPE prechi, preclo;
-  FPTYPE x, xhi, xlo, xa, xx, xxhi, xxlo;
-  FPTYPE y;
-
-  
-  // calculate
-  int ii, jj, kk;
-
-  prec23 = (FPTYPE)8388608.0; // 2^23
-  prec21 = (FPTYPE)2097152.0; // 2^32
-  prec19 = (FPTYPE)524288.0; // 2^19
-  prec17 = (FPTYPE)131072.0; // 2^17
-  prec15 = (FPTYPE)32768.0; // 2^15
-
-  prechi = prec23;
-  preclo = prec19;
-
-  for(ii=0; ii<N; ii++){
-    for(jj=0; jj<M; jj++){
-      x = xs(ii, jj);
-      xa = (x < 0) ? (-x) : x;
-      xhi = floor(xa * prechi) / prechi;
-      xlo = floor(xa * preclo) / preclo;
-      xx = xhi * xlo;
-      xxhi = floor(xx * prechi) / prechi;
-      xxlo = floor(xx * preclo) / preclo;
-      //
-      if (xa < (FPTYPE)2.0) {
-        y = xxhi * (xxhi * (FPTYPE)0.0625 - xhi * (FPTYPE)0.25) + xhi;
-        // y = xxlo * (xxhi * (FPTYPE)0.0625 - xhi * (FPTYPE)0.25) + xhi;
-      } else {
-        y = 1;
-      }
-      //
-      y = floor(y * prechi) / prechi;
-      ys(ii, jj) = (x <0 ) ? (-y) : y;
-    } // loop jj
-  } // loop ii
-} // Compute
-  
-//- define the private variable for calculation
-}; // Tanh4FltNvnmd
-
-
-#define REGISTER_CPU(T) \
-REGISTER_KERNEL_BUILDER( \
-    Name("Tanh4FltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
-    Tanh4FltNvnmdOp<CPUDevice, T>);
-REGISTER_CPU(float);                  
+ public:
+  /// Constructor.
+  explicit Tanh4FltNvnmdOp(OpKernelConstruction* context) : OpKernel(context) {
+    //- define the attribute of context
+    //* the context is the input from your tensorflow code
+  }
+
+  /// Compute the descriptor
+  /// param: context
+  void Compute(OpKernelContext* context) override {
+    /*
+     * Get input
+     * 1.check
+     * 2.get tensor
+     * 3.get shape and check
+     */
+
+    //- 1.check
+    DCHECK_EQ(1, context->num_inputs());
+
+    //- 2.get tensor
+    const Tensor& X = context->input(0);
+
+    //- 3. get shape and check
+    const TensorShape& shX = X.shape();
+
+    int N = shX.dim_size(0);
+    int M = shX.dim_size(1);
+
+    /*
+     * Calculate the output
+     * 1.create tensor
+     * 2.allocate the memory
+     * 3.calculate
+     */
+
+    //- 1.create tensor
+    TensorShape shY;
+    shY.AddDim(N);
+    shY.AddDim(M);
+
+    Tensor* Y = NULL;
+
+    //- 2.allocate the memory
+    //* allocate memory for the Y tensor which is called output 0
+    OP_REQUIRES_OK(context, context->allocate_output(0, shY, &Y));
+    auto xs = X.matrix<FPTYPE>();
+    auto ys = Y->matrix<FPTYPE>();
+    FPTYPE prec23, prec21, prec19, prec17, prec15;
+    FPTYPE prechi, preclo;
+    FPTYPE x, xhi, xlo, xa, xx, xxhi, xxlo;
+    FPTYPE y;
+
+    // calculate
+    int ii, jj, kk;
+
+    prec23 = (FPTYPE)8388608.0;  // 2^23
+    prec21 = (FPTYPE)2097152.0;  // 2^32
+    prec19 = (FPTYPE)524288.0;   // 2^19
+    prec17 = (FPTYPE)131072.0;   // 2^17
+    prec15 = (FPTYPE)32768.0;    // 2^15
+
+    prechi = prec23;
+    preclo = prec19;
+
+    for (ii = 0; ii < N; ii++) {
+      for (jj = 0; jj < M; jj++) {
+        x = xs(ii, jj);
+        xa = (x < 0) ? (-x) : x;
+        xhi = floor(xa * prechi) / prechi;
+        xlo = floor(xa * preclo) / preclo;
+        xx = xhi * xlo;
+        xxhi = floor(xx * prechi) / prechi;
+        xxlo = floor(xx * preclo) / preclo;
+        //
+        if (xa < (FPTYPE)2.0) {
+          y = xxhi * (xxhi * (FPTYPE)0.0625 - xhi * (FPTYPE)0.25) + xhi;
+          // y = xxlo * (xxhi * (FPTYPE)0.0625 - xhi * (FPTYPE)0.25) + xhi;
+        } else {
+          y = 1;
+        }
+        //
+        y = floor(y * prechi) / prechi;
+        ys(ii, jj) = (x < 0) ? (-y) : y;
+      }  // loop jj
+    }    // loop ii
+  }      // Compute
+
+  //- define the private variable for calculation
+};  // Tanh4FltNvnmd
+
+#define REGISTER_CPU(T)                                                \
+  REGISTER_KERNEL_BUILDER(                                             \
+      Name("Tanh4FltNvnmd").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      Tanh4FltNvnmdOp<CPUDevice, T>);
+REGISTER_CPU(float);
 REGISTER_CPU(double);
-
diff --git a/source/op/unaggregated_grad.cc b/source/op/unaggregated_grad.cc
index c5f872ab42..97a298afd7 100644
--- a/source/op/unaggregated_grad.cc
+++ b/source/op/unaggregated_grad.cc
@@ -1,439 +1,502 @@
-#include "custom_op.h"
 #include "ComputeDescriptor.h"
-#include "neighbor_list.h"
+#include "custom_op.h"
 #include "device.h"
+#include "neighbor_list.h"
 
 #define GGELU 0.044715
 
 REGISTER_OP("UnaggregatedDyDxS")
-    .Attr("T: {float, double} = DT_DOUBLE") 
-    .Input("y: T")                
-    .Input("w: T")     
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("y: T")
+    .Input("w: T")
     .Input("xbar: T")
     .Input("functype: int32")
     .Output("dy_dx: T");
 
 REGISTER_OP("UnaggregatedDyDx")
     .Attr("T: {float, double} = DT_DOUBLE")
-    .Input("z: T")           
-    .Input("w: T")     
-    .Input("dy_dx: T")   
+    .Input("z: T")
+    .Input("w: T")
+    .Input("dy_dx: T")
     .Input("ybar: T")
     .Input("functype: int32")
     .Output("dz_dx: T");
 
 REGISTER_OP("UnaggregatedDy2DxS")
-    .Attr("T: {float, double} = DT_DOUBLE") 
-    .Input("y: T")                
-    .Input("dy: T")                
-    .Input("w: T")  
+    .Attr("T: {float, double} = DT_DOUBLE")
+    .Input("y: T")
+    .Input("dy: T")
+    .Input("w: T")
     .Input("xbar: T")
     .Input("functype: int32")
     .Output("dy2_dx: T");
 
 REGISTER_OP("UnaggregatedDy2Dx")
     .Attr("T: {float, double} = DT_DOUBLE")
-    .Input("z: T")           
-    .Input("w: T")       
-    .Input("dy_dx: T")     
-    .Input("dy2_dx: T")     
+    .Input("z: T")
+    .Input("w: T")
+    .Input("dy_dx: T")
+    .Input("dy2_dx: T")
     .Input("ybar: T")
     .Input("functype: int32")
     .Output("dz2_dx: T");
 template <typename FPTYPE>
-FPTYPE grad(const FPTYPE xbar, const FPTYPE y, const int functype)  //functype=tanh, gelu, ..
+FPTYPE grad(const FPTYPE xbar,
+            const FPTYPE y,
+            const int functype)  // functype=tanh, gelu, ..
 {
-    switch (functype)
-    {
-        case 1:
-            return (1 - y * y);
-        case 2:
-        {
-            const FPTYPE var = tanh(SQRT_2_PI * (xbar + GGELU * xbar * xbar * xbar));
-            return 0.5 * SQRT_2_PI * xbar * (1 - var * var) * (3 * GGELU * xbar * xbar + 1) + 0.5 * var + 0.5;
-        }
-        case 3:
-        {
-            if(xbar<=0)
-            {
-                return 0;
-            }
-            else
-            {
-                return 1;
-            }
-        }
-        case 4:
-        {
-            if(xbar<=0 || xbar>=6)
-            {
-                return 0;
-            }
-            else
-            {
-                return 1;
-            }
-        }
-        case 5:
-        {
-            return 1.0-1.0/(1.0+exp(xbar));
-        }
-        case 6:
-        {
-            return y*(1-y);
-        }
-        default:
-            return -1;
+  switch (functype) {
+    case 1:
+      return (1 - y * y);
+    case 2: {
+      const FPTYPE var = tanh(SQRT_2_PI * (xbar + GGELU * xbar * xbar * xbar));
+      return 0.5 * SQRT_2_PI * xbar * (1 - var * var) *
+                 (3 * GGELU * xbar * xbar + 1) +
+             0.5 * var + 0.5;
+    }
+    case 3: {
+      if (xbar <= 0) {
+        return 0;
+      } else {
+        return 1;
+      }
+    }
+    case 4: {
+      if (xbar <= 0 || xbar >= 6) {
+        return 0;
+      } else {
+        return 1;
+      }
+    }
+    case 5: {
+      return 1.0 - 1.0 / (1.0 + exp(xbar));
     }
-    
+    case 6: {
+      return y * (1 - y);
+    }
+    default:
+      return -1;
+  }
 }
 
 template <typename FPTYPE>
-FPTYPE grad_grad(const FPTYPE xbar, const FPTYPE y, const int functype)
-{
-    switch (functype)
-    {
-        case 1:
-            return -2 * y * (1 - y * y);
-        case 2:
-        {
-            const FPTYPE var1 = tanh(SQRT_2_PI * (xbar + GGELU * xbar * xbar * xbar));
-            const FPTYPE var2 = SQRT_2_PI * (1 - var1 * var1) * (3 * GGELU * xbar * xbar + 1);
-            return  3 * GGELU * SQRT_2_PI * xbar * xbar * (1 - var1 * var1) - SQRT_2_PI * xbar * var2 * (3 * GGELU * xbar * xbar + 1) * var1 + var2;
-        }
-        case 3:
-        {
-            return 0;
-        }
-        case 4:
-        {
-            return 0;
-        }
-        case 5:
-        {
-            return exp(xbar)/((1+exp(xbar))*(1+exp(xbar)));
-        }
-        case 6:
-        {
-            return y*(1-y)*(1-2*y);
-        }
-        default:
-            return -1;
+FPTYPE grad_grad(const FPTYPE xbar, const FPTYPE y, const int functype) {
+  switch (functype) {
+    case 1:
+      return -2 * y * (1 - y * y);
+    case 2: {
+      const FPTYPE var1 = tanh(SQRT_2_PI * (xbar + GGELU * xbar * xbar * xbar));
+      const FPTYPE var2 =
+          SQRT_2_PI * (1 - var1 * var1) * (3 * GGELU * xbar * xbar + 1);
+      return 3 * GGELU * SQRT_2_PI * xbar * xbar * (1 - var1 * var1) -
+             SQRT_2_PI * xbar * var2 * (3 * GGELU * xbar * xbar + 1) * var1 +
+             var2;
     }
+    case 3: {
+      return 0;
+    }
+    case 4: {
+      return 0;
+    }
+    case 5: {
+      return exp(xbar) / ((1 + exp(xbar)) * (1 + exp(xbar)));
+    }
+    case 6: {
+      return y * (1 - y) * (1 - 2 * y);
+    }
+    default:
+      return -1;
+  }
 }
-    
-
 
 template <typename FPTYPE>
 struct UnaggregatedDyDxSFunctor {
-    void operator()(const CPUDevice& d, const FPTYPE * y, const FPTYPE * w, const FPTYPE* xbar, const int length, const int width, FPTYPE * dy_dx, const int functype) {
-        #pragma omp parallel for
-        for (int ii = 0; ii < length; ii++) {
-            for (int jj = 0; jj < width; jj++) {
-                dy_dx[ii * width + jj] = grad(xbar[ii * width + jj], y[ii * width + jj],functype)*w[jj];
-            }
-        }
+  void operator()(const CPUDevice& d,
+                  const FPTYPE* y,
+                  const FPTYPE* w,
+                  const FPTYPE* xbar,
+                  const int length,
+                  const int width,
+                  FPTYPE* dy_dx,
+                  const int functype) {
+#pragma omp parallel for
+    for (int ii = 0; ii < length; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        dy_dx[ii * width + jj] =
+            grad(xbar[ii * width + jj], y[ii * width + jj], functype) * w[jj];
+      }
     }
-
-    #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-    void operator()(const GPUDevice& d, const FPTYPE * y, const FPTYPE * w, const int length, const int width, FPTYPE * dy_dx) {
-        //Currently, Do nothing at all! 
-        return;
-    }
-    #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  }
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  void operator()(const GPUDevice& d,
+                  const FPTYPE* y,
+                  const FPTYPE* w,
+                  const int length,
+                  const int width,
+                  FPTYPE* dy_dx) {
+    // Currently, Do nothing at all!
+    return;
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 };
 
 // calculate the gradient for all variables!
 template <typename FPTYPE>
 struct UnaggregatedDyDxFunctor {
-    void operator()(const CPUDevice& d, const FPTYPE * z, const FPTYPE * w, const FPTYPE * dy_dx, const FPTYPE * ybar,  const int length, const int width, const int size, FPTYPE * dz_dx, const int functype) {
-        //width=2*size
-        #pragma omp parallel for
-        for (int kk = 0; kk < length; kk++) {
-            for (int ii = 0; ii < width; ii++) {
-                //FPTYPE dz_drou = 1 - (z[kk * width + ii] - y[kk * size + ii % size]) * (z[kk * width + ii] - y[kk * size + ii % size]);
-                FPTYPE dz_drou = grad(ybar[kk*width+ii], z[kk * width + ii],functype);
-                FPTYPE accumulator = 0.0;
-                for (int jj = 0; jj < size; jj++) {
-                    accumulator += w[jj * width + ii] * dy_dx[kk * size + jj];
-                }
-                dz_drou *= accumulator;
-                dz_drou += dy_dx[kk * size + ii % size];
-                dz_dx[kk * width + ii] = dz_drou;
-            }
+  void operator()(const CPUDevice& d,
+                  const FPTYPE* z,
+                  const FPTYPE* w,
+                  const FPTYPE* dy_dx,
+                  const FPTYPE* ybar,
+                  const int length,
+                  const int width,
+                  const int size,
+                  FPTYPE* dz_dx,
+                  const int functype) {
+// width=2*size
+#pragma omp parallel for
+    for (int kk = 0; kk < length; kk++) {
+      for (int ii = 0; ii < width; ii++) {
+        // FPTYPE dz_drou = 1 - (z[kk * width + ii] - y[kk * size + ii % size])
+        // * (z[kk * width + ii] - y[kk * size + ii % size]);
+        FPTYPE dz_drou =
+            grad(ybar[kk * width + ii], z[kk * width + ii], functype);
+        FPTYPE accumulator = 0.0;
+        for (int jj = 0; jj < size; jj++) {
+          accumulator += w[jj * width + ii] * dy_dx[kk * size + jj];
         }
+        dz_drou *= accumulator;
+        dz_drou += dy_dx[kk * size + ii % size];
+        dz_dx[kk * width + ii] = dz_drou;
+      }
     }
-
-    #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-    void operator()(const GPUDevice& d, const FPTYPE * z, const FPTYPE * w, const FPTYPE * dy_dx, const int length, const int width, const int size, FPTYPE * dz_dx) {
-        //Currently, Do nothing at all! 
-        return;
-    }
-    #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  }
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  void operator()(const GPUDevice& d,
+                  const FPTYPE* z,
+                  const FPTYPE* w,
+                  const FPTYPE* dy_dx,
+                  const int length,
+                  const int width,
+                  const int size,
+                  FPTYPE* dz_dx) {
+    // Currently, Do nothing at all!
+    return;
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 };
 
 template <typename FPTYPE>
 struct UnaggregatedDy2DxSFunctor {
-    void operator()(const CPUDevice& d, const FPTYPE * y, const FPTYPE * dy, const FPTYPE * w, const FPTYPE* xbar, const int length, const int width, FPTYPE * dy2_dx, const int functype) {
-        #pragma omp parallel for
-        for (int ii = 0; ii < length; ii++) {
-            for (int jj = 0; jj < width; jj++) {
-                dy2_dx[ii * width + jj] =  grad_grad(xbar[ii * width + jj],y[ii * width + jj],functype)*w[jj]*w[jj];
-            }
-        }
-    }
-
-    #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-    void operator()(const GPUDevice& d, const FPTYPE * y, const FPTYPE * w, const int length, const int width, FPTYPE * dy_dx) {
-        //Currently, Do nothing at all! 
-        return;
+  void operator()(const CPUDevice& d,
+                  const FPTYPE* y,
+                  const FPTYPE* dy,
+                  const FPTYPE* w,
+                  const FPTYPE* xbar,
+                  const int length,
+                  const int width,
+                  FPTYPE* dy2_dx,
+                  const int functype) {
+#pragma omp parallel for
+    for (int ii = 0; ii < length; ii++) {
+      for (int jj = 0; jj < width; jj++) {
+        dy2_dx[ii * width + jj] =
+            grad_grad(xbar[ii * width + jj], y[ii * width + jj], functype) *
+            w[jj] * w[jj];
+      }
     }
-    #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  }
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  void operator()(const GPUDevice& d,
+                  const FPTYPE* y,
+                  const FPTYPE* w,
+                  const int length,
+                  const int width,
+                  FPTYPE* dy_dx) {
+    // Currently, Do nothing at all!
+    return;
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 };
 
 // calculate the gradient for all variables!
 template <typename FPTYPE>
 struct UnaggregatedDy2DxFunctor {
-    void operator()(const CPUDevice& d, const FPTYPE * z, const FPTYPE * w, const FPTYPE * dy_dx, const FPTYPE * dy2_dx, const FPTYPE * ybar, const int length, const int width, const int size, FPTYPE * dz2_dx, const int functype) {
-        #pragma omp parallel for
-        for (int kk = 0; kk < length; kk++) {
-            for (int ii = 0; ii < width; ii++) {
-                //FPTYPE dz_drou = 1 - (z[kk * width + ii] - y[kk * size + ii % size]) * (z[kk * width + ii] - y[kk * size + ii % size]);
-                FPTYPE dz_drou = grad(ybar[kk*width+ii], z[kk * width + ii],functype);
-                FPTYPE accumulator = 0.0;
-                for (int jj = 0; jj < size; jj++) {
-                    accumulator += w[jj * width + ii] * dy2_dx[kk * size + jj];
-                }
-                dz_drou *= accumulator;
-                accumulator = 0.0;
-                for (int jj = 0; jj < size; jj++) {
-                    accumulator += w[jj * width + ii] * dy_dx[kk * size + jj];
-                }
-                dz_drou += grad_grad(ybar[kk * width + ii], z[kk * width + ii],functype) * accumulator * accumulator;
-                dz_drou += dy2_dx[kk * size + ii % size];
-                dz2_dx[kk * width + ii] = dz_drou;
-            }
+  void operator()(const CPUDevice& d,
+                  const FPTYPE* z,
+                  const FPTYPE* w,
+                  const FPTYPE* dy_dx,
+                  const FPTYPE* dy2_dx,
+                  const FPTYPE* ybar,
+                  const int length,
+                  const int width,
+                  const int size,
+                  FPTYPE* dz2_dx,
+                  const int functype) {
+#pragma omp parallel for
+    for (int kk = 0; kk < length; kk++) {
+      for (int ii = 0; ii < width; ii++) {
+        // FPTYPE dz_drou = 1 - (z[kk * width + ii] - y[kk * size + ii % size])
+        // * (z[kk * width + ii] - y[kk * size + ii % size]);
+        FPTYPE dz_drou =
+            grad(ybar[kk * width + ii], z[kk * width + ii], functype);
+        FPTYPE accumulator = 0.0;
+        for (int jj = 0; jj < size; jj++) {
+          accumulator += w[jj * width + ii] * dy2_dx[kk * size + jj];
         }
+        dz_drou *= accumulator;
+        accumulator = 0.0;
+        for (int jj = 0; jj < size; jj++) {
+          accumulator += w[jj * width + ii] * dy_dx[kk * size + jj];
+        }
+        dz_drou +=
+            grad_grad(ybar[kk * width + ii], z[kk * width + ii], functype) *
+            accumulator * accumulator;
+        dz_drou += dy2_dx[kk * size + ii % size];
+        dz2_dx[kk * width + ii] = dz_drou;
+      }
     }
-
-    #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-    void operator()(const GPUDevice& d, const FPTYPE * z, const FPTYPE * w, const FPTYPE * dz_dx, const FPTYPE * dy_dx, const FPTYPE * dy2_dx, const int length, const int width, const int size, FPTYPE * dz2_dx) {
-        //Currently, Do nothing at all! 
-        return;
-    }
-    #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  }
+
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+  void operator()(const GPUDevice& d,
+                  const FPTYPE* z,
+                  const FPTYPE* w,
+                  const FPTYPE* dz_dx,
+                  const FPTYPE* dy_dx,
+                  const FPTYPE* dy2_dx,
+                  const int length,
+                  const int width,
+                  const int size,
+                  FPTYPE* dz2_dx) {
+    // Currently, Do nothing at all!
+    return;
+  }
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class UnaggregatedDyDxSOp : public OpKernel {
  public:
-    explicit UnaggregatedDyDxSOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-    void Compute(OpKernelContext* context) override {
-        deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
-    }
-
-    void _Compute(OpKernelContext* context) {
-        // Grab the input tensor
-        //xbar=xw+b
-        int context_input_index = 0;
-        const Tensor& y	= context->input(context_input_index++);
-        const Tensor& w	= context->input(context_input_index++);
-        const Tensor& xbar = context->input(context_input_index++);
-        const Tensor& functype = context->input(context_input_index++);
-
-        // set size of the sample
-        OP_REQUIRES (context, (y.shape().dims() == 2),	    errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (w.shape().dims() == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES(context, (xbar.shape().dims() == 2),    errors::InvalidArgument("Dim of input should be 2"));
-        //check functype
-
-        int context_output_index = 0;
-        Tensor* dy_dx = NULL;
-        OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
-	    					     y.shape(),
-	    					     &dy_dx));
-
-        UnaggregatedDyDxSFunctor<FPTYPE>()(
-            context->eigen_device<Device>(),            // define actually graph execution device
-            y.flat<FPTYPE>().data(),
-            w.flat<FPTYPE>().data(),
-            xbar.flat<FPTYPE>().data(),
-            y.shape().dim_size(0),
-            y.shape().dim_size(1),
-            dy_dx->flat<FPTYPE>().data(),
-            functype.flat<int32>()(0)
-        );
-    }
-private:
+  explicit UnaggregatedDyDxSOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
+  }
+
+  void _Compute(OpKernelContext* context) {
+    // Grab the input tensor
+    // xbar=xw+b
+    int context_input_index = 0;
+    const Tensor& y = context->input(context_input_index++);
+    const Tensor& w = context->input(context_input_index++);
+    const Tensor& xbar = context->input(context_input_index++);
+    const Tensor& functype = context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES(context, (y.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (w.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (xbar.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    // check functype
+
+    int context_output_index = 0;
+    Tensor* dy_dx = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     y.shape(), &dy_dx));
+
+    UnaggregatedDyDxSFunctor<FPTYPE>()(
+        context
+            ->eigen_device<Device>(),  // define actually graph execution device
+        y.flat<FPTYPE>().data(), w.flat<FPTYPE>().data(),
+        xbar.flat<FPTYPE>().data(), y.shape().dim_size(0),
+        y.shape().dim_size(1), dy_dx->flat<FPTYPE>().data(),
+        functype.flat<int32>()(0));
+  }
+
+ private:
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class UnaggregatedDy2DxSOp : public OpKernel {
  public:
-    explicit UnaggregatedDy2DxSOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-    void Compute(OpKernelContext* context) override {
-        deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
-    }
-
-    void _Compute(OpKernelContext* context) {
-        // Grab the input tensor
-        int context_input_index = 0;
-        const Tensor& y	    = context->input(context_input_index++);
-        const Tensor& dy	= context->input(context_input_index++);
-        const Tensor& w	    = context->input(context_input_index++);
-        const Tensor& xbar = context->input(context_input_index++);
-        const Tensor& functype = context->input(context_input_index++);
-
-        // set size of the sample
-        OP_REQUIRES (context, (y.shape().dims()  == 2),	    errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (dy.shape().dims() == 2),	    errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (w.shape().dims()  == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (xbar.shape().dims()  == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-    
-        int context_output_index = 0;
-        Tensor* dy2_dx = NULL;
-        OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
-	    					     y.shape(),
-	    					     &dy2_dx));
-
-        UnaggregatedDy2DxSFunctor<FPTYPE>()(
-            context->eigen_device<Device>(),            // define actually graph execution device
-            y.flat<FPTYPE>().data(),
-            dy.flat<FPTYPE>().data(),
-            w.flat<FPTYPE>().data(),
-            xbar.flat<FPTYPE>().data(),
-            y.shape().dim_size(0),
-            y.shape().dim_size(1),
-            dy2_dx->flat<FPTYPE>().data(),
-            functype.flat<int32>()(0)
-        );
-    }
-private:
+  explicit UnaggregatedDy2DxSOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
+  }
+
+  void _Compute(OpKernelContext* context) {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& y = context->input(context_input_index++);
+    const Tensor& dy = context->input(context_input_index++);
+    const Tensor& w = context->input(context_input_index++);
+    const Tensor& xbar = context->input(context_input_index++);
+    const Tensor& functype = context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES(context, (y.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (dy.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (w.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (xbar.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+
+    int context_output_index = 0;
+    Tensor* dy2_dx = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     y.shape(), &dy2_dx));
+
+    UnaggregatedDy2DxSFunctor<FPTYPE>()(
+        context
+            ->eigen_device<Device>(),  // define actually graph execution device
+        y.flat<FPTYPE>().data(), dy.flat<FPTYPE>().data(),
+        w.flat<FPTYPE>().data(), xbar.flat<FPTYPE>().data(),
+        y.shape().dim_size(0), y.shape().dim_size(1),
+        dy2_dx->flat<FPTYPE>().data(), functype.flat<int32>()(0));
+  }
+
+ private:
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class UnaggregatedDyDxOp : public OpKernel {
  public:
-    explicit UnaggregatedDyDxOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-    void Compute(OpKernelContext* context) override {
-        deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
-    }
-
-    void _Compute(OpKernelContext* context) {
-        // Grab the input tensor
-        int context_input_index = 0;
-        const Tensor& z	= context->input(context_input_index++);
-        const Tensor& w	= context->input(context_input_index++);
-        const Tensor& dy_dx	= context->input(context_input_index++);
-        const Tensor& ybar	= context->input(context_input_index++);
-        const Tensor& functype = context->input(context_input_index++);
-
-        // set size of the sample
-        OP_REQUIRES (context, (z.shape().dims() == 2),	        errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (w.shape().dims() == 2),		    errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (dy_dx.shape().dims() == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (ybar.shape().dims() == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-
-        int context_output_index = 0;
-        Tensor* dz_dx = NULL;
-        OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
-	    					     z.shape(),
-	    					     &dz_dx));
-
-        UnaggregatedDyDxFunctor<FPTYPE>()(
-            context->eigen_device<Device>(),            // define actually graph execution device
-            z.flat<FPTYPE>().data(),
-            w.flat<FPTYPE>().data(),
-            dy_dx.flat<FPTYPE>().data(),
-            ybar.flat<FPTYPE>().data(),
-            z.shape().dim_size(0),
-            z.shape().dim_size(1),        //N1
-            w.shape().dim_size(0),      //N0 , N1=2N0
-            dz_dx->flat<FPTYPE>().data(),
-            functype.flat<int32>()(0)
-        );
-    }
-private:
+  explicit UnaggregatedDyDxOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
+  }
+
+  void _Compute(OpKernelContext* context) {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& z = context->input(context_input_index++);
+    const Tensor& w = context->input(context_input_index++);
+    const Tensor& dy_dx = context->input(context_input_index++);
+    const Tensor& ybar = context->input(context_input_index++);
+    const Tensor& functype = context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES(context, (z.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (w.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (dy_dx.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (ybar.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+
+    int context_output_index = 0;
+    Tensor* dz_dx = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     z.shape(), &dz_dx));
+
+    UnaggregatedDyDxFunctor<FPTYPE>()(
+        context
+            ->eigen_device<Device>(),  // define actually graph execution device
+        z.flat<FPTYPE>().data(), w.flat<FPTYPE>().data(),
+        dy_dx.flat<FPTYPE>().data(), ybar.flat<FPTYPE>().data(),
+        z.shape().dim_size(0),
+        z.shape().dim_size(1),  // N1
+        w.shape().dim_size(0),  // N0 , N1=2N0
+        dz_dx->flat<FPTYPE>().data(), functype.flat<int32>()(0));
+  }
+
+ private:
 };
 
-template<typename Device, typename FPTYPE>
+template <typename Device, typename FPTYPE>
 class UnaggregatedDy2DxOp : public OpKernel {
  public:
-    explicit UnaggregatedDy2DxOp(OpKernelConstruction* context) : OpKernel(context) {}
-
-    void Compute(OpKernelContext* context) override {
-        deepmd::safe_compute(context, [this](OpKernelContext* context) {this->_Compute(context);});
-    }
-
-    void _Compute(OpKernelContext* context) {
-        // Grab the input tensor
-        int context_input_index = 0;
-        const Tensor& z	= context->input(context_input_index++);
-        const Tensor& w	= context->input(context_input_index++);
-        const Tensor& dy_dx	= context->input(context_input_index++);
-        const Tensor& dy2_dx = context->input(context_input_index++);
-        const Tensor& ybar = context->input(context_input_index++);
-        const Tensor& functype = context->input(context_input_index++);
-
-        // set size of the sample
-        OP_REQUIRES (context, (z.shape().dims() == 2),	        errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (w.shape().dims() == 2),		    errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (dy_dx.shape().dims() == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (dy2_dx.shape().dims() == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-        OP_REQUIRES (context, (ybar.shape().dims() == 2),		errors::InvalidArgument ("Dim of input should be 2"));
-
-        int context_output_index = 0;
-        Tensor* dz2_dx = NULL;
-        OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
-	    					     z.shape(),
-	    					     &dz2_dx));
-
-        UnaggregatedDy2DxFunctor<FPTYPE>()(
-            context->eigen_device<Device>(),            // define actually graph execution device
-            z.flat<FPTYPE>().data(),
-            w.flat<FPTYPE>().data(),
-            dy_dx.flat<FPTYPE>().data(),
-            dy2_dx.flat<FPTYPE>().data(),
-            ybar.flat<FPTYPE>().data(),
-            z.shape().dim_size(0),
-            z.shape().dim_size(1),
-            w.shape().dim_size(0),
-            dz2_dx->flat<FPTYPE>().data(),
-            functype.flat<int32>()(0)
-        );
-    }
-private:
+  explicit UnaggregatedDy2DxOp(OpKernelConstruction* context)
+      : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    deepmd::safe_compute(
+        context, [this](OpKernelContext* context) { this->_Compute(context); });
+  }
+
+  void _Compute(OpKernelContext* context) {
+    // Grab the input tensor
+    int context_input_index = 0;
+    const Tensor& z = context->input(context_input_index++);
+    const Tensor& w = context->input(context_input_index++);
+    const Tensor& dy_dx = context->input(context_input_index++);
+    const Tensor& dy2_dx = context->input(context_input_index++);
+    const Tensor& ybar = context->input(context_input_index++);
+    const Tensor& functype = context->input(context_input_index++);
+
+    // set size of the sample
+    OP_REQUIRES(context, (z.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (w.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (dy_dx.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (dy2_dx.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+    OP_REQUIRES(context, (ybar.shape().dims() == 2),
+                errors::InvalidArgument("Dim of input should be 2"));
+
+    int context_output_index = 0;
+    Tensor* dz2_dx = NULL;
+    OP_REQUIRES_OK(context, context->allocate_output(context_output_index++,
+                                                     z.shape(), &dz2_dx));
+
+    UnaggregatedDy2DxFunctor<FPTYPE>()(
+        context
+            ->eigen_device<Device>(),  // define actually graph execution device
+        z.flat<FPTYPE>().data(), w.flat<FPTYPE>().data(),
+        dy_dx.flat<FPTYPE>().data(), dy2_dx.flat<FPTYPE>().data(),
+        ybar.flat<FPTYPE>().data(), z.shape().dim_size(0),
+        z.shape().dim_size(1), w.shape().dim_size(0),
+        dz2_dx->flat<FPTYPE>().data(), functype.flat<int32>()(0));
+  }
+
+ private:
 };
 
 // Register the CPU kernels.
-#define REGISTER_CPU(T)                                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("UnaggregatedDyDxS").Device(DEVICE_CPU).TypeConstraint<T>("T"),                \
-    UnaggregatedDyDxSOp<CPUDevice, T>);                                                 \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("UnaggregatedDyDx").Device(DEVICE_CPU).TypeConstraint<T>("T"),                 \
-    UnaggregatedDyDxOp<CPUDevice, T>);                                                  \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("UnaggregatedDy2DxS").Device(DEVICE_CPU).TypeConstraint<T>("T"),               \
-    UnaggregatedDy2DxSOp<CPUDevice, T>);                                                \
-REGISTER_KERNEL_BUILDER(                                                                \
-    Name("UnaggregatedDy2Dx").Device(DEVICE_CPU).TypeConstraint<T>("T"),                \
-    UnaggregatedDy2DxOp<CPUDevice, T>);             
+#define REGISTER_CPU(T)                                                     \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("UnaggregatedDyDxS").Device(DEVICE_CPU).TypeConstraint<T>("T"),  \
+      UnaggregatedDyDxSOp<CPUDevice, T>);                                   \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("UnaggregatedDyDx").Device(DEVICE_CPU).TypeConstraint<T>("T"),   \
+      UnaggregatedDyDxOp<CPUDevice, T>);                                    \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("UnaggregatedDy2DxS").Device(DEVICE_CPU).TypeConstraint<T>("T"), \
+      UnaggregatedDy2DxSOp<CPUDevice, T>);                                  \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("UnaggregatedDy2Dx").Device(DEVICE_CPU).TypeConstraint<T>("T"),  \
+      UnaggregatedDy2DxOp<CPUDevice, T>);
 REGISTER_CPU(float);
 REGISTER_CPU(double);
 // Not required in the current situation
 // // Register the GPU kernels.
 // #if GOOGLE_CUDA
-// #define REGISTER_GPU(T)                                                                 \
-// REGISTER_KERNEL_BUILDER(                                                                \
-//     Name("UnaggregatedDyDxS").Device(DEVICE_GPU).TypeConstraint<T>("T"),                \
-//     UnaggregatedDyDxSOp<GPUDevice, T>);                                                 \
-// REGISTER_KERNEL_BUILDER(                                                                \
-//     Name("UnaggregatedDyDx").Device(DEVICE_GPU).TypeConstraint<T>("T"),                 \
-//     UnaggregatedDyDxOp<GPUDevice, T>);                         
+// #define REGISTER_GPU(T) \
+// REGISTER_KERNEL_BUILDER( \
+//     Name("UnaggregatedDyDxS").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+//     UnaggregatedDyDxSOp<GPUDevice, T>); \
+// REGISTER_KERNEL_BUILDER( \
+//     Name("UnaggregatedDyDx").Device(DEVICE_GPU).TypeConstraint<T>("T"), \
+//     UnaggregatedDyDxOp<GPUDevice, T>);
 // REGISTER_GPU(float);
 // REGISTER_GPU(double);
 // #endif  // GOOGLE_CUDA
diff --git a/source/tests/CMakeLists.txt b/source/tests/CMakeLists.txt
index 6078cf977a..673125804f 100644
--- a/source/tests/CMakeLists.txt
+++ b/source/tests/CMakeLists.txt
@@ -1,7 +1,3 @@
 file(GLOB LIB_PY *py *json)
 
-install(
-  FILES		${LIB_PY}
-  DESTINATION	deepmd/tests
-)
-
+install(FILES ${LIB_PY} DESTINATION deepmd/tests)
diff --git a/source/tests/common.py b/source/tests/common.py
index e11f4e5700..189ad8e42e 100644
--- a/source/tests/common.py
+++ b/source/tests/common.py
@@ -1,87 +1,108 @@
-import os, sys, dpdata, shutil
-import numpy as np
-import pathlib
 import collections
 import glob
+import os
+import pathlib
+import shutil
+import sys
+
+import dpdata
+import numpy as np
 
-from deepmd.env import tf
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
 from deepmd.common import j_loader as dp_j_loader
+from deepmd.entrypoints.main import (
+    main,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
 from deepmd.utils import random as dp_random
-from deepmd.entrypoints.main import main
 
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     global_default_fv_hh = 1e-2
     global_default_dw_hh = 1e-2
     global_default_places = 3
-else :
+else:
     global_default_fv_hh = 1e-5
     global_default_dw_hh = 1e-4
     global_default_places = 5
 
 tests_path = pathlib.Path(__file__).parent.absolute()
 
+
 def j_loader(filename):
-    return dp_j_loader(tests_path/filename)
+    return dp_j_loader(tests_path / filename)
+
 
 def del_data():
-    if os.path.isdir('system'):
-        shutil.rmtree('system')
-    if os.path.isdir('system_mixed_type'):
-        shutil.rmtree('system_mixed_type')
+    if os.path.isdir("system"):
+        shutil.rmtree("system")
+    if os.path.isdir("system_mixed_type"):
+        shutil.rmtree("system_mixed_type")
+
 
-def gen_data_type_specific(nframes = 1):
+def gen_data_type_specific(nframes=1):
     tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
     sys = dpdata.LabeledSystem()
-    sys.data['atom_names'] = ['foo', 'bar']
-    sys.data['coords'] = tmpdata.coord
-    sys.data['atom_types'] = tmpdata.atype
-    sys.data['cells'] = tmpdata.cell
+    sys.data["atom_names"] = ["foo", "bar"]
+    sys.data["coords"] = tmpdata.coord
+    sys.data["atom_types"] = tmpdata.atype
+    sys.data["cells"] = tmpdata.cell
     nframes = tmpdata.nframes
     natoms = tmpdata.natoms
-    sys.data['coords'] = sys.data['coords'].reshape([nframes, natoms, 3])
-    sys.data['cells'] = sys.data['cells'].reshape([nframes, 3, 3])
-    sys.data['energies'] = np.zeros([nframes, 1])
-    sys.data['forces'] = np.zeros([nframes, natoms, 3])
-    sys.to_deepmd_npy('system', prec=np.float64)
-    np.save('system/set.000/fparam.npy', tmpdata.fparam)
-    np.save('system/set.000/aparam.npy', tmpdata.aparam.reshape([nframes, natoms, 2]))
-
-def gen_data_mixed_type(nframes = 1):
+    sys.data["coords"] = sys.data["coords"].reshape([nframes, natoms, 3])
+    sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+    sys.data["energies"] = np.zeros([nframes, 1])
+    sys.data["forces"] = np.zeros([nframes, natoms, 3])
+    sys.to_deepmd_npy("system", prec=np.float64)
+    np.save("system/set.000/fparam.npy", tmpdata.fparam)
+    np.save("system/set.000/aparam.npy", tmpdata.aparam.reshape([nframes, natoms, 2]))
+
+
+def gen_data_mixed_type(nframes=1):
     tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes)
     sys = dpdata.LabeledSystem()
-    real_type_map = ['foo', 'bar']
-    sys.data['atom_names'] = ['X']
-    sys.data['coords'] = tmpdata.coord
-    sys.data['atom_types'] = np.zeros_like(tmpdata.atype)
-    sys.data['cells'] = tmpdata.cell
+    real_type_map = ["foo", "bar"]
+    sys.data["atom_names"] = ["X"]
+    sys.data["coords"] = tmpdata.coord
+    sys.data["atom_types"] = np.zeros_like(tmpdata.atype)
+    sys.data["cells"] = tmpdata.cell
     nframes = tmpdata.nframes
     natoms = tmpdata.natoms
-    sys.data['coords'] = sys.data['coords'].reshape([nframes, natoms, 3])
-    sys.data['cells'] = sys.data['cells'].reshape([nframes, 3, 3])
-    sys.data['energies'] = np.zeros([nframes, 1])
-    sys.data['forces'] = np.zeros([nframes, natoms, 3])
-    sys.to_deepmd_npy('system_mixed_type', prec=np.float64)
-    np.savetxt('system_mixed_type/type_map.raw', real_type_map, fmt='%s')
-    np.save('system_mixed_type/set.000/real_atom_types.npy', tmpdata.atype.reshape(1, -1).repeat(nframes, 0))
-    np.save('system_mixed_type/set.000/fparam.npy', tmpdata.fparam)
-    np.save('system_mixed_type/set.000/aparam.npy', tmpdata.aparam.reshape([nframes, natoms, 2]))
-
-def gen_data(nframes = 1, mixed_type=False) :
+    sys.data["coords"] = sys.data["coords"].reshape([nframes, natoms, 3])
+    sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+    sys.data["energies"] = np.zeros([nframes, 1])
+    sys.data["forces"] = np.zeros([nframes, natoms, 3])
+    sys.to_deepmd_npy("system_mixed_type", prec=np.float64)
+    np.savetxt("system_mixed_type/type_map.raw", real_type_map, fmt="%s")
+    np.save(
+        "system_mixed_type/set.000/real_atom_types.npy",
+        tmpdata.atype.reshape(1, -1).repeat(nframes, 0),
+    )
+    np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam)
+    np.save(
+        "system_mixed_type/set.000/aparam.npy",
+        tmpdata.aparam.reshape([nframes, natoms, 2]),
+    )
+
+
+def gen_data(nframes=1, mixed_type=False):
     if not mixed_type:
         gen_data_type_specific(nframes)
     else:
         gen_data_mixed_type(nframes)
 
 
-class Data():
-    def __init__ (self, 
-                  rand_pert = 0.1, 
-                  seed = 1, 
-                  box_scale = 20,
-                  nframes = 1):
-        coord = [[0.0, 0.0, 0.1], [1.1, 0.0, 0.1], [0.0, 1.1, 0.1], 
-                 [4.0, 0.0, 0.0], [5.1, 0.0, 0.0], [4.0, 1.1, 0.0]]
+class Data:
+    def __init__(self, rand_pert=0.1, seed=1, box_scale=20, nframes=1):
+        coord = [
+            [0.0, 0.0, 0.1],
+            [1.1, 0.0, 0.1],
+            [0.0, 1.1, 0.1],
+            [4.0, 0.0, 0.0],
+            [5.1, 0.0, 0.0],
+            [4.0, 1.1, 0.0],
+        ]
         self.nframes = nframes
         self.coord = np.array(coord)
         self.coord = self._copy_nframes(self.coord)
@@ -91,33 +112,33 @@ def __init__ (self,
         self.aparam = np.tile(self.fparam, [1, 6])
         self.fparam = self._copy_nframes(self.fparam)
         self.aparam = self._copy_nframes(self.aparam)
-        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype = int)
+        self.atype = np.array([0, 1, 1, 0, 1, 1], dtype=int)
         self.cell = box_scale * np.eye(3)
         self.cell = self._copy_nframes(self.cell)
         self.coord = self.coord.reshape([self.nframes, -1])
         self.cell = self.cell.reshape([self.nframes, -1])
-        self.natoms = len(self.atype)        
-        self.idx_map = np.lexsort ((np.arange(self.natoms), self.atype))
+        self.natoms = len(self.atype)
+        self.idx_map = np.lexsort((np.arange(self.natoms), self.atype))
         self.coord = self.coord.reshape([self.nframes, -1, 3])
-        self.coord = self.coord[:,self.idx_map,:]
-        self.coord = self.coord.reshape([self.nframes, -1])        
+        self.coord = self.coord[:, self.idx_map, :]
+        self.coord = self.coord.reshape([self.nframes, -1])
         self.efield = dp_random.random(self.coord.shape)
         self.atype = self.atype[self.idx_map]
         self.datype = self._copy_nframes(self.atype)
 
     def _copy_nframes(self, xx):
         return np.tile(xx, [self.nframes, 1])
-        
-    def get_data(self) :
+
+    def get_data(self):
         return self.coord, self.cell, self.datype
 
-    def get_natoms (self) :
+    def get_natoms(self):
         ret = [self.natoms, self.natoms]
-        for ii in range(max(self.atype) + 1) :
-            ret.append(np.sum(self.atype == ii))        
-        return np.array(ret, dtype = np.int32)
-    
-    def get_ntypes(self) :
+        for ii in range(max(self.atype) + 1):
+            ret.append(np.sum(self.atype == ii))
+        return np.array(ret, dtype=np.int32)
+
+    def get_ntypes(self):
         return max(self.atype) + 1
 
     # def get_test_box_data (self,
@@ -154,9 +175,7 @@ def get_ntypes(self) :
     #         nt = np.append (nt, [type0], axis = 0)
     #     return nc, nb, nt
 
-    def get_test_box_data (self,
-                           hh, 
-                           rand_pert = 0.1) :
+    def get_test_box_data(self, hh, rand_pert=0.1):
         coord0_, box0_, type0_ = self.get_data()
         coord = coord0_[0]
         box = box0_[0]
@@ -164,27 +183,27 @@ def get_test_box_data (self,
         atype = type0_[0]
         nframes = 1
         natoms = coord.size // 3
-        box3 = np.reshape(box, [nframes, 3,3])
+        box3 = np.reshape(box, [nframes, 3, 3])
         rbox3 = np.linalg.inv(box3)
         coord3 = np.reshape(coord, [nframes, natoms, 3])
         rcoord3 = np.matmul(coord3, rbox3)
-        
-        all_coord = [coord.reshape([nframes, natoms*3])]
-        all_box = [box.reshape([nframes,9])]
+
+        all_coord = [coord.reshape([nframes, natoms * 3])]
+        all_box = [box.reshape([nframes, 9])]
         all_atype = [atype]
         all_efield = [self.efield]
         for ii in range(3):
             for jj in range(3):
                 box3p = np.copy(box3)
                 box3m = np.copy(box3)
-                box3p[:,ii,jj] = box3[:,ii,jj] + hh
-                box3m[:,ii,jj] = box3[:,ii,jj] - hh
-                boxp = np.reshape(box3p, [-1,9])
-                boxm = np.reshape(box3m, [-1,9])
+                box3p[:, ii, jj] = box3[:, ii, jj] + hh
+                box3m[:, ii, jj] = box3[:, ii, jj] - hh
+                boxp = np.reshape(box3p, [-1, 9])
+                boxm = np.reshape(box3m, [-1, 9])
                 coord3p = np.matmul(rcoord3, box3p)
                 coord3m = np.matmul(rcoord3, box3m)
-                coordp = np.reshape(coord3p, [nframes,-1])
-                coordm = np.reshape(coord3m, [nframes,-1])
+                coordp = np.reshape(coord3p, [nframes, -1])
+                coordm = np.reshape(coord3m, [nframes, -1])
                 all_coord.append(coordp)
                 all_coord.append(coordm)
                 all_box.append(boxp)
@@ -195,226 +214,269 @@ def get_test_box_data (self,
                 all_efield.append(self.efield)
         all_coord = np.reshape(all_coord, [-1, natoms * 3])
         all_box = np.reshape(all_box, [-1, 9])
-        all_atype = np.reshape(all_atype, [-1, natoms])        
-        all_efield = np.reshape(all_efield, [-1, natoms * 3])        
+        all_atype = np.reshape(all_atype, [-1, natoms])
+        all_efield = np.reshape(all_efield, [-1, natoms * 3])
         return all_coord, all_box, all_atype, all_efield
 
 
-def force_test (inter, 
-                testCase, 
-                places = global_default_places, 
-                hh = global_default_fv_hh, 
-                suffix = '') :
+def force_test(
+    inter, testCase, places=global_default_places, hh=global_default_fv_hh, suffix=""
+):
     # set weights
-    w0 = np.ones (inter.ndescrpt)
+    w0 = np.ones(inter.ndescrpt)
     inter.net_w_i = np.copy(w0)
     # make network
-    t_energy, t_force, t_virial \
-        = inter.comp_ef (inter.coord, inter.box, inter.type, inter.tnatoms, name = "test_f" + suffix)
-    inter.sess.run (tf.global_variables_initializer())
+    t_energy, t_force, t_virial = inter.comp_ef(
+        inter.coord, inter.box, inter.type, inter.tnatoms, name="test_f" + suffix
+    )
+    inter.sess.run(tf.global_variables_initializer())
     # get data
-    dcoord, dbox, dtype = inter.data.get_data ()
+    dcoord, dbox, dtype = inter.data.get_data()
     defield = inter.data.efield
     # cmp e0, f0
-    [energy, force] = inter.sess.run ([t_energy, t_force], 
-                                     feed_dict = {
-                                         inter.coord:     dcoord,
-                                         inter.box:       dbox,
-                                         inter.type:      dtype,
-                                         inter.efield:    defield,
-                                         inter.tnatoms:   inter.natoms}
+    [energy, force] = inter.sess.run(
+        [t_energy, t_force],
+        feed_dict={
+            inter.coord: dcoord,
+            inter.box: dbox,
+            inter.type: dtype,
+            inter.efield: defield,
+            inter.tnatoms: inter.natoms,
+        },
     )
     # dim force
-    sel_idx = np.arange(inter.natoms[0])    
+    sel_idx = np.arange(inter.natoms[0])
     for idx in sel_idx:
         for dd in range(3):
             dcoordp = np.copy(dcoord)
             dcoordm = np.copy(dcoord)
-            dcoordp[0,idx*3+dd] = dcoord[0,idx*3+dd] + hh
-            dcoordm[0,idx*3+dd] = dcoord[0,idx*3+dd] - hh
-            [enerp] = inter.sess.run ([t_energy], 
-                                     feed_dict = {
-                                         inter.coord:     dcoordp,
-                                         inter.box:       dbox,
-                                         inter.type:      dtype,
-                                         inter.efield:    defield,
-                                         inter.tnatoms:   inter.natoms}
+            dcoordp[0, idx * 3 + dd] = dcoord[0, idx * 3 + dd] + hh
+            dcoordm[0, idx * 3 + dd] = dcoord[0, idx * 3 + dd] - hh
+            [enerp] = inter.sess.run(
+                [t_energy],
+                feed_dict={
+                    inter.coord: dcoordp,
+                    inter.box: dbox,
+                    inter.type: dtype,
+                    inter.efield: defield,
+                    inter.tnatoms: inter.natoms,
+                },
+            )
+            [enerm] = inter.sess.run(
+                [t_energy],
+                feed_dict={
+                    inter.coord: dcoordm,
+                    inter.box: dbox,
+                    inter.type: dtype,
+                    inter.efield: defield,
+                    inter.tnatoms: inter.natoms,
+                },
             )
-            [enerm] = inter.sess.run ([t_energy], 
-                                     feed_dict = {
-                                         inter.coord:     dcoordm,
-                                         inter.box:       dbox,
-                                         inter.type:      dtype,
-                                         inter.efield:    defield,
-                                         inter.tnatoms:   inter.natoms}
+            c_force = -(enerp[0] - enerm[0]) / (2 * hh)
+            testCase.assertAlmostEqual(
+                c_force,
+                force[0, idx * 3 + dd],
+                places=places,
+                msg="force component [%d,%d] failed" % (idx, dd),
             )
-            c_force = -(enerp[0] - enerm[0]) / (2*hh)
-            testCase.assertAlmostEqual(c_force, force[0,idx*3+dd], 
-                                       places = places,
-                                       msg = "force component [%d,%d] failed" % (idx, dd))
-
-def comp_vol (box) : 
-    return np.linalg.det (np.reshape(box, (3,3)))
-
-def virial_test (inter, 
-                 testCase, 
-                 places = global_default_places, 
-                 hh = global_default_fv_hh, 
-                 suffix = '') :
+
+
+def comp_vol(box):
+    return np.linalg.det(np.reshape(box, (3, 3)))
+
+
+def virial_test(
+    inter, testCase, places=global_default_places, hh=global_default_fv_hh, suffix=""
+):
     # set weights
-    w0 = np.ones (inter.ndescrpt)
+    w0 = np.ones(inter.ndescrpt)
     inter.net_w_i = np.copy(w0)
     # make network
-    t_energy, t_force, t_virial \
-        = inter.comp_ef (inter.coord, inter.box, inter.type, inter.tnatoms, name = "test_v" + suffix)
-    inter.sess.run (tf.global_variables_initializer())
+    t_energy, t_force, t_virial = inter.comp_ef(
+        inter.coord, inter.box, inter.type, inter.tnatoms, name="test_v" + suffix
+    )
+    inter.sess.run(tf.global_variables_initializer())
     # get data
     dcoord, dbox, dtype, defield = inter.data.get_test_box_data(hh)
     # cmp e, f, v
-    [energy, force, virial] \
-        = inter.sess.run ([t_energy, t_force, t_virial], 
-                          feed_dict = {
-                              inter.coord:     dcoord,
-                              inter.box:       dbox,
-                              inter.type:      dtype,
-                              inter.efield:    defield,
-                              inter.tnatoms:   inter.natoms}
-        )
-    ana_vir = virial[0].reshape([3,3])
-    num_vir = np.zeros([3,3])
+    [energy, force, virial] = inter.sess.run(
+        [t_energy, t_force, t_virial],
+        feed_dict={
+            inter.coord: dcoord,
+            inter.box: dbox,
+            inter.type: dtype,
+            inter.efield: defield,
+            inter.tnatoms: inter.natoms,
+        },
+    )
+    ana_vir = virial[0].reshape([3, 3])
+    num_vir = np.zeros([3, 3])
     for ii in range(3):
         for jj in range(3):
-            ep = energy[1+(ii*3+jj)*2+0]
-            em = energy[1+(ii*3+jj)*2+1]
-            num_vir[ii][jj] = -(ep - em) / (2.*hh)
-    num_vir = np.transpose(num_vir, [1,0])    
-    box3 = dbox[0].reshape([3,3])
+            ep = energy[1 + (ii * 3 + jj) * 2 + 0]
+            em = energy[1 + (ii * 3 + jj) * 2 + 1]
+            num_vir[ii][jj] = -(ep - em) / (2.0 * hh)
+    num_vir = np.transpose(num_vir, [1, 0])
+    box3 = dbox[0].reshape([3, 3])
     num_vir = np.matmul(num_vir, box3)
-    np.testing.assert_almost_equal(ana_vir, num_vir,
-                                   places, 
-                                   err_msg = 'virial component')
-    
+    np.testing.assert_almost_equal(ana_vir, num_vir, places, err_msg="virial component")
 
 
-def force_dw_test (inter, 
-                   testCase,
-                   places = global_default_places,
-                   hh = global_default_dw_hh, 
-                   suffix = '') :
+def force_dw_test(
+    inter, testCase, places=global_default_places, hh=global_default_dw_hh, suffix=""
+):
     dcoord, dbox, dtype = inter.data.get_data()
     defield = inter.data.efield
     feed_dict_test0 = {
-        inter.coord:     dcoord,
-        inter.box:       dbox,
-        inter.type:      dtype,
-        inter.efield:    defield,
-        inter.tnatoms:   inter.natoms}
-
-    w0 = np.ones (inter.ndescrpt)
+        inter.coord: dcoord,
+        inter.box: dbox,
+        inter.type: dtype,
+        inter.efield: defield,
+        inter.tnatoms: inter.natoms,
+    }
+
+    w0 = np.ones(inter.ndescrpt)
     inter.net_w_i = np.copy(w0)
-        
-    t_ll, t_dw = inter.comp_f_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "f_dw_test_0" + suffix)
-    inter.sess.run (tf.global_variables_initializer())
-    ll_0 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
-    dw_0 = inter.sess.run (t_dw, feed_dict = feed_dict_test0)
-        
+
+    t_ll, t_dw = inter.comp_f_dw(
+        inter.coord, inter.box, inter.type, inter.tnatoms, name="f_dw_test_0" + suffix
+    )
+    inter.sess.run(tf.global_variables_initializer())
+    ll_0 = inter.sess.run(t_ll, feed_dict=feed_dict_test0)
+    dw_0 = inter.sess.run(t_dw, feed_dict=feed_dict_test0)
+
     absolut_e = []
     relativ_e = []
-    test_list = range (inter.ndescrpt) 
+    test_list = range(inter.ndescrpt)
     ntest = 3
     if inter.sel_a[0] != 0:
-        test_list = np.concatenate((np.arange(0,ntest), np.arange(inter.sel_a[0]*4, inter.sel_a[0]*4+ntest)))
-    else :
-        test_list = np.arange(0,ntest)
+        test_list = np.concatenate(
+            (
+                np.arange(0, ntest),
+                np.arange(inter.sel_a[0] * 4, inter.sel_a[0] * 4 + ntest),
+            )
+        )
+    else:
+        test_list = np.arange(0, ntest)
 
     for ii in test_list:
-        inter.net_w_i = np.copy (w0)
+        inter.net_w_i = np.copy(w0)
         inter.net_w_i[ii] += hh
-        t_ll, t_dw = inter.comp_f_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "f_dw_test_" + str(ii*2+1) + suffix)
-        inter.sess.run (tf.global_variables_initializer())
-        ll_1 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
-        inter.net_w_i[ii] -= 2. * hh
-        t_ll, t_dw = inter.comp_f_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "f_dw_test_" + str(ii*2+2) + suffix)
-        inter.sess.run (tf.global_variables_initializer())
-        ll_2 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
-        num_v = (ll_1 - ll_2) / (2. * hh)
+        t_ll, t_dw = inter.comp_f_dw(
+            inter.coord,
+            inter.box,
+            inter.type,
+            inter.tnatoms,
+            name="f_dw_test_" + str(ii * 2 + 1) + suffix,
+        )
+        inter.sess.run(tf.global_variables_initializer())
+        ll_1 = inter.sess.run(t_ll, feed_dict=feed_dict_test0)
+        inter.net_w_i[ii] -= 2.0 * hh
+        t_ll, t_dw = inter.comp_f_dw(
+            inter.coord,
+            inter.box,
+            inter.type,
+            inter.tnatoms,
+            name="f_dw_test_" + str(ii * 2 + 2) + suffix,
+        )
+        inter.sess.run(tf.global_variables_initializer())
+        ll_2 = inter.sess.run(t_ll, feed_dict=feed_dict_test0)
+        num_v = (ll_1 - ll_2) / (2.0 * hh)
         ana_v = dw_0[ii]
-        diff = np.abs (num_v - ana_v)
+        diff = np.abs(num_v - ana_v)
         # print(ii, num_v, ana_v)
-        testCase.assertAlmostEqual(num_v, ana_v, places = places)
+        testCase.assertAlmostEqual(num_v, ana_v, places=places)
 
 
-def virial_dw_test (inter, 
-                   testCase,
-                   places = global_default_places,
-                   hh = global_default_dw_hh, 
-                   suffix = '') :
+def virial_dw_test(
+    inter, testCase, places=global_default_places, hh=global_default_dw_hh, suffix=""
+):
     dcoord, dbox, dtype = inter.data.get_data()
     defield = inter.data.efield
     feed_dict_test0 = {
-        inter.coord:     dcoord,
-        inter.box:       dbox,
-        inter.type:      dtype,
-        inter.efield:    defield,
-        inter.tnatoms:   inter.natoms}
-
-    w0 = np.ones (inter.ndescrpt)
+        inter.coord: dcoord,
+        inter.box: dbox,
+        inter.type: dtype,
+        inter.efield: defield,
+        inter.tnatoms: inter.natoms,
+    }
+
+    w0 = np.ones(inter.ndescrpt)
     inter.net_w_i = np.copy(w0)
 
-    t_ll, t_dw = inter.comp_v_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "v_dw_test_0" + suffix)
-    inter.sess.run (tf.global_variables_initializer())
-    ll_0 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
-    dw_0 = inter.sess.run (t_dw, feed_dict = feed_dict_test0)
-        
+    t_ll, t_dw = inter.comp_v_dw(
+        inter.coord, inter.box, inter.type, inter.tnatoms, name="v_dw_test_0" + suffix
+    )
+    inter.sess.run(tf.global_variables_initializer())
+    ll_0 = inter.sess.run(t_ll, feed_dict=feed_dict_test0)
+    dw_0 = inter.sess.run(t_dw, feed_dict=feed_dict_test0)
+
     absolut_e = []
     relativ_e = []
-    test_list = range (inter.ndescrpt) 
+    test_list = range(inter.ndescrpt)
     ntest = 3
-    if inter.sel_a[0] != 0 :
-        test_list = np.concatenate((np.arange(0,ntest), np.arange(inter.sel_a[0]*4, inter.sel_a[0]*4+ntest)))
-    else :
-        test_list = np.arange(0,ntest)
-        
+    if inter.sel_a[0] != 0:
+        test_list = np.concatenate(
+            (
+                np.arange(0, ntest),
+                np.arange(inter.sel_a[0] * 4, inter.sel_a[0] * 4 + ntest),
+            )
+        )
+    else:
+        test_list = np.arange(0, ntest)
+
     for ii in test_list:
-        inter.net_w_i = np.copy (w0)
+        inter.net_w_i = np.copy(w0)
         inter.net_w_i[ii] += hh
-        t_ll, t_dw = inter.comp_v_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "v_dw_test_" + str(ii*2+1) + suffix)
-        inter.sess.run (tf.global_variables_initializer())
-        ll_1 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
-        inter.net_w_i[ii] -= 2. * hh
-        t_ll, t_dw = inter.comp_v_dw (inter.coord, inter.box, inter.type, inter.tnatoms, name = "v_dw_test_" + str(ii*2+2) + suffix)
-        inter.sess.run (tf.global_variables_initializer())
-        ll_2 = inter.sess.run (t_ll, feed_dict = feed_dict_test0)
-        num_v = (ll_1 - ll_2) / (2. * hh)
+        t_ll, t_dw = inter.comp_v_dw(
+            inter.coord,
+            inter.box,
+            inter.type,
+            inter.tnatoms,
+            name="v_dw_test_" + str(ii * 2 + 1) + suffix,
+        )
+        inter.sess.run(tf.global_variables_initializer())
+        ll_1 = inter.sess.run(t_ll, feed_dict=feed_dict_test0)
+        inter.net_w_i[ii] -= 2.0 * hh
+        t_ll, t_dw = inter.comp_v_dw(
+            inter.coord,
+            inter.box,
+            inter.type,
+            inter.tnatoms,
+            name="v_dw_test_" + str(ii * 2 + 2) + suffix,
+        )
+        inter.sess.run(tf.global_variables_initializer())
+        ll_2 = inter.sess.run(t_ll, feed_dict=feed_dict_test0)
+        num_v = (ll_1 - ll_2) / (2.0 * hh)
         ana_v = dw_0[ii]
-        testCase.assertAlmostEqual(num_v, ana_v, places = places)
+        testCase.assertAlmostEqual(num_v, ana_v, places=places)
 
 
 def finite_difference(f, x, delta=1e-6):
     in_shape = x.shape
     y0 = f(x)
     out_shape = y0.shape
-    res = np.empty(out_shape+in_shape)
+    res = np.empty(out_shape + in_shape)
     for idx in np.ndindex(*in_shape):
         diff = np.zeros(in_shape)
         diff[idx] += delta
-        y1p = f(x+diff)
-        y1n = f(x-diff)
+        y1p = f(x + diff)
+        y1n = f(x - diff)
         res[(Ellipsis, *idx)] = (y1p - y1n) / (2 * delta)
     return res
 
 
 def strerch_box(old_coord, old_box, new_box):
-    ocoord = old_coord.reshape(-1,3)
-    obox = old_box.reshape(3,3)
-    nbox = new_box.reshape(3,3)
+    ocoord = old_coord.reshape(-1, 3)
+    obox = old_box.reshape(3, 3)
+    nbox = new_box.reshape(3, 3)
     ncoord = ocoord @ np.linalg.inv(obox) @ nbox
     return ncoord.reshape(old_coord.shape)
 
 
 def run_dp(cmd: str) -> int:
     """Run DP directly from the entry point instead of the subprocess.
-    
+
     It is quite slow to start DeePMD-kit with subprocess.
 
     Parameters
@@ -428,138 +490,144 @@ def run_dp(cmd: str) -> int:
         Always returns 0.
     """
     cmds = cmd.split()
-    if cmds[0] == 'dp':
+    if cmds[0] == "dp":
         cmds = cmds[1:]
     else:
-        raise RuntimeError('The command is not dp')
+        raise RuntimeError("The command is not dp")
 
     main(cmds)
     return 0
 
 
 # some tests still need this class
-class DataSets (object):
+class DataSets(object):
     """
     Outdated class for one data system.
     .. deprecated:: 2.0.0
         This class is not maintained any more.
     """
-    def __init__ (self, 
-                  sys_path,
-                  set_prefix,
-                  seed = None, 
-                  shuffle_test = True) :
-        self.dirs = glob.glob (os.path.join(sys_path, set_prefix + ".*"))
+
+    def __init__(self, sys_path, set_prefix, seed=None, shuffle_test=True):
+        self.dirs = glob.glob(os.path.join(sys_path, set_prefix + ".*"))
         self.dirs.sort()
         # load atom type
-        self.atom_type, self.idx_map, self.idx3_map = self.load_type (sys_path)
+        self.atom_type, self.idx_map, self.idx3_map = self.load_type(sys_path)
         # load atom type map
         self.type_map = self.load_type_map(sys_path)
         if self.type_map is not None:
-            assert(len(self.type_map) >= max(self.atom_type)+1)
+            assert len(self.type_map) >= max(self.atom_type) + 1
         # train dirs
-        self.test_dir   = self.dirs[-1]
-        if len(self.dirs) == 1 :
+        self.test_dir = self.dirs[-1]
+        if len(self.dirs) == 1:
             self.train_dirs = self.dirs
-        else :
+        else:
             self.train_dirs = self.dirs[:-1]
         # check fparam
-        has_fparam = [ os.path.isfile(os.path.join(ii, 'fparam.npy')) for ii in self.dirs ]
-        if any(has_fparam) and (not all(has_fparam)) :
-            raise RuntimeError("system %s: if any set has frame parameter, then all sets should have frame parameter" % sys_path)
-        if all(has_fparam) :
+        has_fparam = [
+            os.path.isfile(os.path.join(ii, "fparam.npy")) for ii in self.dirs
+        ]
+        if any(has_fparam) and (not all(has_fparam)):
+            raise RuntimeError(
+                "system %s: if any set has frame parameter, then all sets should have frame parameter"
+                % sys_path
+            )
+        if all(has_fparam):
             self.has_fparam = 0
-        else :
+        else:
             self.has_fparam = -1
         # check aparam
-        has_aparam = [ os.path.isfile(os.path.join(ii, 'aparam.npy')) for ii in self.dirs ]
-        if any(has_aparam) and (not all(has_aparam)) :
-            raise RuntimeError("system %s: if any set has frame parameter, then all sets should have frame parameter" % sys_path)
-        if all(has_aparam) :
+        has_aparam = [
+            os.path.isfile(os.path.join(ii, "aparam.npy")) for ii in self.dirs
+        ]
+        if any(has_aparam) and (not all(has_aparam)):
+            raise RuntimeError(
+                "system %s: if any set has frame parameter, then all sets should have frame parameter"
+                % sys_path
+            )
+        if all(has_aparam):
             self.has_aparam = 0
-        else :
+        else:
             self.has_aparam = -1
         # energy norm
         self.eavg = self.stats_energy()
         # load sets
         self.set_count = 0
-        self.load_batch_set (self.train_dirs[self.set_count % self.get_numb_set()])
-        self.load_test_set (self.test_dir, shuffle_test)
+        self.load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
+        self.load_test_set(self.test_dir, shuffle_test)
 
-    def check_batch_size (self, batch_size) :
-        for ii in self.train_dirs :
+    def check_batch_size(self, batch_size):
+        for ii in self.train_dirs:
             tmpe = np.load(os.path.join(ii, "coord.npy"))
-            if tmpe.shape[0] < batch_size :
+            if tmpe.shape[0] < batch_size:
                 return ii, tmpe.shape[0]
         return None
 
-    def check_test_size (self, test_size) :
+    def check_test_size(self, test_size):
         tmpe = np.load(os.path.join(self.test_dir, "coord.npy"))
-        if tmpe.shape[0] < test_size :
+        if tmpe.shape[0] < test_size:
             return self.test_dir, tmpe.shape[0]
-        else :
+        else:
             return None
 
-    def load_type (self, sys_path) :
-        atom_type = np.loadtxt (os.path.join(sys_path, "type.raw"), dtype=np.int32, ndmin=1)
+    def load_type(self, sys_path):
+        atom_type = np.loadtxt(
+            os.path.join(sys_path, "type.raw"), dtype=np.int32, ndmin=1
+        )
         natoms = atom_type.shape[0]
-        idx = np.arange (natoms)
-        idx_map = np.lexsort ((idx, atom_type))
+        idx = np.arange(natoms)
+        idx_map = np.lexsort((idx, atom_type))
         atom_type3 = np.repeat(atom_type, 3)
-        idx3 = np.arange (natoms * 3)
-        idx3_map = np.lexsort ((idx3, atom_type3))
+        idx3 = np.arange(natoms * 3)
+        idx3_map = np.lexsort((idx3, atom_type3))
         return atom_type, idx_map, idx3_map
 
-    def load_type_map(self, sys_path) :
-        fname = os.path.join(sys_path, 'type_map.raw')
-        if os.path.isfile(fname) :            
-            with open(os.path.join(sys_path, 'type_map.raw')) as fp:
-                return fp.read().split()                
-        else :
+    def load_type_map(self, sys_path):
+        fname = os.path.join(sys_path, "type_map.raw")
+        if os.path.isfile(fname):
+            with open(os.path.join(sys_path, "type_map.raw")) as fp:
+                return fp.read().split()
+        else:
             return None
 
-    def get_type_map(self) :
+    def get_type_map(self):
         return self.type_map
 
-    def get_numb_set (self) :
-        return len (self.train_dirs)
+    def get_numb_set(self):
+        return len(self.train_dirs)
 
-    def stats_energy (self) :
+    def stats_energy(self):
         eners = []
         for ii in self.train_dirs:
             ener_file = os.path.join(ii, "energy.npy")
-            if os.path.isfile(ener_file) :
+            if os.path.isfile(ener_file):
                 ei = np.load(ener_file)
                 eners.append(ei)
         eners = np.concatenate(eners)
-        if eners.size == 0 :
+        if eners.size == 0:
             return 0
-        else :
+        else:
             return np.average(eners)
 
-    def load_energy(self, 
-                    set_name,
-                    nframes,
-                    nvalues,
-                    energy_file, 
-                    atom_energy_file) :
+    def load_energy(self, set_name, nframes, nvalues, energy_file, atom_energy_file):
         """
         return : coeff_ener, ener, coeff_atom_ener, atom_ener
         """
         # load atom_energy
-        coeff_atom_ener, atom_ener = self.load_data(set_name, atom_energy_file, [nframes, nvalues], False)
+        coeff_atom_ener, atom_ener = self.load_data(
+            set_name, atom_energy_file, [nframes, nvalues], False
+        )
         # ignore energy_file
         if coeff_atom_ener == 1:
-            ener = np.sum(atom_ener, axis = 1)
+            ener = np.sum(atom_ener, axis=1)
             coeff_ener = 1
         # load energy_file
         else:
             coeff_ener, ener = self.load_data(set_name, energy_file, [nframes], False)
         return coeff_ener, ener, coeff_atom_ener, atom_ener
 
-    def load_data(self, set_name, data_name, shape, is_necessary = True):
-        path = os.path.join(set_name, data_name+".npy")
-        if os.path.isfile (path) :
+    def load_data(self, set_name, data_name, shape, is_necessary=True):
+        path = os.path.join(set_name, data_name + ".npy")
+        if os.path.isfile(path):
             data = np.load(path)
             data = np.reshape(data, shape)
             if is_necessary:
@@ -571,7 +639,7 @@ def load_data(self, set_name, data_name, shape, is_necessary = True):
             data = np.zeros(shape)
         return 0, data
 
-    def load_set(self, set_name, shuffle = True):
+    def load_set(self, set_name, shuffle=True):
         data = {}
         data["box"] = self.load_data(set_name, "box", [-1, 9])
         nframe = data["box"].shape[0]
@@ -579,31 +647,41 @@ def load_set(self, set_name, shuffle = True):
         ncoord = data["coord"].shape[1]
         if self.has_fparam >= 0:
             data["fparam"] = self.load_data(set_name, "fparam", [nframe, -1])
-            if self.has_fparam == 0 :
+            if self.has_fparam == 0:
                 self.has_fparam = data["fparam"].shape[1]
-            else :
+            else:
                 assert self.has_fparam == data["fparam"].shape[1]
         if self.has_aparam >= 0:
             data["aparam"] = self.load_data(set_name, "aparam", [nframe, -1])
-            if self.has_aparam == 0 :
-                self.has_aparam = data["aparam"].shape[1] // (ncoord//3)
-            else :
-                assert self.has_aparam == data["aparam"].shape[1] // (ncoord//3)
+            if self.has_aparam == 0:
+                self.has_aparam = data["aparam"].shape[1] // (ncoord // 3)
+            else:
+                assert self.has_aparam == data["aparam"].shape[1] // (ncoord // 3)
         data["prop_c"] = np.zeros(5)
-        data["prop_c"][0], data["energy"], data["prop_c"][3], data["atom_ener"] \
-            = self.load_energy (set_name, nframe, ncoord // 3, "energy", "atom_ener")
-        data["prop_c"][1], data["force"] = self.load_data(set_name, "force", [nframe, ncoord], False)
-        data["prop_c"][2], data["virial"] = self.load_data(set_name, "virial", [nframe, 9], False)
-        data["prop_c"][4], data["atom_pref"] = self.load_data(set_name, "atom_pref", [nframe, ncoord//3], False)
+        (
+            data["prop_c"][0],
+            data["energy"],
+            data["prop_c"][3],
+            data["atom_ener"],
+        ) = self.load_energy(set_name, nframe, ncoord // 3, "energy", "atom_ener")
+        data["prop_c"][1], data["force"] = self.load_data(
+            set_name, "force", [nframe, ncoord], False
+        )
+        data["prop_c"][2], data["virial"] = self.load_data(
+            set_name, "virial", [nframe, 9], False
+        )
+        data["prop_c"][4], data["atom_pref"] = self.load_data(
+            set_name, "atom_pref", [nframe, ncoord // 3], False
+        )
         data["atom_pref"] = np.repeat(data["atom_pref"], 3, axis=1)
         # shuffle data
         if shuffle:
-            idx = np.arange (nframe)
+            idx = np.arange(nframe)
             dp_random.shuffle(idx)
             for ii in data:
                 if ii != "prop_c":
                     data[ii] = data[ii][idx]
-        data["type"] = np.tile (self.atom_type, (nframe, 1))
+        data["type"] = np.tile(self.atom_type, (nframe, 1))
         # sort according to type
         for ii in ["type", "atom_ener"]:
             data[ii] = data[ii][:, self.idx_map]
@@ -611,21 +689,18 @@ def load_set(self, set_name, shuffle = True):
             data[ii] = data[ii][:, self.idx3_map]
         return data
 
-    def load_batch_set (self,
-                        set_name) :
+    def load_batch_set(self, set_name):
         self.batch_set = self.load_set(set_name, True)
-        self.reset_iter ()
+        self.reset_iter()
 
-    def load_test_set (self,
-                       set_name, 
-                       shuffle_test) :
+    def load_test_set(self, set_name, shuffle_test):
         self.test_set = self.load_set(set_name, shuffle_test)
 
-    def reset_iter (self) :
-        self.iterator = 0              
+    def reset_iter(self):
+        self.iterator = 0
         self.set_count += 1
 
-    def get_set(self, data, idx = None) :
+    def get_set(self, data, idx=None):
         new_data = {}
         for ii in data:
             dd = data[ii]
@@ -640,239 +715,248 @@ def get_set(self, data, idx = None) :
                     new_data[ii] = dd.astype(GLOBAL_NP_FLOAT_PRECISION)
         return new_data
 
-    def get_test (self) :
+    def get_test(self):
         """
-        returned property prefector [4] in order: 
+        returned property prefector [4] in order:
         energy, force, virial, atom_ener
         """
         return self.get_set(self.test_set)
 
-    def get_batch (self,
-                   batch_size) :
+    def get_batch(self, batch_size):
         """
-        returned property prefector [4] in order: 
+        returned property prefector [4] in order:
         energy, force, virial, atom_ener
         """
         set_size = self.batch_set["energy"].shape[0]
         # assert (batch_size <= set_size), "batch size should be no more than set size"
-        if self.iterator + batch_size > set_size :
-            self.load_batch_set (self.train_dirs[self.set_count % self.get_numb_set()])
+        if self.iterator + batch_size > set_size:
+            self.load_batch_set(self.train_dirs[self.set_count % self.get_numb_set()])
             set_size = self.batch_set["energy"].shape[0]
         # print ("%d %d %d" % (self.iterator, self.iterator + batch_size, set_size))
         iterator_1 = self.iterator + batch_size
-        if iterator_1 >= set_size :
+        if iterator_1 >= set_size:
             iterator_1 = set_size
-        idx = np.arange (self.iterator, iterator_1)
+        idx = np.arange(self.iterator, iterator_1)
         self.iterator += batch_size
         return self.get_set(self.batch_set, idx)
 
-    def get_natoms (self) :
+    def get_natoms(self):
         sample_type = self.batch_set["type"][0]
         natoms = len(sample_type)
         return natoms
 
-    def get_natoms_2 (self, ntypes) :
+    def get_natoms_2(self, ntypes):
         sample_type = self.batch_set["type"][0]
         natoms = len(sample_type)
-        natoms_vec = np.zeros (ntypes).astype(int)
-        for ii in range (ntypes) :
+        natoms_vec = np.zeros(ntypes).astype(int)
+        for ii in range(ntypes):
             natoms_vec[ii] = np.count_nonzero(sample_type == ii)
         return natoms, natoms_vec
 
-    def get_natoms_vec (self, ntypes) :
-        natoms, natoms_vec = self.get_natoms_2 (ntypes)
+    def get_natoms_vec(self, ntypes):
+        natoms, natoms_vec = self.get_natoms_2(ntypes)
         tmp = [natoms, natoms]
-        tmp = np.append (tmp, natoms_vec)
+        tmp = np.append(tmp, natoms_vec)
         return tmp.astype(np.int32)
 
-    def set_numb_batch (self, 
-                        batch_size) :
+    def set_numb_batch(self, batch_size):
         return self.batch_set["energy"].shape[0] // batch_size
 
-    def get_sys_numb_batch (self, batch_size) :
+    def get_sys_numb_batch(self, batch_size):
         return self.set_numb_batch(batch_size) * self.get_numb_set()
 
-    def get_ener (self) :
+    def get_ener(self):
         return self.eavg
 
-    def numb_fparam(self) :
+    def numb_fparam(self):
         return self.has_fparam
 
-    def numb_aparam(self) :
+    def numb_aparam(self):
         return self.has_aparam
 
 
-class DataSystem (object) :
+class DataSystem(object):
     """
     Outdated class for the data systems.
     .. deprecated:: 2.0.0
-        This class is not maintained any more.  
+        This class is not maintained any more.
     """
-    def __init__ (self,
-                  systems,
-                  set_prefix,
-                  batch_size,
-                  test_size,
-                  rcut, 
-                  run_opt = None) : 
+
+    def __init__(self, systems, set_prefix, batch_size, test_size, rcut, run_opt=None):
         self.system_dirs = systems
         self.nsystems = len(self.system_dirs)
         self.batch_size = batch_size
-        if isinstance(self.batch_size, int) :
+        if isinstance(self.batch_size, int):
             self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int)
-        assert(isinstance(self.batch_size, (list,np.ndarray)))
-        assert(len(self.batch_size) == self.nsystems)
+        assert isinstance(self.batch_size, (list, np.ndarray))
+        assert len(self.batch_size) == self.nsystems
         self.data_systems = []
         self.ntypes = []
         self.natoms = []
         self.natoms_vec = []
         self.nbatches = []
-        for ii in self.system_dirs :
+        for ii in self.system_dirs:
             self.data_systems.append(DataSets(ii, set_prefix))
             sys_all_types = np.loadtxt(os.path.join(ii, "type.raw")).astype(int)
             self.ntypes.append(np.max(sys_all_types) + 1)
         self.sys_ntypes = max(self.ntypes)
         type_map = []
-        for ii in range(self.nsystems) :
+        for ii in range(self.nsystems):
             self.natoms.append(self.data_systems[ii].get_natoms())
-            self.natoms_vec.append(self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int))
-            self.nbatches.append(self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii]))
+            self.natoms_vec.append(
+                self.data_systems[ii].get_natoms_vec(self.sys_ntypes).astype(int)
+            )
+            self.nbatches.append(
+                self.data_systems[ii].get_sys_numb_batch(self.batch_size[ii])
+            )
             type_map.append(self.data_systems[ii].get_type_map())
         self.type_map = self.check_type_map_consistency(type_map)
 
         # check frame parameters
         has_fparam = [ii.numb_fparam() for ii in self.data_systems]
-        for ii in has_fparam :
-            if ii != has_fparam[0] :
-                raise RuntimeError("if any system has frame parameter, then all systems should have the same number of frame parameter")
+        for ii in has_fparam:
+            if ii != has_fparam[0]:
+                raise RuntimeError(
+                    "if any system has frame parameter, then all systems should have the same number of frame parameter"
+                )
         self.has_fparam = has_fparam[0]
 
         # check the size of data if they satisfy the requirement of batch and test
-        for ii in range(self.nsystems) :
+        for ii in range(self.nsystems):
             chk_ret = self.data_systems[ii].check_batch_size(self.batch_size[ii])
-            if chk_ret is not None :
-                raise RuntimeError ("system %s required batch size %d is larger than the size %d of the dataset %s" % \
-                                    (self.system_dirs[ii], self.batch_size[ii], chk_ret[1], chk_ret[0]))
+            if chk_ret is not None:
+                raise RuntimeError(
+                    "system %s required batch size %d is larger than the size %d of the dataset %s"
+                    % (
+                        self.system_dirs[ii],
+                        self.batch_size[ii],
+                        chk_ret[1],
+                        chk_ret[0],
+                    )
+                )
             chk_ret = self.data_systems[ii].check_test_size(test_size)
-            if chk_ret is not None :
-                print("WARNNING: system %s required test size %d is larger than the size %d of the dataset %s" % \
-                      (self.system_dirs[ii], test_size, chk_ret[1], chk_ret[0]))
+            if chk_ret is not None:
+                print(
+                    "WARNNING: system %s required test size %d is larger than the size %d of the dataset %s"
+                    % (self.system_dirs[ii], test_size, chk_ret[1], chk_ret[0])
+                )
 
         if run_opt is not None:
             self.print_summary(run_opt)
 
-        self.prob_nbatches = [ float(i) for i in self.nbatches] / np.sum(self.nbatches)
+        self.prob_nbatches = [float(i) for i in self.nbatches] / np.sum(self.nbatches)
 
         self.test_data = collections.defaultdict(list)
         self.default_mesh = []
-        for ii in range(self.nsystems) :
-            test_system_data = self.data_systems[ii].get_test ()
+        for ii in range(self.nsystems):
+            test_system_data = self.data_systems[ii].get_test()
             for nn in test_system_data:
                 self.test_data[nn].append(test_system_data[nn])
-            cell_size = np.max (rcut)
-            avg_box = np.average (test_system_data["box"], axis = 0)
-            avg_box = np.reshape (avg_box, [3,3])
-            ncell = (np.linalg.norm(avg_box, axis=1)/ cell_size).astype(np.int32)
+            cell_size = np.max(rcut)
+            avg_box = np.average(test_system_data["box"], axis=0)
+            avg_box = np.reshape(avg_box, [3, 3])
+            ncell = (np.linalg.norm(avg_box, axis=1) / cell_size).astype(np.int32)
             ncell[ncell < 2] = 2
-            default_mesh = np.zeros (6, dtype = np.int32)
+            default_mesh = np.zeros(6, dtype=np.int32)
             default_mesh[3:6] = ncell
             self.default_mesh.append(default_mesh)
         self.pick_idx = 0
 
-
     def check_type_map_consistency(self, type_map_list):
         ret = []
         for ii in type_map_list:
             if ii is not None:
                 min_len = min([len(ii), len(ret)])
-                for idx in range(min_len) :
-                    if ii[idx] != ret[idx] :
-                        raise RuntimeError('inconsistent type map: %s %s' % (str(ret), str(ii)))
-                if len(ii) > len(ret) :
+                for idx in range(min_len):
+                    if ii[idx] != ret[idx]:
+                        raise RuntimeError(
+                            "inconsistent type map: %s %s" % (str(ret), str(ii))
+                        )
+                if len(ii) > len(ret):
                     ret = ii
         return ret
 
-
     def get_type_map(self):
         return self.type_map
 
-
-    def format_name_length(self, name, width) :
+    def format_name_length(self, name, width):
         if len(name) <= width:
-            return '{: >{}}'.format(name, width)
-        else :
-            name = name[-(width-3):]
-            name = '-- ' + name
-            return name 
+            return "{: >{}}".format(name, width)
+        else:
+            name = name[-(width - 3) :]
+            name = "-- " + name
+            return name
 
-    def print_summary(self) :
+    def print_summary(self):
         tmp_msg = ""
         # width 65
         sys_width = 42
         tmp_msg += "---Summary of DataSystem-----------------------------------------\n"
         tmp_msg += "find %d system(s):\n" % self.nsystems
-        tmp_msg += "%s  " % self.format_name_length('system', sys_width)
-        tmp_msg += "%s  %s  %s\n" % ('natoms', 'bch_sz', 'n_bch')
-        for ii in range(self.nsystems) :
-            tmp_msg += ("%s  %6d  %6d  %5d\n" % 
-                        (self.format_name_length(self.system_dirs[ii], sys_width),
-                         self.natoms[ii], 
-                         self.batch_size[ii], 
-                         self.nbatches[ii]) )
+        tmp_msg += "%s  " % self.format_name_length("system", sys_width)
+        tmp_msg += "%s  %s  %s\n" % ("natoms", "bch_sz", "n_bch")
+        for ii in range(self.nsystems):
+            tmp_msg += "%s  %6d  %6d  %5d\n" % (
+                self.format_name_length(self.system_dirs[ii], sys_width),
+                self.natoms[ii],
+                self.batch_size[ii],
+                self.nbatches[ii],
+            )
         tmp_msg += "-----------------------------------------------------------------\n"
-        #log.info(tmp_msg)
+        # log.info(tmp_msg)
 
-    def compute_energy_shift(self) :
+    def compute_energy_shift(self):
         sys_ener = []
-        for ss in self.data_systems :
+        for ss in self.data_systems:
             sys_ener.append(ss.get_ener())
         sys_ener = np.array(sys_ener)
         sys_tynatom = np.array(self.natoms_vec, dtype=GLOBAL_NP_FLOAT_PRECISION)
-        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems,-1])
-        sys_tynatom = sys_tynatom[:,2:]
-        energy_shift,resd,rank,s_value \
-            = np.linalg.lstsq(sys_tynatom, sys_ener, rcond = 1e-3)
+        sys_tynatom = np.reshape(sys_tynatom, [self.nsystems, -1])
+        sys_tynatom = sys_tynatom[:, 2:]
+        energy_shift, resd, rank, s_value = np.linalg.lstsq(
+            sys_tynatom, sys_ener, rcond=1e-3
+        )
         return energy_shift
 
-    def process_sys_weights(self, sys_weights) :
+    def process_sys_weights(self, sys_weights):
         sys_weights = np.array(sys_weights)
         type_filter = sys_weights >= 0
         assigned_sum_prob = np.sum(type_filter * sys_weights)
-        assert assigned_sum_prob <= 1, "the sum of assigned probability should be less than 1"
-        rest_sum_prob = 1. - assigned_sum_prob
+        assert (
+            assigned_sum_prob <= 1
+        ), "the sum of assigned probability should be less than 1"
+        rest_sum_prob = 1.0 - assigned_sum_prob
         rest_nbatch = (1 - type_filter) * self.nbatches
         rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
         ret_prob = rest_prob + type_filter * sys_weights
         assert np.sum(ret_prob) == 1, "sum of probs should be 1"
         return ret_prob
 
-    def get_batch (self, 
-                   sys_idx = None,
-                   sys_weights = None,
-                   style = "prob_sys_size") :
-        if sys_idx is not None :
+    def get_batch(self, sys_idx=None, sys_weights=None, style="prob_sys_size"):
+        if sys_idx is not None:
             self.pick_idx = sys_idx
-        else :
-            if sys_weights is None :
-                if style == "prob_sys_size" :
+        else:
+            if sys_weights is None:
+                if style == "prob_sys_size":
                     prob = self.prob_nbatches
-                elif style == "prob_uniform" :
+                elif style == "prob_uniform":
                     prob = None
-                else :
+                else:
                     raise RuntimeError("unknown get_batch style")
-            else :
+            else:
                 prob = self.process_sys_weights(sys_weights)
             self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=prob)
-        b_data = self.data_systems[self.pick_idx].get_batch(self.batch_size[self.pick_idx])
+        b_data = self.data_systems[self.pick_idx].get_batch(
+            self.batch_size[self.pick_idx]
+        )
         b_data["natoms_vec"] = self.natoms_vec[self.pick_idx]
         b_data["default_mesh"] = self.default_mesh[self.pick_idx]
         return b_data
 
-    def get_test (self, 
-                  sys_idx = None) :
-        if sys_idx is not None :
+    def get_test(self, sys_idx=None):
+        if sys_idx is not None:
             idx = sys_idx
-        else :
+        else:
             idx = self.pick_idx
         test_system_data = {}
         for nn in self.test_data:
@@ -881,20 +965,20 @@ def get_test (self,
         test_system_data["default_mesh"] = self.default_mesh[idx]
         return test_system_data
 
-    def get_nbatches (self) : 
+    def get_nbatches(self):
         return self.nbatches
 
-    def get_ntypes (self) :
+    def get_ntypes(self):
         return self.sys_ntypes
 
-    def get_nsystems (self) :
+    def get_nsystems(self):
         return self.nsystems
 
-    def get_sys (self, sys_idx) :
+    def get_sys(self, sys_idx):
         return self.data_systems[sys_idx]
 
-    def get_batch_size(self) :
+    def get_batch_size(self):
         return self.batch_size
 
-    def numb_fparam(self) :
+    def numb_fparam(self):
         return self.has_fparam
diff --git a/source/tests/compat_inputs/water_se_a_v0.json b/source/tests/compat_inputs/water_se_a_v0.json
index 26a0b0a0ea..e187b54eca 100644
--- a/source/tests/compat_inputs/water_se_a_v0.json
+++ b/source/tests/compat_inputs/water_se_a_v0.json
@@ -1,47 +1,61 @@
 {
-    "_comment": " model parameters",
-    "use_smooth":	true,
-    "sel_a":		[46, 92],
-    "rcut_smth":	5.80,
-    "rcut":		6.00,
-    "filter_neuron":	[25, 50, 100],
-    "filter_resnet_dt":	false,
-    "axis_neuron":	16,
-    "fitting_neuron":	[240, 240, 240],
-    "fitting_resnet_dt":true,
-    "coord_norm":	true,
-    "type_fitting_net":	false,
+  "_comment": " model parameters",
+  "use_smooth": true,
+  "sel_a": [
+    46,
+    92
+  ],
+  "rcut_smth": 5.80,
+  "rcut": 6.00,
+  "filter_neuron": [
+    25,
+    50,
+    100
+  ],
+  "filter_resnet_dt": false,
+  "axis_neuron": 16,
+  "fitting_neuron": [
+    240,
+    240,
+    240
+  ],
+  "fitting_resnet_dt": true,
+  "coord_norm": true,
+  "type_fitting_net": false,
 
-    "_comment": " traing controls",
-    "systems":		["../data/"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	[1],
-    "start_lr":		0.001,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "_comment": " traing controls",
+  "systems": [
+    "../data/"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": [
+    1
+  ],
+  "start_lr": 0.001,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "seed":		1,
+  "seed": 1,
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	10,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	true,
-    "profiling_file":	"timeline.json",
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 10,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": true,
+  "profiling_file": "timeline.json",
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/compat_inputs/water_se_a_v1.json b/source/tests/compat_inputs/water_se_a_v1.json
index 402da962ca..7ebec4d5a3 100644
--- a/source/tests/compat_inputs/water_se_a_v1.json
+++ b/source/tests/compat_inputs/water_se_a_v1.json
@@ -1,55 +1,69 @@
 {
-    "model": {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "axis_neuron":	16,
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "axis_neuron": 16,
+      "resnet_dt": false,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"start_lr":	0.001
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "start_lr": 0.001
+  },
 
-    "loss" :{
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0
-    },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
 
-    "training" : {
-	"systems":	["../data/"],
-	"set_prefix":	"set",    
-	"stop_batch":	1000000,
-	"batch_size":	[1],
+  "training": {
+    "systems": [
+      "../data/"
+    ],
+    "set_prefix": "set",
+    "stop_batch": 1000000,
+    "batch_size": [
+      1
+    ],
 
-	"seed":		1,
+    "seed": 1,
 
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"numb_test":	10,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"profiling":	true,
-	"profiling_file":"timeline.json"
-    }
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 10,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": true,
+    "profiling_file": "timeline.json"
+  }
 }
-
diff --git a/source/tests/compat_inputs/water_v0.json b/source/tests/compat_inputs/water_v0.json
index 70eedcf72b..1ed580a2ae 100644
--- a/source/tests/compat_inputs/water_v0.json
+++ b/source/tests/compat_inputs/water_v0.json
@@ -1,46 +1,74 @@
 {
-    "_comment": " model parameters",
-    "use_smooth":	false,
-    "sel_a":		[16, 32],
-    "sel_r":		[30, 60],
-    "rcut":		6.00,
-    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
-    "_comment":	" default rule: []",
-    "_comment":	" user defined rule: for each type provides two axes, ",
-    "_comment":	"                    for each axis: (a_or_r, type, idx)",
-    "_comment":	"                    if type < 0, exclude type -(type+1)",
-    "_comment": "                    for water (O:0, H:1) it can be",
-    "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
-    "fitting_neuron":	[240, 120, 60, 30, 10],
+  "_comment": " model parameters",
+  "use_smooth": false,
+  "sel_a": [
+    16,
+    32
+  ],
+  "sel_r": [
+    30,
+    60
+  ],
+  "rcut": 6.00,
+  "axis_rule": [
+    0,
+    1,
+    0,
+    0,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    0
+  ],
+  "_comment": " default rule: []",
+  "_comment": " user defined rule: for each type provides two axes, ",
+  "_comment": "                    for each axis: (a_or_r, type, idx)",
+  "_comment": "                    if type < 0, exclude type -(type+1)",
+  "_comment": "                    for water (O:0, H:1) it can be",
+  "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
+  "fitting_neuron": [
+    240,
+    120,
+    60,
+    30,
+    10
+  ],
 
-    "_comment": " traing controls",
-    "systems":		["../data/"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	[4],
-    "start_lr":		0.001,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "_comment": " traing controls",
+  "systems": [
+    "../data/"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": [
+    4
+  ],
+  "start_lr": 0.001,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	8,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 8,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "seed":		1,
+  "seed": 1,
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	10,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 10,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/compat_inputs/water_v1.json b/source/tests/compat_inputs/water_v1.json
index e8b1d8a196..076a576d7b 100644
--- a/source/tests/compat_inputs/water_v1.json
+++ b/source/tests/compat_inputs/water_v1.json
@@ -1,50 +1,78 @@
 {
-    "model":{
-	"descriptor": {
-	    "type":		"loc_frame",
-	    "sel_a":		[16, 32],
-	    "sel_r":		[30, 60],
-	    "rcut":		6.00,
-	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
-	},
-	"fitting_net": {
-	    "neuron":		[240, 120, 60, 30, 10],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
-    },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"start_lr":	0.001
+  "model": {
+    "descriptor": {
+      "type": "loc_frame",
+      "sel_a": [
+        16,
+        32
+      ],
+      "sel_r": [
+        30,
+        60
+      ],
+      "rcut": 6.00,
+      "axis_rule": [
+        0,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0
+      ]
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        120,
+        60,
+        30,
+        10
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "loss" : {
-	"start_pref_e":	0.02,
-	"limit_pref_e":	8,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "start_lr": 0.001
+  },
 
-    "training": {
-	"systems":	["../data/"], 
-	"set_prefix":	"set",    
-	"stop_batch":	1000000,
-	"batch_size":	[4],
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 8,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
 
-	"seed":		1,
+  "training": {
+    "systems": [
+      "../data/"
+    ],
+    "set_prefix": "set",
+    "stop_batch": 1000000,
+    "batch_size": [
+      4
+    ],
 
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"numb_test":	10,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true
-    }
-}
+    "seed": 1,
 
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 10,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true
+  }
+}
diff --git a/source/tests/compat_inputs/water_v2.json b/source/tests/compat_inputs/water_v2.json
index af923b606d..af8eca7db1 100644
--- a/source/tests/compat_inputs/water_v2.json
+++ b/source/tests/compat_inputs/water_v2.json
@@ -1,50 +1,78 @@
 {
-    "model":{
-		"descriptor": {
-			"type":		"loc_frame",
-			"sel_a":		[16, 32],
-			"sel_r":		[30, 60],
-			"rcut":		6.00,
-			"axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
-		},
-		"fitting_net": {
-			"neuron":		[240, 120, 60, 30, 10],
-			"resnet_dt":	true,
-			"seed":		1
-		}
-    },
-    
-    "learning_rate" :{
-		"type":		"exp",
-		"decay_steps":	5000,
-		"stop_lr":	3.505266624882874e-08,
-		"start_lr":	0.001
+  "model": {
+    "descriptor": {
+      "type": "loc_frame",
+      "sel_a": [
+        16,
+        32
+      ],
+      "sel_r": [
+        30,
+        60
+      ],
+      "rcut": 6.00,
+      "axis_rule": [
+        0,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0
+      ]
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        120,
+        60,
+        30,
+        10
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "loss" : {
-		"start_pref_e":	0.02,
-		"limit_pref_e":	8,
-		"start_pref_f":	1000,
-		"limit_pref_f":	1,
-		"start_pref_v":	0,
-		"limit_pref_v":	0
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "stop_lr": 3.505266624882874e-08,
+    "start_lr": 0.001
+  },
 
-    "training": {
-		"training_data": {
-			"systems":	["../data/"],
-			"set_prefix":	"set",
-			"batch_size":	[4]
-		},
-		"stop_batch":	1000000,
-		"seed":		1,
-		"disp_file":	"lcurve.out",
-		"disp_freq":	100,
-		"numb_test":	10,
-		"save_freq":	1000,
-		"save_ckpt":	"model.ckpt",
-		"disp_training":true,
-		"time_training":true
-    }
-}
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 8,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
 
+  "training": {
+    "training_data": {
+      "systems": [
+        "../data/"
+      ],
+      "set_prefix": "set",
+      "batch_size": [
+        4
+      ]
+    },
+    "stop_batch": 1000000,
+    "seed": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 10,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true
+  }
+}
diff --git a/source/tests/data_modifier/dipole.json b/source/tests/data_modifier/dipole.json
index 5bd8b505f4..262b29b60a 100644
--- a/source/tests/data_modifier/dipole.json
+++ b/source/tests/data_modifier/dipole.json
@@ -1,64 +1,81 @@
 {
-    "_comment": " model parameters",
-    "model":{
-	"type_map":		["O", "H"],
-	"descriptor" :{
-	    "type":		"se_e2_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	3.80,
-	    "rcut":		4.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	6,
-	    "seed":		1
-	},
-	"fitting_net": {
-	    "type":		"dipole",
-	    "sel_type":		[0],
-	    "neuron":		[100, 100, 100],
-	    "resnet_dt":	true,
-	    "seed":		1
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 3.80,
+      "rcut": 4.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 6,
+      "seed": 1
     },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.01,
-	"stop_lr":	1e-8,
-	"decay_steps":	5000,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "type": "dipole",
+      "sel_type": [
+        0
+      ],
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1
     },
+    "_comment": " that's all"
+  },
 
-	"loss": {
-		"type":"tensor",
-		"pref":1.0,
-		"pref_atomic":1.0,
-		"_comment": " that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.01,
+    "stop_lr": 1e-8,
+    "decay_steps": 5000,
+    "_comment": "that's all"
+  },
 
-    "_comment": " traing controls",
-    "training": {
-	"systems":	["data_modifier/sys_10"], 
-	"set_prefix":	"set",    
-	"stop_batch":	1000000,
-	"batch_size":	4,
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment": " that's all"
+  },
 
-	"seed":		1,
+  "_comment": " traing controls",
+  "training": {
+    "systems": [
+      "data_modifier/sys_10"
+    ],
+    "set_prefix": "set",
+    "stop_batch": 1000000,
+    "batch_size": 4,
 
-	"_comment": " display and restart",
-	"_comment": " frequencies counted in batch",
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"numb_test":	5,
-	"save_freq":	500,
-	"save_ckpt":	"model.ckpt",
-	"load_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"_comment":	"that's all"
-    },
+    "seed": 1,
 
-    "_comment":		"that's all"
-}
+    "_comment": " display and restart",
+    "_comment": " frequencies counted in batch",
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 5,
+    "save_freq": 500,
+    "save_ckpt": "model.ckpt",
+    "load_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "_comment": "that's all"
+  },
 
+  "_comment": "that's all"
+}
diff --git a/source/tests/finetune/input_finetune.json b/source/tests/finetune/input_finetune.json
index 22d878fcf0..0b51b5c4d5 100644
--- a/source/tests/finetune/input_finetune.json
+++ b/source/tests/finetune/input_finetune.json
@@ -1,52 +1,64 @@
 {
-	"_comment": " model parameters",
-	"model": {
-"type_embedding": {"trainable":  false},
-"type_map":	["O", "H"],
-"descriptor" :{"_comment":		" that's all"},
-"fitting_net" : {"_comment":		" that's all"},
-"_comment":	" that's all"
-	},
+  "_comment": " model parameters",
+  "model": {
+    "type_embedding": {
+      "trainable": false
+    },
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
 
-	"learning_rate" :{
-"type":		"exp",
-"decay_steps":	5000,
-"start_lr":	0.001,	
-"stop_lr":	3.51e-8,
-"_comment":	"that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-	"loss" :{
-"type":		"ener",
-"start_pref_e":	0.02,
-"limit_pref_e":	1,
-"start_pref_f":	1000,
-"limit_pref_f":	1,
-"start_pref_v":	1,
-"limit_pref_v":	1,
-"_comment":	" that's all"
-	},
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 1,
+    "limit_pref_v": 1,
+    "_comment": " that's all"
+  },
 
-	"training" : {
-"training_data": {
-		"systems":		["finetune/data"],
-		"batch_size":	"auto",
-		"_comment":		"that's all"
-},
-"validation_data":{
-		"systems":		["finetune/data"],
-		"batch_size":	1,
-		"numb_btch":	3,
-		"_comment":		"that's all"
-},
-"numb_steps":	0,
-"seed":		10,
-"disp_file":	"lcurve.out",
-"disp_freq":	1,
-"save_freq":	1,
-"_comment":	"that's all"
-	},    
+  "training": {
+    "training_data": {
+      "systems": [
+        "finetune/data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "finetune/data"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 0,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1,
+    "save_freq": 1,
+    "_comment": "that's all"
+  },
 
-	"_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/finetune/input_pretrain.json b/source/tests/finetune/input_pretrain.json
index c448eab43f..1b1f50752b 100644
--- a/source/tests/finetune/input_pretrain.json
+++ b/source/tests/finetune/input_pretrain.json
@@ -1,70 +1,86 @@
 {
-	"_comment": " model parameters",
-	"model": {
-"type_map":	["H", "X1", "X2", "O"],
-"descriptor" :{
-		"type":		"se_atten",
-		"sel":		120,
-		"rcut_smth":	0.50,
-		"rcut":		6.00,
-		"neuron":		[4, 8, 16],
-		"resnet_dt":	false,
-		"axis_neuron":	16,
-		"attn": 128,
-		"attn_layer": 2,
-		"attn_dotr": true,
-		"attn_mask": false,
-		"seed":		1,
-		"_comment":		" that's all"
-},
-"fitting_net" : {
-		"neuron":		[20, 20, 20],
-		"resnet_dt":	true,
-		"seed":		1,
-		"_comment":		" that's all"
-},
-"_comment":	" that's all"
-	},
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "H",
+      "X1",
+      "X2",
+      "O"
+    ],
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 120,
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        4,
+        8,
+        16
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "attn": 128,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        20,
+        20,
+        20
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
 
-	"learning_rate" :{
-"type":		"exp",
-"decay_steps":	5000,
-"start_lr":	0.001,	
-"stop_lr":	3.51e-8,
-"_comment":	"that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-	"loss" :{
-"type":		"ener",
-"start_pref_e":	0.02,
-"limit_pref_e":	1,
-"start_pref_f":	1000,
-"limit_pref_f":	1,
-"start_pref_v":	1,
-"limit_pref_v":	1,
-"_comment":	" that's all"
-	},
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 1,
+    "limit_pref_v": 1,
+    "_comment": " that's all"
+  },
 
-	"training" : {
-"training_data": {
-		"systems":		["finetune/data"],
-		"batch_size":	"auto",
-		"_comment":		"that's all"
-},
-"validation_data":{
-		"systems":		["finetune/data"],
-		"batch_size":	1,
-		"numb_btch":	3,
-		"_comment":		"that's all"
-},
-"numb_steps":	1,
-"seed":		10,
-"disp_file":	"lcurve.out",
-"disp_freq":	1,
-"save_freq":	1,
-"_comment":	"that's all"
-	},    
+  "training": {
+    "training_data": {
+      "systems": [
+        "finetune/data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "finetune/data"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1,
+    "save_freq": 1,
+    "_comment": "that's all"
+  },
 
-	"_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/infer/convert2pb.py b/source/tests/infer/convert2pb.py
index bf77800613..a3d9879a35 100644
--- a/source/tests/infer/convert2pb.py
+++ b/source/tests/infer/convert2pb.py
@@ -1,13 +1,25 @@
-from deepmd.env import tf
-from google.protobuf import text_format
-from tensorflow.python.platform import gfile
-from tensorflow.python import pywrap_tensorflow
-from tensorflow.python.framework import graph_util
+from google.protobuf import (
+    text_format,
+)
+from tensorflow.python import (
+    pywrap_tensorflow,
+)
+from tensorflow.python.framework import (
+    graph_util,
+)
+from tensorflow.python.platform import (
+    gfile,
+)
+
+from deepmd.env import (
+    tf,
+)
+
 
 def convert_pbtxt_to_pb(pbtxtfile, pbfile):
-    with tf.gfile.FastGFile(pbtxtfile, 'r') as f:
+    with tf.gfile.FastGFile(pbtxtfile, "r") as f:
         graph_def = tf.GraphDef()
         file_content = f.read()
         # Merges the human-readable string in `file_content` into `graph_def`.
         text_format.Merge(file_content, graph_def)
-        tf.train.write_graph(graph_def, './', pbfile, as_text=False)
+        tf.train.write_graph(graph_def, "./", pbfile, as_text=False)
diff --git a/source/tests/infer/in.test b/source/tests/infer/in.test
index f647c1b132..e9f0ad99f9 100644
--- a/source/tests/infer/in.test
+++ b/source/tests/infer/in.test
@@ -12,7 +12,7 @@ mass 		1 16
 mass		2 2
 
 pair_style	deepmd deep_pot.pb
-pair_coeff  * *	
+pair_coeff  * *
 
 velocity        all create 330.0 23456789
 
diff --git a/source/tests/init_frz_model/data/type.raw b/source/tests/init_frz_model/data/type.raw
index e329bb5191..4eeae61de1 100644
--- a/source/tests/init_frz_model/data/type.raw
+++ b/source/tests/init_frz_model/data/type.raw
@@ -1,6 +1,6 @@
-0 
-1 
-1 
 0
-1 
-1
\ No newline at end of file
+1
+1
+0
+1
+1
diff --git a/source/tests/init_frz_model/input.json b/source/tests/init_frz_model/input.json
index 4c4e93fb05..7461b8e7c4 100644
--- a/source/tests/init_frz_model/input.json
+++ b/source/tests/init_frz_model/input.json
@@ -1,66 +1,83 @@
 {
-	"_comment": " model parameters",
-	"model": {
-"type_map":	["O", "H"],
-"descriptor" :{
-		"type":		"se_e2_a",
-		"sel":		[46, 92],
-		"rcut_smth":	0.50,
-		"rcut":		6.00,
-		"neuron":		[4, 8, 16],
-		"resnet_dt":	false,
-		"axis_neuron":	16,
-		"seed":		1,
-		"_comment":		" that's all"
-},
-"fitting_net" : {
-		"neuron":		[20, 20, 20],
-		"resnet_dt":	true,
-		"seed":		1,
-		"_comment":		" that's all"
-},
-"_comment":	" that's all"
-	},
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        4,
+        8,
+        16
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        20,
+        20,
+        20
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
 
-	"learning_rate" :{
-"type":		"exp",
-"decay_steps":	5000,
-"start_lr":	0.001,	
-"stop_lr":	3.51e-8,
-"_comment":	"that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-	"loss" :{
-"type":		"ener",
-"start_pref_e":	0.02,
-"limit_pref_e":	1,
-"start_pref_f":	1000,
-"limit_pref_f":	1,
-"start_pref_v":	1,
-"limit_pref_v":	1,
-"_comment":	" that's all"
-	},
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 1,
+    "limit_pref_v": 1,
+    "_comment": " that's all"
+  },
 
-	"training" : {
-"training_data": {
-		"systems":		["init_frz_model/data"],
-		"batch_size":	"auto",
-		"_comment":		"that's all"
-},
-"validation_data":{
-		"systems":		["init_frz_model/data"],
-		"batch_size":	1,
-		"numb_btch":	3,
-		"_comment":		"that's all"
-},
-"numb_steps":	1,
-"seed":		10,
-"disp_file":	"lcurve.out",
-"disp_freq":	1,
-"save_freq":	1,
-"_comment":	"that's all"
-	},    
+  "training": {
+    "training_data": {
+      "systems": [
+        "init_frz_model/data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "init_frz_model/data"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1,
+    "save_freq": 1,
+    "_comment": "that's all"
+  },
 
-	"_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/model_compression/data/type.raw b/source/tests/model_compression/data/type.raw
index e329bb5191..4eeae61de1 100644
--- a/source/tests/model_compression/data/type.raw
+++ b/source/tests/model_compression/data/type.raw
@@ -1,6 +1,6 @@
-0 
-1 
-1 
 0
-1 
-1
\ No newline at end of file
+1
+1
+0
+1
+1
diff --git a/source/tests/model_compression/input.json b/source/tests/model_compression/input.json
index 3ba7d7daf7..2b3c2af542 100644
--- a/source/tests/model_compression/input.json
+++ b/source/tests/model_compression/input.json
@@ -1,66 +1,83 @@
 {
-	"_comment": " model parameters",
-	"model": {
-"type_map":	["O", "H"],
-"descriptor" :{
-		"type":		"se_e2_a",
-		"sel":		[46, 92],
-		"rcut_smth":	0.50,
-		"rcut":		6.00,
-		"neuron":		[4, 8, 16],
-		"resnet_dt":	false,
-		"axis_neuron":	16,
-		"seed":		1,
-		"_comment":		" that's all"
-},
-"fitting_net" : {
-		"neuron":		[20, 20, 20],
-		"resnet_dt":	true,
-		"seed":		1,
-		"_comment":		" that's all"
-},
-"_comment":	" that's all"
-	},
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_e2_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        4,
+        8,
+        16
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "fitting_net": {
+      "neuron": [
+        20,
+        20,
+        20
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
+    },
+    "_comment": " that's all"
+  },
 
-	"learning_rate" :{
-"type":		"exp",
-"decay_steps":	5000,
-"start_lr":	0.001,	
-"stop_lr":	3.51e-8,
-"_comment":	"that's all"
-	},
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "start_lr": 0.001,
+    "stop_lr": 3.51e-8,
+    "_comment": "that's all"
+  },
 
-	"loss" :{
-"type":		"ener",
-"start_pref_e":	0.02,
-"limit_pref_e":	1,
-"start_pref_f":	1000,
-"limit_pref_f":	1,
-"start_pref_v":	0,
-"limit_pref_v":	0,
-"_comment":	" that's all"
-	},
+  "loss": {
+    "type": "ener",
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0,
+    "_comment": " that's all"
+  },
 
-	"training" : {
-"training_data": {
-		"systems":		["model_compression/data"],
-		"batch_size":	"auto",
-		"_comment":		"that's all"
-},
-"validation_data":{
-		"systems":		["model_compression/data"],
-		"batch_size":	1,
-		"numb_btch":	3,
-		"_comment":		"that's all"
-},
-"numb_steps":	1,
-"seed":		10,
-"disp_file":	"lcurve.out",
-"disp_freq":	1,
-"save_freq":	1,
-"_comment":	"that's all"
-	},    
+  "training": {
+    "training_data": {
+      "systems": [
+        "model_compression/data"
+      ],
+      "batch_size": "auto",
+      "_comment": "that's all"
+    },
+    "validation_data": {
+      "systems": [
+        "model_compression/data"
+      ],
+      "batch_size": 1,
+      "numb_btch": 3,
+      "_comment": "that's all"
+    },
+    "numb_steps": 1,
+    "seed": 10,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1,
+    "save_freq": 1,
+    "_comment": "that's all"
+  },
 
-	"_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/nvnmd/train.json b/source/tests/nvnmd/train.json
index ab947028b8..511af29e93 100644
--- a/source/tests/nvnmd/train.json
+++ b/source/tests/nvnmd/train.json
@@ -1,40 +1,43 @@
 {
-    "nvnmd":{
-        "net_size": 128,
-        "sel": [60, 60],
-        "rcut": 6.0,
-        "rcut_smth": 0.5
-    },
-    "learning_rate": {
-        "type": "exp",
-        "start_lr": 1e-3,
-        "stop_lr": 3e-8,
-        "decay_steps": 5000
-    },
-    "loss": {
-        "start_pref_e": 0.02,
-        "limit_pref_e": 1,
-        "start_pref_f": 1000,
-        "limit_pref_f": 1,
-        "start_pref_v": 0,
-        "limit_pref_v": 0
-    },
-    "training": {
-        "seed": 1,
-        "stop_batch": 200000,
-        "numb_test": 1,
-        "disp_file": "lcurve.out",
-        "disp_freq": 1000,
-        "save_ckpt": "model.ckpt",
-        "save_freq": 10000,
-        "training_data": {
-            "systems": [
-                "../data"
-            ],
-            "set_prefix": "set",
-            "batch_size": [
-                1
-            ]
-        }
+  "nvnmd": {
+    "net_size": 128,
+    "sel": [
+      60,
+      60
+    ],
+    "rcut": 6.0,
+    "rcut_smth": 0.5
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 1e-3,
+    "stop_lr": 3e-8,
+    "decay_steps": 5000
+  },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+  "training": {
+    "seed": 1,
+    "stop_batch": 200000,
+    "numb_test": 1,
+    "disp_file": "lcurve.out",
+    "disp_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "save_freq": 10000,
+    "training_data": {
+      "systems": [
+        "../data"
+      ],
+      "set_prefix": "set",
+      "batch_size": [
+        1
+      ]
     }
+  }
 }
diff --git a/source/tests/nvnmd/train_cnn.json b/source/tests/nvnmd/train_cnn.json
index 04aad33410..2df38e0dfd 100644
--- a/source/tests/nvnmd/train_cnn.json
+++ b/source/tests/nvnmd/train_cnn.json
@@ -1,77 +1,77 @@
 {
-    "nvnmd": {
-        "net_size": 128,
-        "config_file": "none",
-        "weight_file": "none",
-        "map_file": "none",
-        "enable": true,
-        "restore_descriptor": false,
-        "restore_fitting_net": false,
-        "quantize_descriptor": false,
-        "quantize_fitting_net": false
+  "nvnmd": {
+    "net_size": 128,
+    "config_file": "none",
+    "weight_file": "none",
+    "map_file": "none",
+    "enable": true,
+    "restore_descriptor": false,
+    "restore_fitting_net": false,
+    "quantize_descriptor": false,
+    "quantize_fitting_net": false
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "stop_lr": 3e-08,
+    "decay_steps": 5000
+  },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
+  "training": {
+    "seed": 1,
+    "stop_batch": 200000,
+    "numb_test": 1,
+    "disp_file": "nvnmd_cnn/lcurve.out",
+    "disp_freq": 1000,
+    "save_ckpt": "nvnmd_cnn/model.ckpt",
+    "save_freq": 10000,
+    "training_data": {
+      "systems": [
+        "../data"
+      ],
+      "set_prefix": "set",
+      "batch_size": [
+        1
+      ]
     },
-    "learning_rate": {
-        "type": "exp",
-        "start_lr": 0.001,
-        "stop_lr": 3e-08,
-        "decay_steps": 5000
+    "disp_training": true,
+    "time_training": true,
+    "profiling": false
+  },
+  "model": {
+    "descriptor": {
+      "seed": 1,
+      "type": "se_a",
+      "sel": [
+        60,
+        60
+      ],
+      "rcut": 6.0,
+      "rcut_smth": 0.5,
+      "neuron": [
+        8,
+        16,
+        32
+      ],
+      "type_one_side": true,
+      "axis_neuron": 4,
+      "resnet_dt": false
     },
-    "loss": {
-        "start_pref_e": 0.02,
-        "limit_pref_e": 1,
-        "start_pref_f": 1000,
-        "limit_pref_f": 1,
-        "start_pref_v": 0,
-        "limit_pref_v": 0
-    },
-    "training": {
-        "seed": 1,
-        "stop_batch": 200000,
-        "numb_test": 1,
-        "disp_file": "nvnmd_cnn/lcurve.out",
-        "disp_freq": 1000,
-        "save_ckpt": "nvnmd_cnn/model.ckpt",
-        "save_freq": 10000,
-        "training_data": {
-            "systems": [
-                "../data"
-            ],
-            "set_prefix": "set",
-            "batch_size": [
-                1
-            ]
-        },
-        "disp_training": true,
-        "time_training": true,
-        "profiling": false
-    },
-    "model": {
-        "descriptor": {
-            "seed": 1,
-            "type": "se_a",
-            "sel": [
-                60,
-                60
-            ],
-            "rcut": 6.0,
-            "rcut_smth": 0.5,
-            "neuron": [
-                8,
-                16,
-                32
-            ],
-            "type_one_side": true,
-            "axis_neuron": 4,
-            "resnet_dt": false
-        },
-        "fitting_net": {
-            "seed": 1,
-            "neuron": [
-                128,
-                128,
-                128
-            ],
-            "resnet_dt": false
-        }
+    "fitting_net": {
+      "seed": 1,
+      "neuron": [
+        128,
+        128,
+        128
+      ],
+      "resnet_dt": false
     }
-}
\ No newline at end of file
+  }
+}
diff --git a/source/tests/polar_se_a.json b/source/tests/polar_se_a.json
index 7b3362dbe7..9830ffd684 100644
--- a/source/tests/polar_se_a.json
+++ b/source/tests/polar_se_a.json
@@ -1,63 +1,82 @@
 {
-    "_comment": " model parameters",
-    "model":{
-	"type":			"polar",
-	"type_map":		["O", "H"],
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net": {
-	    "type":		"polar",
-	    "sel_type":		[0],
-	    "fit_diag":		false,
-	    "neuron":		[100, 100, 100],
-	    "resnet_dt":	true,
-	    "seed":		1
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type": "polar",
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.001,
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "type": "polar",
+      "sel_type": [
+        0
+      ],
+      "fit_diag": false,
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1
     },
+    "_comment": " that's all"
+  },
 
-    "loss" : {
-	"type":	"tensor",
-	"pref":			1.0,
-	"pref_atomic":	1.0,
-	"_comment":	"that's all"
-    },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "_comment": "that's all"
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	[1],
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment": "that's all"
+  },
 
-    "seed":		1,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": [
+    1
+  ],
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	10,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":true,
-    "time_training":true,
+  "seed": 1,
 
-    "_comment":		"that's all"
-}
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 10,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
 
+  "_comment": "that's all"
+}
diff --git a/source/tests/polar_se_a_tebd.json b/source/tests/polar_se_a_tebd.json
index 98de4b6694..56e3ed31aa 100644
--- a/source/tests/polar_se_a_tebd.json
+++ b/source/tests/polar_se_a_tebd.json
@@ -1,68 +1,91 @@
 {
-    "_comment": " model parameters",
-    "model":{
-	"type":			"polar",
-	"type_map":		["O", "H"],
-	"type_embedding":{
-	    "neuron":		[2,4,8],
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net": {
-	    "type":		"polar",
-	    "sel_type":		[0],
-	    "fit_diag":		false,
-	    "neuron":		[100, 100, 100],
-	    "resnet_dt":	true,
-	    "seed":		1
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type": "polar",
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "type_embedding": {
+      "neuron": [
+        2,
+        4,
+        8
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.001,
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"_comment":	"that's all"
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
-
-    "loss" : {
-	"type":	"tensor",
-	"pref":			1.0,
-	"pref_atomic":	1.0,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "type": "polar",
+      "sel_type": [
+        0
+      ],
+      "fit_diag": false,
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1
     },
+    "_comment": " that's all"
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	[1],
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "_comment": "that's all"
+  },
 
-    "seed":		1,
+  "loss": {
+    "type": "tensor",
+    "pref": 1.0,
+    "pref_atomic": 1.0,
+    "_comment": "that's all"
+  },
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	10,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":true,
-    "time_training":true,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": [
+    1
+  ],
 
-    "_comment":		"that's all"
-}
+  "seed": 1,
 
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 10,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+
+  "_comment": "that's all"
+}
diff --git a/source/tests/test_activation_fn_gelu.py b/source/tests/test_activation_fn_gelu.py
index cdf14072bc..2d05b51a9d 100644
--- a/source/tests/test_activation_fn_gelu.py
+++ b/source/tests/test_activation_fn_gelu.py
@@ -1,53 +1,100 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
+import numpy as np
+
+from deepmd.common import (
+    get_activation_func,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils.network import (
+    embedding_net,
+)
 
-from deepmd.utils.network import embedding_net
-from deepmd.common import get_activation_func
 
 class TestGelu(tf.test.TestCase):
-    def setUp (self) :
+    def setUp(self):
         self.places = 6
         self.sess = self.test_session().__enter__()
-        self.inputs = tf.reshape(tf.constant([ 0., 1., 2., 3.], dtype = tf.float64), [-1, 1])
-        self.refout = [[ 0.37703893, -0.38242253, -0.1862878,  -0.23220415,  2.28706995, -0.40754364,
-                         0.22086098, -0.2690335 ],
-                       [ 2.167494,    0.72560347,  0.99234317,  0.50832127,  5.20665818,  0.58361587,
-                         1.57217107,  0.67395218],
-                       [ 4.19655852,  2.04779208,  2.20239826,  1.69247695,  8.38305924,  1.69006845,
-                         2.97176052,  1.76098426],
-                       [ 6.21460216,  3.52613278,  3.39508271,  2.817003,   11.521799,    2.91028145,
-                         4.41870371,  2.82610791]]       
-        
+        self.inputs = tf.reshape(
+            tf.constant([0.0, 1.0, 2.0, 3.0], dtype=tf.float64), [-1, 1]
+        )
+        self.refout = [
+            [
+                0.37703893,
+                -0.38242253,
+                -0.1862878,
+                -0.23220415,
+                2.28706995,
+                -0.40754364,
+                0.22086098,
+                -0.2690335,
+            ],
+            [
+                2.167494,
+                0.72560347,
+                0.99234317,
+                0.50832127,
+                5.20665818,
+                0.58361587,
+                1.57217107,
+                0.67395218,
+            ],
+            [
+                4.19655852,
+                2.04779208,
+                2.20239826,
+                1.69247695,
+                8.38305924,
+                1.69006845,
+                2.97176052,
+                1.76098426,
+            ],
+            [
+                6.21460216,
+                3.52613278,
+                3.39508271,
+                2.817003,
+                11.521799,
+                2.91028145,
+                4.41870371,
+                2.82610791,
+            ],
+        ]
+
     def test_activation_function_gelu_custom(self):
         network_size = [2, 4, 8]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            activation_fn = get_activation_func('gelu'),
-                            name_suffix = 'gelu_custom',
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            activation_fn=get_activation_func("gelu"),
+            name_suffix="gelu_custom",
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
         np.testing.assert_almost_equal(self.refout, myout, self.places)
 
-
     def test_activation_function_gelu_tensorflow(self):
         network_size = [2, 4, 8]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            activation_fn = get_activation_func('gelu_tf'),
-                            name_suffix = 'gelu_tensorflow',
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            activation_fn=get_activation_func("gelu_tf"),
+            name_suffix="gelu_tensorflow",
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
         np.testing.assert_almost_equal(self.refout, myout, self.places)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_adjust_sel.py b/source/tests/test_adjust_sel.py
index 3f3494f040..985b6813ab 100644
--- a/source/tests/test_adjust_sel.py
+++ b/source/tests/test_adjust_sel.py
@@ -1,38 +1,52 @@
-import os, json
-import numpy as np
-import unittest
+import json
+import os
 import subprocess as sp
+import unittest
+
+import numpy as np
 
-from deepmd.infer import DeepPot
-from deepmd.env import MODEL_VERSION
 # from deepmd.entrypoints.compress import compress
-from common import j_loader, tests_path, run_dp
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    MODEL_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-def _file_delete(file) :
+
+def _file_delete(file):
     if os.path.isdir(file):
         os.rmdir(file)
     elif os.path.isfile(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
+
 def _init_models():
     # we use the setting for model compression
-    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-adjust-sel-original.pb")
     decreased_model = str(tests_path / "dp-adjust-sel-original-decreased.pb")
     increased_model = str(tests_path / "dp-adjust-sel-original-increased.pb")
@@ -45,80 +59,104 @@ def _init_models():
         json.dump(jdata, fp, indent=4)
 
     ret = run_dp("dp train " + INPUT + " --skip-neighbor-stat")
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
     jdata["training"]["numb_steps"] = 0
-    jdata["model"]["descriptor"]["sel"] = [2, 4] # equal to data
+    jdata["model"]["descriptor"]["sel"] = [2, 4]  # equal to data
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT + " -f " + frozen_model + " --skip-neighbor-stat")
-    np.testing.assert_equal(ret, 0, 'DP model adjust sel failed!')
+    np.testing.assert_equal(ret, 0, "DP model adjust sel failed!")
     ret = run_dp("dp freeze -o " + decreased_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
-    jdata["model"]["descriptor"]["sel"] = [300, 300] # equal to data
+    jdata["model"]["descriptor"]["sel"] = [300, 300]  # equal to data
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT + " -f " + frozen_model + " --skip-neighbor-stat")
-    np.testing.assert_equal(ret, 0, 'DP model adjust sel failed!')
+    np.testing.assert_equal(ret, 0, "DP model adjust sel failed!")
     ret = run_dp("dp freeze -o " + increased_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     return INPUT, frozen_model, decreased_model, increased_model
 
+
 INPUT, FROZEN_MODEL, DECREASED_MODEL, INCREASED_MODEL = _init_models()
 
-class TestDeepPotAAdjustSel(unittest.TestCase) :
+
+class TestDeepPotAAdjustSel(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_decreased = DeepPot(DECREASED_MODEL)
         self.dp_increased = DeepPot(INCREASED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_decreased.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_decreased.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_decreased.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_decreased.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_decreased.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_decreased.get_dim_fparam(), 0)
         self.assertEqual(self.dp_decreased.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_increased.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_increased.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_increased.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_increased.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_increased.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_increased.get_dim_fparam(), 0)
         self.assertEqual(self.dp_increased.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_decreased.eval(self.coords, self.box, self.atype, atomic = False)
-        ee2, ff2, vv2 = self.dp_increased.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_decreased.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee2, ff2, vv2 = self.dp_increased.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ee2.shape, (nframes,1))
-        self.assertEqual(ff2.shape, (nframes,natoms,3))
-        self.assertEqual(vv2.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ee2.shape, (nframes, 1))
+        self.assertEqual(ff2.shape, (nframes, natoms, 3))
+        self.assertEqual(vv2.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
@@ -128,27 +166,33 @@ def test_1frame(self):
         np.testing.assert_almost_equal(vv0, vv2, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_decreased.eval(self.coords, self.box, self.atype, atomic = True)
-        ee2, ff2, vv2, ae2, av2 = self.dp_increased.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_decreased.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee2, ff2, vv2, ae2, av2 = self.dp_increased.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
-        self.assertEqual(ee2.shape, (nframes,1))
-        self.assertEqual(ff2.shape, (nframes,natoms,3))
-        self.assertEqual(vv2.shape, (nframes,9))
-        self.assertEqual(ae2.shape, (nframes,natoms,1))
-        self.assertEqual(av2.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        self.assertEqual(ee2.shape, (nframes, 1))
+        self.assertEqual(ff2.shape, (nframes, natoms, 3))
+        self.assertEqual(vv2.shape, (nframes, 9))
+        self.assertEqual(ae2.shape, (nframes, natoms, 1))
+        self.assertEqual(av2.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -164,27 +208,33 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_decreased.eval(coords2, box2, self.atype, atomic = True)
-        ee2, ff2, vv2, ae2, av2 = self.dp_increased.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_decreased.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee2, ff2, vv2, ae2, av2 = self.dp_increased.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
-        self.assertEqual(ee2.shape, (nframes,1))
-        self.assertEqual(ff2.shape, (nframes,natoms,3))
-        self.assertEqual(vv2.shape, (nframes,9))
-        self.assertEqual(ae2.shape, (nframes,natoms,1))
-        self.assertEqual(av2.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
+        self.assertEqual(ee2.shape, (nframes, 1))
+        self.assertEqual(ff2.shape, (nframes, natoms, 3))
+        self.assertEqual(vv2.shape, (nframes, 9))
+        self.assertEqual(ae2.shape, (nframes, natoms, 1))
+        self.assertEqual(av2.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
diff --git a/source/tests/test_argument_parser.py b/source/tests/test_argument_parser.py
index 2f3fce9aa1..65401cef4e 100644
--- a/source/tests/test_argument_parser.py
+++ b/source/tests/test_argument_parser.py
@@ -1,13 +1,29 @@
 """Unittests for argument parser."""
 
-import unittest
-from argparse import Namespace
-from typing import Any, Dict, List, Tuple, Union, TYPE_CHECKING
 import re
-from io import StringIO
-from contextlib import redirect_stderr
-
-from deepmd.entrypoints.main import parse_args, get_ll
+import unittest
+from argparse import (
+    Namespace,
+)
+from contextlib import (
+    redirect_stderr,
+)
+from io import (
+    StringIO,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    List,
+    Tuple,
+    Union,
+)
+
+from deepmd.entrypoints.main import (
+    get_ll,
+    parse_args,
+)
 
 if TYPE_CHECKING:
     try:
@@ -326,16 +342,19 @@ def test_parser_doc(self):
         }
 
         self.run_test(command="doc-train-input", mapping=ARGS)
-    
+
     def test_parser_model_devi(self):
         """Test model-devi subparser"""
         ARGS = {
-            "--models": dict(type=list, value="GRAPH.000.pb GRAPH.001.pb",
-                             expected=["GRAPH.000.pb", "GRAPH.001.pb"]),
+            "--models": dict(
+                type=list,
+                value="GRAPH.000.pb GRAPH.001.pb",
+                expected=["GRAPH.000.pb", "GRAPH.001.pb"],
+            ),
             "--system": dict(type=str, value="SYSTEM_DIR"),
             "--set-prefix": dict(type=str, value="SET_PREFIX"),
             "--output": dict(type=str, value="OUTFILE"),
-            "--frequency": dict(type=int, value=1)
+            "--frequency": dict(type=int, value=1),
         }
 
         self.run_test(command="model-devi", mapping=ARGS)
@@ -357,5 +376,5 @@ def test_get_log_level(self):
                 get_ll(input_val),
                 expected_result,
                 msg=f"Expected: {expected_result} result for input value: {input_val} "
-                f"but got {get_ll(input_val)}"
+                f"but got {get_ll(input_val)}",
             )
diff --git a/source/tests/test_auto_batch_size.py b/source/tests/test_auto_batch_size.py
index 2790c7d4d5..bd863b2633 100644
--- a/source/tests/test_auto_batch_size.py
+++ b/source/tests/test_auto_batch_size.py
@@ -1,10 +1,15 @@
-import unittest
 import os
+import unittest
 
 import numpy as np
 
-from deepmd.utils.batch_size import AutoBatchSize
-from deepmd.utils.errors import OutOfMemoryError
+from deepmd.utils.batch_size import (
+    AutoBatchSize,
+)
+from deepmd.utils.errors import (
+    OutOfMemoryError,
+)
+
 
 class TestAutoBatchSize(unittest.TestCase):
     def oom(self, batch_size, start_index):
@@ -12,11 +17,11 @@ def oom(self, batch_size, start_index):
             raise OutOfMemoryError
         return batch_size, np.zeros((batch_size, 2))
 
-    @unittest.mock.patch('tensorflow.compat.v1.test.is_gpu_available')
+    @unittest.mock.patch("tensorflow.compat.v1.test.is_gpu_available")
     def test_execute_oom_gpu(self, mock_is_gpu_available):
         mock_is_gpu_available.return_value = True
         # initial batch size 256 = 128 * 2
-        auto_batch_size = AutoBatchSize(256, 2.)
+        auto_batch_size = AutoBatchSize(256, 2.0)
         # no error - 128
         nb, result = auto_batch_size.execute(self.oom, 1, 2)
         self.assertEqual(nb, 128)
@@ -38,11 +43,11 @@ def test_execute_oom_gpu(self, mock_is_gpu_available):
         self.assertEqual(nb, 256)
         self.assertEqual(result.shape, (256, 2))
 
-    @unittest.mock.patch('tensorflow.compat.v1.test.is_gpu_available')
+    @unittest.mock.patch("tensorflow.compat.v1.test.is_gpu_available")
     def test_execute_oom_cpu(self, mock_is_gpu_available):
         mock_is_gpu_available.return_value = False
         # initial batch size 256 = 128 * 2, nb is always 128
-        auto_batch_size = AutoBatchSize(256, 2.)
+        auto_batch_size = AutoBatchSize(256, 2.0)
         nb, result = auto_batch_size.execute(self.oom, 1, 2)
         self.assertEqual(nb, 128)
         self.assertEqual(result.shape, (128, 2))
@@ -62,7 +67,7 @@ def test_execute_oom_cpu(self, mock_is_gpu_available):
     @unittest.mock.patch.dict(os.environ, {"DP_INFER_BATCH_SIZE": "256"}, clear=True)
     def test_execute_oom_environment_variables(self):
         # DP_INFER_BATCH_SIZE = 256 = 128 * 2, nb is always 128
-        auto_batch_size = AutoBatchSize(999, 2.)
+        auto_batch_size = AutoBatchSize(999, 2.0)
         nb, result = auto_batch_size.execute(self.oom, 1, 2)
         self.assertEqual(nb, 128)
         self.assertEqual(result.shape, (128, 2))
@@ -81,6 +86,6 @@ def test_execute_oom_environment_variables(self):
 
     def test_execute_all(self):
         dd1 = np.zeros((10000, 2, 1))
-        auto_batch_size = AutoBatchSize(256, 2.)
+        auto_batch_size = AutoBatchSize(256, 2.0)
         dd2 = auto_batch_size.execute_all(np.array, 10000, 2, dd1)
         np.testing.assert_equal(dd1, dd2)
diff --git a/source/tests/test_cluster.py b/source/tests/test_cluster.py
index d67b572bb1..a9f6826354 100644
--- a/source/tests/test_cluster.py
+++ b/source/tests/test_cluster.py
@@ -1,15 +1,21 @@
 import unittest
+from unittest import (
+    mock,
+)
 
-from deepmd.cluster import local, slurm
-from deepmd.env import tf
-from unittest import mock
+from deepmd.cluster import (
+    local,
+    slurm,
+)
+from deepmd.env import (
+    tf,
+)
 
-
-kHostName = 'compute-b24-1'
+kHostName = "compute-b24-1"
 
 
 class FakePopen(object):
-    def __init__(self, stdout=b'', stderr=b'', returncode=0):
+    def __init__(self, stdout=b"", stderr=b"", returncode=0):
         self._stdout = stdout
         self._stderr = stderr
         self._returncode = returncode
@@ -23,34 +29,35 @@ def returncode(self):
 
 
 class TestGPU(unittest.TestCase):
-    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
-    @mock.patch('subprocess.Popen')
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    @mock.patch("subprocess.Popen")
     def test_none(self, mock_Popen, mock_is_built_with_cuda):
-        mock_Popen.return_value.__enter__.return_value = FakePopen(b'0', b'')
+        mock_Popen.return_value.__enter__.return_value = FakePopen(b"0", b"")
         mock_is_built_with_cuda.return_value = True
         gpus = local.get_gpus()
         self.assertIsNone(gpus)
 
-    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
-    @mock.patch('subprocess.Popen')
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    @mock.patch("subprocess.Popen")
     def test_valid(self, mock_Popen, mock_is_built_with_cuda):
-        mock_Popen.return_value.__enter__.return_value = FakePopen(b'2', b'')
+        mock_Popen.return_value.__enter__.return_value = FakePopen(b"2", b"")
         mock_is_built_with_cuda.return_value = True
         gpus = local.get_gpus()
         self.assertEqual(gpus, [0, 1])
 
-    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
-    @mock.patch('subprocess.Popen')
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
+    @mock.patch("subprocess.Popen")
     def test_error(self, mock_Popen, mock_is_built_with_cuda):
-        mock_Popen.return_value.__enter__.return_value = \
-            FakePopen(stderr=b'!', returncode=1)
+        mock_Popen.return_value.__enter__.return_value = FakePopen(
+            stderr=b"!", returncode=1
+        )
         mock_is_built_with_cuda.return_value = True
         with self.assertRaises(RuntimeError) as cm:
             _ = local.get_gpus()
-            self.assertIn('Failed to detect', str(cm.exception))
+            self.assertIn("Failed to detect", str(cm.exception))
 
-    @mock.patch('tensorflow.compat.v1.test.is_built_with_rocm', create=True)
-    @mock.patch('tensorflow.compat.v1.test.is_built_with_cuda')
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_rocm", create=True)
+    @mock.patch("tensorflow.compat.v1.test.is_built_with_cuda")
     def test_cpu(self, mock_is_built_with_cuda, mock_is_built_with_rocm):
         mock_is_built_with_cuda.return_value = False
         mock_is_built_with_rocm.return_value = False
@@ -59,7 +66,7 @@ def test_cpu(self, mock_is_built_with_cuda, mock_is_built_with_rocm):
 
 
 class TestLocal(unittest.TestCase):
-    @mock.patch('socket.gethostname')
+    @mock.patch("socket.gethostname")
     def test_resource(self, mock_gethostname):
         mock_gethostname.return_value = kHostName
         nodename, nodelist, _ = local.get_resource()
@@ -68,63 +75,72 @@ def test_resource(self, mock_gethostname):
 
 
 class TestSlurm(unittest.TestCase):
-    @mock.patch.dict('os.environ', values={
-        'SLURM_JOB_NODELIST': kHostName,
-        'SLURMD_NODENAME': kHostName,
-        'SLURM_JOB_NUM_NODES': '1'
-    })
+    @mock.patch.dict(
+        "os.environ",
+        values={
+            "SLURM_JOB_NODELIST": kHostName,
+            "SLURMD_NODENAME": kHostName,
+            "SLURM_JOB_NUM_NODES": "1",
+        },
+    )
     def test_single(self):
         nodename, nodelist, _ = slurm.get_resource()
         self.assertEqual(nodename, kHostName)
         self.assertEqual(nodelist, [kHostName])
 
-    @mock.patch.dict('os.environ', values={
-        'SLURM_JOB_NODELIST': 'compute-b24-[1-3,5-9],compute-b25-[4,8]',
-        'SLURMD_NODENAME': 'compute-b24-2',
-        'SLURM_JOB_NUM_NODES': '10'
-    })
+    @mock.patch.dict(
+        "os.environ",
+        values={
+            "SLURM_JOB_NODELIST": "compute-b24-[1-3,5-9],compute-b25-[4,8]",
+            "SLURMD_NODENAME": "compute-b24-2",
+            "SLURM_JOB_NUM_NODES": "10",
+        },
+    )
     def test_multiple(self):
         nodename, nodelist, _ = slurm.get_resource()
-        self.assertEqual(nodename, 'compute-b24-2')
-        self.assertEqual(nodelist, [
-            'compute-b24-1',
-            'compute-b24-2',
-            'compute-b24-3',
-            'compute-b24-5',
-            'compute-b24-6',
-            'compute-b24-7',
-            'compute-b24-8',
-            'compute-b24-9',
-            'compute-b25-4',
-            'compute-b25-8'
-        ])
+        self.assertEqual(nodename, "compute-b24-2")
+        self.assertEqual(
+            nodelist,
+            [
+                "compute-b24-1",
+                "compute-b24-2",
+                "compute-b24-3",
+                "compute-b24-5",
+                "compute-b24-6",
+                "compute-b24-7",
+                "compute-b24-8",
+                "compute-b24-9",
+                "compute-b25-4",
+                "compute-b25-8",
+            ],
+        )
 
     def test_illegal(self):
         environ = {
-            'SLURM_JOB_NODELIST': 'compute-b24-[3-5]',
-            'SLURMD_NODENAME': 'compute-b24-4'
+            "SLURM_JOB_NODELIST": "compute-b24-[3-5]",
+            "SLURMD_NODENAME": "compute-b24-4",
         }
-        with mock.patch.dict('os.environ', environ):
+        with mock.patch.dict("os.environ", environ):
             with self.assertRaises(RuntimeError) as cm:
                 _ = slurm.get_resource()
-                self.assertIn('Could not get SLURM number', str(cm.exception))
+                self.assertIn("Could not get SLURM number", str(cm.exception))
 
         environ = {
-            'SLURM_JOB_NODELIST': 'compute-b24-1,compute-b25-2',
-            'SLURMD_NODENAME': 'compute-b25-2',
-            'SLURM_JOB_NUM_NODES': '4'
+            "SLURM_JOB_NODELIST": "compute-b24-1,compute-b25-2",
+            "SLURMD_NODENAME": "compute-b25-2",
+            "SLURM_JOB_NUM_NODES": "4",
         }
-        with mock.patch.dict('os.environ', environ):
+        with mock.patch.dict("os.environ", environ):
             with self.assertRaises(ValueError) as cm:
                 _ = slurm.get_resource()
-                self.assertIn('Number of slurm nodes 2', str(cm.exception))
+                self.assertIn("Number of slurm nodes 2", str(cm.exception))
 
         environ = {
-            'SLURM_JOB_NODELIST': 'compute-b24-1,compute-b25-3',
-            'SLURMD_NODENAME': 'compute-b25-2',
-            'SLURM_JOB_NUM_NODES': '2'
+            "SLURM_JOB_NODELIST": "compute-b24-1,compute-b25-3",
+            "SLURMD_NODENAME": "compute-b25-2",
+            "SLURM_JOB_NUM_NODES": "2",
         }
-        with mock.patch.dict('os.environ', environ):
+        with mock.patch.dict("os.environ", environ):
             with self.assertRaises(ValueError) as cm:
                 _ = slurm.get_resource()
-                self.assertIn('Nodename(compute-b25-2', str(cm.exception))
+                self.assertIn("Nodename(compute-b25-2", str(cm.exception))
diff --git a/source/tests/test_common.py b/source/tests/test_common.py
index adbde971e9..56a4a82228 100644
--- a/source/tests/test_common.py
+++ b/source/tests/test_common.py
@@ -1,10 +1,23 @@
-import os,sys,shutil,fnmatch
-import numpy as np
+import fnmatch
+import os
+import shutil
+import sys
 import unittest
-from pathlib import Path
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+
+from deepmd.common import (
+    GLOBAL_TF_FLOAT_PRECISION,
+    cast_precision,
+    expand_sys_str,
+)
+from deepmd.env import (
+    tf,
+)
 
-from deepmd.common import expand_sys_str, cast_precision, GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import tf
 
 # compute relative path
 # https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level
@@ -16,47 +29,55 @@ def relpath(path_to, path_from):
             head, tail = p, path_to.relative_to(p)
     except ValueError:  # Stop when the paths diverge.
         pass
-    return Path('../' * (len(path_from.parents) - len(head.parents))).joinpath(tail)
+    return Path("../" * (len(path_from.parents) - len(head.parents))).joinpath(tail)
 
-class TestCommonExpandSysDir(unittest.TestCase) :
+
+class TestCommonExpandSysDir(unittest.TestCase):
     def setUp(self):
-        self.match_file = Path('type.raw')
-        Path('test_sys').mkdir()
-        self.dir = Path('test_sys')
-        self.dira = Path('test_sys/a')
-        self.dirb = Path('test_sys/a/b')
-        self.dirc = Path('test_sys/c')
-        self.dird = Path('test_sys/c/d')
-        self.dire = Path('test_sys/c/type.raw')
+        self.match_file = Path("type.raw")
+        Path("test_sys").mkdir()
+        self.dir = Path("test_sys")
+        self.dira = Path("test_sys/a")
+        self.dirb = Path("test_sys/a/b")
+        self.dirc = Path("test_sys/c")
+        self.dird = Path("test_sys/c/d")
+        self.dire = Path("test_sys/c/type.raw")
         self.dira.mkdir()
         self.dirb.mkdir()
         self.dirc.mkdir()
         for ii in [self.dir, self.dira, self.dirb]:
-            (ii/self.match_file).touch()
+            (ii / self.match_file).touch()
         relb = relpath(self.dirb, self.dirc)
         absb = self.dirb.resolve()
         self.dird.symlink_to(relb)
         self.dire.symlink_to(absb)
-        self.expected_out = ['test_sys', 'test_sys/a', 'test_sys/a/b', 'test_sys/c/d', 'test_sys/c/type.raw']
+        self.expected_out = [
+            "test_sys",
+            "test_sys/a",
+            "test_sys/a/b",
+            "test_sys/c/d",
+            "test_sys/c/type.raw",
+        ]
         self.expected_out.sort()
 
     def tearDown(self):
-        shutil.rmtree('test_sys')
+        shutil.rmtree("test_sys")
 
     def test_expand(self):
-        ret = expand_sys_str('test_sys')
+        ret = expand_sys_str("test_sys")
         ret.sort()
         self.assertEqual(ret, self.expected_out)
 
 
 class TestCastPrecision(unittest.TestCase):
     """This class tests `deepmd.common.cast_precision`."""
+
     @property
     def precision(self):
         if GLOBAL_TF_FLOAT_PRECISION == tf.float32:
             return tf.float64
         return tf.float32
-    
+
     def test_cast_precision(self):
         x = tf.zeros(1, dtype=GLOBAL_TF_FLOAT_PRECISION)
         y = tf.zeros(1, dtype=tf.int64)
diff --git a/source/tests/test_compat_input.py b/source/tests/test_compat_input.py
index cec42e622d..d0ab4a021c 100644
--- a/source/tests/test_compat_input.py
+++ b/source/tests/test_compat_input.py
@@ -1,27 +1,35 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.utils.compat import convert_input_v0_v1, convert_input_v1_v2
-from common import j_loader
+import numpy as np
+from common import (
+    j_loader,
+)
+
+from deepmd.utils.compat import (
+    convert_input_v0_v1,
+    convert_input_v1_v2,
+)
 
-class TestConvertInput (unittest.TestCase) :
+
+class TestConvertInput(unittest.TestCase):
     def test_convert_smth(self):
-        jdata0 = j_loader(os.path.join('compat_inputs', 'water_se_a_v0.json'))
-        jdata1 = j_loader(os.path.join('compat_inputs', 'water_se_a_v1.json'))
-        jdata = convert_input_v0_v1(jdata0, warning = False, dump = None)
+        jdata0 = j_loader(os.path.join("compat_inputs", "water_se_a_v0.json"))
+        jdata1 = j_loader(os.path.join("compat_inputs", "water_se_a_v1.json"))
+        jdata = convert_input_v0_v1(jdata0, warning=False, dump=None)
         self.assertEqual(jdata, jdata1)
 
     def test_convert_nonsmth(self):
-        jdata0 = j_loader(os.path.join('compat_inputs', 'water_v0.json'))
-        jdata1 = j_loader(os.path.join('compat_inputs', 'water_v1.json'))
-        jdata = convert_input_v0_v1(jdata0, warning = False, dump = None)
+        jdata0 = j_loader(os.path.join("compat_inputs", "water_v0.json"))
+        jdata1 = j_loader(os.path.join("compat_inputs", "water_v1.json"))
+        jdata = convert_input_v0_v1(jdata0, warning=False, dump=None)
         self.assertEqual(jdata, jdata1)
 
     def test_convert_v1_v2(self):
-        jdata0 = j_loader(os.path.join('compat_inputs', 'water_v1.json'))
-        jdata1 = j_loader(os.path.join('compat_inputs', 'water_v2.json'))
-        jdata = convert_input_v1_v2(jdata0, warning = False, dump = None)
+        jdata0 = j_loader(os.path.join("compat_inputs", "water_v1.json"))
+        jdata1 = j_loader(os.path.join("compat_inputs", "water_v2.json"))
+        jdata = convert_input_v1_v2(jdata0, warning=False, dump=None)
         self.assertDictAlmostEqual(jdata, jdata1)
 
     def assertDictAlmostEqual(self, d1, d2, msg=None, places=7):
@@ -37,10 +45,9 @@ def test_json_yaml_equal(self):
         inputs = ("water_v1", "water_se_a_v1")
 
         for i in inputs:
-            jdata = j_loader(os.path.join('yaml_inputs', f'{i}.json'))
-            ydata = j_loader(os.path.join('yaml_inputs', f'{i}.yaml'))
+            jdata = j_loader(os.path.join("yaml_inputs", f"{i}.json"))
+            ydata = j_loader(os.path.join("yaml_inputs", f"{i}.yaml"))
             self.assertEqual(jdata, ydata)
 
         with self.assertRaises(TypeError):
             j_loader("path_with_wrong.extension")
-
diff --git a/source/tests/test_data_large_batch.py b/source/tests/test_data_large_batch.py
index e8522b5a70..4d72964ad5 100644
--- a/source/tests/test_data_large_batch.py
+++ b/source/tests/test_data_large_batch.py
@@ -1,151 +1,192 @@
-import dpdata, os, sys, unittest
-import numpy as np
-from deepmd.env import tf
+import os
 import pickle
-from common import Data, gen_data, j_loader
-
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.descriptor import DescrptSeAtten
-from deepmd.common import data_requirement
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.common import j_must_have
-from common import tf
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    Data,
+    gen_data,
+    j_loader,
+    tf,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.common import (
+    data_requirement,
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeAtten,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestDataLargeBatch(tf.test.TestCase):
     def setUp(self):
         gen_data(mixed_type=True)
 
     def test_data_mixed_type(self):
-        jfile = 'water_se_atten_mixed_type.json'
+        jfile = "water_se_atten_mixed_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
+        systems = j_must_have(jdata, "systems")
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
-        type_map = j_must_have(jdata['model'], 'type_map')
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        type_map = j_must_have(jdata["model"], "type_map")
 
         data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map)
-        data_requirement = {'energy': {'ndof': 1,
-                    'atomic': False,
-                    'must': False,
-                    'high_prec': True,
-                    'type_sel': None,
-                    'repeat': 1,
-                    'default': 0.0},
-         'force': {'ndof': 3,
-                   'atomic': True,
-                   'must': False,
-                   'high_prec': False,
-                   'type_sel': None,
-                   'repeat': 1,
-                   'default': 0.0},
-         'virial': {'ndof': 9,
-                    'atomic': False,
-                    'must': False,
-                    'high_prec': False,
-                    'type_sel': None,
-                    'repeat': 1,
-                    'default': 0.0},
-         'atom_ener': {'ndof': 1,
-                       'atomic': True,
-                       'must': False,
-                       'high_prec': False,
-                       'type_sel': None,
-                       'repeat': 1,
-                       'default': 0.0},
-         'atom_pref': {'ndof': 1,
-                       'atomic': True,
-                       'must': False,
-                       'high_prec': False,
-                       'type_sel': None,
-                       'repeat': 3,
-                       'default': 0.0}}
+        data_requirement = {
+            "energy": {
+                "ndof": 1,
+                "atomic": False,
+                "must": False,
+                "high_prec": True,
+                "type_sel": None,
+                "repeat": 1,
+                "default": 0.0,
+            },
+            "force": {
+                "ndof": 3,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "type_sel": None,
+                "repeat": 1,
+                "default": 0.0,
+            },
+            "virial": {
+                "ndof": 9,
+                "atomic": False,
+                "must": False,
+                "high_prec": False,
+                "type_sel": None,
+                "repeat": 1,
+                "default": 0.0,
+            },
+            "atom_ener": {
+                "ndof": 1,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "type_sel": None,
+                "repeat": 1,
+                "default": 0.0,
+            },
+            "atom_pref": {
+                "ndof": 1,
+                "atomic": True,
+                "must": False,
+                "high_prec": False,
+                "type_sel": None,
+                "repeat": 3,
+                "default": 0.0,
+            },
+        }
         data.add_dict(data_requirement)
 
         test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['descriptor']['ntypes'] = 2
-        descrpt = DescrptSeAtten(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed=True)
-        typeebd_param = jdata['model']['type_embedding']
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"]["ntypes"] = 2
+        descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
+        typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
-            neuron=typeebd_param['neuron'],
-            resnet_dt=typeebd_param['resnet_dt'],
+            neuron=typeebd_param["neuron"],
+            resnet_dt=typeebd_param["resnet_dt"],
             activation_function=None,
-            seed=typeebd_param['seed'],
+            seed=typeebd_param["seed"],
             uniform_seed=True,
-            padding=True)
+            padding=True,
+        )
         model = EnerModel(descrpt, fitting, typeebd)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']],
-                      'real_natoms_vec': [test_data['real_natoms_vec']]
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "real_natoms_vec": [test_data["real_natoms_vec"]],
+        }
         model._compute_input_stat(input_data, mixed_type=True)
-        model.descrpt.bias_atom_e = np.array([0.,  0.])
-
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        model.descrpt.bias_atom_e = np.array([0.0, 0.0])
+
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
         inputs_dict = {}
 
-        model_pred \
-            = model.build(t_coord,
-                          t_type,
-                          t_natoms,
-                          t_box,
-                          t_mesh,
-                          inputs_dict,
-                          suffix="se_atten",
-                          reuse=False)
-
-        energy = model_pred['energy']
-        force = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener = model_pred['atom_ener']
-
-        feed_dict_test = {t_energy: np.reshape(test_data['energy'][:numb_test], [-1]),
-                          t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
-                          t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
-                          t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
-
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            inputs_dict,
+            suffix="se_atten",
+            reuse=False,
+        )
+
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_energy: np.reshape(test_data["energy"][:numb_test], [-1]),
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial],
-                             feed_dict=feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
         # print(sess.run(model.type_embedding))
         # np.savetxt('tmp.out', sess.run(descrpt.dout, feed_dict = feed_dict_test), fmt='%.10e')
         # # print(sess.run(model.atype_embed, feed_dict = feed_dict_test))
@@ -156,13 +197,42 @@ def test_data_mixed_type(self):
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        np.savetxt('e.out', e.reshape([1, -1]), delimiter=',')
-        np.savetxt('f.out', f.reshape([1, -1]), delimiter=',')
-        np.savetxt('v.out', v.reshape([1, -1]), delimiter=',')
-
-        refe = [6.12188445792698e+01]
-        reff = [-2.7590100298321299e-03, -2.7392865283639755e-03, 8.5672424478673337e-05,  7.3154109032780492e-03, 7.6754109031673332e-04, -1.0882393042639207e-03, 9.8633073531477645e-03,  3.6631966083397029e-03, -2.2379079261940034e-04, -4.2393697523149913e-03, 4.9491210390296492e-04,  1.6970049039709007e-04, -8.9021867696626039e-03, -4.7967452269658322e-03, 9.2569990351204447e-04, -1.2781517046160920e-03, 2.6103819527704053e-03,  1.3095727849551296e-04]
-        refv = [-1.0171833662757776e-02, -6.7981543912862021e-03, 6.1480942994810296e-04, -6.7981543912861942e-03, 3.0092645628232335e-03,  3.8060849919518031e-04, 6.1480942994810383e-04,  3.8060849919518036e-04, -5.6890657188056002e-05]
+        np.savetxt("e.out", e.reshape([1, -1]), delimiter=",")
+        np.savetxt("f.out", f.reshape([1, -1]), delimiter=",")
+        np.savetxt("v.out", v.reshape([1, -1]), delimiter=",")
+
+        refe = [6.12188445792698e01]
+        reff = [
+            -2.7590100298321299e-03,
+            -2.7392865283639755e-03,
+            8.5672424478673337e-05,
+            7.3154109032780492e-03,
+            7.6754109031673332e-04,
+            -1.0882393042639207e-03,
+            9.8633073531477645e-03,
+            3.6631966083397029e-03,
+            -2.2379079261940034e-04,
+            -4.2393697523149913e-03,
+            4.9491210390296492e-04,
+            1.6970049039709007e-04,
+            -8.9021867696626039e-03,
+            -4.7967452269658322e-03,
+            9.2569990351204447e-04,
+            -1.2781517046160920e-03,
+            2.6103819527704053e-03,
+            1.3095727849551296e-04,
+        ]
+        refv = [
+            -1.0171833662757776e-02,
+            -6.7981543912862021e-03,
+            6.1480942994810296e-04,
+            -6.7981543912861942e-03,
+            3.0092645628232335e-03,
+            3.8060849919518031e-04,
+            6.1480942994810383e-04,
+            3.8060849919518036e-04,
+            -5.6890657188056002e-05,
+        ]
 
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
diff --git a/source/tests/test_data_modifier.py b/source/tests/test_data_modifier.py
index d791ca2844..3950b75d58 100644
--- a/source/tests/test_data_modifier.py
+++ b/source/tests/test_data_modifier.py
@@ -1,77 +1,91 @@
-import os,sys,platform
-import numpy as np
+import os
+import platform
+import sys
 import unittest
-from deepmd.env import tf
-
-from deepmd.common import j_must_have, data_requirement
-from deepmd.train.run_options import RunOptions
-from deepmd.train.trainer import DPTrainer
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.infer.ewald_recp import EwaldRecp
-from deepmd.infer.data_modifier import DipoleChargeModifier
 
-from common import Data, j_loader, tests_path
-
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+import numpy as np
+from common import (
+    Data,
+    j_loader,
+    tests_path,
+)
+
+from deepmd.common import (
+    data_requirement,
+    j_must_have,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.infer.data_modifier import (
+    DipoleChargeModifier,
+)
+from deepmd.infer.ewald_recp import (
+    EwaldRecp,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     global_default_fv_hh = 1e-2
     global_default_dw_hh = 1e-2
     global_default_places = 3
-else :
+else:
     global_default_fv_hh = 1e-6
     global_default_dw_hh = 1e-4
     global_default_places = 5
 
-modifier_datapath = 'data_modifier'
-INPUT = os.path.join(modifier_datapath, 'dipole.json')
+modifier_datapath = "data_modifier"
+INPUT = os.path.join(modifier_datapath, "dipole.json")
 
 
-class TestDataModifier (tf.test.TestCase) :
-
+class TestDataModifier(tf.test.TestCase):
     def setUp(self):
         # with tf.variable_scope('load', reuse = False) :
-        tf.reset_default_graph()        
+        tf.reset_default_graph()
         self._setUp()
 
     def tearDown(self):
-        tf.reset_default_graph()        
+        tf.reset_default_graph()
 
     def _setUp(self):
         run_opt = RunOptions(
-            restart=None,
-            init_model=None,
-            log_path=None,
-            log_level=30,
-            mpi_log="master"
+            restart=None, init_model=None, log_path=None, log_level=30, mpi_log="master"
         )
         jdata = j_loader(INPUT)
 
         # init model
-        model = DPTrainer (jdata, run_opt = run_opt)
+        model = DPTrainer(jdata, run_opt=run_opt)
         rcut = model.model.get_rcut()
 
         # init data system
-        systems = j_must_have(jdata['training'], 'systems')
-        #systems[0] = tests_path / systems[0]
+        systems = j_must_have(jdata["training"], "systems")
+        # systems[0] = tests_path / systems[0]
         systems = [tests_path / ii for ii in systems]
-        set_pfx = j_must_have(jdata['training'], 'set_prefix')
-        batch_size = j_must_have(jdata['training'], 'batch_size')
-        test_size = j_must_have(jdata['training'], 'numb_test')    
-        data = DeepmdDataSystem(systems, 
-                                batch_size, 
-                                test_size, 
-                                rcut, 
-                                set_prefix=set_pfx)
+        set_pfx = j_must_have(jdata["training"], "set_prefix")
+        batch_size = j_must_have(jdata["training"], "batch_size")
+        test_size = j_must_have(jdata["training"], "numb_test")
+        data = DeepmdDataSystem(
+            systems, batch_size, test_size, rcut, set_prefix=set_pfx
+        )
         data.add_dict(data_requirement)
 
         # clear the default graph
         tf.reset_default_graph()
 
         # build the model with stats from the first system
-        model.build (data)
-        
+        model.build(data)
+
         # freeze the graph
         with self.test_session() as sess:
             init_op = tf.global_variables_initializer()
@@ -80,33 +94,35 @@ def _setUp(self):
             input_graph_def = graph.as_graph_def()
             nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type,model_attr/output_dim,model_attr/model_version"
             output_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess,
-                input_graph_def,
-                nodes.split(",") 
+                sess, input_graph_def, nodes.split(",")
+            )
+            output_graph = str(
+                tests_path / os.path.join(modifier_datapath, "dipole.pb")
             )
-            output_graph = str(tests_path / os.path.join(modifier_datapath, 'dipole.pb'))
             with tf.gfile.GFile(output_graph, "wb") as f:
                 f.write(output_graph_def.SerializeToString())
 
     def test_fv(self):
         # with tf.variable_scope('load', reuse = False) :
         self._test_fv()
-            
-    def _test_fv (self):
-        dcm = DipoleChargeModifier(str(tests_path / os.path.join(modifier_datapath, "dipole.pb")),
-                                   [-8],
-                                   [6, 1],
-                                   1,
-                                   0.25)
+
+    def _test_fv(self):
+        dcm = DipoleChargeModifier(
+            str(tests_path / os.path.join(modifier_datapath, "dipole.pb")),
+            [-8],
+            [6, 1],
+            1,
+            0.25,
+        )
         data = Data()
         coord, box, atype = data.get_data()
         atype = atype[0]
         ve, vf, vv = dcm.eval(coord, box, atype)
 
         hh = global_default_fv_hh
-        hh=1e-4
+        hh = 1e-4
         places = global_default_places
-        places=1
+        places = 1
         nframes = coord.shape[0]
         ndof = coord.shape[1]
         natoms = ndof // 3
@@ -114,42 +130,44 @@ def _test_fv (self):
         for ii in range(ndof):
             coordp = np.copy(coord)
             coordm = np.copy(coord)
-            coordp[:,ii] += hh
-            coordm[:,ii] -= hh
-            ep, _, __ = dcm.eval(coordp, box, atype, eval_fv = False)
-            em, _, __ = dcm.eval(coordm, box, atype, eval_fv = False)
-            num_f = -(ep - em) / (2.*hh)
-            np.testing.assert_almost_equal(vf[:,ii].ravel(), num_f.ravel(), 
-                                           places,
-                                           err_msg = 'dof %d does not match' % (ii))
-
-        box3 = np.reshape(box, [nframes, 3,3])
+            coordp[:, ii] += hh
+            coordm[:, ii] -= hh
+            ep, _, __ = dcm.eval(coordp, box, atype, eval_fv=False)
+            em, _, __ = dcm.eval(coordm, box, atype, eval_fv=False)
+            num_f = -(ep - em) / (2.0 * hh)
+            np.testing.assert_almost_equal(
+                vf[:, ii].ravel(),
+                num_f.ravel(),
+                places,
+                err_msg="dof %d does not match" % (ii),
+            )
+
+        box3 = np.reshape(box, [nframes, 3, 3])
         rbox3 = np.linalg.inv(box3)
         coord3 = np.reshape(coord, [nframes, natoms, 3])
         rcoord3 = np.matmul(coord3, rbox3)
-        num_deriv = np.zeros([nframes,3,3])
+        num_deriv = np.zeros([nframes, 3, 3])
         for ii in range(3):
             for jj in range(3):
                 box3p = np.copy(box3)
                 box3m = np.copy(box3)
-                box3p[:,ii,jj] = box3[:,ii,jj] + hh
-                box3m[:,ii,jj] = box3[:,ii,jj] - hh
-                boxp = np.reshape(box3p, [-1,9])
-                boxm = np.reshape(box3m, [-1,9])
+                box3p[:, ii, jj] = box3[:, ii, jj] + hh
+                box3m[:, ii, jj] = box3[:, ii, jj] - hh
+                boxp = np.reshape(box3p, [-1, 9])
+                boxm = np.reshape(box3m, [-1, 9])
                 coord3p = np.matmul(rcoord3, box3p)
                 coord3m = np.matmul(rcoord3, box3m)
-                coordp = np.reshape(coord3p, [nframes,-1])
-                coordm = np.reshape(coord3m, [nframes,-1])
-                ep, _, __ = dcm.eval(coordp, boxp, atype, eval_fv = False)
-                em, _, __ = dcm.eval(coordm, boxm, atype, eval_fv = False)
-                num_deriv[:,ii,jj] = -(ep - em) / (2.*hh)
+                coordp = np.reshape(coord3p, [nframes, -1])
+                coordm = np.reshape(coord3m, [nframes, -1])
+                ep, _, __ = dcm.eval(coordp, boxp, atype, eval_fv=False)
+                em, _, __ = dcm.eval(coordm, boxm, atype, eval_fv=False)
+                num_deriv[:, ii, jj] = -(ep - em) / (2.0 * hh)
         # box3t = np.transpose(box3, [0,2,1])
         # t_esti = np.matmul(num_deriv, box3t)
-        num_deriv = np.transpose(num_deriv, [0,2,1])
+        num_deriv = np.transpose(num_deriv, [0, 2, 1])
         t_esti = np.matmul(num_deriv, box3)
 
         # print(t_esti, '\n', vv.reshape([-1, 3, 3]))
-        np.testing.assert_almost_equal(t_esti.ravel(), vv.ravel(), 
-                                       places,
-                                       err_msg = "virial component failed")
-            
+        np.testing.assert_almost_equal(
+            t_esti.ravel(), vv.ravel(), places, err_msg="virial component failed"
+        )
diff --git a/source/tests/test_data_modifier_shuffle.py b/source/tests/test_data_modifier_shuffle.py
index 194a80e9c4..52b46e4edd 100644
--- a/source/tests/test_data_modifier_shuffle.py
+++ b/source/tests/test_data_modifier_shuffle.py
@@ -1,81 +1,97 @@
-import os,sys,platform,json,shutil
-import numpy as np
+import json
+import os
+import platform
+import shutil
+import sys
 import unittest
+
 import dpdata
-from deepmd.env import tf
-
-from deepmd.common import j_must_have, data_requirement
-from deepmd.train.run_options import RunOptions
-from deepmd.train.trainer import DPTrainer
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.infer.ewald_recp import EwaldRecp
-from deepmd.infer.data_modifier import DipoleChargeModifier
-from deepmd.infer.deep_dipole import DeepDipole
-
-from common import Data
-
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+import numpy as np
+from common import (
+    Data,
+)
+
+from deepmd.common import (
+    data_requirement,
+    j_must_have,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.infer.data_modifier import (
+    DipoleChargeModifier,
+)
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.ewald_recp import (
+    EwaldRecp,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     global_default_fv_hh = 1e-2
     global_default_dw_hh = 1e-2
     global_default_places = 3
-else :
+else:
     global_default_fv_hh = 1e-6
     global_default_dw_hh = 1e-4
     global_default_places = 5
 
-modifier_datapath = 'data_modifier'
-
+modifier_datapath = "data_modifier"
 
-class TestDataModifier (tf.test.TestCase) :
 
+class TestDataModifier(tf.test.TestCase):
     def setUp(self):
         # with tf.variable_scope('load', reuse = False) :
-        tf.reset_default_graph()        
+        tf.reset_default_graph()
         self._setUp()
 
     def tearDown(self):
-        tf.reset_default_graph()        
-        if os.path.isdir(os.path.join(modifier_datapath, 'sys_test_0')):
-            shutil.rmtree(os.path.join(modifier_datapath, 'sys_test_0'))
-        if os.path.isfile(os.path.join(modifier_datapath, 'dipole.pb')):
-            os.remove(os.path.join(modifier_datapath, 'dipole.pb'))
+        tf.reset_default_graph()
+        if os.path.isdir(os.path.join(modifier_datapath, "sys_test_0")):
+            shutil.rmtree(os.path.join(modifier_datapath, "sys_test_0"))
+        if os.path.isfile(os.path.join(modifier_datapath, "dipole.pb")):
+            os.remove(os.path.join(modifier_datapath, "dipole.pb"))
 
     def _setUp(self):
         run_opt = RunOptions(
-            restart=None,
-            init_model=None,
-            log_path=None,
-            log_level=30,
-            mpi_log="master"
+            restart=None, init_model=None, log_path=None, log_level=30, mpi_log="master"
         )
         jdata = self._setUp_jdata()
         self._setUp_data()
 
         # init model
-        model = DPTrainer (jdata, run_opt = run_opt)
+        model = DPTrainer(jdata, run_opt=run_opt)
         rcut = model.model.get_rcut()
 
         # init data system
-        systems = j_must_have(jdata['training'], 'systems')
-        set_pfx = j_must_have(jdata['training'], 'set_prefix')
-        batch_size = j_must_have(jdata['training'], 'batch_size')
-        test_size = j_must_have(jdata['training'], 'numb_test')    
-        data = DeepmdDataSystem(systems, 
-                                batch_size, 
-                                test_size, 
-                                rcut, 
-                                set_prefix=set_pfx)
+        systems = j_must_have(jdata["training"], "systems")
+        set_pfx = j_must_have(jdata["training"], "set_prefix")
+        batch_size = j_must_have(jdata["training"], "batch_size")
+        test_size = j_must_have(jdata["training"], "numb_test")
+        data = DeepmdDataSystem(
+            systems, batch_size, test_size, rcut, set_prefix=set_pfx
+        )
         data.add_dict(data_requirement)
 
         # clear the default graph
         tf.reset_default_graph()
 
         # build the model with stats from the first system
-        model.build (data)
-        
+        model.build(data)
+
         # freeze the graph
         with self.test_session() as sess:
             init_op = tf.global_variables_initializer()
@@ -84,128 +100,123 @@ def _setUp(self):
             input_graph_def = graph.as_graph_def()
             nodes = "o_dipole,o_rmat,o_rmat_deriv,o_nlist,o_rij,descrpt_attr/rcut,descrpt_attr/ntypes,descrpt_attr/sel,descrpt_attr/ndescrpt,model_attr/tmap,model_attr/sel_type,model_attr/model_type,model_attr/output_dim,model_attr/model_version"
             output_graph_def = tf.graph_util.convert_variables_to_constants(
-                sess,
-                input_graph_def,
-                nodes.split(",") 
+                sess, input_graph_def, nodes.split(",")
             )
-            output_graph = os.path.join(modifier_datapath, 'dipole.pb')
+            output_graph = os.path.join(modifier_datapath, "dipole.pb")
             with tf.gfile.GFile(output_graph, "wb") as f:
                 f.write(output_graph_def.SerializeToString())
 
-    def _setUp_data(self):        
+    def _setUp_data(self):
         jdata = self._setUp_jdata()
         # sys0
-        self.atom_types0 = np.array([0, 3, 2, 1, 3, 4, 1, 4], dtype = int)
+        self.atom_types0 = np.array([0, 3, 2, 1, 3, 4, 1, 4], dtype=int)
         self.natoms = len(self.atom_types0)
         self.nframes = 1
         scale = 10.0
-        self.sel_type = jdata['model']['fitting_net']['sel_type']
+        self.sel_type = jdata["model"]["fitting_net"]["sel_type"]
         self.nsel = 0
         for ii in self.sel_type:
             self.nsel += np.sum(self.atom_types0 == ii)
         self.coords0 = np.random.random([self.nframes, self.natoms * 3]) * scale
-        self.dipoles0 = np.random.random([self.nframes, self.nsel * 3]) 
+        self.dipoles0 = np.random.random([self.nframes, self.nsel * 3])
         self.box0 = np.reshape(np.eye(3) * scale, [-1, 9])
         self.box0 = np.tile(self.box0, [self.nframes, 1])
-        self._write_sys_data(os.path.join(modifier_datapath, 'sys_test_0'), 
-                             self.atom_types0, self.coords0, self.dipoles0, self.box0)
+        self._write_sys_data(
+            os.path.join(modifier_datapath, "sys_test_0"),
+            self.atom_types0,
+            self.coords0,
+            self.dipoles0,
+            self.box0,
+        )
         # sys1
-        self.idx_map = np.array([6, 7, 1, 0, 5, 2, 4, 3], dtype = int)
-        self.sel_idx_map = np.array([3, 0, 2, 1], dtype = int)
-        self.atom_types1 = self.atom_types0[self.idx_map]        
+        self.idx_map = np.array([6, 7, 1, 0, 5, 2, 4, 3], dtype=int)
+        self.sel_idx_map = np.array([3, 0, 2, 1], dtype=int)
+        self.atom_types1 = self.atom_types0[self.idx_map]
         self.coords1 = np.reshape(self.coords0, [self.nframes, -1, 3])
-        self.coords1 = self.coords1[:,self.idx_map,:]
-        self.coords1 = np.reshape(self.coords1, [self.nframes, self.natoms*3])
-        self.dipoles1 = self.dipoles0[:,self.sel_idx_map]
+        self.coords1 = self.coords1[:, self.idx_map, :]
+        self.coords1 = np.reshape(self.coords1, [self.nframes, self.natoms * 3])
+        self.dipoles1 = self.dipoles0[:, self.sel_idx_map]
         self.box1 = self.box0
 
     def _write_sys_data(self, dirname, atom_types, coords, dipoles, box):
-        os.makedirs(dirname, exist_ok = True)
-        os.makedirs(dirname+'/set.0', exist_ok = True)
-        np.savetxt(os.path.join(dirname, 'type.raw'), atom_types, fmt = '%d')
-        np.save(os.path.join(dirname, 'set.0', 'coord.npy'), coords)
-        np.save(os.path.join(dirname, 'set.0', 'atomic_dipole.npy'), dipoles)
-        np.save(os.path.join(dirname, 'set.0', 'box.npy'), box)
+        os.makedirs(dirname, exist_ok=True)
+        os.makedirs(dirname + "/set.0", exist_ok=True)
+        np.savetxt(os.path.join(dirname, "type.raw"), atom_types, fmt="%d")
+        np.save(os.path.join(dirname, "set.0", "coord.npy"), coords)
+        np.save(os.path.join(dirname, "set.0", "atomic_dipole.npy"), dipoles)
+        np.save(os.path.join(dirname, "set.0", "box.npy"), box)
 
     def _setUp_jdata(self):
-        aa = {"a":[1,2,3]}
+        aa = {"a": [1, 2, 3]}
         jdata = {
-            "model":{
-	        "type_map":		["A", "B", "C", "D", "E"],
-	        "descriptor" :{
-	            "type":		"se_e2_a",
-	            "sel":              [50, 50, 50, 50, 50],
-	            "rcut_smth":	3.80,
-	            "rcut":		4.00,
-	            "neuron":		[2, 4],
-	            "resnet_dt":	False,
-	            "axis_neuron":	4,
-	            "seed":		1,
-	        },
-	        "fitting_net": {
-	            "type":		"dipole",
-	            "sel_type":	[1, 3],
-	            "neuron":		[10],
-	            "resnet_dt":	True,
-	            "seed":		1,
-	        },
+            "model": {
+                "type_map": ["A", "B", "C", "D", "E"],
+                "descriptor": {
+                    "type": "se_e2_a",
+                    "sel": [50, 50, 50, 50, 50],
+                    "rcut_smth": 3.80,
+                    "rcut": 4.00,
+                    "neuron": [2, 4],
+                    "resnet_dt": False,
+                    "axis_neuron": 4,
+                    "seed": 1,
+                },
+                "fitting_net": {
+                    "type": "dipole",
+                    "sel_type": [1, 3],
+                    "neuron": [10],
+                    "resnet_dt": True,
+                    "seed": 1,
+                },
             },
             "loss": {
-                "type":"tensor",
-                "pref":1.0,
-                "pref_atomic":1.0,
-                "_comment": " that's all"
+                "type": "tensor",
+                "pref": 1.0,
+                "pref_atomic": 1.0,
+                "_comment": " that's all",
             },
-            "learning_rate" :{
-	        "type":		"exp",
-	        "start_lr":	0.01,
-	        "stop_lr":	1e-8,
-	        "decay_steps":	5000,
-	        "decay_rate":	0.95,
+            "learning_rate": {
+                "type": "exp",
+                "start_lr": 0.01,
+                "stop_lr": 1e-8,
+                "decay_steps": 5000,
+                "decay_rate": 0.95,
             },
             "training": {
-	        "systems":	["data_modifier/sys_test_0"], 
-	        "set_prefix":	"set",    
-	        "stop_batch":	1000000,
-	        "batch_size":	1,
-	        "numb_test":	2,
+                "systems": ["data_modifier/sys_test_0"],
+                "set_prefix": "set",
+                "stop_batch": 1000000,
+                "batch_size": 1,
+                "numb_test": 2,
             },
         }
         return jdata
 
-
     def test_z_dipole(self):
         dd = DeepDipole(os.path.join(modifier_datapath, "dipole.pb"))
-            
+
         dv0 = dd.eval(self.coords0, self.box0, self.atom_types0)
         dv1 = dd.eval(self.coords1, self.box1, self.atom_types1)
 
         dv01 = dv0.reshape([self.nframes, -1, 3])
-        dv01 = dv01[:,self.sel_idx_map, :]
+        dv01 = dv01[:, self.sel_idx_map, :]
         dv01 = dv01.reshape([self.nframes, -1])
         dv1 = dv1.reshape([self.nframes, -1])
 
-        np.testing.assert_almost_equal(
-                    dv01, dv1, 
-                    err_msg = "dipole dose not match")
-
+        np.testing.assert_almost_equal(dv01, dv1, err_msg="dipole dose not match")
 
     def test_modify(self):
-        dcm = DipoleChargeModifier(os.path.join(modifier_datapath, "dipole.pb"),
-                                   [-1, -3],
-                                   [1, 1, 1, 1, 1],
-                                   1,
-                                   0.25)
+        dcm = DipoleChargeModifier(
+            os.path.join(modifier_datapath, "dipole.pb"),
+            [-1, -3],
+            [1, 1, 1, 1, 1],
+            1,
+            0.25,
+        )
         ve0, vf0, vv0 = dcm.eval(self.coords0, self.box0, self.atom_types0)
         ve1, vf1, vv1 = dcm.eval(self.coords1, self.box1, self.atom_types1)
-        vf01 = vf0[:,self.idx_map, :]
-
-        np.testing.assert_almost_equal(ve0, ve1, 
-                                       err_msg = 'energy should match')
-        np.testing.assert_almost_equal(vv0, vv1, 
-                                       err_msg = 'virial should match')
-        np.testing.assert_almost_equal(
-                        vf01, vf1, 
-                        err_msg = "force dose not match")
-                    
-        
+        vf01 = vf0[:, self.idx_map, :]
+
+        np.testing.assert_almost_equal(ve0, ve1, err_msg="energy should match")
+        np.testing.assert_almost_equal(vv0, vv1, err_msg="virial should match")
+        np.testing.assert_almost_equal(vf01, vf1, err_msg="force dose not match")
diff --git a/source/tests/test_data_requirement.py b/source/tests/test_data_requirement.py
index 96e192af3e..a35b1d2bf6 100644
--- a/source/tests/test_data_requirement.py
+++ b/source/tests/test_data_requirement.py
@@ -1,14 +1,23 @@
-import dpdata,os,sys,json,unittest
+import json
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
 
-from deepmd.common import data_requirement, add_data_requirement
+from deepmd.common import (
+    add_data_requirement,
+    data_requirement,
+)
+
 
 class TestDataRequirement(unittest.TestCase):
-    def test_add(self) :
-        add_data_requirement('test', 3) 
-        self.assertEqual(data_requirement['test']['ndof'], 3)
-        self.assertEqual(data_requirement['test']['atomic'], False)
-        self.assertEqual(data_requirement['test']['must'], False)
-        self.assertEqual(data_requirement['test']['high_prec'], False)
-        self.assertEqual(data_requirement['test']['repeat'], 1)
-        self.assertEqual(data_requirement['test']['default'], 0.)
+    def test_add(self):
+        add_data_requirement("test", 3)
+        self.assertEqual(data_requirement["test"]["ndof"], 3)
+        self.assertEqual(data_requirement["test"]["atomic"], False)
+        self.assertEqual(data_requirement["test"]["must"], False)
+        self.assertEqual(data_requirement["test"]["high_prec"], False)
+        self.assertEqual(data_requirement["test"]["repeat"], 1)
+        self.assertEqual(data_requirement["test"]["default"], 0.0)
diff --git a/source/tests/test_deepdipole.py b/source/tests/test_deepdipole.py
index f90d8cab87..d75662a3a9 100644
--- a/source/tests/test_deepdipole.py
+++ b/source/tests/test_deepdipole.py
@@ -1,34 +1,78 @@
-import os,sys,platform,shutil,dpdata
-import numpy as np
+import os
+import platform
+import shutil
+import sys
 import unittest
 
-from infer.convert2pb import convert_pbtxt_to_pb
-from deepmd.infer import DeepDipole
-from common import tests_path, finite_difference, strerch_box, tf
+import dpdata
+import numpy as np
+from common import (
+    finite_difference,
+    strerch_box,
+    tests_path,
+    tf,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
 from packaging.version import parse as parse_version
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer import (
+    DeepDipole,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-class TestDeepDipolePBC(unittest.TestCase) :
+
+class TestDeepDipolePBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deepdipole.pbtxt")), "deepdipole.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deepdipole.pbtxt")), "deepdipole.pb"
+        )
         cls.dp = DeepDipole("deepdipole.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_d = np.array([-9.274180565967479195e-01,2.698028341272042496e+00,2.521268387140979117e-01,2.927260638453461628e+00,-8.571926301526779923e-01,1.667785136187720063e+00])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_d = np.array(
+            [
+                -9.274180565967479195e-01,
+                2.698028341272042496e00,
+                2.521268387140979117e-01,
+                2.927260638453461628e00,
+                -8.571926301526779923e-01,
+                1.667785136187720063e00,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -37,8 +81,8 @@ def tearDownClass(cls):
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 4.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 4.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_sel_type(), [0])
 
     def test_1frame_atm(self):
@@ -47,7 +91,7 @@ def test_1frame_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,3))
+        self.assertEqual(dd.shape, (nframes, nsel, 3))
         # check values
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
@@ -59,28 +103,55 @@ def test_2frame_atm(self):
         nframes = 2
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,3))
+        self.assertEqual(dd.shape, (nframes, nsel, 3))
         # check values
         expected_d = np.concatenate((self.expected_d, self.expected_d))
         np.testing.assert_almost_equal(dd.ravel(), expected_d, default_places)
 
 
-class TestDeepDipoleNoPBC(unittest.TestCase) :
+class TestDeepDipoleNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deepdipole.pbtxt")), "deepdipole.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deepdipole.pbtxt")), "deepdipole.pb"
+        )
         cls.dp = DeepDipole("deepdipole.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([20., 0., 0., 0., 20., 0., 0., 0., 20.])
-        self.expected_d = np.array([-1.982092647058316e+00, 8.303361089028074e-01, 1.499962003179265e+00, 2.927112547154802e+00, -8.572096473802318e-01, 1.667798310054391e+00])
+        self.box = np.array([20.0, 0.0, 0.0, 0.0, 20.0, 0.0, 0.0, 0.0, 20.0])
+        self.expected_d = np.array(
+            [
+                -1.982092647058316e00,
+                8.303361089028074e-01,
+                1.499962003179265e00,
+                2.927112547154802e00,
+                -8.572096473802318e-01,
+                1.667798310054391e00,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -93,7 +164,7 @@ def test_1frame_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,3))
+        self.assertEqual(dd.shape, (nframes, nsel, 3))
         # check values
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
@@ -103,34 +174,288 @@ def test_1frame_atm_large_box(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,3))
+        self.assertEqual(dd.shape, (nframes, nsel, 3))
         # check values
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"), 
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
-class TestDeepDipoleNewPBC(unittest.TestCase) :
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepDipoleNewPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deepdipole_new.pbtxt")), "deepdipole_new.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deepdipole_new.pbtxt")),
+            "deepdipole_new.pb",
+        )
         cls.dp = DeepDipole("deepdipole_new.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.nout = 3
         self.atype = np.array([0, 1, 1, 0, 1, 1])
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_t = np.array([-1.128427726201255282e-01, 2.654103846999197880e-01, 2.625816377288122533e-02, 3.027556488877700680e-01, -7.475444785689989990e-02, 1.526291164572509684e-01])
-        self.expected_f = np.array([8.424897862241968738e-02, -3.823566783202275721e-02, 3.570797165027734810e-01, 6.102563129736437997e-02, -1.351209759852018133e-01, -2.438224487466488510e-01, -1.403204771681088869e-01, 1.719596545791735875e-01, -1.136584427103610045e-01, 2.761686212947551955e-02, -7.247860200915196005e-02, 6.208831127377397591e-02, -2.605870723577520809e-01, -4.504074577536486268e-02, 7.340240097998475266e-02, 2.280160774766013809e-01, 1.189163370225677641e-01, -1.350895372995223886e-01, -4.294311497114180337e-02, 1.524802094783661577e-01, 1.070451777645946290e-01, -1.259336332521076574e-01, -2.087610788959351760e-01, 9.447141346538817652e-02, 1.668125597515543457e-01, 5.487037060760904805e-02, -2.014994036104674757e-01, -7.411985441205551361e-02, 3.614456658821710300e-01, 2.901174891391154476e-01, -4.871926969937838414e-02, -1.252747945819455699e-01, -2.555459318266457558e-01, 1.249033125831290059e-01, -2.347603724902655176e-01, -3.458874493198500766e-02, 3.563990394229877290e-01, 1.052342031228763047e-01, 1.907268232932498031e-01, -2.432737821373903708e-01, 1.016781829972335099e-01, -7.707616437996064884e-02, -1.139199805053340564e-01, -2.068592154909300040e-01, -1.156337826476897951e-01, 6.583817133933017596e-02, 2.902207490750204344e-01, 9.945482314729316153e-02, 7.986986504051810098e-02, -2.549975565538568079e-01, 1.275343199697696051e-01, -1.449133131601115787e-01, -3.527636315034351350e-02, -2.250060193826620980e-01])
-        self.expected_v = np.array([3.479789535931299138e-02, 4.337414719007849292e-03, -3.647371468256610082e-03, 8.053492919528318708e-03, 1.003834811499279773e-03, -8.441338187607602033e-04, -6.695998268698949256e-03, -8.346286793845711892e-04, 7.018468440279366279e-04, -4.515896716004976635e-02, 1.891794570218296306e-02, 3.417435352652402336e-02, 9.998952222904963771e-02, -4.188750255541257711e-02, -7.566774655171297492e-02, 1.804286120725206444e-01, -7.558495911146115298e-02, -1.365405712981232755e-01, -1.002593446510361419e-01, -1.117945222697993429e-01, 7.449172735713084637e-02, 7.770237313970995707e-02, 1.313723119887387492e-01, -8.655414676270002661e-02, -4.973937467461287537e-02, -8.663006083493235421e-02, 5.703914957966123994e-02, -3.382231967662072125e-02, -4.215813217482468345e-03, 3.545115660155720612e-03, -8.247565860499378454e-03, -1.028025206407854253e-03, 8.644757417520612143e-04, 6.761330949063471332e-03, 8.427721296283078580e-04, -7.086947453692606178e-04, -1.622698090933780493e-02, 1.305372051650728060e-01, -2.082599910094798112e-01, -7.109985131471197733e-03, 2.202585658101286273e-02, -3.554509763049529952e-02, 1.436400379134906459e-02, -3.554915857551419617e-02, 5.763638171798115412e-02, 2.074946305037073946e-01, 5.016353704485233822e-02, -5.700401936915034523e-02, 1.082138666905367308e-01, 2.616159414496492877e-02, -2.972908425564194101e-02, -1.229314789425654392e-01, -2.971969820589494271e-02, 3.377238432488059716e-02, 7.622024445219390681e-03, 9.500540384976005961e-04, -7.989090778275298932e-04, -2.952148931042387209e-02, -3.679732378636401541e-03, 3.094320409307891630e-03, -9.534268115386618486e-04, -1.188407357158671420e-04, 9.993425503379762414e-05, 9.319088860655992679e-02, -3.903942630815338682e-02, -7.052283462118023871e-02, 1.544831983829924038e-01, -6.471593445773991815e-02, -1.169062041817236081e-01, -6.990884596438741438e-02, 2.928613817427033750e-02, 5.290399154061733306e-02, 7.491400658274136037e-02, 1.273824184577304897e-01, -8.391492311946648075e-02, 3.543872837542783732e-02, 4.324623973455964804e-02, -2.873418641045778418e-02, -8.444981234074398768e-02, -1.531171183141288306e-01, 1.007308415346981068e-01, -6.396885751015785743e-03, -7.973455327045167592e-04, 6.704951070469818575e-04, 2.915483242551994078e-02, 3.634030104030812076e-03, -3.055888951116827318e-03, 6.608747470375698129e-04, 8.237532257692081912e-05, -6.927015762150179410e-05, -6.099175331115514430e-03, 2.402310352789886402e-02, -3.861491558256636286e-02, -2.583867422346154685e-02, 6.050621302336450097e-02, -9.822840263095998503e-02, -3.827994718203701213e-02, 1.252239810257823327e-01, -2.018867305507059950e-01, 1.136620144506474833e-01, 2.747872876828840599e-02, -3.122582814578225147e-02, -2.136319389661417989e-01, -5.164728194785846160e-02, 5.869009312256637939e-02, -3.147575788810638014e-02, -7.609523885036708832e-03, 8.647186232996251914e-03, -5.990706138603461330e-03, -7.467169124604876177e-04, 6.279210400235934152e-04, -9.287887182821588476e-04, -1.157696985960763821e-04, 9.735179200124630735e-05, -2.966271471326579340e-02, -3.697335544996301071e-03, 3.109123071928715683e-03, 1.800225987816693740e-01, -7.541487246259104271e-02, -1.362333179969384966e-01, -7.524185541795300192e-02, 3.152023672914239238e-02, 5.693978247845072477e-02, 5.703636164117102669e-02, -2.389361095778780308e-02, -4.316265205277792366e-02, -4.915584336537091176e-02, -8.674240294138457763e-02, 5.709724154860432860e-02, -8.679070528401405804e-02, -1.572017650485294793e-01, 1.034201569997979520e-01, -3.557746655862283752e-02, -8.626268394893003844e-02, 5.645546718878535764e-02, 6.848075985139651621e-03, 8.535845420570665554e-04, -7.177870012752625602e-04, 8.266638576582277997e-04, 1.030402542123569647e-04, -8.664748649675494882e-05, 2.991751925173294011e-02, 3.729095884068693231e-03, -3.135830629785046203e-03, 1.523793442834292522e-02, -3.873020552543556677e-02, 6.275576045602117292e-02, -3.842536616563556329e-02, 1.249268983543572881e-01, -2.014296501045876875e-01, 1.288704808602599873e-02, -6.326999354443738066e-02, 1.014064886873057153e-01, -1.318711149757016143e-01, -3.188092889522457091e-02, 3.622832829002789468e-02, -3.210149046681261276e-02, -7.760799893075580151e-03, 8.819090787585878374e-03, -2.047554776382226327e-01, -4.950132426418570042e-02, 5.625150484566552450e-02])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_t = np.array(
+            [
+                -1.128427726201255282e-01,
+                2.654103846999197880e-01,
+                2.625816377288122533e-02,
+                3.027556488877700680e-01,
+                -7.475444785689989990e-02,
+                1.526291164572509684e-01,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                8.424897862241968738e-02,
+                -3.823566783202275721e-02,
+                3.570797165027734810e-01,
+                6.102563129736437997e-02,
+                -1.351209759852018133e-01,
+                -2.438224487466488510e-01,
+                -1.403204771681088869e-01,
+                1.719596545791735875e-01,
+                -1.136584427103610045e-01,
+                2.761686212947551955e-02,
+                -7.247860200915196005e-02,
+                6.208831127377397591e-02,
+                -2.605870723577520809e-01,
+                -4.504074577536486268e-02,
+                7.340240097998475266e-02,
+                2.280160774766013809e-01,
+                1.189163370225677641e-01,
+                -1.350895372995223886e-01,
+                -4.294311497114180337e-02,
+                1.524802094783661577e-01,
+                1.070451777645946290e-01,
+                -1.259336332521076574e-01,
+                -2.087610788959351760e-01,
+                9.447141346538817652e-02,
+                1.668125597515543457e-01,
+                5.487037060760904805e-02,
+                -2.014994036104674757e-01,
+                -7.411985441205551361e-02,
+                3.614456658821710300e-01,
+                2.901174891391154476e-01,
+                -4.871926969937838414e-02,
+                -1.252747945819455699e-01,
+                -2.555459318266457558e-01,
+                1.249033125831290059e-01,
+                -2.347603724902655176e-01,
+                -3.458874493198500766e-02,
+                3.563990394229877290e-01,
+                1.052342031228763047e-01,
+                1.907268232932498031e-01,
+                -2.432737821373903708e-01,
+                1.016781829972335099e-01,
+                -7.707616437996064884e-02,
+                -1.139199805053340564e-01,
+                -2.068592154909300040e-01,
+                -1.156337826476897951e-01,
+                6.583817133933017596e-02,
+                2.902207490750204344e-01,
+                9.945482314729316153e-02,
+                7.986986504051810098e-02,
+                -2.549975565538568079e-01,
+                1.275343199697696051e-01,
+                -1.449133131601115787e-01,
+                -3.527636315034351350e-02,
+                -2.250060193826620980e-01,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                3.479789535931299138e-02,
+                4.337414719007849292e-03,
+                -3.647371468256610082e-03,
+                8.053492919528318708e-03,
+                1.003834811499279773e-03,
+                -8.441338187607602033e-04,
+                -6.695998268698949256e-03,
+                -8.346286793845711892e-04,
+                7.018468440279366279e-04,
+                -4.515896716004976635e-02,
+                1.891794570218296306e-02,
+                3.417435352652402336e-02,
+                9.998952222904963771e-02,
+                -4.188750255541257711e-02,
+                -7.566774655171297492e-02,
+                1.804286120725206444e-01,
+                -7.558495911146115298e-02,
+                -1.365405712981232755e-01,
+                -1.002593446510361419e-01,
+                -1.117945222697993429e-01,
+                7.449172735713084637e-02,
+                7.770237313970995707e-02,
+                1.313723119887387492e-01,
+                -8.655414676270002661e-02,
+                -4.973937467461287537e-02,
+                -8.663006083493235421e-02,
+                5.703914957966123994e-02,
+                -3.382231967662072125e-02,
+                -4.215813217482468345e-03,
+                3.545115660155720612e-03,
+                -8.247565860499378454e-03,
+                -1.028025206407854253e-03,
+                8.644757417520612143e-04,
+                6.761330949063471332e-03,
+                8.427721296283078580e-04,
+                -7.086947453692606178e-04,
+                -1.622698090933780493e-02,
+                1.305372051650728060e-01,
+                -2.082599910094798112e-01,
+                -7.109985131471197733e-03,
+                2.202585658101286273e-02,
+                -3.554509763049529952e-02,
+                1.436400379134906459e-02,
+                -3.554915857551419617e-02,
+                5.763638171798115412e-02,
+                2.074946305037073946e-01,
+                5.016353704485233822e-02,
+                -5.700401936915034523e-02,
+                1.082138666905367308e-01,
+                2.616159414496492877e-02,
+                -2.972908425564194101e-02,
+                -1.229314789425654392e-01,
+                -2.971969820589494271e-02,
+                3.377238432488059716e-02,
+                7.622024445219390681e-03,
+                9.500540384976005961e-04,
+                -7.989090778275298932e-04,
+                -2.952148931042387209e-02,
+                -3.679732378636401541e-03,
+                3.094320409307891630e-03,
+                -9.534268115386618486e-04,
+                -1.188407357158671420e-04,
+                9.993425503379762414e-05,
+                9.319088860655992679e-02,
+                -3.903942630815338682e-02,
+                -7.052283462118023871e-02,
+                1.544831983829924038e-01,
+                -6.471593445773991815e-02,
+                -1.169062041817236081e-01,
+                -6.990884596438741438e-02,
+                2.928613817427033750e-02,
+                5.290399154061733306e-02,
+                7.491400658274136037e-02,
+                1.273824184577304897e-01,
+                -8.391492311946648075e-02,
+                3.543872837542783732e-02,
+                4.324623973455964804e-02,
+                -2.873418641045778418e-02,
+                -8.444981234074398768e-02,
+                -1.531171183141288306e-01,
+                1.007308415346981068e-01,
+                -6.396885751015785743e-03,
+                -7.973455327045167592e-04,
+                6.704951070469818575e-04,
+                2.915483242551994078e-02,
+                3.634030104030812076e-03,
+                -3.055888951116827318e-03,
+                6.608747470375698129e-04,
+                8.237532257692081912e-05,
+                -6.927015762150179410e-05,
+                -6.099175331115514430e-03,
+                2.402310352789886402e-02,
+                -3.861491558256636286e-02,
+                -2.583867422346154685e-02,
+                6.050621302336450097e-02,
+                -9.822840263095998503e-02,
+                -3.827994718203701213e-02,
+                1.252239810257823327e-01,
+                -2.018867305507059950e-01,
+                1.136620144506474833e-01,
+                2.747872876828840599e-02,
+                -3.122582814578225147e-02,
+                -2.136319389661417989e-01,
+                -5.164728194785846160e-02,
+                5.869009312256637939e-02,
+                -3.147575788810638014e-02,
+                -7.609523885036708832e-03,
+                8.647186232996251914e-03,
+                -5.990706138603461330e-03,
+                -7.467169124604876177e-04,
+                6.279210400235934152e-04,
+                -9.287887182821588476e-04,
+                -1.157696985960763821e-04,
+                9.735179200124630735e-05,
+                -2.966271471326579340e-02,
+                -3.697335544996301071e-03,
+                3.109123071928715683e-03,
+                1.800225987816693740e-01,
+                -7.541487246259104271e-02,
+                -1.362333179969384966e-01,
+                -7.524185541795300192e-02,
+                3.152023672914239238e-02,
+                5.693978247845072477e-02,
+                5.703636164117102669e-02,
+                -2.389361095778780308e-02,
+                -4.316265205277792366e-02,
+                -4.915584336537091176e-02,
+                -8.674240294138457763e-02,
+                5.709724154860432860e-02,
+                -8.679070528401405804e-02,
+                -1.572017650485294793e-01,
+                1.034201569997979520e-01,
+                -3.557746655862283752e-02,
+                -8.626268394893003844e-02,
+                5.645546718878535764e-02,
+                6.848075985139651621e-03,
+                8.535845420570665554e-04,
+                -7.177870012752625602e-04,
+                8.266638576582277997e-04,
+                1.030402542123569647e-04,
+                -8.664748649675494882e-05,
+                2.991751925173294011e-02,
+                3.729095884068693231e-03,
+                -3.135830629785046203e-03,
+                1.523793442834292522e-02,
+                -3.873020552543556677e-02,
+                6.275576045602117292e-02,
+                -3.842536616563556329e-02,
+                1.249268983543572881e-01,
+                -2.014296501045876875e-01,
+                1.288704808602599873e-02,
+                -6.326999354443738066e-02,
+                1.014064886873057153e-01,
+                -1.318711149757016143e-01,
+                -3.188092889522457091e-02,
+                3.622832829002789468e-02,
+                -3.210149046681261276e-02,
+                -7.760799893075580151e-03,
+                8.819090787585878374e-03,
+                -2.047554776382226327e-01,
+                -4.950132426418570042e-02,
+                5.625150484566552450e-02,
+            ]
+        )
         self.expected_gt = self.expected_t.reshape(-1, self.nout).sum(0).reshape(-1)
-        self.expected_gv = self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
+        self.expected_gv = (
+            self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -139,15 +464,15 @@ def tearDownClass(cls):
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 4.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 4.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_sel_type(), [0])
 
     def test_1frame_old(self):
         gt = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
-        self.assertEqual(gt.shape, (nframes,self.nout))
+        self.assertEqual(gt.shape, (nframes, self.nout))
         # check values
         np.testing.assert_almost_equal(gt.ravel(), self.expected_gt, default_places)
 
@@ -157,7 +482,7 @@ def test_1frame_old_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
         # check values
         np.testing.assert_almost_equal(at.ravel(), self.expected_t, default_places)
 
@@ -169,55 +494,72 @@ def test_2frame_old_atm(self):
         nframes = 2
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
         # check values
         expected_d = np.concatenate((self.expected_t, self.expected_t))
         np.testing.assert_almost_equal(at.ravel(), expected_d, default_places)
 
     def test_1frame_full(self):
-        gt, ff, vv = self.dp.eval_full(self.coords, self.box, self.atype, atomic = False)
+        gt, ff, vv = self.dp.eval_full(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
         # check values
         np.testing.assert_almost_equal(ff.ravel(), self.expected_f, default_places)
         np.testing.assert_almost_equal(gt.ravel(), self.expected_gt, default_places)
         np.testing.assert_almost_equal(vv.ravel(), self.expected_gv, default_places)
 
     def test_1frame_full_atm(self):
-        gt, ff, vv, at, av = self.dp.eval_full(self.coords, self.box, self.atype, atomic = True)
+        gt, ff, vv, at, av = self.dp.eval_full(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.reshape([-1]), self.expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(at.reshape([-1]), self.expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(av.reshape([-1]), self.expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal = default_places)
+        np.testing.assert_almost_equal(
+            ff.reshape([-1]), self.expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            at.reshape([-1]), self.expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            av.reshape([-1]), self.expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
+        )
 
     def test_1frame_full_atm_shuffle(self):
-        i_sf = [2,1,3,0,5,4]
-        isel_sf = [1,0]
-        gt, ff, vv, at, av = self.dp.eval_full(self.coords.reshape(-1,3)[i_sf].reshape(-1), self.box, self.atype[i_sf], atomic = True)
+        i_sf = [2, 1, 3, 0, 5, 4]
+        isel_sf = [1, 0]
+        gt, ff, vv, at, av = self.dp.eval_full(
+            self.coords.reshape(-1, 3)[i_sf].reshape(-1),
+            self.box,
+            self.atype[i_sf],
+            atomic=True,
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # recover the shuffled result
         nff = np.empty_like(ff)
         nav = np.empty_like(av)
@@ -226,81 +568,386 @@ def test_1frame_full_atm_shuffle(self):
         nav[:, :, i_sf] = av
         nat[:, isel_sf] = at
         # check values
-        np.testing.assert_almost_equal(nff.reshape([-1]), self.expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(nat.reshape([-1]), self.expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(nav.reshape([-1]), self.expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal = default_places)
+        np.testing.assert_almost_equal(
+            nff.reshape([-1]), self.expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            nat.reshape([-1]), self.expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            nav.reshape([-1]), self.expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
+        )
 
     def test_1frame_num_deriv(self):
         # numerical force
-        num_f = - finite_difference(
-            lambda coord: self.dp.eval(coord, self.box, self.atype, atomic=False).reshape(-1),
-            self.coords
+        num_f = -finite_difference(
+            lambda coord: self.dp.eval(
+                coord, self.box, self.atype, atomic=False
+            ).reshape(-1),
+            self.coords,
         ).reshape(-1)
-        np.testing.assert_allclose(num_f.reshape([-1]), self.expected_f.reshape([-1]), atol=1e-5)
+        np.testing.assert_allclose(
+            num_f.reshape([-1]), self.expected_f.reshape([-1]), atol=1e-5
+        )
         # numerical virial
-        num_v = - (finite_difference(
-            lambda box: self.dp.eval(strerch_box(self.coords, self.box, box), box, self.atype, atomic=False).reshape(-1),
-            self.box
-        ).reshape(-1, 3, 3).transpose(0,2,1) @ self.box.reshape(3,3)).reshape(-1)
-        np.testing.assert_allclose(num_v.reshape([-1]), self.expected_gv.reshape([-1]), atol=1e-5)
+        num_v = -(
+            finite_difference(
+                lambda box: self.dp.eval(
+                    strerch_box(self.coords, self.box, box),
+                    box,
+                    self.atype,
+                    atomic=False,
+                ).reshape(-1),
+                self.box,
+            )
+            .reshape(-1, 3, 3)
+            .transpose(0, 2, 1)
+            @ self.box.reshape(3, 3)
+        ).reshape(-1)
+        np.testing.assert_allclose(
+            num_v.reshape([-1]), self.expected_gv.reshape([-1]), atol=1e-5
+        )
 
     def test_2frame_full_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        gt, ff, vv, at, av = self.dp.eval_full(coords2, box2, self.atype, atomic = True)
+        gt, ff, vv, at, av = self.dp.eval_full(coords2, box2, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # check values
         expected_f = np.tile(self.expected_f.reshape(-1), nframes)
         expected_t = np.tile(self.expected_t.reshape(-1), nframes)
         expected_v = np.tile(self.expected_v.reshape(-1), nframes)
         expected_gt = np.tile(self.expected_gt.reshape(-1), nframes)
         expected_gv = np.tile(self.expected_gv.reshape(-1), nframes)
-        np.testing.assert_almost_equal(ff.reshape([-1]), expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(at.reshape([-1]), expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(av.reshape([-1]), expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), expected_gv.reshape([-1]), decimal = default_places)
+        np.testing.assert_almost_equal(
+            ff.reshape([-1]), expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            at.reshape([-1]), expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            av.reshape([-1]), expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), expected_gv.reshape([-1]), decimal=default_places
+        )
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"), 
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
-class TestDeepDipoleFakePBC(unittest.TestCase) :
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepDipoleFakePBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deepdipole_fake.pbtxt")), "deepdipole_fake.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deepdipole_fake.pbtxt")),
+            "deepdipole_fake.pb",
+        )
         cls.dp = DeepDipole("deepdipole_fake.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.nout = 3
         self.atype = np.array([0, 1, 1, 0, 1, 1])
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_t = np.array([-3.186217894664857830e-01, 1.082220317383403296e+00, 5.646623185237639730e-02, 7.426508038929955369e-01, -3.115996324658170114e-01, -5.619108089573777720e-01, -4.181578166874897473e-01, -7.579762930974662805e-01, 4.980618433125854616e-01, 1.059635561913792712e+00, -2.641989315855929332e-01, 5.307984468104405273e-01, -1.484512535335152095e-01, 4.978588497891502374e-01, -8.022467807199461509e-01, -9.165936539882671985e-01, -2.238112120606238209e-01, 2.553133145814526217e-01])
-        self.expected_f = np.array([5.041930370187270860e-01, 7.873825190365474347e-03, -4.096376607074713183e-01, -3.904160887819057568e-01, 1.651276463640535541e-01, 2.941164542146633698e-01, -1.137769482368212182e-01, -1.730014715544191672e-01, 1.155212064928080040e-01, 5.863332521864410563e-01, 8.527372103998451247e-02, -6.934420009023331555e-02, -1.225415636916203990e-02, 4.321720022314819165e-02, -7.184309080594213082e-02, -5.740790958172790059e-01, -1.284909212631327180e-01, 1.411872908961754325e-01, 1.394536521676267848e-02, 4.089695733795025712e-01, -8.790828175074971718e-02, 1.594305121314434359e-01, -7.202915091075953735e-02, -1.198685751141350120e-01, -1.733758773482060866e-01, -3.369404224687432281e-01, 2.077768568648848124e-01, 8.892382475507179529e-02, 1.801380487829997712e-01, -3.123469659869602677e-01, 5.864597608298829229e-02, -1.422803757045815187e-01, 2.644907470171818931e-01, -1.475698008380600668e-01, -3.785767307841875901e-02, 4.785621896977837464e-02, -4.108193580732780736e-01, -8.281856742888188405e-02, 3.778676259248315294e-01, 2.952252813797733855e-01, -1.246444286160888204e-01, -2.244502796339041817e-01, 1.155940766935046465e-01, 2.074629960449706489e-01, -1.534173462909272645e-01, -7.510936703550785687e-02, -3.127379668651892319e-01, 4.622598362029770591e-01, -9.621211578064041425e-02, 2.628380090727049923e-01, -4.042471768183623637e-01, 1.713214828161482572e-01, 4.989995779248418417e-02, -5.801265938461462601e-02])
-        self.expected_v = np.array([-2.222884841673062051e-01, 9.787686675884660348e-01, -4.154378405125468132e-03, -1.028716279506487613e-01, -5.106807648068932559e-02, 9.617563369584695987e-02, -6.539114125439839109e-02, 8.616465014722822502e-02, 3.804663842399232110e-02, 8.958556637777052023e-01, -3.880568178324209083e-01, -6.754602069672590581e-01, -7.079186190294968484e-02, 2.747611091693637556e-02, 5.399649908930042458e-02, -1.139876669236737639e-01, 5.825425892149401624e-02, 8.421681390884694363e-02, -4.324455921712526130e-01, -7.982113179384198176e-01, 5.178700497729279428e-01, -2.119158650865729521e-02, -5.669958244474895825e-02, 2.880008495593230911e-02, 1.025153878619989092e-02, 3.455330867235743841e-02, -1.531884121903195027e-02, 8.219378927727334361e-01, -3.289162383259068290e-01, 6.075540959886343018e-01, -4.581331025027536585e-02, -2.052131009092891811e-02, 2.750489901219354411e-02, 4.633180549151136307e-02, 2.654757883635484872e-02, -3.696756527480526966e-02, -1.440158444262530923e-01, 4.944364353401542456e-01, -7.963661150769665298e-01, -3.279405043326523786e-03, -2.129463233078606257e-02, 3.328257760760894995e-02, 5.297895300667846037e-03, 3.437606177524311912e-02, -5.372785779467447592e-02, -1.202172148995579004e+00, -2.858130614731594910e-01, 3.226510095110137200e-01, -6.135144302237673097e-02, -7.628488365516866883e-03, 5.476841872267750738e-03, 6.607427030244909794e-02, 5.340677880472323794e-03, -1.357441391258333270e-03, -8.118660176947067875e-02, -5.001362994997625433e-02, 7.779205646059993151e-02, -3.756939173800121767e-01, 9.298080515606454988e-01, 1.339730913665280465e-01, 7.808446283301898050e-02, 6.915261247137938216e-02, -7.891656263643208324e-02, -8.035264423283335067e-02, 3.669461691293440797e-02, 6.021702408564724718e-02, 7.758956893285878786e-01, -3.211906986558734078e-01, -5.879129815844135187e-01, 6.104269012391384808e-02, -2.900814613392431462e-02, -4.552568262646729258e-02, -2.925720146121059406e-02, -6.902319498684716947e-02, 3.795994492146410881e-02, -4.884151777114849047e-01, -8.870211107633522163e-01, 5.820737769422319463e-01, 3.684187251077851444e-02, 8.060668659447538242e-02, -4.657258523345865486e-02, -5.368793987058780026e-02, -2.898503185606490784e-02, 4.002941486858704184e-02, 1.047195951770644173e+00, -2.548621413845133521e-01, 5.147188892651490821e-01, 2.224026955228448205e-02, -3.359454269630585826e-02, 5.544338676867385796e-02, -1.191273887309037081e-03, -2.572624454332552921e-02, 4.050578204667463350e-02, -1.732938335087045867e-01, 5.389208482414027390e-01, -8.697634229876662904e-01, 4.437234466680844980e-02, -8.396020718207411471e-02, 1.373643808601548444e-01, -7.061240859228964939e-02, -6.490608065647092938e-03, 2.687574399814150403e-03, -9.296946571189880215e-01, -2.226700108388965371e-01, 2.521074551855023715e-01, 1.661015709598279849e-02, -1.517347986687963592e-03, 4.175867772300452530e-03, -6.961167479355900856e-02, 8.595942434252096254e-02, 4.162461447266577186e-02, 9.626281426355881576e-02, 7.003654498037747977e-02, -9.432734078079299533e-02, -2.845586320234831934e-01, 9.840080473993093602e-01, 4.702636003956783828e-02, -1.121268620463006793e-01, 5.646007092227271762e-02, 8.300611975708871437e-02, 5.302797712834559501e-02, -2.128036013727904047e-02, -4.031107561971148529e-02, 8.271174343351145319e-01, -3.553740248929939671e-01, -6.241986194331364812e-01, 1.182134083009860406e-02, 3.695184024999947914e-02, -1.710161500383376373e-02, 3.008054412288880750e-02, 7.027591928009153943e-02, -3.889396164699072955e-02, -4.409008808247306677e-01, -8.148107923739302816e-01, 5.281887759440460073e-01, 5.876941218352332852e-02, 3.991562883248954419e-02, -5.674944832716710685e-02, 2.308380369202570059e-02, -3.268790472062921282e-02, 5.410175456271631989e-02, 1.034753757966884624e+00, -2.182612858207719775e-01, 4.555767475016349599e-01, 1.999790463725661591e-03, 4.137558459329451765e-02, -6.513656908661276390e-02, 4.414866304579422029e-02, -8.348549073500094453e-02, 1.365906277014072301e-01, -2.146360657075572775e-01, 6.238014307983194007e-01, -1.008256906299115352e+00, 8.070152934834977365e-02, 3.543449526282398468e-03, 3.048075243036858784e-03, 1.760219621424649605e-02, -1.639238275648761956e-03, 4.474655455192242531e-03, -9.335462888220811273e-01, -2.202218134011651174e-01, 2.478280539571276475e-01])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_t = np.array(
+            [
+                -3.186217894664857830e-01,
+                1.082220317383403296e00,
+                5.646623185237639730e-02,
+                7.426508038929955369e-01,
+                -3.115996324658170114e-01,
+                -5.619108089573777720e-01,
+                -4.181578166874897473e-01,
+                -7.579762930974662805e-01,
+                4.980618433125854616e-01,
+                1.059635561913792712e00,
+                -2.641989315855929332e-01,
+                5.307984468104405273e-01,
+                -1.484512535335152095e-01,
+                4.978588497891502374e-01,
+                -8.022467807199461509e-01,
+                -9.165936539882671985e-01,
+                -2.238112120606238209e-01,
+                2.553133145814526217e-01,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                5.041930370187270860e-01,
+                7.873825190365474347e-03,
+                -4.096376607074713183e-01,
+                -3.904160887819057568e-01,
+                1.651276463640535541e-01,
+                2.941164542146633698e-01,
+                -1.137769482368212182e-01,
+                -1.730014715544191672e-01,
+                1.155212064928080040e-01,
+                5.863332521864410563e-01,
+                8.527372103998451247e-02,
+                -6.934420009023331555e-02,
+                -1.225415636916203990e-02,
+                4.321720022314819165e-02,
+                -7.184309080594213082e-02,
+                -5.740790958172790059e-01,
+                -1.284909212631327180e-01,
+                1.411872908961754325e-01,
+                1.394536521676267848e-02,
+                4.089695733795025712e-01,
+                -8.790828175074971718e-02,
+                1.594305121314434359e-01,
+                -7.202915091075953735e-02,
+                -1.198685751141350120e-01,
+                -1.733758773482060866e-01,
+                -3.369404224687432281e-01,
+                2.077768568648848124e-01,
+                8.892382475507179529e-02,
+                1.801380487829997712e-01,
+                -3.123469659869602677e-01,
+                5.864597608298829229e-02,
+                -1.422803757045815187e-01,
+                2.644907470171818931e-01,
+                -1.475698008380600668e-01,
+                -3.785767307841875901e-02,
+                4.785621896977837464e-02,
+                -4.108193580732780736e-01,
+                -8.281856742888188405e-02,
+                3.778676259248315294e-01,
+                2.952252813797733855e-01,
+                -1.246444286160888204e-01,
+                -2.244502796339041817e-01,
+                1.155940766935046465e-01,
+                2.074629960449706489e-01,
+                -1.534173462909272645e-01,
+                -7.510936703550785687e-02,
+                -3.127379668651892319e-01,
+                4.622598362029770591e-01,
+                -9.621211578064041425e-02,
+                2.628380090727049923e-01,
+                -4.042471768183623637e-01,
+                1.713214828161482572e-01,
+                4.989995779248418417e-02,
+                -5.801265938461462601e-02,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -2.222884841673062051e-01,
+                9.787686675884660348e-01,
+                -4.154378405125468132e-03,
+                -1.028716279506487613e-01,
+                -5.106807648068932559e-02,
+                9.617563369584695987e-02,
+                -6.539114125439839109e-02,
+                8.616465014722822502e-02,
+                3.804663842399232110e-02,
+                8.958556637777052023e-01,
+                -3.880568178324209083e-01,
+                -6.754602069672590581e-01,
+                -7.079186190294968484e-02,
+                2.747611091693637556e-02,
+                5.399649908930042458e-02,
+                -1.139876669236737639e-01,
+                5.825425892149401624e-02,
+                8.421681390884694363e-02,
+                -4.324455921712526130e-01,
+                -7.982113179384198176e-01,
+                5.178700497729279428e-01,
+                -2.119158650865729521e-02,
+                -5.669958244474895825e-02,
+                2.880008495593230911e-02,
+                1.025153878619989092e-02,
+                3.455330867235743841e-02,
+                -1.531884121903195027e-02,
+                8.219378927727334361e-01,
+                -3.289162383259068290e-01,
+                6.075540959886343018e-01,
+                -4.581331025027536585e-02,
+                -2.052131009092891811e-02,
+                2.750489901219354411e-02,
+                4.633180549151136307e-02,
+                2.654757883635484872e-02,
+                -3.696756527480526966e-02,
+                -1.440158444262530923e-01,
+                4.944364353401542456e-01,
+                -7.963661150769665298e-01,
+                -3.279405043326523786e-03,
+                -2.129463233078606257e-02,
+                3.328257760760894995e-02,
+                5.297895300667846037e-03,
+                3.437606177524311912e-02,
+                -5.372785779467447592e-02,
+                -1.202172148995579004e00,
+                -2.858130614731594910e-01,
+                3.226510095110137200e-01,
+                -6.135144302237673097e-02,
+                -7.628488365516866883e-03,
+                5.476841872267750738e-03,
+                6.607427030244909794e-02,
+                5.340677880472323794e-03,
+                -1.357441391258333270e-03,
+                -8.118660176947067875e-02,
+                -5.001362994997625433e-02,
+                7.779205646059993151e-02,
+                -3.756939173800121767e-01,
+                9.298080515606454988e-01,
+                1.339730913665280465e-01,
+                7.808446283301898050e-02,
+                6.915261247137938216e-02,
+                -7.891656263643208324e-02,
+                -8.035264423283335067e-02,
+                3.669461691293440797e-02,
+                6.021702408564724718e-02,
+                7.758956893285878786e-01,
+                -3.211906986558734078e-01,
+                -5.879129815844135187e-01,
+                6.104269012391384808e-02,
+                -2.900814613392431462e-02,
+                -4.552568262646729258e-02,
+                -2.925720146121059406e-02,
+                -6.902319498684716947e-02,
+                3.795994492146410881e-02,
+                -4.884151777114849047e-01,
+                -8.870211107633522163e-01,
+                5.820737769422319463e-01,
+                3.684187251077851444e-02,
+                8.060668659447538242e-02,
+                -4.657258523345865486e-02,
+                -5.368793987058780026e-02,
+                -2.898503185606490784e-02,
+                4.002941486858704184e-02,
+                1.047195951770644173e00,
+                -2.548621413845133521e-01,
+                5.147188892651490821e-01,
+                2.224026955228448205e-02,
+                -3.359454269630585826e-02,
+                5.544338676867385796e-02,
+                -1.191273887309037081e-03,
+                -2.572624454332552921e-02,
+                4.050578204667463350e-02,
+                -1.732938335087045867e-01,
+                5.389208482414027390e-01,
+                -8.697634229876662904e-01,
+                4.437234466680844980e-02,
+                -8.396020718207411471e-02,
+                1.373643808601548444e-01,
+                -7.061240859228964939e-02,
+                -6.490608065647092938e-03,
+                2.687574399814150403e-03,
+                -9.296946571189880215e-01,
+                -2.226700108388965371e-01,
+                2.521074551855023715e-01,
+                1.661015709598279849e-02,
+                -1.517347986687963592e-03,
+                4.175867772300452530e-03,
+                -6.961167479355900856e-02,
+                8.595942434252096254e-02,
+                4.162461447266577186e-02,
+                9.626281426355881576e-02,
+                7.003654498037747977e-02,
+                -9.432734078079299533e-02,
+                -2.845586320234831934e-01,
+                9.840080473993093602e-01,
+                4.702636003956783828e-02,
+                -1.121268620463006793e-01,
+                5.646007092227271762e-02,
+                8.300611975708871437e-02,
+                5.302797712834559501e-02,
+                -2.128036013727904047e-02,
+                -4.031107561971148529e-02,
+                8.271174343351145319e-01,
+                -3.553740248929939671e-01,
+                -6.241986194331364812e-01,
+                1.182134083009860406e-02,
+                3.695184024999947914e-02,
+                -1.710161500383376373e-02,
+                3.008054412288880750e-02,
+                7.027591928009153943e-02,
+                -3.889396164699072955e-02,
+                -4.409008808247306677e-01,
+                -8.148107923739302816e-01,
+                5.281887759440460073e-01,
+                5.876941218352332852e-02,
+                3.991562883248954419e-02,
+                -5.674944832716710685e-02,
+                2.308380369202570059e-02,
+                -3.268790472062921282e-02,
+                5.410175456271631989e-02,
+                1.034753757966884624e00,
+                -2.182612858207719775e-01,
+                4.555767475016349599e-01,
+                1.999790463725661591e-03,
+                4.137558459329451765e-02,
+                -6.513656908661276390e-02,
+                4.414866304579422029e-02,
+                -8.348549073500094453e-02,
+                1.365906277014072301e-01,
+                -2.146360657075572775e-01,
+                6.238014307983194007e-01,
+                -1.008256906299115352e00,
+                8.070152934834977365e-02,
+                3.543449526282398468e-03,
+                3.048075243036858784e-03,
+                1.760219621424649605e-02,
+                -1.639238275648761956e-03,
+                4.474655455192242531e-03,
+                -9.335462888220811273e-01,
+                -2.202218134011651174e-01,
+                2.478280539571276475e-01,
+            ]
+        )
         self.expected_gt = self.expected_t.reshape(-1, self.nout).sum(0).reshape(-1)
-        self.expected_gv = self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
-        mcoord = self.coords.reshape(2,3,3)
-        fake_target = np.stack([
-            mcoord[:, 1] + mcoord[:, 2] - 2 * mcoord[:, 0],
-            mcoord[:, 0] - mcoord[:, 1],
-            mcoord[:, 0] - mcoord[:, 2]
-        ], axis=-2)
+        self.expected_gv = (
+            self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
+        )
+        mcoord = self.coords.reshape(2, 3, 3)
+        fake_target = np.stack(
+            [
+                mcoord[:, 1] + mcoord[:, 2] - 2 * mcoord[:, 0],
+                mcoord[:, 0] - mcoord[:, 1],
+                mcoord[:, 0] - mcoord[:, 2],
+            ],
+            axis=-2,
+        )
         fake_target = fake_target - 13 * np.rint(fake_target / 13)
         self.target_t = fake_target.reshape(-1)
 
@@ -311,13 +958,15 @@ def tearDownClass(cls):
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 2.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 2.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_sel_type().tolist(), [0, 1])
         np.testing.assert_allclose(self.target_t, self.expected_t, atol=3e-2)
 
     def test_1frame_full_atm(self):
-        gt, ff, vv, at, av = self.dp.eval_full(self.coords, self.box, self.atype, atomic = True)
+        gt, ff, vv, at, av = self.dp.eval_full(
+            self.coords, self.box, self.atype, atomic=True
+        )
         for dd in at, ff, av:
             print("\n\n")
             print(", ".join(f"{ii:.18e}" for ii in dd.reshape(-1)))
@@ -326,31 +975,46 @@ def test_1frame_full_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = natoms
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.reshape([-1]), self.expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(at.reshape([-1]), self.expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(av.reshape([-1]), self.expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal = default_places)
+        np.testing.assert_almost_equal(
+            ff.reshape([-1]), self.expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            at.reshape([-1]), self.expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            av.reshape([-1]), self.expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
+        )
 
     def test_1frame_full_atm_shuffle(self):
-        i_sf = [2,1,3,0,5,4]
+        i_sf = [2, 1, 3, 0, 5, 4]
         isel_sf = i_sf
-        gt, ff, vv, at, av = self.dp.eval_full(self.coords.reshape(-1,3)[i_sf].reshape(-1), self.box, self.atype[i_sf], atomic = True)
+        gt, ff, vv, at, av = self.dp.eval_full(
+            self.coords.reshape(-1, 3)[i_sf].reshape(-1),
+            self.box,
+            self.atype[i_sf],
+            atomic=True,
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
         nsel = natoms
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # recover the shuffled result
         nff = np.empty_like(ff)
         nav = np.empty_like(av)
@@ -359,8 +1023,18 @@ def test_1frame_full_atm_shuffle(self):
         nav[:, :, i_sf] = av
         nat[:, isel_sf] = at
         # check values
-        np.testing.assert_almost_equal(nff.reshape([-1]), self.expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(nat.reshape([-1]), self.expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(nav.reshape([-1]), self.expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal = default_places)
\ No newline at end of file
+        np.testing.assert_almost_equal(
+            nff.reshape([-1]), self.expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            nat.reshape([-1]), self.expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            nav.reshape([-1]), self.expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
+        )
diff --git a/source/tests/test_deepmd_data.py b/source/tests/test_deepmd_data.py
index a155d7df77..4bbc614a93 100644
--- a/source/tests/test_deepmd_data.py
+++ b/source/tests/test_deepmd_data.py
@@ -1,271 +1,286 @@
-import os,sys,shutil,copy
-import numpy as np
+import copy
+import os
+import shutil
+import sys
 import unittest
 
-from deepmd.utils.data import DeepmdData
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from common import tests_path
+import numpy as np
+from common import (
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.utils.data import (
+    DeepmdData,
+)
 
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     places = 6
 else:
     places = 12
 
+
 class TestDataTypeSel(unittest.TestCase):
     def setUp(self):
-        self.data_name = 'test_data'
-        os.makedirs(self.data_name, exist_ok = True)
-        os.makedirs(os.path.join(self.data_name,'set.foo'), exist_ok = True)
-        np.savetxt(os.path.join(self.data_name, 'type.raw'), 
-                   np.array([0, 1, 1, 0, 1, 1]), 
-                   fmt = '%d')
+        self.data_name = "test_data"
+        os.makedirs(self.data_name, exist_ok=True)
+        os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
+        np.savetxt(
+            os.path.join(self.data_name, "type.raw"),
+            np.array([0, 1, 1, 0, 1, 1]),
+            fmt="%d",
+        )
         self.nframes = 3
         self.natoms = 6
         # coord
-        path = os.path.join(self.data_name, 'set.foo', 'coord.npy')
+        path = os.path.join(self.data_name, "set.foo", "coord.npy")
         self.coord = np.random.random([self.nframes, self.natoms, 3])
         np.save(path, np.reshape(self.coord, [self.nframes, -1]))
-        self.coord = self.coord[:,[0,3,1,2,4,5],:]
+        self.coord = self.coord[:, [0, 3, 1, 2, 4, 5], :]
         self.coord = self.coord.reshape([self.nframes, -1])
         # box
-        path = os.path.join(self.data_name, 'set.foo', 'box.npy')
+        path = os.path.join(self.data_name, "set.foo", "box.npy")
         self.box = np.random.random([self.nframes, 9])
         np.save(path, self.box)
         # value
-        path = os.path.join(self.data_name, 'set.foo', 'value_1.npy')
+        path = os.path.join(self.data_name, "set.foo", "value_1.npy")
         self.value_1 = np.arange(self.nframes * 2)
         self.value_1 = np.reshape(self.value_1, [self.nframes, 2])
         np.save(path, self.value_1)
         # value
-        path = os.path.join(self.data_name, 'set.foo', 'value_2.npy')
+        path = os.path.join(self.data_name, "set.foo", "value_2.npy")
         self.value_2 = np.arange(self.nframes * 4)
         self.value_2 = np.reshape(self.value_2, [self.nframes, 4])
         np.save(path, self.value_2)
 
-    def tearDown(self) :
+    def tearDown(self):
         shutil.rmtree(self.data_name)
 
-    def test_load_set_1(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('value_1', 1, atomic=True, must=True, type_sel = [0])
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
-        self.assertEqual(data['value_1'].shape, (self.nframes, 2))
-        np.testing.assert_almost_equal(data['value_1'], self.value_1)
-                
+    def test_load_set_1(self):
+        dd = DeepmdData(self.data_name).add(
+            "value_1", 1, atomic=True, must=True, type_sel=[0]
+        )
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
+        self.assertEqual(data["value_1"].shape, (self.nframes, 2))
+        np.testing.assert_almost_equal(data["value_1"], self.value_1)
 
-    def test_load_set_2(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('value_2', 1, atomic=True, must=True, type_sel = [1])
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
-        self.assertEqual(data['value_2'].shape, (self.nframes, 4))
-        np.testing.assert_almost_equal(data['value_2'], self.value_2)          
+    def test_load_set_2(self):
+        dd = DeepmdData(self.data_name).add(
+            "value_2", 1, atomic=True, must=True, type_sel=[1]
+        )
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
+        self.assertEqual(data["value_2"].shape, (self.nframes, 4))
+        np.testing.assert_almost_equal(data["value_2"], self.value_2)
 
 
-class TestData (unittest.TestCase) :
-    def setUp (self) :
-        self.data_name = 'test_data'
-        os.makedirs(self.data_name, exist_ok = True)
-        os.makedirs(os.path.join(self.data_name,'set.foo'), exist_ok = True)
-        os.makedirs(os.path.join(self.data_name,'set.bar'), exist_ok = True)
-        os.makedirs(os.path.join(self.data_name,'set.tar'), exist_ok = True)
-        np.savetxt(os.path.join(self.data_name, 'type.raw'), 
-                   np.array([1, 0]), 
-                   fmt = '%d')
-        np.savetxt(os.path.join(self.data_name, 'type_map.raw'), 
-                   np.array(['foo', 'bar']), 
-                   fmt = '%s')
+class TestData(unittest.TestCase):
+    def setUp(self):
+        self.data_name = "test_data"
+        os.makedirs(self.data_name, exist_ok=True)
+        os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
+        os.makedirs(os.path.join(self.data_name, "set.bar"), exist_ok=True)
+        os.makedirs(os.path.join(self.data_name, "set.tar"), exist_ok=True)
+        np.savetxt(os.path.join(self.data_name, "type.raw"), np.array([1, 0]), fmt="%d")
+        np.savetxt(
+            os.path.join(self.data_name, "type_map.raw"),
+            np.array(["foo", "bar"]),
+            fmt="%s",
+        )
         self.nframes = 5
         self.natoms = 2
         # coord
-        path = os.path.join(self.data_name, 'set.foo', 'coord.npy')
+        path = os.path.join(self.data_name, "set.foo", "coord.npy")
         self.coord = np.random.random([self.nframes, self.natoms, 3])
         np.save(path, np.reshape(self.coord, [self.nframes, -1]))
-        self.coord = self.coord[:,[1,0],:]
+        self.coord = self.coord[:, [1, 0], :]
         self.coord = self.coord.reshape([self.nframes, -1])
         # coord bar
-        path = os.path.join(self.data_name, 'set.bar', 'coord.npy')
+        path = os.path.join(self.data_name, "set.bar", "coord.npy")
         self.coord_bar = np.random.random([self.nframes, 3 * self.natoms])
         np.save(path, self.coord_bar)
         self.coord_bar = self.coord_bar.reshape([self.nframes, self.natoms, 3])
-        self.coord_bar = self.coord_bar[:,[1,0],:]
+        self.coord_bar = self.coord_bar[:, [1, 0], :]
         self.coord_bar = self.coord_bar.reshape([self.nframes, -1])
         # coord tar
-        path = os.path.join(self.data_name, 'set.tar', 'coord.npy')
+        path = os.path.join(self.data_name, "set.tar", "coord.npy")
         self.coord_tar = np.random.random([2, 3 * self.natoms])
         np.save(path, self.coord_tar)
         self.coord_tar = self.coord_tar.reshape([2, self.natoms, 3])
-        self.coord_tar = self.coord_tar[:,[1,0],:]
+        self.coord_tar = self.coord_tar[:, [1, 0], :]
         self.coord_tar = self.coord_tar.reshape([2, -1])
         # box
-        path = os.path.join(self.data_name, 'set.foo', 'box.npy')
+        path = os.path.join(self.data_name, "set.foo", "box.npy")
         self.box = np.random.random([self.nframes, 9])
         np.save(path, self.box)
         # box bar
-        path = os.path.join(self.data_name, 'set.bar', 'box.npy')
+        path = os.path.join(self.data_name, "set.bar", "box.npy")
         self.box_bar = np.random.random([self.nframes, 9])
         np.save(path, self.box_bar)
         # box tar
-        path = os.path.join(self.data_name, 'set.tar', 'box.npy')
+        path = os.path.join(self.data_name, "set.tar", "box.npy")
         self.box_tar = np.random.random([2, 9])
         np.save(path, self.box_tar)
         # t a
-        path = os.path.join(self.data_name, 'set.foo', 'test_atomic.npy')
+        path = os.path.join(self.data_name, "set.foo", "test_atomic.npy")
         self.test_atomic = np.random.random([self.nframes, self.natoms, 7])
-        self.redu_atomic = np.sum(self.test_atomic, axis = 1)
+        self.redu_atomic = np.sum(self.test_atomic, axis=1)
         np.save(path, np.reshape(self.test_atomic, [self.nframes, -1]))
-        self.test_atomic = self.test_atomic[:,[1,0],:]        
+        self.test_atomic = self.test_atomic[:, [1, 0], :]
         self.test_atomic = self.test_atomic.reshape([self.nframes, -1])
         # t f
-        path = os.path.join(self.data_name, 'set.foo', 'test_frame.npy')
+        path = os.path.join(self.data_name, "set.foo", "test_frame.npy")
         self.test_frame = np.random.random([self.nframes, 5])
         np.save(path, self.test_frame)
-        path = os.path.join(self.data_name, 'set.bar', 'test_frame.npy')
+        path = os.path.join(self.data_name, "set.bar", "test_frame.npy")
         self.test_frame_bar = np.random.random([self.nframes, 5])
         np.save(path, self.test_frame_bar)
         # t n
         self.test_null = np.zeros([self.nframes, 2 * self.natoms])
-    
-    def tearDown(self) :
+
+    def tearDown(self):
         shutil.rmtree(self.data_name)
 
-    def test_init (self) :
+    def test_init(self):
         dd = DeepmdData(self.data_name)
         self.assertEqual(dd.idx_map[0], 1)
         self.assertEqual(dd.idx_map[1], 0)
-        self.assertEqual(dd.type_map, ['foo', 'bar'])
-        self.assertEqual(dd.test_dir, 'test_data/set.tar')
-        self.assertEqual(dd.train_dirs, ['test_data/set.bar', 'test_data/set.foo'])
+        self.assertEqual(dd.type_map, ["foo", "bar"])
+        self.assertEqual(dd.test_dir, "test_data/set.tar")
+        self.assertEqual(dd.train_dirs, ["test_data/set.bar", "test_data/set.foo"])
 
-    def test_init_type_map (self) :
-        dd = DeepmdData(self.data_name, type_map = ['bar', 'foo', 'tar'])
+    def test_init_type_map(self):
+        dd = DeepmdData(self.data_name, type_map=["bar", "foo", "tar"])
         self.assertEqual(dd.idx_map[0], 0)
         self.assertEqual(dd.idx_map[1], 1)
         self.assertEqual(dd.atom_type[0], 0)
         self.assertEqual(dd.atom_type[1], 1)
-        self.assertEqual(dd.type_map, ['bar', 'foo', 'tar'])
+        self.assertEqual(dd.type_map, ["bar", "foo", "tar"])
 
-    def test_load_set(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('test_atomic', 7, atomic=True, must=True)\
-             .add('test_frame', 5, atomic=False, must=True)\
-             .add('test_null', 2, atomic=True, must=False)
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
-        nframes = data['coord'].shape[0]
+    def test_load_set(self):
+        dd = (
+            DeepmdData(self.data_name)
+            .add("test_atomic", 7, atomic=True, must=True)
+            .add("test_frame", 5, atomic=False, must=True)
+            .add("test_null", 2, atomic=True, must=False)
+        )
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
+        nframes = data["coord"].shape[0]
         self.assertEqual(dd.get_numb_set(), 2)
-        self.assertEqual(dd.get_type_map(), ['foo', 'bar'])
+        self.assertEqual(dd.get_type_map(), ["foo", "bar"])
         self.assertEqual(dd.get_natoms(), 2)
-        self.assertEqual(list(dd.get_natoms_vec(3)), [2,2,1,1,0])
-        for ii in range(nframes) :
-            self.assertEqual(data['type'][ii][0], 0)
-            self.assertEqual(data['type'][ii][1], 1)
-        self.assertEqual(data['find_coord'], 1)
-        self._comp_np_mat2(data['coord'], self.coord)
-        self.assertEqual(data['find_test_atomic'], 1)
-        self._comp_np_mat2(data['test_atomic'], self.test_atomic)
-        self.assertEqual(data['find_test_frame'], 1)
-        self._comp_np_mat2(data['test_frame'], self.test_frame)
-        self.assertEqual(data['find_test_null'], 0)
-        self._comp_np_mat2(data['test_null'], self.test_null)
+        self.assertEqual(list(dd.get_natoms_vec(3)), [2, 2, 1, 1, 0])
+        for ii in range(nframes):
+            self.assertEqual(data["type"][ii][0], 0)
+            self.assertEqual(data["type"][ii][1], 1)
+        self.assertEqual(data["find_coord"], 1)
+        self._comp_np_mat2(data["coord"], self.coord)
+        self.assertEqual(data["find_test_atomic"], 1)
+        self._comp_np_mat2(data["test_atomic"], self.test_atomic)
+        self.assertEqual(data["find_test_frame"], 1)
+        self._comp_np_mat2(data["test_frame"], self.test_frame)
+        self.assertEqual(data["find_test_null"], 0)
+        self._comp_np_mat2(data["test_null"], self.test_null)
 
-    def test_shuffle(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('test_atomic', 7, atomic=True, must=True)\
-             .add('test_frame', 5, atomic=False, must=True)
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
+    def test_shuffle(self):
+        dd = (
+            DeepmdData(self.data_name)
+            .add("test_atomic", 7, atomic=True, must=True)
+            .add("test_frame", 5, atomic=False, must=True)
+        )
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
         data_bk = copy.deepcopy(data)
         data, idx = dd._shuffle_data(data)
-        self._comp_np_mat2(data_bk['coord'][idx,:], 
-                           data['coord'])
-        self._comp_np_mat2(data_bk['test_atomic'][idx,:], 
-                           data['test_atomic'])
-        self._comp_np_mat2(data_bk['test_frame'][idx,:], 
-                           data['test_frame'])
+        self._comp_np_mat2(data_bk["coord"][idx, :], data["coord"])
+        self._comp_np_mat2(data_bk["test_atomic"][idx, :], data["test_atomic"])
+        self._comp_np_mat2(data_bk["test_frame"][idx, :], data["test_frame"])
 
     def test_shuffle_with_numb_copy(self):
-        path = os.path.join(self.data_name, 'set.foo', 'numb_copy.npy')
+        path = os.path.join(self.data_name, "set.foo", "numb_copy.npy")
         prob = np.arange(self.nframes)
         np.save(path, prob)
-        dd = DeepmdData(self.data_name)\
-             .add('test_atomic', 7, atomic=True, must=True)\
-             .add('test_frame', 5, atomic=False, must=True)
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
+        dd = (
+            DeepmdData(self.data_name)
+            .add("test_atomic", 7, atomic=True, must=True)
+            .add("test_frame", 5, atomic=False, must=True)
+        )
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
         data_bk = copy.deepcopy(data)
         data, idx = dd._shuffle_data(data)
         assert idx.size == np.sum(prob)
-        self._comp_np_mat2(data_bk['coord'][idx,:], 
-                           data['coord'])
-        self._comp_np_mat2(data_bk['test_atomic'][idx,:], 
-                           data['test_atomic'])
-        self._comp_np_mat2(data_bk['test_frame'][idx,:], 
-                           data['test_frame'])
+        self._comp_np_mat2(data_bk["coord"][idx, :], data["coord"])
+        self._comp_np_mat2(data_bk["test_atomic"][idx, :], data["test_atomic"])
+        self._comp_np_mat2(data_bk["test_frame"][idx, :], data["test_frame"])
+
+    def test_reduce(self):
+        dd = DeepmdData(self.data_name).add("test_atomic", 7, atomic=True, must=True)
+        dd.reduce("redu", "test_atomic")
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
+        self.assertEqual(data["find_test_atomic"], 1)
+        self._comp_np_mat2(data["test_atomic"], self.test_atomic)
+        self.assertEqual(data["find_redu"], 1)
+        self._comp_np_mat2(data["redu"], self.redu_atomic)
+
+    def test_reduce_null(self):
+        dd = DeepmdData(self.data_name).add("test_atomic_1", 7, atomic=True, must=False)
+        dd.reduce("redu", "test_atomic_1")
+        data = dd._load_set(os.path.join(self.data_name, "set.foo"))
+        self.assertEqual(data["find_test_atomic_1"], 0)
+        self._comp_np_mat2(
+            data["test_atomic_1"], np.zeros([self.nframes, self.natoms * 7])
+        )
+        self.assertEqual(data["find_redu"], 0)
+        self._comp_np_mat2(data["redu"], np.zeros([self.nframes, 7]))
 
-    def test_reduce(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('test_atomic', 7, atomic=True, must=True)
-        dd.reduce('redu', 'test_atomic')        
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
-        self.assertEqual(data['find_test_atomic'], 1)
-        self._comp_np_mat2(data['test_atomic'], self.test_atomic)
-        self.assertEqual(data['find_redu'], 1)
-        self._comp_np_mat2(data['redu'], self.redu_atomic)
-        
-    def test_reduce_null(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('test_atomic_1', 7, atomic=True, must=False)
-        dd.reduce('redu', 'test_atomic_1')
-        data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
-        self.assertEqual(data['find_test_atomic_1'], 0)
-        self._comp_np_mat2(data['test_atomic_1'], np.zeros([self.nframes, self.natoms * 7]))
-        self.assertEqual(data['find_redu'], 0)
-        self._comp_np_mat2(data['redu'], np.zeros([self.nframes, 7]))
-    
     def test_load_null_must(self):
-        dd = DeepmdData(self.data_name)\
-             .add('test_atomic_1', 7, atomic=True, must=True)
-        with self.assertRaises(RuntimeError) :
-            data = dd._load_set(os.path.join(self.data_name, 'set.foo'))
+        dd = DeepmdData(self.data_name).add("test_atomic_1", 7, atomic=True, must=True)
+        with self.assertRaises(RuntimeError):
+            data = dd._load_set(os.path.join(self.data_name, "set.foo"))
 
-    def test_avg(self) :
-        dd = DeepmdData(self.data_name)\
-             .add('test_frame', 5, atomic=False, must=True)
-        favg = dd.avg('test_frame')
-        fcmp = np.average(np.concatenate((self.test_frame, self.test_frame_bar), axis = 0), axis = 0)
+    def test_avg(self):
+        dd = DeepmdData(self.data_name).add("test_frame", 5, atomic=False, must=True)
+        favg = dd.avg("test_frame")
+        fcmp = np.average(
+            np.concatenate((self.test_frame, self.test_frame_bar), axis=0), axis=0
+        )
         np.testing.assert_almost_equal(favg, fcmp, places)
 
-    def test_check_batch_size(self) :
+    def test_check_batch_size(self):
         dd = DeepmdData(self.data_name)
         ret = dd.check_batch_size(10)
-        self.assertEqual(ret, (os.path.join(self.data_name,'set.bar'), 5))
+        self.assertEqual(ret, (os.path.join(self.data_name, "set.bar"), 5))
         ret = dd.check_batch_size(5)
         self.assertEqual(ret, None)
 
     def test_check_test_size(self):
         dd = DeepmdData(self.data_name)
         ret = dd.check_test_size(10)
-        self.assertEqual(ret, (os.path.join(self.data_name,'set.tar'), 2))
+        self.assertEqual(ret, (os.path.join(self.data_name, "set.tar"), 2))
         ret = dd.check_test_size(2)
         self.assertEqual(ret, None)
 
-    def test_get_batch(self) :
+    def test_get_batch(self):
         dd = DeepmdData(self.data_name)
         data = dd.get_batch(5)
-        self._comp_np_mat2(np.sort(data['coord'], axis = 0), 
-                           np.sort(self.coord_bar, axis = 0))
+        self._comp_np_mat2(
+            np.sort(data["coord"], axis=0), np.sort(self.coord_bar, axis=0)
+        )
         data = dd.get_batch(5)
-        self._comp_np_mat2(np.sort(data['coord'], axis = 0), 
-                           np.sort(self.coord, axis = 0))
+        self._comp_np_mat2(np.sort(data["coord"], axis=0), np.sort(self.coord, axis=0))
         data = dd.get_batch(5)
-        self._comp_np_mat2(np.sort(data['coord'], axis = 0), 
-                           np.sort(self.coord_bar, axis = 0))
+        self._comp_np_mat2(
+            np.sort(data["coord"], axis=0), np.sort(self.coord_bar, axis=0)
+        )
         data = dd.get_batch(5)
-        self._comp_np_mat2(np.sort(data['coord'], axis = 0), 
-                           np.sort(self.coord, axis = 0))
+        self._comp_np_mat2(np.sort(data["coord"], axis=0), np.sort(self.coord, axis=0))
 
-    def test_get_test(self) :
+    def test_get_test(self):
         dd = DeepmdData(self.data_name)
         data = dd.get_test()
-        self._comp_np_mat2(np.sort(data['coord'], axis = 0), 
-                           np.sort(self.coord_tar, axis = 0))
+        self._comp_np_mat2(
+            np.sort(data["coord"], axis=0), np.sort(self.coord_tar, axis=0)
+        )
 
     def test_get_nbatch(self):
         dd = DeepmdData(self.data_name)
@@ -273,22 +288,22 @@ def test_get_nbatch(self):
         self.assertEqual(nb, 5)
         nb = dd.get_numb_batch(2, 0)
         self.assertEqual(nb, 2)
-        
-    def _comp_np_mat2(self, first, second) :
+
+    def _comp_np_mat2(self, first, second):
         np.testing.assert_almost_equal(first, second, places)
 
 
-class TestH5Data (unittest.TestCase) :
-    def setUp (self) :
-        self.data_name = str(tests_path / 'test.hdf5')
+class TestH5Data(unittest.TestCase):
+    def setUp(self):
+        self.data_name = str(tests_path / "test.hdf5")
 
-    def test_init (self) :
+    def test_init(self):
         dd = DeepmdData(self.data_name)
         self.assertEqual(dd.idx_map[0], 0)
-        self.assertEqual(dd.type_map, ['X'])
-        self.assertEqual(dd.test_dir, self.data_name + '#/set.000')
-        self.assertEqual(dd.train_dirs, [self.data_name + '#/set.000'])
+        self.assertEqual(dd.type_map, ["X"])
+        self.assertEqual(dd.test_dir, self.data_name + "#/set.000")
+        self.assertEqual(dd.train_dirs, [self.data_name + "#/set.000"])
 
-    def test_get_batch(self) :
+    def test_get_batch(self):
         dd = DeepmdData(self.data_name)
         data = dd.get_batch(5)
diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/test_deepmd_data_sys.py
index 5582bdb4c2..6ce034d655 100644
--- a/source/tests/test_deepmd_data_sys.py
+++ b/source/tests/test_deepmd_data_sys.py
@@ -1,77 +1,83 @@
-import os,sys,shutil,copy
-import numpy as np
+import copy
+import os
+import shutil
+import sys
 import unittest
 
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
+import numpy as np
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
 
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     places = 6
 else:
     places = 12
 
-class TestDataSystem (unittest.TestCase) :
-    def setUp (self) :
+
+class TestDataSystem(unittest.TestCase):
+    def setUp(self):
         self.nsys = 4
         self.nframes = [3, 6, 5, 4]
         self.natoms = [3, 4, 6, 5]
-        self.atom_type = [[1, 0, 0], 
-                          [2, 1, 0, 2],
-                          [0, 0, 1, 1, 2, 1],
-                          [0, 2, 2, 0, 0]]
+        self.atom_type = [[1, 0, 0], [2, 1, 0, 2], [0, 0, 1, 1, 2, 1], [0, 2, 2, 0, 0]]
         self.test_ndof = 2
         self.sys_name = []
         self.nset = 3
-        for ii in range(self.nsys) :
-            sys_name = 'sys_%d' % ii
+        for ii in range(self.nsys):
+            sys_name = "sys_%d" % ii
             self.sys_name.append(sys_name)
-            os.makedirs(sys_name, exist_ok = True)
-            np.savetxt(os.path.join(sys_name, 'type.raw'), 
-                       self.atom_type[ii], 
-                       fmt = '%d')
-            for jj in range(self.nset) :
-                set_name = os.path.join(sys_name, 'set.%03d' % jj)
-                os.makedirs(set_name, exist_ok = True)
-                path = os.path.join(set_name, 'coord.npy')
-                val = np.random.random([self.nframes[ii]+jj, self.natoms[ii]*3])
+            os.makedirs(sys_name, exist_ok=True)
+            np.savetxt(os.path.join(sys_name, "type.raw"), self.atom_type[ii], fmt="%d")
+            for jj in range(self.nset):
+                set_name = os.path.join(sys_name, "set.%03d" % jj)
+                os.makedirs(set_name, exist_ok=True)
+                path = os.path.join(set_name, "coord.npy")
+                val = np.random.random([self.nframes[ii] + jj, self.natoms[ii] * 3])
                 np.save(path, val)
-                path = os.path.join(set_name, 'box.npy')
-                val = np.random.random([self.nframes[ii]+jj, 9]) * 10
+                path = os.path.join(set_name, "box.npy")
+                val = np.random.random([self.nframes[ii] + jj, 9]) * 10
                 np.save(path, val)
-                path = os.path.join(set_name, 'test.npy')
-                val = np.random.random([self.nframes[ii]+jj, self.natoms[ii]*self.test_ndof])
+                path = os.path.join(set_name, "test.npy")
+                val = np.random.random(
+                    [self.nframes[ii] + jj, self.natoms[ii] * self.test_ndof]
+                )
                 np.save(path, val)
-                
+
     def tearDown(self):
-        for ii in range(self.nsys) :
-            sys_name = 'sys_%d' % ii
-            shutil.rmtree(sys_name)            
+        for ii in range(self.nsys):
+            sys_name = "sys_%d" % ii
+            shutil.rmtree(sys_name)
 
-    def test_ntypes(self) :
+    def test_ntypes(self):
         batch_size = 3
         test_size = 2
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
-        ds.add('test', self.test_ndof, atomic = True, must = True)
-        ds.add('null', self.test_ndof, atomic = True, must = False)
+        ds.add("test", self.test_ndof, atomic=True, must=True)
+        ds.add("null", self.test_ndof, atomic=True, must=False)
         self.assertEqual(ds.get_ntypes(), 3)
         self.assertEqual(ds.get_nbatches(), [2, 4, 3, 2])
         self.assertEqual(ds.get_nsystems(), self.nsys)
-        self.assertEqual(list(ds.get_batch_size()), [batch_size]*4)
-        
+        self.assertEqual(list(ds.get_batch_size()), [batch_size] * 4)
+
     def test_batch_size_5(self):
-        batch_size = 'auto:5'
+        batch_size = "auto:5"
         test_size = 2
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
         self.assertEqual(ds.batch_size, [2, 2, 1, 1])
-        
+
     def test_batch_size_null(self):
-        batch_size = 'auto:3'
+        batch_size = "auto:3"
         test_size = 2
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
         self.assertEqual(ds.batch_size, [1, 1, 1, 1])
 
     def test_batch_size_raise(self):
-        batch_size = 'foo'
+        batch_size = "foo"
         test_size = 2
         with self.assertRaises(RuntimeError):
             ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
@@ -80,166 +86,225 @@ def test_get_test(self):
         batch_size = 3
         test_size = 2
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
-        ds.add('test', self.test_ndof, atomic = True, must = True)
-        ds.add('null', self.test_ndof, atomic = True, must = False)
-        ds.add('ones', self.test_ndof, atomic = True, must = False, default=1.)
+        ds.add("test", self.test_ndof, atomic=True, must=True)
+        ds.add("null", self.test_ndof, atomic=True, must=False)
+        ds.add("ones", self.test_ndof, atomic=True, must=False, default=1.0)
         sys_idx = 0
         data = ds.get_test(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_0/set.002/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_0/set.002/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([self.nframes[sys_idx]+2,
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
-        self.assertAlmostEqual(np.linalg.norm(np.ones([self.nframes[sys_idx]+2,
-                                                self.natoms[sys_idx]*self.test_ndof])
-                                        -
-                                        data['ones']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_0/set.002/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_0/set.002/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros(
+                    [self.nframes[sys_idx] + 2, self.natoms[sys_idx] * self.test_ndof]
+                )
+                - data["null"]
+            ),
+            0.0,
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.ones(
+                    [self.nframes[sys_idx] + 2, self.natoms[sys_idx] * self.test_ndof]
+                )
+                - data["ones"]
+            ),
+            0.0,
+        )
 
         sys_idx = 2
         data = ds.get_test(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_2/set.002/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_2/set.002/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([self.nframes[sys_idx]+2,
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
-        
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_2/set.002/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_2/set.002/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros(
+                    [self.nframes[sys_idx] + 2, self.natoms[sys_idx] * self.test_ndof]
+                )
+                - data["null"]
+            ),
+            0.0,
+        )
 
     def test_get_batch(self):
         batch_size = 3
         test_size = 2
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
-        ds.add('test', self.test_ndof, atomic = True, must = True)
-        ds.add('null', self.test_ndof, atomic = True, must = False)
+        ds.add("test", self.test_ndof, atomic=True, must=True)
+        ds.add("null", self.test_ndof, atomic=True, must=False)
         sys_idx = 0
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_0/set.000/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_0/set.000/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size,
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_0/set.000/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_0/set.000/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_0/set.001/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_0/set.001/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, 
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_0/set.001/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_0/set.001/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_0/set.000/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_0/set.000/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, 
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_0/set.000/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_0/set.000/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
         sys_idx = 2
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_2/set.000/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_2/set.000/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, 
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_2/set.000/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_2/set.000/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_2/set.001/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_2/set.001/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, 
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_2/set.001/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_2/set.001/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_2/set.001/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_2/set.001/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, 
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_2/set.001/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_2/set.001/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
         data = ds.get_batch(sys_idx=sys_idx)
-        self.assertEqual(list(data['type'][0]), list(np.sort(self.atom_type[sys_idx])))
-        self._in_array(np.load('sys_2/set.000/coord.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       3, 
-                       data['coord'])
-        self._in_array(np.load('sys_2/set.000/test.npy'),
-                       ds.get_sys(sys_idx).idx_map,
-                       self.test_ndof,
-                       data['test'])
-        self.assertAlmostEqual(np.linalg.norm(np.zeros([batch_size, 
-                                                        self.natoms[sys_idx]*self.test_ndof])
-                                              -
-                                              data['null']
-        ), 0.0)
-
-
+        self.assertEqual(list(data["type"][0]), list(np.sort(self.atom_type[sys_idx])))
+        self._in_array(
+            np.load("sys_2/set.000/coord.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            3,
+            data["coord"],
+        )
+        self._in_array(
+            np.load("sys_2/set.000/test.npy"),
+            ds.get_sys(sys_idx).idx_map,
+            self.test_ndof,
+            data["test"],
+        )
+        self.assertAlmostEqual(
+            np.linalg.norm(
+                np.zeros([batch_size, self.natoms[sys_idx] * self.test_ndof])
+                - data["null"]
+            ),
+            0.0,
+        )
 
-    def test_prob_sys_size_1(self) :
+    def test_prob_sys_size_1(self):
         batch_size = 1
         test_size = 1
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
@@ -249,12 +314,19 @@ def test_prob_sys_size_1(self) :
         self.assertAlmostEqual(np.sum(prob[2:4]), 0.8)
         # number of training set is self.nset-1
         # shift is the total number of set size shift...
-        shift = np.sum(np.arange(self.nset-1))
-        self.assertAlmostEqual(prob[1]/prob[0], float(self.nframes[1]*(self.nset-1)+shift)/float(self.nframes[0]*(self.nset-1)+shift))
-        self.assertAlmostEqual(prob[3]/prob[2], float(self.nframes[3]*(self.nset-1)+shift)/float(self.nframes[2]*(self.nset-1)+shift))
+        shift = np.sum(np.arange(self.nset - 1))
+        self.assertAlmostEqual(
+            prob[1] / prob[0],
+            float(self.nframes[1] * (self.nset - 1) + shift)
+            / float(self.nframes[0] * (self.nset - 1) + shift),
+        )
+        self.assertAlmostEqual(
+            prob[3] / prob[2],
+            float(self.nframes[3] * (self.nset - 1) + shift)
+            / float(self.nframes[2] * (self.nset - 1) + shift),
+        )
 
-
-    def test_prob_sys_size_1(self) :
+    def test_prob_sys_size_1(self):
         batch_size = 1
         test_size = 1
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
@@ -264,35 +336,38 @@ def test_prob_sys_size_1(self) :
         self.assertAlmostEqual(np.sum(prob[2:4]), 0.8)
         # number of training set is self.nset-1
         # shift is the total number of set size shift...
-        shift = np.sum(np.arange(self.nset-1))
+        shift = np.sum(np.arange(self.nset - 1))
         self.assertAlmostEqual(prob[0], 0.0)
         self.assertAlmostEqual(prob[1], 0.2)
-        self.assertAlmostEqual(prob[3]/prob[2], float(self.nframes[3]*(self.nset-1)+shift)/float(self.nframes[2]*(self.nset-1)+shift))
-
+        self.assertAlmostEqual(
+            prob[3] / prob[2],
+            float(self.nframes[3] * (self.nset - 1) + shift)
+            / float(self.nframes[2] * (self.nset - 1) + shift),
+        )
 
     def _idx_map(self, target, idx_map, ndof):
         natoms = len(idx_map)
         target = target.reshape([-1, natoms, ndof])
-        target = target[:,idx_map,:]
+        target = target[:, idx_map, :]
         target = target.reshape([-1, natoms * ndof])
-        return target        
+        return target
 
     def _in_array(self, target, idx_map, ndof, array):
         target = self._idx_map(target, idx_map, ndof)
         all_find = []
-        for ii in array :
+        for ii in array:
             find = False
-            for jj in target :
-                if np.linalg.norm(ii - jj) < 1e-5 :
+            for jj in target:
+                if np.linalg.norm(ii - jj) < 1e-5:
                     find = True
             all_find.append(find)
-        for idx,ii in enumerate(all_find) :
-            self.assertTrue(ii, msg = 'does not find frame %d in array' % idx)
+        for idx, ii in enumerate(all_find):
+            self.assertTrue(ii, msg="does not find frame %d in array" % idx)
 
     def test_sys_prob_floating_point_error(self):
         # test floating point error; See #1917
         sys_probs = [
-            0.010,                                                           
+            0.010,
             0.010,
             0.010,
             0.010,
@@ -322,6 +397,6 @@ def test_sys_prob_floating_point_error(self):
             0.040,
             0.040,
             0.005,
-            ]
+        ]
         ds = DeepmdDataSystem(self.sys_name, 3, 2, 2.0, sys_probs=sys_probs)
         self.assertEqual(ds.sys_probs.size, len(sys_probs))
diff --git a/source/tests/test_deeppolar.py b/source/tests/test_deeppolar.py
index 93112c163c..a47c14bc80 100644
--- a/source/tests/test_deeppolar.py
+++ b/source/tests/test_deeppolar.py
@@ -1,34 +1,88 @@
-import os,sys,platform,shutil,dpdata
-import numpy as np
+import os
+import platform
+import shutil
+import sys
 import unittest
 
-from infer.convert2pb import convert_pbtxt_to_pb
-from deepmd.infer import DeepPolar
-from common import tests_path, tf
+import dpdata
+import numpy as np
+from common import (
+    tests_path,
+    tf,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
 from packaging.version import parse as parse_version
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer import (
+    DeepPolar,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-class TestDeepPolarPBC(unittest.TestCase) :
+
+class TestDeepPolarPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppolar.pbtxt")), "deeppolar.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppolar.pbtxt")), "deeppolar.pb"
+        )
         cls.dp = DeepPolar("deeppolar.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_d = np.array([1.061407927405987051e-01,-3.569013342133873778e-01,-2.862108976089940138e-02,-3.569013342133875444e-01,1.304367268874677244e+00,1.037647501453442256e-01,-2.862108976089940138e-02,1.037647501453441284e-01,8.100521520762453409e-03,1.236797829492216616e+00,-3.717307430531632262e-01,7.371515676976750919e-01,-3.717307430531630041e-01,1.127222682121889058e-01,-2.239181552775717510e-01,7.371515676976746478e-01,-2.239181552775717787e-01,4.448255365635306879e-01])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_d = np.array(
+            [
+                1.061407927405987051e-01,
+                -3.569013342133873778e-01,
+                -2.862108976089940138e-02,
+                -3.569013342133875444e-01,
+                1.304367268874677244e00,
+                1.037647501453442256e-01,
+                -2.862108976089940138e-02,
+                1.037647501453441284e-01,
+                8.100521520762453409e-03,
+                1.236797829492216616e00,
+                -3.717307430531632262e-01,
+                7.371515676976750919e-01,
+                -3.717307430531630041e-01,
+                1.127222682121889058e-01,
+                -2.239181552775717510e-01,
+                7.371515676976746478e-01,
+                -2.239181552775717787e-01,
+                4.448255365635306879e-01,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -37,8 +91,8 @@ def tearDownClass(cls):
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_sel_type(), [0])
 
     def test_1frame_atm(self):
@@ -47,7 +101,7 @@ def test_1frame_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,9))
+        self.assertEqual(dd.shape, (nframes, nsel, 9))
         # check values
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
@@ -59,34 +113,72 @@ def test_2frame_atm(self):
         nframes = 2
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,9))
+        self.assertEqual(dd.shape, (nframes, nsel, 9))
         # check values
         expected_d = np.concatenate((self.expected_d, self.expected_d))
         np.testing.assert_almost_equal(dd.ravel(), expected_d, default_places)
 
 
-
-class TestDeepPolarNoPBC(unittest.TestCase) :
+class TestDeepPolarNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppolar.pbtxt")), "deeppolar.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppolar.pbtxt")), "deeppolar.pb"
+        )
         cls.dp = DeepPolar("deeppolar.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([20., 0., 0., 0., 20., 0., 0., 0., 20.])
-        self.expected_d = np.array([5.601785462021734e-01, -2.346693909765864e-01, -4.239188998286720e-01, -2.346693909765862e-01, 9.830744757127260e-02, 1.775876472255247e-01, -4.239188998286717e-01, 1.775876472255248e-01, 3.208034917622381e-01, 1.302526099276315e+00, -3.784198124746947e-01, 7.548241853986054e-01, -3.784198124746949e-01, 1.098824690874320e-01, -2.194150345809899e-01, 7.548241853986057e-01, -2.194150345809898e-01, 4.382376148484938e-01])
+        self.box = np.array([20.0, 0.0, 0.0, 0.0, 20.0, 0.0, 0.0, 0.0, 20.0])
+        self.expected_d = np.array(
+            [
+                5.601785462021734e-01,
+                -2.346693909765864e-01,
+                -4.239188998286720e-01,
+                -2.346693909765862e-01,
+                9.830744757127260e-02,
+                1.775876472255247e-01,
+                -4.239188998286717e-01,
+                1.775876472255248e-01,
+                3.208034917622381e-01,
+                1.302526099276315e00,
+                -3.784198124746947e-01,
+                7.548241853986054e-01,
+                -3.784198124746949e-01,
+                1.098824690874320e-01,
+                -2.194150345809899e-01,
+                7.548241853986057e-01,
+                -2.194150345809898e-01,
+                4.382376148484938e-01,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppolar.pb")
-        cls.dp = None    
+        cls.dp = None
 
     def test_1frame_atm(self):
         dd = self.dp.eval(self.coords, None, self.atype)
@@ -94,7 +186,7 @@ def test_1frame_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,9))
+        self.assertEqual(dd.shape, (nframes, nsel, 9))
         # check values
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
@@ -104,51 +196,749 @@ def test_1frame_atm_large_box(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(dd.shape, (nframes,nsel,9))
+        self.assertEqual(dd.shape, (nframes, nsel, 9))
         # check values
         np.testing.assert_almost_equal(dd.ravel(), self.expected_d, default_places)
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"), 
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
-class TestDeepPolarNewPBC(unittest.TestCase) :
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
+class TestDeepPolarNewPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppolar_new.pbtxt")), "deeppolar_new.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppolar_new.pbtxt")),
+            "deeppolar_new.pb",
+        )
         cls.dp = DeepPolar("deeppolar_new.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.nout = 9
         self.atype = np.array([0, 1, 1, 0, 1, 1])
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_t = np.array([1.936327241487292961e+00, 5.198696351735779264e-02, 3.888336625074450149e-03, 5.198696351735781346e-02, 1.764967784387830196e+00, -1.354658545697527347e-02, 3.888336625074451016e-03, -1.354658545697527000e-02, 1.939288409902199639e+00, 1.786740420980893029e+00, 4.868765294055640847e-02, -9.812132615180739481e-02, 4.868765294055640847e-02, 1.925999147066305373e+00, 2.895028407651457567e-02, -9.812132615180743644e-02, 2.895028407651457220e-02, 1.883109989034779996e+00])
-        self.expected_f = np.array([5.305178446980116092e-02, -1.127314829623577049e-02, 1.136493514861047216e-01, 5.598130220328862322e-05, -4.352126938892845326e-02, -7.700608888887500170e-02, -1.050015668789053697e-01, 5.882396336737016895e-02, -3.723875897544067642e-02, -7.850322286760008650e-02, 7.279117637753844405e-02, -6.178451060078461732e-02, 3.404361490778949895e-01, 5.447934529195214842e-02, -8.698375128815737101e-02, -2.100391251033939810e-01, -1.313000673516965255e-01, 1.493637582671529240e-01, -9.589318874236771317e-02, 6.285887854370801608e-02, -1.824395427630142175e-01, -3.264267092869802683e-02, 3.637498661083633789e-02, 1.524859582123189172e-01, 1.442484990808054202e-01, -8.957992476622803069e-02, 3.076469140583825215e-02, 4.909822745881124717e-02, -2.559151672032903835e-01, -1.522830913546814324e-01, -2.885480042033320910e-02, 7.730841025065784966e-02, 1.553301391955271560e-01, -3.595606644821771475e-02, 1.689528165643162105e-01, -3.858154695988691516e-03, 5.018843026262573281e-02, 1.756005154318779349e-02, 3.489323893614350303e-02, -4.020411124876955428e-02, 2.218648284685413238e-02, -8.086177159691650476e-03, -2.222392408702593067e-02, -3.825892777133557687e-02, -1.689393838770965675e-02, -5.465804822761769627e-02, -1.398775875506316768e-01, -1.165702490994514756e-01, 5.449067849718619572e-02, 1.588580450812354106e-01, -8.209560373418453572e-02, 1.240697480360127003e-02, -2.046806414931008622e-02, 1.887527294448937965e-01, -9.589318874236771317e-02, 6.285887854370801608e-02, -1.824395427630142175e-01, -3.264267092869802683e-02, 3.637498661083633789e-02, 1.524859582123189172e-01, 1.442484990808054202e-01, -8.957992476622803069e-02, 3.076469140583825215e-02, 4.909822745881124717e-02, -2.559151672032903835e-01, -1.522830913546814324e-01, -2.885480042033320910e-02, 7.730841025065784966e-02, 1.553301391955271560e-01, -3.595606644821771475e-02, 1.689528165643162105e-01, -3.858154695988691516e-03, 4.038746042068122599e-02, -2.549213597407858356e-01, -1.131801705114504619e-01, 1.489732376295762606e-01, 2.734584831542113958e-01, -1.125511889088352951e-01, -1.908551011160136424e-01, -2.400995606986339528e-02, 2.255650484976146619e-01, -2.185213968874370055e-02, 1.475333123369945709e-01, 9.584417756169674729e-02, -1.576380405016522893e-02, -5.153693137796186430e-02, -8.489897831367294867e-02, 3.911034680466508873e-02, -9.052354830259493057e-02, -1.077888832535272776e-02, -1.970229486427777510e-01, -6.538978166042377915e-02, -1.570533119125729904e-01, 1.417940206277617798e-01, -4.684714285705613573e-02, 6.070882964241105378e-02, 5.715183445260185735e-02, 1.138024049318459713e-01, 9.374622673558237473e-02, 3.096927839536914306e-02, -9.232883741117139942e-02, -6.499836527010099951e-02, 2.839980861544661936e-02, 8.097497759757724123e-03, 1.006700103228213017e-01, -6.129199344840163821e-02, 8.266585923704758421e-02, -3.307338951814068478e-02, 5.018843026262574669e-02, 1.756005154318778308e-02, 3.489323893614350997e-02, -4.020411124876957509e-02, 2.218648284685414279e-02, -8.086177159691652211e-03, -2.222392408702593067e-02, -3.825892777133557687e-02, -1.689393838770965675e-02, -5.465804822761770321e-02, -1.398775875506316491e-01, -1.165702490994514756e-01, 5.449067849718619572e-02, 1.588580450812354106e-01, -8.209560373418453572e-02, 1.240697480360125615e-02, -2.046806414931009316e-02, 1.887527294448937965e-01, -1.970229486427777510e-01, -6.538978166042375140e-02, -1.570533119125729626e-01, 1.417940206277618076e-01, -4.684714285705613573e-02, 6.070882964241105378e-02, 5.715183445260184347e-02, 1.138024049318459713e-01, 9.374622673558236086e-02, 3.096927839536912919e-02, -9.232883741117139942e-02, -6.499836527010102727e-02, 2.839980861544661589e-02, 8.097497759757731062e-03, 1.006700103228213017e-01, -6.129199344840162433e-02, 8.266585923704758421e-02, -3.307338951814066397e-02, -3.078161564779093723e-02, -8.748776750553553111e-03, -2.162930108693108394e-02, 2.135313622214399243e-02, -8.845621737097757523e-03, 9.365293934359546560e-03, 8.562579091543631032e-03, 1.772751551871581607e-02, 1.573655414890783033e-02, -3.649820158632081230e-02, -1.904914900326310223e-01, -1.076542087674599024e-01, -5.186655049718805199e-02, 1.686765146765009937e-01, -6.620206332305828001e-02, 8.923065241761217459e-02, 2.168185832506550753e-02, 1.703837250941818704e-01])
-        self.expected_v = np.array([-2.123013313652813774e-03, -2.646248889538913257e-04, 2.225254748021367093e-04, 9.843593195853941446e-04, 1.226963457840150472e-04, -1.031764725911038809e-04, -8.467513732241481721e-04, -1.055440805151912256e-04, 8.875297679686559459e-05, 1.829118379697145316e-02, 2.302438731350108913e-03, -1.890198823577125386e-03, 3.300229266409118040e-02, -1.339230641165423293e-02, -2.445540228188634868e-02, 5.127826101331301595e-02, -2.458314752619149279e-02, -4.252530480245884925e-02, 9.733043787604266084e-02, -6.217238566516904152e-02, 3.767656091618994812e-02, 6.674680725588777973e-03, 4.245867422406505304e-02, -2.752200660186601699e-02, -8.318636634138946995e-03, -2.738884420387305285e-02, 1.785195524121836741e-02, -3.151218435289559073e-03, -3.927864338604547816e-04, 3.302976830190196104e-04, 1.387198082848713948e-06, 1.729085429046553641e-07, -1.454003656243721975e-07, -4.056191292896940703e-05, -5.055875832506090064e-06, 4.251531950061960394e-06, 7.087482338961141604e-02, -1.643445525800983908e-01, 2.668682182870234509e-01, 7.752581706917366366e-03, -2.674714571946596939e-02, 4.308263417785011123e-02, -9.385640612496094423e-03, 4.307848167667025635e-02, -6.910099104451945806e-02, -1.822493611414978121e-01, -4.510097387143227610e-02, 5.157836206906134952e-02, -1.170389534066011428e-01, -2.858136680923874240e-02, 3.256883555835647648e-02, 1.336331160725280354e-01, 3.257484898923947853e-02, -3.710113093740719653e-02, 3.343993600586595179e-03, 4.168150663620683060e-04, -3.505035785317401481e-04, -4.312491363797464269e-03, -5.375343342977005178e-04, 4.520175083867039156e-04, -5.045304632809267465e-04, -6.288764981405317546e-05, 5.288279643454484632e-05, 2.176577726533836937e-02, -1.041710664445027849e-02, -1.802940684978692962e-02, -3.097121964369356495e-02, 1.077096511204005125e-02, 2.079488766754130843e-02, -1.120464690158002596e-01, 4.736950869652114399e-02, 8.530900293808066359e-02, 3.029112757823893692e-02, 1.058529311156591879e-01, -6.894903720238335088e-02, -5.089618157121258979e-02, -6.973511953466600410e-02, 4.618114280030299196e-02, 1.143309394598741001e-02, 2.319568285212985151e-02, -1.522637168466081138e-02, -1.535733649675188493e-03, -1.914228911776438445e-04, 1.609692493993826663e-04, -2.603290366421702733e-03, -3.244894507721100851e-04, 2.728661290583660171e-04, 6.938458118266074663e-04, 8.648503036932213837e-05, -7.272604826511198082e-05, -2.609239945314979423e-02, 1.142603664459106681e-02, -2.051406106454568487e-02, 5.779549344910496142e-03, -3.860615030463052100e-02, 6.168332781226748551e-02, 2.068839156841529789e-02, -7.643723474881176927e-02, 1.229844977392647865e-01, -3.554667688747349674e-02, -8.262665730398828859e-03, 9.285295046969522723e-03, 1.497274901467501862e-01, 3.666859638982037511e-02, -4.181688913175674732e-02, -3.257377626487627069e-03, -8.171909213273372040e-04, 9.379633299917983094e-04, 5.408910405506226968e-04, 6.741984641424190365e-05, -5.669396175743082354e-05, 4.696290607396237790e-04, 5.853733334998140626e-05, -4.922457577157541143e-05, -5.350269144276139158e-03, -6.668890718077903363e-04, 5.607930831110977251e-04, 3.013271000130106694e-02, -1.241570117891089425e-02, -2.255430712666738058e-02, -1.643158253499693577e-02, 6.876116339617440766e-03, 1.242585434168311936e-02, 2.120265775977717496e-03, -2.988284987993197143e-03, -4.123302560925387432e-03, 3.528008965720315360e-02, -1.132921329184741026e-02, 6.435692645130823564e-03, -2.115291124444698342e-02, -2.971050496327276927e-02, 1.966236467455729359e-02, -2.194244461519655187e-02, -1.469000955331024871e-02, 1.000316933044766328e-02, -2.208576023807404254e-03, -2.752899293131040766e-04, 2.314938041951108548e-04, -5.840262773118632192e-04, -7.279647649213021596e-05, 6.121521886838239123e-05, -1.263538670848133802e-03, -1.574949051482092536e-04, 1.324388975109944740e-04, 8.955566031735841259e-03, -2.660296383100100095e-02, 4.296567375352825652e-02, 2.380373596470350059e-02, -7.784355459714024927e-02, 1.255004729498893912e-01, -1.824501349606120690e-02, 3.948761180940744964e-02, -6.423389834199008663e-02, 1.038606825469970407e-02, 2.616819816765628484e-03, -3.006960935423359793e-03, -1.864007491704058883e-02, -4.504736174636920880e-03, 5.118497771104377897e-03, 1.680266347982039554e-01, 4.105963063126880086e-02, -4.679634408112137711e-02, 3.343993600586595179e-03, 4.168150663620683060e-04, -3.505035785317401481e-04, -4.312491363797464269e-03, -5.375343342977005178e-04, 4.520175083867039156e-04, -5.045304632809267465e-04, -6.288764981405317546e-05, 5.288279643454484632e-05, 2.176577726533836937e-02, -1.041710664445027849e-02, -1.802940684978692962e-02, -3.097121964369356495e-02, 1.077096511204005125e-02, 2.079488766754130843e-02, -1.120464690158002596e-01, 4.736950869652114399e-02, 8.530900293808066359e-02, 3.029112757823893692e-02, 1.058529311156591879e-01, -6.894903720238335088e-02, -5.089618157121258979e-02, -6.973511953466600410e-02, 4.618114280030299196e-02, 1.143309394598741001e-02, 2.319568285212985151e-02, -1.522637168466081138e-02, -1.535733649675188493e-03, -1.914228911776438445e-04, 1.609692493993826663e-04, -2.603290366421702733e-03, -3.244894507721100851e-04, 2.728661290583660171e-04, 6.938458118266074663e-04, 8.648503036932213837e-05, -7.272604826511198082e-05, -2.609239945314979423e-02, 1.142603664459106681e-02, -2.051406106454568487e-02, 5.779549344910496142e-03, -3.860615030463052100e-02, 6.168332781226748551e-02, 2.068839156841529789e-02, -7.643723474881176927e-02, 1.229844977392647865e-01, -3.554667688747349674e-02, -8.262665730398828859e-03, 9.285295046969522723e-03, 1.497274901467501862e-01, 3.666859638982037511e-02, -4.181688913175674732e-02, -3.257377626487627069e-03, -8.171909213273372040e-04, 9.379633299917983094e-04, 1.097257666720985849e-03, 1.367686610077148478e-04, -1.150100103928514269e-04, -3.252401295559594844e-03, -4.053984617694676175e-04, 3.409032519425078027e-04, -1.217154259382106555e-04, -1.517132787898375553e-05, 1.275770753460001047e-05, -1.104423096905816498e-01, 4.615651100464009809e-02, 8.344619780982527601e-02, -1.998235369855275168e-01, 8.508819942125579738e-02, 1.528709647298205909e-01, 8.333302476347614896e-02, -3.488524142655123617e-02, -6.303339769808283255e-02, -7.468341447282240975e-02, -1.443673498458480642e-01, 9.485360739696327426e-02, -2.685004652445167612e-04, -1.702408228533323561e-02, 1.097613894113106531e-02, 9.496752299747332482e-02, 1.714581306702349373e-01, -1.128066531362114239e-01, -2.109671824413435984e-03, -2.629619271223545066e-04, 2.211270750801623281e-04, 1.011694656468142307e-02, 1.261035832424879221e-03, -1.060416495448196581e-03, 2.326027531269699879e-04, 2.899297772687444119e-05, -2.438045854305356789e-05, -9.775618976121780001e-04, 7.897148922927013995e-03, -1.259878571596698138e-02, -5.534571406250721713e-03, 2.552681480358522451e-02, -4.094434810336724379e-02, -1.258721457759937913e-02, 4.161890111720080443e-02, -6.708566706120022705e-02, 3.521744971093632853e-02, 8.557787631933998912e-03, -9.738493960065902622e-03, -8.446926488038911107e-02, -2.017604402799078392e-02, 2.285024948138817888e-02, -9.755577915095828626e-03, -2.364722966186930900e-03, 2.689144780896026744e-03, 8.392348196279006065e-05, 1.046071729847805219e-05, -8.796512273720217211e-06, -2.967282659264359589e-03, -3.698595949224694123e-04, 3.110182957302592738e-04, -1.688223115474902841e-03, -2.104300767164184042e-04, 1.769525645115341121e-04, -1.040849854787611189e-01, 4.406117175034113265e-02, 7.931633477513304331e-02, 3.539829580561168476e-02, -1.443144702217136026e-02, -2.631106338063535569e-02, -4.383990895980735547e-02, 1.895493123709470276e-02, 3.388325869579450478e-02, 1.809448338386955915e-02, 4.269882582195522885e-02, -2.795653019460052346e-02, 4.363124777259473619e-02, 8.597058258914810902e-02, -5.646456449126337207e-02, 4.431189331687027805e-02, 7.186269332716928304e-02, -4.739074421553418626e-02, 7.807665162715203382e-05, 9.731933913865978996e-06, -8.183671700296416994e-06, 2.525821455836478949e-03, 3.148332692827336839e-04, -2.647461582604813284e-04, 5.088778918832323993e-03, 6.342953893162101269e-04, -5.333847591977234877e-04, 1.765533347871811772e-03, -1.422682766506909793e-02, 2.269730547460076936e-02, 2.888222424864686153e-04, -4.083171371247279469e-03, 6.494062010930001794e-03, 1.594130471018519873e-02, -4.922350239779287734e-02, 7.944117864515577720e-02, -5.516443865142822006e-02, -1.340804559261108905e-02, 1.525892700429632917e-02, 7.450140187529649682e-02, 1.809617933997387934e-02, -2.059052256811338619e-02, -3.118940445306414219e-02, -7.412336287839308216e-03, 8.382871287998559101e-03, 5.408910405506207452e-04, 6.741984641424155129e-05, -5.669396175743063380e-05, 4.696290607396231285e-04, 5.853733334998132494e-05, -4.922457577157534367e-05, -5.350269144276134821e-03, -6.668890718077897942e-04, 5.607930831110975083e-04, 3.013271000130106694e-02, -1.241570117891090119e-02, -2.255430712666738752e-02, -1.643158253499694271e-02, 6.876116339617444236e-03, 1.242585434168312457e-02, 2.120265775977718363e-03, -2.988284987993198010e-03, -4.123302560925387432e-03, 3.528008965720314666e-02, -1.132921329184741026e-02, 6.435692645130823564e-03, -2.115291124444698342e-02, -2.971050496327276927e-02, 1.966236467455729012e-02, -2.194244461519655881e-02, -1.469000955331024871e-02, 1.000316933044766501e-02, -2.208576023807403820e-03, -2.752899293131040766e-04, 2.314938041951108548e-04, -5.840262773118632192e-04, -7.279647649213021596e-05, 6.121521886838239123e-05, -1.263538670848133802e-03, -1.574949051482092536e-04, 1.324388975109944740e-04, 8.955566031735841259e-03, -2.660296383100100095e-02, 4.296567375352825652e-02, 2.380373596470350059e-02, -7.784355459714024927e-02, 1.255004729498893912e-01, -1.824501349606121037e-02, 3.948761180940744964e-02, -6.423389834199008663e-02, 1.038606825469969019e-02, 2.616819816765625015e-03, -3.006960935423356324e-03, -1.864007491704059577e-02, -4.504736174636922615e-03, 5.118497771104379632e-03, 1.680266347982039554e-01, 4.105963063126880086e-02, -4.679634408112137711e-02, 8.392348196278930170e-05, 1.046071729847797087e-05, -8.796512273720142672e-06, -2.967282659264356987e-03, -3.698595949224691413e-04, 3.110182957302590027e-04, -1.688223115474903708e-03, -2.104300767164184855e-04, 1.769525645115341934e-04, -1.040849854787611189e-01, 4.406117175034113265e-02, 7.931633477513304331e-02, 3.539829580561167782e-02, -1.443144702217136026e-02, -2.631106338063535569e-02, -4.383990895980735547e-02, 1.895493123709470276e-02, 3.388325869579450478e-02, 1.809448338386955221e-02, 4.269882582195521498e-02, -2.795653019460051653e-02, 4.363124777259472925e-02, 8.597058258914809514e-02, -5.646456449126335819e-02, 4.431189331687027111e-02, 7.186269332716926916e-02, -4.739074421553417932e-02, 7.807665162715246750e-05, 9.731933913866019654e-06, -8.183671700296457651e-06, 2.525821455836478515e-03, 3.148332692827336297e-04, -2.647461582604812742e-04, 5.088778918832324860e-03, 6.342953893162102353e-04, -5.333847591977235961e-04, 1.765533347871809603e-03, -1.422682766506909793e-02, 2.269730547460076589e-02, 2.888222424864694826e-04, -4.083171371247282938e-03, 6.494062010930008733e-03, 1.594130471018519873e-02, -4.922350239779287040e-02, 7.944117864515577720e-02, -5.516443865142821312e-02, -1.340804559261108558e-02, 1.525892700429632570e-02, 7.450140187529649682e-02, 1.809617933997387934e-02, -2.059052256811338966e-02, -3.118940445306412831e-02, -7.412336287839304746e-03, 8.382871287998553897e-03, -9.575909105642434974e-04, -1.193597735547498307e-04, 1.003707186710399045e-04, -9.520061199010912585e-05, -1.186636523389461756e-05, 9.978534401229592523e-06, -5.876800709203859434e-03, -7.325190685693192200e-04, 6.159819440242017292e-04, -1.659431774532551043e-02, 6.520628417529478540e-03, 1.204087494393247214e-02, 6.518824051016284399e-03, -2.745500204548994606e-03, -4.950724849051978994e-03, -5.340810191179472081e-03, 3.101366677982481286e-03, 5.077959020099345744e-03, 7.727976016970144156e-03, 7.022558645366243878e-03, -4.714356496325102820e-03, 7.018017321145150929e-03, 1.341962078953426278e-02, -8.818944869050635710e-03, -2.755773236988961865e-03, 1.079245666846929096e-02, -6.886663303228377636e-03, 9.801230913130992879e-04, 1.221683173308112048e-04, -1.027324486645460452e-04, 1.233918620327190629e-04, 1.538028875195364422e-05, -1.293342463232469071e-05, 4.892751025155074075e-03, 6.098613175830685205e-04, -5.128379261493998297e-04, -7.792305682365031905e-03, 2.541307371885552502e-02, -4.097328323558844382e-02, 2.530143617608526449e-02, -8.265149730513186854e-02, 1.332544508945474881e-01, -1.184335640259520997e-02, 3.220055758982264676e-02, -5.209911236104310117e-02, 8.090761694886683397e-02, 1.959431243541279177e-02, -2.227702786419644143e-02, 1.968691296265078980e-02, 4.764576998712748319e-03, -5.415896903683155988e-03, 1.534638141861073557e-01, 3.728680895816388619e-02, -4.242975875503233324e-02])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_t = np.array(
+            [
+                1.936327241487292961e00,
+                5.198696351735779264e-02,
+                3.888336625074450149e-03,
+                5.198696351735781346e-02,
+                1.764967784387830196e00,
+                -1.354658545697527347e-02,
+                3.888336625074451016e-03,
+                -1.354658545697527000e-02,
+                1.939288409902199639e00,
+                1.786740420980893029e00,
+                4.868765294055640847e-02,
+                -9.812132615180739481e-02,
+                4.868765294055640847e-02,
+                1.925999147066305373e00,
+                2.895028407651457567e-02,
+                -9.812132615180743644e-02,
+                2.895028407651457220e-02,
+                1.883109989034779996e00,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                5.305178446980116092e-02,
+                -1.127314829623577049e-02,
+                1.136493514861047216e-01,
+                5.598130220328862322e-05,
+                -4.352126938892845326e-02,
+                -7.700608888887500170e-02,
+                -1.050015668789053697e-01,
+                5.882396336737016895e-02,
+                -3.723875897544067642e-02,
+                -7.850322286760008650e-02,
+                7.279117637753844405e-02,
+                -6.178451060078461732e-02,
+                3.404361490778949895e-01,
+                5.447934529195214842e-02,
+                -8.698375128815737101e-02,
+                -2.100391251033939810e-01,
+                -1.313000673516965255e-01,
+                1.493637582671529240e-01,
+                -9.589318874236771317e-02,
+                6.285887854370801608e-02,
+                -1.824395427630142175e-01,
+                -3.264267092869802683e-02,
+                3.637498661083633789e-02,
+                1.524859582123189172e-01,
+                1.442484990808054202e-01,
+                -8.957992476622803069e-02,
+                3.076469140583825215e-02,
+                4.909822745881124717e-02,
+                -2.559151672032903835e-01,
+                -1.522830913546814324e-01,
+                -2.885480042033320910e-02,
+                7.730841025065784966e-02,
+                1.553301391955271560e-01,
+                -3.595606644821771475e-02,
+                1.689528165643162105e-01,
+                -3.858154695988691516e-03,
+                5.018843026262573281e-02,
+                1.756005154318779349e-02,
+                3.489323893614350303e-02,
+                -4.020411124876955428e-02,
+                2.218648284685413238e-02,
+                -8.086177159691650476e-03,
+                -2.222392408702593067e-02,
+                -3.825892777133557687e-02,
+                -1.689393838770965675e-02,
+                -5.465804822761769627e-02,
+                -1.398775875506316768e-01,
+                -1.165702490994514756e-01,
+                5.449067849718619572e-02,
+                1.588580450812354106e-01,
+                -8.209560373418453572e-02,
+                1.240697480360127003e-02,
+                -2.046806414931008622e-02,
+                1.887527294448937965e-01,
+                -9.589318874236771317e-02,
+                6.285887854370801608e-02,
+                -1.824395427630142175e-01,
+                -3.264267092869802683e-02,
+                3.637498661083633789e-02,
+                1.524859582123189172e-01,
+                1.442484990808054202e-01,
+                -8.957992476622803069e-02,
+                3.076469140583825215e-02,
+                4.909822745881124717e-02,
+                -2.559151672032903835e-01,
+                -1.522830913546814324e-01,
+                -2.885480042033320910e-02,
+                7.730841025065784966e-02,
+                1.553301391955271560e-01,
+                -3.595606644821771475e-02,
+                1.689528165643162105e-01,
+                -3.858154695988691516e-03,
+                4.038746042068122599e-02,
+                -2.549213597407858356e-01,
+                -1.131801705114504619e-01,
+                1.489732376295762606e-01,
+                2.734584831542113958e-01,
+                -1.125511889088352951e-01,
+                -1.908551011160136424e-01,
+                -2.400995606986339528e-02,
+                2.255650484976146619e-01,
+                -2.185213968874370055e-02,
+                1.475333123369945709e-01,
+                9.584417756169674729e-02,
+                -1.576380405016522893e-02,
+                -5.153693137796186430e-02,
+                -8.489897831367294867e-02,
+                3.911034680466508873e-02,
+                -9.052354830259493057e-02,
+                -1.077888832535272776e-02,
+                -1.970229486427777510e-01,
+                -6.538978166042377915e-02,
+                -1.570533119125729904e-01,
+                1.417940206277617798e-01,
+                -4.684714285705613573e-02,
+                6.070882964241105378e-02,
+                5.715183445260185735e-02,
+                1.138024049318459713e-01,
+                9.374622673558237473e-02,
+                3.096927839536914306e-02,
+                -9.232883741117139942e-02,
+                -6.499836527010099951e-02,
+                2.839980861544661936e-02,
+                8.097497759757724123e-03,
+                1.006700103228213017e-01,
+                -6.129199344840163821e-02,
+                8.266585923704758421e-02,
+                -3.307338951814068478e-02,
+                5.018843026262574669e-02,
+                1.756005154318778308e-02,
+                3.489323893614350997e-02,
+                -4.020411124876957509e-02,
+                2.218648284685414279e-02,
+                -8.086177159691652211e-03,
+                -2.222392408702593067e-02,
+                -3.825892777133557687e-02,
+                -1.689393838770965675e-02,
+                -5.465804822761770321e-02,
+                -1.398775875506316491e-01,
+                -1.165702490994514756e-01,
+                5.449067849718619572e-02,
+                1.588580450812354106e-01,
+                -8.209560373418453572e-02,
+                1.240697480360125615e-02,
+                -2.046806414931009316e-02,
+                1.887527294448937965e-01,
+                -1.970229486427777510e-01,
+                -6.538978166042375140e-02,
+                -1.570533119125729626e-01,
+                1.417940206277618076e-01,
+                -4.684714285705613573e-02,
+                6.070882964241105378e-02,
+                5.715183445260184347e-02,
+                1.138024049318459713e-01,
+                9.374622673558236086e-02,
+                3.096927839536912919e-02,
+                -9.232883741117139942e-02,
+                -6.499836527010102727e-02,
+                2.839980861544661589e-02,
+                8.097497759757731062e-03,
+                1.006700103228213017e-01,
+                -6.129199344840162433e-02,
+                8.266585923704758421e-02,
+                -3.307338951814066397e-02,
+                -3.078161564779093723e-02,
+                -8.748776750553553111e-03,
+                -2.162930108693108394e-02,
+                2.135313622214399243e-02,
+                -8.845621737097757523e-03,
+                9.365293934359546560e-03,
+                8.562579091543631032e-03,
+                1.772751551871581607e-02,
+                1.573655414890783033e-02,
+                -3.649820158632081230e-02,
+                -1.904914900326310223e-01,
+                -1.076542087674599024e-01,
+                -5.186655049718805199e-02,
+                1.686765146765009937e-01,
+                -6.620206332305828001e-02,
+                8.923065241761217459e-02,
+                2.168185832506550753e-02,
+                1.703837250941818704e-01,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -2.123013313652813774e-03,
+                -2.646248889538913257e-04,
+                2.225254748021367093e-04,
+                9.843593195853941446e-04,
+                1.226963457840150472e-04,
+                -1.031764725911038809e-04,
+                -8.467513732241481721e-04,
+                -1.055440805151912256e-04,
+                8.875297679686559459e-05,
+                1.829118379697145316e-02,
+                2.302438731350108913e-03,
+                -1.890198823577125386e-03,
+                3.300229266409118040e-02,
+                -1.339230641165423293e-02,
+                -2.445540228188634868e-02,
+                5.127826101331301595e-02,
+                -2.458314752619149279e-02,
+                -4.252530480245884925e-02,
+                9.733043787604266084e-02,
+                -6.217238566516904152e-02,
+                3.767656091618994812e-02,
+                6.674680725588777973e-03,
+                4.245867422406505304e-02,
+                -2.752200660186601699e-02,
+                -8.318636634138946995e-03,
+                -2.738884420387305285e-02,
+                1.785195524121836741e-02,
+                -3.151218435289559073e-03,
+                -3.927864338604547816e-04,
+                3.302976830190196104e-04,
+                1.387198082848713948e-06,
+                1.729085429046553641e-07,
+                -1.454003656243721975e-07,
+                -4.056191292896940703e-05,
+                -5.055875832506090064e-06,
+                4.251531950061960394e-06,
+                7.087482338961141604e-02,
+                -1.643445525800983908e-01,
+                2.668682182870234509e-01,
+                7.752581706917366366e-03,
+                -2.674714571946596939e-02,
+                4.308263417785011123e-02,
+                -9.385640612496094423e-03,
+                4.307848167667025635e-02,
+                -6.910099104451945806e-02,
+                -1.822493611414978121e-01,
+                -4.510097387143227610e-02,
+                5.157836206906134952e-02,
+                -1.170389534066011428e-01,
+                -2.858136680923874240e-02,
+                3.256883555835647648e-02,
+                1.336331160725280354e-01,
+                3.257484898923947853e-02,
+                -3.710113093740719653e-02,
+                3.343993600586595179e-03,
+                4.168150663620683060e-04,
+                -3.505035785317401481e-04,
+                -4.312491363797464269e-03,
+                -5.375343342977005178e-04,
+                4.520175083867039156e-04,
+                -5.045304632809267465e-04,
+                -6.288764981405317546e-05,
+                5.288279643454484632e-05,
+                2.176577726533836937e-02,
+                -1.041710664445027849e-02,
+                -1.802940684978692962e-02,
+                -3.097121964369356495e-02,
+                1.077096511204005125e-02,
+                2.079488766754130843e-02,
+                -1.120464690158002596e-01,
+                4.736950869652114399e-02,
+                8.530900293808066359e-02,
+                3.029112757823893692e-02,
+                1.058529311156591879e-01,
+                -6.894903720238335088e-02,
+                -5.089618157121258979e-02,
+                -6.973511953466600410e-02,
+                4.618114280030299196e-02,
+                1.143309394598741001e-02,
+                2.319568285212985151e-02,
+                -1.522637168466081138e-02,
+                -1.535733649675188493e-03,
+                -1.914228911776438445e-04,
+                1.609692493993826663e-04,
+                -2.603290366421702733e-03,
+                -3.244894507721100851e-04,
+                2.728661290583660171e-04,
+                6.938458118266074663e-04,
+                8.648503036932213837e-05,
+                -7.272604826511198082e-05,
+                -2.609239945314979423e-02,
+                1.142603664459106681e-02,
+                -2.051406106454568487e-02,
+                5.779549344910496142e-03,
+                -3.860615030463052100e-02,
+                6.168332781226748551e-02,
+                2.068839156841529789e-02,
+                -7.643723474881176927e-02,
+                1.229844977392647865e-01,
+                -3.554667688747349674e-02,
+                -8.262665730398828859e-03,
+                9.285295046969522723e-03,
+                1.497274901467501862e-01,
+                3.666859638982037511e-02,
+                -4.181688913175674732e-02,
+                -3.257377626487627069e-03,
+                -8.171909213273372040e-04,
+                9.379633299917983094e-04,
+                5.408910405506226968e-04,
+                6.741984641424190365e-05,
+                -5.669396175743082354e-05,
+                4.696290607396237790e-04,
+                5.853733334998140626e-05,
+                -4.922457577157541143e-05,
+                -5.350269144276139158e-03,
+                -6.668890718077903363e-04,
+                5.607930831110977251e-04,
+                3.013271000130106694e-02,
+                -1.241570117891089425e-02,
+                -2.255430712666738058e-02,
+                -1.643158253499693577e-02,
+                6.876116339617440766e-03,
+                1.242585434168311936e-02,
+                2.120265775977717496e-03,
+                -2.988284987993197143e-03,
+                -4.123302560925387432e-03,
+                3.528008965720315360e-02,
+                -1.132921329184741026e-02,
+                6.435692645130823564e-03,
+                -2.115291124444698342e-02,
+                -2.971050496327276927e-02,
+                1.966236467455729359e-02,
+                -2.194244461519655187e-02,
+                -1.469000955331024871e-02,
+                1.000316933044766328e-02,
+                -2.208576023807404254e-03,
+                -2.752899293131040766e-04,
+                2.314938041951108548e-04,
+                -5.840262773118632192e-04,
+                -7.279647649213021596e-05,
+                6.121521886838239123e-05,
+                -1.263538670848133802e-03,
+                -1.574949051482092536e-04,
+                1.324388975109944740e-04,
+                8.955566031735841259e-03,
+                -2.660296383100100095e-02,
+                4.296567375352825652e-02,
+                2.380373596470350059e-02,
+                -7.784355459714024927e-02,
+                1.255004729498893912e-01,
+                -1.824501349606120690e-02,
+                3.948761180940744964e-02,
+                -6.423389834199008663e-02,
+                1.038606825469970407e-02,
+                2.616819816765628484e-03,
+                -3.006960935423359793e-03,
+                -1.864007491704058883e-02,
+                -4.504736174636920880e-03,
+                5.118497771104377897e-03,
+                1.680266347982039554e-01,
+                4.105963063126880086e-02,
+                -4.679634408112137711e-02,
+                3.343993600586595179e-03,
+                4.168150663620683060e-04,
+                -3.505035785317401481e-04,
+                -4.312491363797464269e-03,
+                -5.375343342977005178e-04,
+                4.520175083867039156e-04,
+                -5.045304632809267465e-04,
+                -6.288764981405317546e-05,
+                5.288279643454484632e-05,
+                2.176577726533836937e-02,
+                -1.041710664445027849e-02,
+                -1.802940684978692962e-02,
+                -3.097121964369356495e-02,
+                1.077096511204005125e-02,
+                2.079488766754130843e-02,
+                -1.120464690158002596e-01,
+                4.736950869652114399e-02,
+                8.530900293808066359e-02,
+                3.029112757823893692e-02,
+                1.058529311156591879e-01,
+                -6.894903720238335088e-02,
+                -5.089618157121258979e-02,
+                -6.973511953466600410e-02,
+                4.618114280030299196e-02,
+                1.143309394598741001e-02,
+                2.319568285212985151e-02,
+                -1.522637168466081138e-02,
+                -1.535733649675188493e-03,
+                -1.914228911776438445e-04,
+                1.609692493993826663e-04,
+                -2.603290366421702733e-03,
+                -3.244894507721100851e-04,
+                2.728661290583660171e-04,
+                6.938458118266074663e-04,
+                8.648503036932213837e-05,
+                -7.272604826511198082e-05,
+                -2.609239945314979423e-02,
+                1.142603664459106681e-02,
+                -2.051406106454568487e-02,
+                5.779549344910496142e-03,
+                -3.860615030463052100e-02,
+                6.168332781226748551e-02,
+                2.068839156841529789e-02,
+                -7.643723474881176927e-02,
+                1.229844977392647865e-01,
+                -3.554667688747349674e-02,
+                -8.262665730398828859e-03,
+                9.285295046969522723e-03,
+                1.497274901467501862e-01,
+                3.666859638982037511e-02,
+                -4.181688913175674732e-02,
+                -3.257377626487627069e-03,
+                -8.171909213273372040e-04,
+                9.379633299917983094e-04,
+                1.097257666720985849e-03,
+                1.367686610077148478e-04,
+                -1.150100103928514269e-04,
+                -3.252401295559594844e-03,
+                -4.053984617694676175e-04,
+                3.409032519425078027e-04,
+                -1.217154259382106555e-04,
+                -1.517132787898375553e-05,
+                1.275770753460001047e-05,
+                -1.104423096905816498e-01,
+                4.615651100464009809e-02,
+                8.344619780982527601e-02,
+                -1.998235369855275168e-01,
+                8.508819942125579738e-02,
+                1.528709647298205909e-01,
+                8.333302476347614896e-02,
+                -3.488524142655123617e-02,
+                -6.303339769808283255e-02,
+                -7.468341447282240975e-02,
+                -1.443673498458480642e-01,
+                9.485360739696327426e-02,
+                -2.685004652445167612e-04,
+                -1.702408228533323561e-02,
+                1.097613894113106531e-02,
+                9.496752299747332482e-02,
+                1.714581306702349373e-01,
+                -1.128066531362114239e-01,
+                -2.109671824413435984e-03,
+                -2.629619271223545066e-04,
+                2.211270750801623281e-04,
+                1.011694656468142307e-02,
+                1.261035832424879221e-03,
+                -1.060416495448196581e-03,
+                2.326027531269699879e-04,
+                2.899297772687444119e-05,
+                -2.438045854305356789e-05,
+                -9.775618976121780001e-04,
+                7.897148922927013995e-03,
+                -1.259878571596698138e-02,
+                -5.534571406250721713e-03,
+                2.552681480358522451e-02,
+                -4.094434810336724379e-02,
+                -1.258721457759937913e-02,
+                4.161890111720080443e-02,
+                -6.708566706120022705e-02,
+                3.521744971093632853e-02,
+                8.557787631933998912e-03,
+                -9.738493960065902622e-03,
+                -8.446926488038911107e-02,
+                -2.017604402799078392e-02,
+                2.285024948138817888e-02,
+                -9.755577915095828626e-03,
+                -2.364722966186930900e-03,
+                2.689144780896026744e-03,
+                8.392348196279006065e-05,
+                1.046071729847805219e-05,
+                -8.796512273720217211e-06,
+                -2.967282659264359589e-03,
+                -3.698595949224694123e-04,
+                3.110182957302592738e-04,
+                -1.688223115474902841e-03,
+                -2.104300767164184042e-04,
+                1.769525645115341121e-04,
+                -1.040849854787611189e-01,
+                4.406117175034113265e-02,
+                7.931633477513304331e-02,
+                3.539829580561168476e-02,
+                -1.443144702217136026e-02,
+                -2.631106338063535569e-02,
+                -4.383990895980735547e-02,
+                1.895493123709470276e-02,
+                3.388325869579450478e-02,
+                1.809448338386955915e-02,
+                4.269882582195522885e-02,
+                -2.795653019460052346e-02,
+                4.363124777259473619e-02,
+                8.597058258914810902e-02,
+                -5.646456449126337207e-02,
+                4.431189331687027805e-02,
+                7.186269332716928304e-02,
+                -4.739074421553418626e-02,
+                7.807665162715203382e-05,
+                9.731933913865978996e-06,
+                -8.183671700296416994e-06,
+                2.525821455836478949e-03,
+                3.148332692827336839e-04,
+                -2.647461582604813284e-04,
+                5.088778918832323993e-03,
+                6.342953893162101269e-04,
+                -5.333847591977234877e-04,
+                1.765533347871811772e-03,
+                -1.422682766506909793e-02,
+                2.269730547460076936e-02,
+                2.888222424864686153e-04,
+                -4.083171371247279469e-03,
+                6.494062010930001794e-03,
+                1.594130471018519873e-02,
+                -4.922350239779287734e-02,
+                7.944117864515577720e-02,
+                -5.516443865142822006e-02,
+                -1.340804559261108905e-02,
+                1.525892700429632917e-02,
+                7.450140187529649682e-02,
+                1.809617933997387934e-02,
+                -2.059052256811338619e-02,
+                -3.118940445306414219e-02,
+                -7.412336287839308216e-03,
+                8.382871287998559101e-03,
+                5.408910405506207452e-04,
+                6.741984641424155129e-05,
+                -5.669396175743063380e-05,
+                4.696290607396231285e-04,
+                5.853733334998132494e-05,
+                -4.922457577157534367e-05,
+                -5.350269144276134821e-03,
+                -6.668890718077897942e-04,
+                5.607930831110975083e-04,
+                3.013271000130106694e-02,
+                -1.241570117891090119e-02,
+                -2.255430712666738752e-02,
+                -1.643158253499694271e-02,
+                6.876116339617444236e-03,
+                1.242585434168312457e-02,
+                2.120265775977718363e-03,
+                -2.988284987993198010e-03,
+                -4.123302560925387432e-03,
+                3.528008965720314666e-02,
+                -1.132921329184741026e-02,
+                6.435692645130823564e-03,
+                -2.115291124444698342e-02,
+                -2.971050496327276927e-02,
+                1.966236467455729012e-02,
+                -2.194244461519655881e-02,
+                -1.469000955331024871e-02,
+                1.000316933044766501e-02,
+                -2.208576023807403820e-03,
+                -2.752899293131040766e-04,
+                2.314938041951108548e-04,
+                -5.840262773118632192e-04,
+                -7.279647649213021596e-05,
+                6.121521886838239123e-05,
+                -1.263538670848133802e-03,
+                -1.574949051482092536e-04,
+                1.324388975109944740e-04,
+                8.955566031735841259e-03,
+                -2.660296383100100095e-02,
+                4.296567375352825652e-02,
+                2.380373596470350059e-02,
+                -7.784355459714024927e-02,
+                1.255004729498893912e-01,
+                -1.824501349606121037e-02,
+                3.948761180940744964e-02,
+                -6.423389834199008663e-02,
+                1.038606825469969019e-02,
+                2.616819816765625015e-03,
+                -3.006960935423356324e-03,
+                -1.864007491704059577e-02,
+                -4.504736174636922615e-03,
+                5.118497771104379632e-03,
+                1.680266347982039554e-01,
+                4.105963063126880086e-02,
+                -4.679634408112137711e-02,
+                8.392348196278930170e-05,
+                1.046071729847797087e-05,
+                -8.796512273720142672e-06,
+                -2.967282659264356987e-03,
+                -3.698595949224691413e-04,
+                3.110182957302590027e-04,
+                -1.688223115474903708e-03,
+                -2.104300767164184855e-04,
+                1.769525645115341934e-04,
+                -1.040849854787611189e-01,
+                4.406117175034113265e-02,
+                7.931633477513304331e-02,
+                3.539829580561167782e-02,
+                -1.443144702217136026e-02,
+                -2.631106338063535569e-02,
+                -4.383990895980735547e-02,
+                1.895493123709470276e-02,
+                3.388325869579450478e-02,
+                1.809448338386955221e-02,
+                4.269882582195521498e-02,
+                -2.795653019460051653e-02,
+                4.363124777259472925e-02,
+                8.597058258914809514e-02,
+                -5.646456449126335819e-02,
+                4.431189331687027111e-02,
+                7.186269332716926916e-02,
+                -4.739074421553417932e-02,
+                7.807665162715246750e-05,
+                9.731933913866019654e-06,
+                -8.183671700296457651e-06,
+                2.525821455836478515e-03,
+                3.148332692827336297e-04,
+                -2.647461582604812742e-04,
+                5.088778918832324860e-03,
+                6.342953893162102353e-04,
+                -5.333847591977235961e-04,
+                1.765533347871809603e-03,
+                -1.422682766506909793e-02,
+                2.269730547460076589e-02,
+                2.888222424864694826e-04,
+                -4.083171371247282938e-03,
+                6.494062010930008733e-03,
+                1.594130471018519873e-02,
+                -4.922350239779287040e-02,
+                7.944117864515577720e-02,
+                -5.516443865142821312e-02,
+                -1.340804559261108558e-02,
+                1.525892700429632570e-02,
+                7.450140187529649682e-02,
+                1.809617933997387934e-02,
+                -2.059052256811338966e-02,
+                -3.118940445306412831e-02,
+                -7.412336287839304746e-03,
+                8.382871287998553897e-03,
+                -9.575909105642434974e-04,
+                -1.193597735547498307e-04,
+                1.003707186710399045e-04,
+                -9.520061199010912585e-05,
+                -1.186636523389461756e-05,
+                9.978534401229592523e-06,
+                -5.876800709203859434e-03,
+                -7.325190685693192200e-04,
+                6.159819440242017292e-04,
+                -1.659431774532551043e-02,
+                6.520628417529478540e-03,
+                1.204087494393247214e-02,
+                6.518824051016284399e-03,
+                -2.745500204548994606e-03,
+                -4.950724849051978994e-03,
+                -5.340810191179472081e-03,
+                3.101366677982481286e-03,
+                5.077959020099345744e-03,
+                7.727976016970144156e-03,
+                7.022558645366243878e-03,
+                -4.714356496325102820e-03,
+                7.018017321145150929e-03,
+                1.341962078953426278e-02,
+                -8.818944869050635710e-03,
+                -2.755773236988961865e-03,
+                1.079245666846929096e-02,
+                -6.886663303228377636e-03,
+                9.801230913130992879e-04,
+                1.221683173308112048e-04,
+                -1.027324486645460452e-04,
+                1.233918620327190629e-04,
+                1.538028875195364422e-05,
+                -1.293342463232469071e-05,
+                4.892751025155074075e-03,
+                6.098613175830685205e-04,
+                -5.128379261493998297e-04,
+                -7.792305682365031905e-03,
+                2.541307371885552502e-02,
+                -4.097328323558844382e-02,
+                2.530143617608526449e-02,
+                -8.265149730513186854e-02,
+                1.332544508945474881e-01,
+                -1.184335640259520997e-02,
+                3.220055758982264676e-02,
+                -5.209911236104310117e-02,
+                8.090761694886683397e-02,
+                1.959431243541279177e-02,
+                -2.227702786419644143e-02,
+                1.968691296265078980e-02,
+                4.764576998712748319e-03,
+                -5.415896903683155988e-03,
+                1.534638141861073557e-01,
+                3.728680895816388619e-02,
+                -4.242975875503233324e-02,
+            ]
+        )
         self.expected_gt = self.expected_t.reshape(-1, self.nout).sum(0).reshape(-1)
-        self.expected_gv = self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
+        self.expected_gv = (
+            self.expected_v.reshape(1, self.nout, 6, 9).sum(-2).reshape(-1)
+        )
 
     @classmethod
     def tearDownClass(cls):
-        os.remove("deeppolar_new.pb")    
+        os.remove("deeppolar_new.pb")
         cls.dp = None
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_sel_type(), [0])
 
     def test_1frame_old(self):
         gt = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
-        self.assertEqual(gt.shape, (nframes,self.nout))
+        self.assertEqual(gt.shape, (nframes, self.nout))
         # check values
         np.testing.assert_almost_equal(gt.ravel(), self.expected_gt, default_places)
 
@@ -158,7 +948,7 @@ def test_1frame_old_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
         # check values
         np.testing.assert_almost_equal(at.ravel(), self.expected_t, default_places)
 
@@ -170,27 +960,29 @@ def test_2frame_old_atm(self):
         nframes = 2
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
         # check values
         expected_d = np.concatenate((self.expected_t, self.expected_t))
         np.testing.assert_almost_equal(at.ravel(), expected_d, default_places)
 
     def test_1frame_full(self):
-        gt, ff, vv = self.dp.eval_full(self.coords, self.box, self.atype, atomic = False)
+        gt, ff, vv = self.dp.eval_full(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
         # check values
         np.testing.assert_almost_equal(ff.ravel(), self.expected_f, default_places)
         np.testing.assert_almost_equal(gt.ravel(), self.expected_gt, default_places)
         np.testing.assert_almost_equal(vv.ravel(), self.expected_gv, default_places)
 
     def test_1frame_full_atm(self):
-        gt, ff, vv, at, av = self.dp.eval_full(self.coords, self.box, self.atype, atomic = True)
-        
+        gt, ff, vv, at, av = self.dp.eval_full(
+            self.coords, self.box, self.atype, atomic=True
+        )
+
         # print the values
         for dd in (at, ff, av):
             print("\n\n")
@@ -201,31 +993,46 @@ def test_1frame_full_atm(self):
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.reshape([-1]), self.expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(at.reshape([-1]), self.expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(av.reshape([-1]), self.expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal = default_places)
+        np.testing.assert_almost_equal(
+            ff.reshape([-1]), self.expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            at.reshape([-1]), self.expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            av.reshape([-1]), self.expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
+        )
 
     def test_1frame_full_atm_shuffle(self):
-        i_sf = [2,1,3,0,5,4]
-        isel_sf = [1,0]
-        gt, ff, vv, at, av = self.dp.eval_full(self.coords.reshape(-1,3)[i_sf].reshape(-1), self.box, self.atype[i_sf], atomic = True)
+        i_sf = [2, 1, 3, 0, 5, 4]
+        isel_sf = [1, 0]
+        gt, ff, vv, at, av = self.dp.eval_full(
+            self.coords.reshape(-1, 3)[i_sf].reshape(-1),
+            self.box,
+            self.atype[i_sf],
+            atomic=True,
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # recover the shuffled result
         nff = np.empty_like(ff)
         nav = np.empty_like(av)
@@ -234,35 +1041,53 @@ def test_1frame_full_atm_shuffle(self):
         nav[:, :, i_sf] = av
         nat[:, isel_sf] = at
         # check values
-        np.testing.assert_almost_equal(nff.reshape([-1]), self.expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(nat.reshape([-1]), self.expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(nav.reshape([-1]), self.expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal = default_places)
-
+        np.testing.assert_almost_equal(
+            nff.reshape([-1]), self.expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            nat.reshape([-1]), self.expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            nav.reshape([-1]), self.expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), self.expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), self.expected_gv.reshape([-1]), decimal=default_places
+        )
 
     def test_2frame_full_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        gt, ff, vv, at, av = self.dp.eval_full(coords2, box2, self.atype, atomic = True)
+        gt, ff, vv, at, av = self.dp.eval_full(coords2, box2, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
         nsel = 2
-        self.assertEqual(gt.shape, (nframes,self.nout))
-        self.assertEqual(ff.shape, (nframes,self.nout,natoms,3))
-        self.assertEqual(vv.shape, (nframes,self.nout,9))
-        self.assertEqual(at.shape, (nframes,nsel,self.nout))
-        self.assertEqual(av.shape, (nframes,self.nout,natoms,9))
+        self.assertEqual(gt.shape, (nframes, self.nout))
+        self.assertEqual(ff.shape, (nframes, self.nout, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, self.nout, 9))
+        self.assertEqual(at.shape, (nframes, nsel, self.nout))
+        self.assertEqual(av.shape, (nframes, self.nout, natoms, 9))
         # check values
         expected_f = np.tile(self.expected_f.reshape(-1), nframes)
         expected_t = np.tile(self.expected_t.reshape(-1), nframes)
         expected_v = np.tile(self.expected_v.reshape(-1), nframes)
         expected_gt = np.tile(self.expected_gt.reshape(-1), nframes)
         expected_gv = np.tile(self.expected_gv.reshape(-1), nframes)
-        np.testing.assert_almost_equal(ff.reshape([-1]), expected_f.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(at.reshape([-1]), expected_t.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(av.reshape([-1]), expected_v.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(gt.reshape([-1]), expected_gt.reshape([-1]), decimal = default_places)
-        np.testing.assert_almost_equal(vv.reshape([-1]), expected_gv.reshape([-1]), decimal = default_places)
-
+        np.testing.assert_almost_equal(
+            ff.reshape([-1]), expected_f.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            at.reshape([-1]), expected_t.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            av.reshape([-1]), expected_v.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            gt.reshape([-1]), expected_gt.reshape([-1]), decimal=default_places
+        )
+        np.testing.assert_almost_equal(
+            vv.reshape([-1]), expected_gv.reshape([-1]), decimal=default_places
+        )
diff --git a/source/tests/test_deeppot_a.py b/source/tests/test_deeppot_a.py
index f856056472..37a92d7c0e 100644
--- a/source/tests/test_deeppot_a.py
+++ b/source/tests/test_deeppot_a.py
@@ -1,22 +1,37 @@
-import os,sys,platform,shutil,dpdata
-import numpy as np
+import os
+import platform
+import shutil
+import sys
 import unittest
 
-from infer.convert2pb import convert_pbtxt_to_pb
-from deepmd.infer import DeepPot
-from deepmd.env import MODEL_VERSION
-from common import tests_path, run_dp
-
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+import dpdata
+import numpy as np
+from common import (
+    run_dp,
+    tests_path,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    MODEL_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-class TestModelMajorCompatability(unittest.TestCase) :
+
+class TestModelMajorCompatability(unittest.TestCase):
     def setUp(self):
-        model_file = str(tests_path / os.path.join("infer","deeppot.pbtxt"))
-        with open(model_file, 'r') as fp:
+        model_file = str(tests_path / os.path.join("infer", "deeppot.pbtxt"))
+        with open(model_file, "r") as fp:
             # data = fp.read().replace('\n', '')
             data = fp.read().split("\n")
             for ii in range(len(data)):
@@ -35,18 +50,18 @@ def tearDown(self):
         os.remove(self.version_pbtxt)
         os.remove(self.version_pb)
 
-    def test(self):        
+    def test(self):
         with self.assertRaises(RuntimeError) as context:
             DeepPot(str(self.version_pb))
-        self.assertTrue('incompatible' in str(context.exception))
+        self.assertTrue("incompatible" in str(context.exception))
         self.assertTrue(MODEL_VERSION in str(context.exception))
-        self.assertTrue('0.0' in str(context.exception))
+        self.assertTrue("0.0" in str(context.exception))
 
 
-class TestModelMinorCompatability(unittest.TestCase) :
+class TestModelMinorCompatability(unittest.TestCase):
     def setUp(self):
-        model_file = str(tests_path / os.path.join("infer","deeppot.pbtxt"))
-        with open(model_file, 'r') as fp:
+        model_file = str(tests_path / os.path.join("infer", "deeppot.pbtxt"))
+        with open(model_file, "r") as fp:
             # data = fp.read().replace('\n', '')
             data = fp.read().split("\n")
             for ii in range(len(data)):
@@ -65,77 +80,192 @@ def tearDown(self):
         os.remove(self.version_pbtxt)
         os.remove(self.version_pb)
 
-    def test(self):        
+    def test(self):
         with self.assertRaises(RuntimeError) as context:
             DeepPot(self.version_pb)
-        self.assertTrue('incompatible' in str(context.exception))
+        self.assertTrue("incompatible" in str(context.exception))
         self.assertTrue(MODEL_VERSION in str(context.exception))
-        self.assertTrue('0.1000000' in str(context.exception))
+        self.assertTrue("0.1000000" in str(context.exception))
 
 
-class TestDeepPotAPBC(unittest.TestCase) :
+class TestDeepPotAPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot.pbtxt")), "deeppot.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+        )
         cls.dp = DeepPot("deeppot.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_e = np.array([-9.275780747115504710e+01,-1.863501786584258468e+02,-1.863392472863538103e+02,-9.279281325486221021e+01,-1.863671545232153903e+02,-1.863619822847602165e+02])
-        self.expected_f = np.array([-3.034045420701179663e-01,8.405844663871177014e-01,7.696947487118485642e-02,7.662001266663505117e-01,-1.880601391333554251e-01,-6.183333871091722944e-01,-5.036172391059643427e-01,-6.529525836149027151e-01,5.432962643022043459e-01,6.382357912332115024e-01,-1.748518296794561167e-01,3.457363524891907125e-01,1.286482986991941552e-03,3.757251165286925043e-01,-5.972588700887541124e-01,-5.987006197104716154e-01,-2.004450304880958100e-01,2.495901655353461868e-01])
-        self.expected_v = np.array([-2.912234126853306959e-01,-3.800610846612756388e-02,2.776624987489437202e-01,-5.053761003913598976e-02,-3.152373041953385746e-01,1.060894290092162379e-01,2.826389131596073745e-01,1.039129970665329250e-01,-2.584378792325942586e-01,-3.121722367954994914e-01,8.483275876786681990e-02,2.524662342344257682e-01,4.142176771106586414e-02,-3.820285230785245428e-02,-2.727311173065460545e-02,2.668859789777112135e-01,-6.448243569420382404e-02,-2.121731470426218846e-01,-8.624335220278558922e-02,-1.809695356746038597e-01,1.529875294531883312e-01,-1.283658185172031341e-01,-1.992682279795223999e-01,1.409924999632362341e-01,1.398322735274434292e-01,1.804318474574856390e-01,-1.470309318999652726e-01,-2.593983661598450730e-01,-4.236536279233147489e-02,3.386387920184946720e-02,-4.174017537818433543e-02,-1.003500282164128260e-01,1.525690815194478966e-01,3.398976109910181037e-02,1.522253908435125536e-01,-2.349125581341701963e-01,9.515545977581392825e-04,-1.643218849228543846e-02,1.993234765412972564e-02,6.027265332209678569e-04,-9.563256398907417355e-02,1.510815124001868293e-01,-7.738094816888557714e-03,1.502832772532304295e-01,-2.380965783745832010e-01,-2.309456719810296654e-01,-6.666961081213038098e-02,7.955566551234216632e-02,-8.099093777937517447e-02,-3.386641099800401927e-02,4.447884755740908608e-02,1.008593228579038742e-01,4.556718179228393811e-02,-6.078081273849572641e-02])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_e = np.array(
+            [
+                -9.275780747115504710e01,
+                -1.863501786584258468e02,
+                -1.863392472863538103e02,
+                -9.279281325486221021e01,
+                -1.863671545232153903e02,
+                -1.863619822847602165e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                -3.034045420701179663e-01,
+                8.405844663871177014e-01,
+                7.696947487118485642e-02,
+                7.662001266663505117e-01,
+                -1.880601391333554251e-01,
+                -6.183333871091722944e-01,
+                -5.036172391059643427e-01,
+                -6.529525836149027151e-01,
+                5.432962643022043459e-01,
+                6.382357912332115024e-01,
+                -1.748518296794561167e-01,
+                3.457363524891907125e-01,
+                1.286482986991941552e-03,
+                3.757251165286925043e-01,
+                -5.972588700887541124e-01,
+                -5.987006197104716154e-01,
+                -2.004450304880958100e-01,
+                2.495901655353461868e-01,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -2.912234126853306959e-01,
+                -3.800610846612756388e-02,
+                2.776624987489437202e-01,
+                -5.053761003913598976e-02,
+                -3.152373041953385746e-01,
+                1.060894290092162379e-01,
+                2.826389131596073745e-01,
+                1.039129970665329250e-01,
+                -2.584378792325942586e-01,
+                -3.121722367954994914e-01,
+                8.483275876786681990e-02,
+                2.524662342344257682e-01,
+                4.142176771106586414e-02,
+                -3.820285230785245428e-02,
+                -2.727311173065460545e-02,
+                2.668859789777112135e-01,
+                -6.448243569420382404e-02,
+                -2.121731470426218846e-01,
+                -8.624335220278558922e-02,
+                -1.809695356746038597e-01,
+                1.529875294531883312e-01,
+                -1.283658185172031341e-01,
+                -1.992682279795223999e-01,
+                1.409924999632362341e-01,
+                1.398322735274434292e-01,
+                1.804318474574856390e-01,
+                -1.470309318999652726e-01,
+                -2.593983661598450730e-01,
+                -4.236536279233147489e-02,
+                3.386387920184946720e-02,
+                -4.174017537818433543e-02,
+                -1.003500282164128260e-01,
+                1.525690815194478966e-01,
+                3.398976109910181037e-02,
+                1.522253908435125536e-01,
+                -2.349125581341701963e-01,
+                9.515545977581392825e-04,
+                -1.643218849228543846e-02,
+                1.993234765412972564e-02,
+                6.027265332209678569e-04,
+                -9.563256398907417355e-02,
+                1.510815124001868293e-01,
+                -7.738094816888557714e-03,
+                1.502832772532304295e-01,
+                -2.380965783745832010e-01,
+                -2.309456719810296654e-01,
+                -6.666961081213038098e-02,
+                7.955566551234216632e-02,
+                -8.099093777937517447e-02,
+                -3.386641099800401927e-02,
+                4.447884755740908608e-02,
+                1.008593228579038742e-01,
+                4.556718179228393811e-02,
+                -6.078081273849572641e-02,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppot.pb")
         cls.dp = None
-    
+
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_dim_fparam(), 0)
         self.assertEqual(self.dp.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_descriptor(self):
@@ -146,194 +276,449 @@ def test_descriptor(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee, ff, vv, ae, av = self.dp.eval(coords2, box2, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(coords2, box2, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        expected_f = np.concatenate((self.expected_f, self.expected_f), axis = 0)
-        expected_e = np.concatenate((self.expected_e, self.expected_e), axis = 0)
-        expected_v = np.concatenate((self.expected_v, self.expected_v), axis = 0)
+        expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
+        expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
+        expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
         np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
         np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
         np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
-        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis = 1)
+        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
 
-class TestDeepPotANoPBC(unittest.TestCase) :
+class TestDeepPotANoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot.pbtxt")), "deeppot.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+        )
         cls.dp = DeepPot("deeppot.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
         self.box = None
-        self.expected_e = np.array([-9.255934839310273787e+01,-1.863253376736990106e+02,-1.857237299341402945e+02,-9.279308539717486326e+01,-1.863708105823244239e+02,-1.863635196514972563e+02])
-        self.expected_f = np.array([-2.161037360255332107e+00,9.052994347015581589e-01,1.635379623977007979e+00,2.161037360255332107e+00,-9.052994347015581589e-01,-1.635379623977007979e+00,-1.167128117249453811e-02,1.371975700096064992e-03,-1.575265180249604477e-03,6.226508593971802341e-01,-1.816734122009256991e-01,3.561766019664774907e-01,-1.406075393906316626e-02,3.789140061530929526e-01,-6.018777878642909140e-01,-5.969188242856223736e-01,-1.986125696522633155e-01,2.472764510780630642e-01])
-        self.expected_v = np.array([-7.042445481792056761e-01,2.950213647777754078e-01,5.329418202437231633e-01,2.950213647777752968e-01,-1.235900311906896754e-01,-2.232594111831812944e-01,5.329418202437232743e-01,-2.232594111831813499e-01,-4.033073234276823849e-01,-8.949230984097404917e-01,3.749002169013777030e-01,6.772391014992630298e-01,3.749002169013777586e-01,-1.570527935667933583e-01,-2.837082722496912512e-01,6.772391014992631408e-01,-2.837082722496912512e-01,-5.125052659994422388e-01,4.858210330291591605e-02,-6.902596153269104431e-03,6.682612642430500391e-03,-5.612247004554610057e-03,9.767795567660207592e-04,-9.773758942738038254e-04,5.638322117219018645e-03,-9.483806049779926932e-04,8.493873281881353637e-04,-2.941738570564985666e-01,-4.482529909499673171e-02,4.091569840186781021e-02,-4.509020615859140463e-02,-1.013919988807244071e-01,1.551440772665269030e-01,4.181857726606644232e-02,1.547200233064863484e-01,-2.398213304685777592e-01,-3.218625798524068354e-02,-1.012438450438508421e-02,1.271639330380921855e-02,3.072814938490859779e-03,-9.556241797915024372e-02,1.512251983492413077e-01,-8.277872384009607454e-03,1.505412040827929787e-01,-2.386150620881526407e-01,-2.312295470054945568e-01,-6.631490213524345034e-02,7.932427266386249398e-02,-8.053754366323923053e-02,-3.294595881137418747e-02,4.342495071150231922e-02,1.004599500126941436e-01,4.450400364869536163e-02,-5.951077548033092968e-02])
+        self.expected_e = np.array(
+            [
+                -9.255934839310273787e01,
+                -1.863253376736990106e02,
+                -1.857237299341402945e02,
+                -9.279308539717486326e01,
+                -1.863708105823244239e02,
+                -1.863635196514972563e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                -2.161037360255332107e00,
+                9.052994347015581589e-01,
+                1.635379623977007979e00,
+                2.161037360255332107e00,
+                -9.052994347015581589e-01,
+                -1.635379623977007979e00,
+                -1.167128117249453811e-02,
+                1.371975700096064992e-03,
+                -1.575265180249604477e-03,
+                6.226508593971802341e-01,
+                -1.816734122009256991e-01,
+                3.561766019664774907e-01,
+                -1.406075393906316626e-02,
+                3.789140061530929526e-01,
+                -6.018777878642909140e-01,
+                -5.969188242856223736e-01,
+                -1.986125696522633155e-01,
+                2.472764510780630642e-01,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -7.042445481792056761e-01,
+                2.950213647777754078e-01,
+                5.329418202437231633e-01,
+                2.950213647777752968e-01,
+                -1.235900311906896754e-01,
+                -2.232594111831812944e-01,
+                5.329418202437232743e-01,
+                -2.232594111831813499e-01,
+                -4.033073234276823849e-01,
+                -8.949230984097404917e-01,
+                3.749002169013777030e-01,
+                6.772391014992630298e-01,
+                3.749002169013777586e-01,
+                -1.570527935667933583e-01,
+                -2.837082722496912512e-01,
+                6.772391014992631408e-01,
+                -2.837082722496912512e-01,
+                -5.125052659994422388e-01,
+                4.858210330291591605e-02,
+                -6.902596153269104431e-03,
+                6.682612642430500391e-03,
+                -5.612247004554610057e-03,
+                9.767795567660207592e-04,
+                -9.773758942738038254e-04,
+                5.638322117219018645e-03,
+                -9.483806049779926932e-04,
+                8.493873281881353637e-04,
+                -2.941738570564985666e-01,
+                -4.482529909499673171e-02,
+                4.091569840186781021e-02,
+                -4.509020615859140463e-02,
+                -1.013919988807244071e-01,
+                1.551440772665269030e-01,
+                4.181857726606644232e-02,
+                1.547200233064863484e-01,
+                -2.398213304685777592e-01,
+                -3.218625798524068354e-02,
+                -1.012438450438508421e-02,
+                1.271639330380921855e-02,
+                3.072814938490859779e-03,
+                -9.556241797915024372e-02,
+                1.512251983492413077e-01,
+                -8.277872384009607454e-03,
+                1.505412040827929787e-01,
+                -2.386150620881526407e-01,
+                -2.312295470054945568e-01,
+                -6.631490213524345034e-02,
+                7.932427266386249398e-02,
+                -8.053754366323923053e-02,
+                -3.294595881137418747e-02,
+                4.342495071150231922e-02,
+                1.004599500126941436e-01,
+                4.450400364869536163e-02,
+                -5.951077548033092968e-02,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppot.pb")
         cls.dp = None
-    
+
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
-
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
-        ee, ff, vv, ae, av = self.dp.eval(coords2, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(coords2, self.box, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        expected_f = np.concatenate((self.expected_f, self.expected_f), axis = 0)
-        expected_e = np.concatenate((self.expected_e, self.expected_e), axis = 0)
-        expected_v = np.concatenate((self.expected_v, self.expected_v), axis = 0)
+        expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
+        expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
+        expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
         np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
         np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
         np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
-        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis = 1)
+        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
-    
-class TestDeepPotALargeBoxNoPBC(unittest.TestCase) :
+
+class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot.pbtxt")), "deeppot.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), "deeppot.pb"
+        )
         cls.dp = DeepPot("deeppot.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([19., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_e = np.array([-9.255934839310273787e+01,-1.863253376736990106e+02,-1.857237299341402945e+02,-9.279308539717486326e+01,-1.863708105823244239e+02,-1.863635196514972563e+02])
-        self.expected_f = np.array([-2.161037360255332107e+00,9.052994347015581589e-01,1.635379623977007979e+00,2.161037360255332107e+00,-9.052994347015581589e-01,-1.635379623977007979e+00,-1.167128117249453811e-02,1.371975700096064992e-03,-1.575265180249604477e-03,6.226508593971802341e-01,-1.816734122009256991e-01,3.561766019664774907e-01,-1.406075393906316626e-02,3.789140061530929526e-01,-6.018777878642909140e-01,-5.969188242856223736e-01,-1.986125696522633155e-01,2.472764510780630642e-01])
-        self.expected_v = np.array([-7.042445481792056761e-01,2.950213647777754078e-01,5.329418202437231633e-01,2.950213647777752968e-01,-1.235900311906896754e-01,-2.232594111831812944e-01,5.329418202437232743e-01,-2.232594111831813499e-01,-4.033073234276823849e-01,-8.949230984097404917e-01,3.749002169013777030e-01,6.772391014992630298e-01,3.749002169013777586e-01,-1.570527935667933583e-01,-2.837082722496912512e-01,6.772391014992631408e-01,-2.837082722496912512e-01,-5.125052659994422388e-01,4.858210330291591605e-02,-6.902596153269104431e-03,6.682612642430500391e-03,-5.612247004554610057e-03,9.767795567660207592e-04,-9.773758942738038254e-04,5.638322117219018645e-03,-9.483806049779926932e-04,8.493873281881353637e-04,-2.941738570564985666e-01,-4.482529909499673171e-02,4.091569840186781021e-02,-4.509020615859140463e-02,-1.013919988807244071e-01,1.551440772665269030e-01,4.181857726606644232e-02,1.547200233064863484e-01,-2.398213304685777592e-01,-3.218625798524068354e-02,-1.012438450438508421e-02,1.271639330380921855e-02,3.072814938490859779e-03,-9.556241797915024372e-02,1.512251983492413077e-01,-8.277872384009607454e-03,1.505412040827929787e-01,-2.386150620881526407e-01,-2.312295470054945568e-01,-6.631490213524345034e-02,7.932427266386249398e-02,-8.053754366323923053e-02,-3.294595881137418747e-02,4.342495071150231922e-02,1.004599500126941436e-01,4.450400364869536163e-02,-5.951077548033092968e-02])
+        self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_e = np.array(
+            [
+                -9.255934839310273787e01,
+                -1.863253376736990106e02,
+                -1.857237299341402945e02,
+                -9.279308539717486326e01,
+                -1.863708105823244239e02,
+                -1.863635196514972563e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                -2.161037360255332107e00,
+                9.052994347015581589e-01,
+                1.635379623977007979e00,
+                2.161037360255332107e00,
+                -9.052994347015581589e-01,
+                -1.635379623977007979e00,
+                -1.167128117249453811e-02,
+                1.371975700096064992e-03,
+                -1.575265180249604477e-03,
+                6.226508593971802341e-01,
+                -1.816734122009256991e-01,
+                3.561766019664774907e-01,
+                -1.406075393906316626e-02,
+                3.789140061530929526e-01,
+                -6.018777878642909140e-01,
+                -5.969188242856223736e-01,
+                -1.986125696522633155e-01,
+                2.472764510780630642e-01,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -7.042445481792056761e-01,
+                2.950213647777754078e-01,
+                5.329418202437231633e-01,
+                2.950213647777752968e-01,
+                -1.235900311906896754e-01,
+                -2.232594111831812944e-01,
+                5.329418202437232743e-01,
+                -2.232594111831813499e-01,
+                -4.033073234276823849e-01,
+                -8.949230984097404917e-01,
+                3.749002169013777030e-01,
+                6.772391014992630298e-01,
+                3.749002169013777586e-01,
+                -1.570527935667933583e-01,
+                -2.837082722496912512e-01,
+                6.772391014992631408e-01,
+                -2.837082722496912512e-01,
+                -5.125052659994422388e-01,
+                4.858210330291591605e-02,
+                -6.902596153269104431e-03,
+                6.682612642430500391e-03,
+                -5.612247004554610057e-03,
+                9.767795567660207592e-04,
+                -9.773758942738038254e-04,
+                5.638322117219018645e-03,
+                -9.483806049779926932e-04,
+                8.493873281881353637e-04,
+                -2.941738570564985666e-01,
+                -4.482529909499673171e-02,
+                4.091569840186781021e-02,
+                -4.509020615859140463e-02,
+                -1.013919988807244071e-01,
+                1.551440772665269030e-01,
+                4.181857726606644232e-02,
+                1.547200233064863484e-01,
+                -2.398213304685777592e-01,
+                -3.218625798524068354e-02,
+                -1.012438450438508421e-02,
+                1.271639330380921855e-02,
+                3.072814938490859779e-03,
+                -9.556241797915024372e-02,
+                1.512251983492413077e-01,
+                -8.277872384009607454e-03,
+                1.505412040827929787e-01,
+                -2.386150620881526407e-01,
+                -2.312295470054945568e-01,
+                -6.631490213524345034e-02,
+                7.932427266386249398e-02,
+                -8.053754366323923053e-02,
+                -3.294595881137418747e-02,
+                4.342495071150231922e-02,
+                1.004599500126941436e-01,
+                4.450400364869536163e-02,
+                -5.951077548033092968e-02,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppot.pb")
         cls.dp = None
-    
+
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_ase(self):
-        from ase import Atoms
-        from deepmd.calculator import DP
-        water = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP("deeppot.pb"))
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP("deeppot.pb"),
+        )
         ee = water.get_potential_energy()
         ff = water.get_forces()
         nframes = 1
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
 
 
 class TestModelConvert(unittest.TestCase):
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_convert_012(self):
         old_model = "deeppot.pb"
@@ -346,19 +731,40 @@ def test_convert_012(self):
         os.remove(new_model)
 
 
-class TestTypeEmbed(unittest.TestCase) :
+class TestTypeEmbed(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer", "se_e2_a_tebd.pbtxt")), "se_e2_a_tebd.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "se_e2_a_tebd.pbtxt")),
+            "se_e2_a_tebd.pb",
+        )
         cls.dp = DeepPot("se_e2_a_tebd.pb")
 
     def test_eval_typeebd(self):
-        expected_typeebd = np.array([
-            [-0.4602908199, -0.9440795817, -0.857044451, -0.3448434537,
-            -0.6310194663, -0.9765837147, -0.3945653821, 0.8973716518],
-            [-0.7239568558, -0.9672733137, -0.420987752, -0.4542931277,
-            -0.79586188, -0.9615886543, -0.6864800369, 0.9477863254],
-            ])
+        expected_typeebd = np.array(
+            [
+                [
+                    -0.4602908199,
+                    -0.9440795817,
+                    -0.857044451,
+                    -0.3448434537,
+                    -0.6310194663,
+                    -0.9765837147,
+                    -0.3945653821,
+                    0.8973716518,
+                ],
+                [
+                    -0.7239568558,
+                    -0.9672733137,
+                    -0.420987752,
+                    -0.4542931277,
+                    -0.79586188,
+                    -0.9615886543,
+                    -0.6864800369,
+                    0.9477863254,
+                ],
+            ]
+        )
 
         eval_typeebd = self.dp.eval_typeebd()
         np.testing.assert_almost_equal(eval_typeebd, expected_typeebd, default_places)
diff --git a/source/tests/test_deeppot_r.py b/source/tests/test_deeppot_r.py
index 24f35dc47b..f1e498db3f 100644
--- a/source/tests/test_deeppot_r.py
+++ b/source/tests/test_deeppot_r.py
@@ -1,51 +1,164 @@
-import os,sys,platform,shutil,dpdata
-import numpy as np
+import os
+import platform
+import shutil
+import sys
 import unittest
 
-from infer.convert2pb import convert_pbtxt_to_pb
-from deepmd.infer import DeepPot
-from common import tests_path
+import dpdata
+import numpy as np
+from common import (
+    tests_path,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-class TestDeepPotRPBC(unittest.TestCase) :
+
+class TestDeepPotRPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot-r.pbtxt")), "deeppot.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot-r.pbtxt")), "deeppot.pb"
+        )
         cls.dp = DeepPot("deeppot.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_e = np.array([
-            -9.320909762801588272e+01,-1.868020345400987878e+02,-1.868011172371355997e+02,-9.320868430396934912e+01,-1.868010398844378415e+02,-1.868016706555875999e+02
-        ])
-        self.expected_f = np.array([
-            6.385312846474267391e-04,-6.460452911141417731e-03,-5.652405655332678417e-04,-7.516468794343579736e-03,1.128804614240160216e-03,5.531937784564192051e-03,1.914138124904981664e-03,5.601819906021693503e-03,-5.131359585752605541e-03,-4.847104424804288617e-03,1.992071550328819614e-03,-4.028159855157302516e-03,1.236340684486603517e-03,-5.373955841338794344e-03,8.312829460571366513e-03,8.574563125108854156e-03,3.111712681889538742e-03,-4.120007238692381148e-03
-        ])
-        self.expected_v = np.array([
-            5.844056241889131371e-03,4.663973497239899614e-04,-2.268382127762904633e-03,4.663973497239897988e-04,2.349338784202595950e-03,-6.908546513234039253e-04,-2.268382127762904633e-03,-6.908546513234039253e-04,2.040499248150800561e-03,4.238130266437327605e-03,-1.539867187443782223e-04,-2.393101333240631613e-03,-1.539867187443782223e-04,4.410341945447907377e-04,9.544239698119633068e-06,-2.393101333240631613e-03,9.544239698119578858e-06,1.877785959095269654e-03,5.798992562057291543e-03,6.943392552230453693e-04,-1.180376879311998773e-03,6.943392552230453693e-04,1.686725132156275536e-03,-1.461632060145726542e-03,-1.180376879311998556e-03,-1.461632060145726325e-03,1.749543733794208444e-03,7.173915604192910439e-03,3.903218041111061569e-04,-5.747400467123527524e-04,3.903218041111061569e-04,1.208289706621179949e-03,-1.826828914132010932e-03,-5.747400467123527524e-04,-1.826828914132011148e-03,2.856960586657185906e-03,4.067553030177322240e-03,-3.267469855253819430e-05,-6.980667859103454904e-05,-3.267469855253830272e-05,1.387653029234650918e-03,-2.096820720698671855e-03,-6.980667859103444062e-05,-2.096820720698671855e-03,3.218305506720191278e-03,4.753992590355240674e-03,1.224911338353675992e-03,-1.683421934571502484e-03,1.224911338353676209e-03,7.332113564901583539e-04,-1.025577052190138451e-03,-1.683421934571502484e-03,-1.025577052190138234e-03,1.456681925652047018e-03
-        ])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_e = np.array(
+            [
+                -9.320909762801588272e01,
+                -1.868020345400987878e02,
+                -1.868011172371355997e02,
+                -9.320868430396934912e01,
+                -1.868010398844378415e02,
+                -1.868016706555875999e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                6.385312846474267391e-04,
+                -6.460452911141417731e-03,
+                -5.652405655332678417e-04,
+                -7.516468794343579736e-03,
+                1.128804614240160216e-03,
+                5.531937784564192051e-03,
+                1.914138124904981664e-03,
+                5.601819906021693503e-03,
+                -5.131359585752605541e-03,
+                -4.847104424804288617e-03,
+                1.992071550328819614e-03,
+                -4.028159855157302516e-03,
+                1.236340684486603517e-03,
+                -5.373955841338794344e-03,
+                8.312829460571366513e-03,
+                8.574563125108854156e-03,
+                3.111712681889538742e-03,
+                -4.120007238692381148e-03,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                5.844056241889131371e-03,
+                4.663973497239899614e-04,
+                -2.268382127762904633e-03,
+                4.663973497239897988e-04,
+                2.349338784202595950e-03,
+                -6.908546513234039253e-04,
+                -2.268382127762904633e-03,
+                -6.908546513234039253e-04,
+                2.040499248150800561e-03,
+                4.238130266437327605e-03,
+                -1.539867187443782223e-04,
+                -2.393101333240631613e-03,
+                -1.539867187443782223e-04,
+                4.410341945447907377e-04,
+                9.544239698119633068e-06,
+                -2.393101333240631613e-03,
+                9.544239698119578858e-06,
+                1.877785959095269654e-03,
+                5.798992562057291543e-03,
+                6.943392552230453693e-04,
+                -1.180376879311998773e-03,
+                6.943392552230453693e-04,
+                1.686725132156275536e-03,
+                -1.461632060145726542e-03,
+                -1.180376879311998556e-03,
+                -1.461632060145726325e-03,
+                1.749543733794208444e-03,
+                7.173915604192910439e-03,
+                3.903218041111061569e-04,
+                -5.747400467123527524e-04,
+                3.903218041111061569e-04,
+                1.208289706621179949e-03,
+                -1.826828914132010932e-03,
+                -5.747400467123527524e-04,
+                -1.826828914132011148e-03,
+                2.856960586657185906e-03,
+                4.067553030177322240e-03,
+                -3.267469855253819430e-05,
+                -6.980667859103454904e-05,
+                -3.267469855253830272e-05,
+                1.387653029234650918e-03,
+                -2.096820720698671855e-03,
+                -6.980667859103444062e-05,
+                -2.096820720698671855e-03,
+                3.218305506720191278e-03,
+                4.753992590355240674e-03,
+                1.224911338353675992e-03,
+                -1.683421934571502484e-03,
+                1.224911338353676209e-03,
+                7.332113564901583539e-04,
+                -1.025577052190138451e-03,
+                -1.683421934571502484e-03,
+                -1.025577052190138234e-03,
+                1.456681925652047018e-03,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppot.pb")
         cls.dp = None
-    
+
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_dim_fparam(), 0)
         self.assertEqual(self.dp.get_dim_aparam(), 0)
 
@@ -56,216 +169,440 @@ def test_attrs(self):
     #     np.savetxt('vv.out', av.reshape([1, -1]), delimiter=',')
 
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
-
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee, ff, vv, ae, av = self.dp.eval(coords2, box2, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(coords2, box2, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        expected_f = np.concatenate((self.expected_f, self.expected_f), axis = 0)
-        expected_e = np.concatenate((self.expected_e, self.expected_e), axis = 0)
-        expected_v = np.concatenate((self.expected_v, self.expected_v), axis = 0)
+        expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
+        expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
+        expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
         np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
         np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
         np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
-        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis = 1)
+        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
 
-class TestDeepPotRNoPBC(unittest.TestCase) :
+class TestDeepPotRNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot-r.pbtxt")), "deeppot.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot-r.pbtxt")), "deeppot.pb"
+        )
         cls.dp = DeepPot("deeppot.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
         self.box = None
-        self.expected_e = np.array([
-            -9.321213823508108476e+01,-1.868044102481340758e+02,-1.868067983858651075e+02,-9.320899631301440991e+01,-1.868014559732615112e+02,-1.868017660713088617e+02
-        ])
-        self.expected_f = np.array([            
-            4.578151103701261042e-03,-1.917874111009987628e-03,-3.464546781179331930e-03,-4.578151103701261042e-03,1.917874111009987628e-03,3.464546781179331930e-03,-2.624402581721222913e-03,3.566275128489623933e-04,-2.859315986763691776e-04,-5.767787273464367384e-03,1.907053583551196647e-03,-3.889064429673861831e-03,1.786820066350549132e-04,-5.327197473636275694e-03,8.236236182834734409e-03,8.213507848550535492e-03,3.063516377236116545e-03,-4.061240154484504865e-03
-        ])
-        self.expected_v = np.array([        
-            1.984979026299632174e-03,-8.315452677741701822e-04,-1.502146290172694243e-03,-8.315452677741700738e-04,3.483500446080982317e-04,6.292774999372096039e-04,-1.502146290172694243e-03,6.292774999372097123e-04,1.136759354725281907e-03,1.402852790439301908e-03,-5.876815743732210226e-04,-1.061618327900012114e-03,-5.876815743732211311e-04,2.461909298049979960e-04,4.447320022283834766e-04,-1.061618327900012331e-03,4.447320022283834766e-04,8.033868427351443728e-04,4.143606961846296385e-03,-5.511382161123719835e-04,4.465413399437045397e-04,-5.511382161123719835e-04,1.082271054025323839e-04,-1.097918001262628728e-04,4.465413399437046481e-04,-1.097918001262628728e-04,1.220966982358671871e-04,5.263952004497593831e-03,2.395243710938091842e-04,-2.830378939414603329e-04,2.395243710938094010e-04,1.189969706598244898e-03,-1.805627331015851201e-03,-2.830378939414602245e-04,-1.805627331015851635e-03,2.801996513751836820e-03,2.208413501170402270e-03,5.331756287635716889e-05,-1.664423506603235218e-04,5.331756287635695205e-05,1.379626072862918072e-03,-2.094132943741625064e-03,-1.664423506603234133e-04,-2.094132943741625064e-03,3.199787996743366607e-03,4.047014004814953811e-03,1.137904999421357000e-03,-1.568106936614101698e-03,1.137904999421357217e-03,7.205982843216952307e-04,-1.011174600268313238e-03,-1.568106936614101698e-03,-1.011174600268313238e-03,1.435226522157425754e-03            
-        ])
+        self.expected_e = np.array(
+            [
+                -9.321213823508108476e01,
+                -1.868044102481340758e02,
+                -1.868067983858651075e02,
+                -9.320899631301440991e01,
+                -1.868014559732615112e02,
+                -1.868017660713088617e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                4.578151103701261042e-03,
+                -1.917874111009987628e-03,
+                -3.464546781179331930e-03,
+                -4.578151103701261042e-03,
+                1.917874111009987628e-03,
+                3.464546781179331930e-03,
+                -2.624402581721222913e-03,
+                3.566275128489623933e-04,
+                -2.859315986763691776e-04,
+                -5.767787273464367384e-03,
+                1.907053583551196647e-03,
+                -3.889064429673861831e-03,
+                1.786820066350549132e-04,
+                -5.327197473636275694e-03,
+                8.236236182834734409e-03,
+                8.213507848550535492e-03,
+                3.063516377236116545e-03,
+                -4.061240154484504865e-03,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                1.984979026299632174e-03,
+                -8.315452677741701822e-04,
+                -1.502146290172694243e-03,
+                -8.315452677741700738e-04,
+                3.483500446080982317e-04,
+                6.292774999372096039e-04,
+                -1.502146290172694243e-03,
+                6.292774999372097123e-04,
+                1.136759354725281907e-03,
+                1.402852790439301908e-03,
+                -5.876815743732210226e-04,
+                -1.061618327900012114e-03,
+                -5.876815743732211311e-04,
+                2.461909298049979960e-04,
+                4.447320022283834766e-04,
+                -1.061618327900012331e-03,
+                4.447320022283834766e-04,
+                8.033868427351443728e-04,
+                4.143606961846296385e-03,
+                -5.511382161123719835e-04,
+                4.465413399437045397e-04,
+                -5.511382161123719835e-04,
+                1.082271054025323839e-04,
+                -1.097918001262628728e-04,
+                4.465413399437046481e-04,
+                -1.097918001262628728e-04,
+                1.220966982358671871e-04,
+                5.263952004497593831e-03,
+                2.395243710938091842e-04,
+                -2.830378939414603329e-04,
+                2.395243710938094010e-04,
+                1.189969706598244898e-03,
+                -1.805627331015851201e-03,
+                -2.830378939414602245e-04,
+                -1.805627331015851635e-03,
+                2.801996513751836820e-03,
+                2.208413501170402270e-03,
+                5.331756287635716889e-05,
+                -1.664423506603235218e-04,
+                5.331756287635695205e-05,
+                1.379626072862918072e-03,
+                -2.094132943741625064e-03,
+                -1.664423506603234133e-04,
+                -2.094132943741625064e-03,
+                3.199787996743366607e-03,
+                4.047014004814953811e-03,
+                1.137904999421357000e-03,
+                -1.568106936614101698e-03,
+                1.137904999421357217e-03,
+                7.205982843216952307e-04,
+                -1.011174600268313238e-03,
+                -1.568106936614101698e-03,
+                -1.011174600268313238e-03,
+                1.435226522157425754e-03,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppot.pb")
         cls.dp = None
-    
+
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
-
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
-        ee, ff, vv, ae, av = self.dp.eval(coords2, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(coords2, self.box, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        expected_f = np.concatenate((self.expected_f, self.expected_f), axis = 0)
-        expected_e = np.concatenate((self.expected_e, self.expected_e), axis = 0)
-        expected_v = np.concatenate((self.expected_v, self.expected_v), axis = 0)
+        expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
+        expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
+        expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
         np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
         np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
         np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
-        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis = 1)
+        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
-    
-class TestDeepPotRLargeBoxNoPBC(unittest.TestCase) :
+
+class TestDeepPotRLargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot-r.pbtxt")), "deeppot.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot-r.pbtxt")), "deeppot.pb"
+        )
         cls.dp = DeepPot("deeppot.pb")
 
     def setUp(self):
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([19., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_e = np.array([
-            -9.321213823508108476e+01,-1.868044102481340758e+02,-1.868067983858651075e+02,-9.320899631301440991e+01,-1.868014559732615112e+02,-1.868017660713088617e+02
-        ])
-        self.expected_f = np.array([            
-            4.578151103701261042e-03,-1.917874111009987628e-03,-3.464546781179331930e-03,-4.578151103701261042e-03,1.917874111009987628e-03,3.464546781179331930e-03,-2.624402581721222913e-03,3.566275128489623933e-04,-2.859315986763691776e-04,-5.767787273464367384e-03,1.907053583551196647e-03,-3.889064429673861831e-03,1.786820066350549132e-04,-5.327197473636275694e-03,8.236236182834734409e-03,8.213507848550535492e-03,3.063516377236116545e-03,-4.061240154484504865e-03            
-        ])
-        self.expected_v = np.array([        
-            1.984979026299632174e-03,-8.315452677741701822e-04,-1.502146290172694243e-03,-8.315452677741700738e-04,3.483500446080982317e-04,6.292774999372096039e-04,-1.502146290172694243e-03,6.292774999372097123e-04,1.136759354725281907e-03,1.402852790439301908e-03,-5.876815743732210226e-04,-1.061618327900012114e-03,-5.876815743732211311e-04,2.461909298049979960e-04,4.447320022283834766e-04,-1.061618327900012331e-03,4.447320022283834766e-04,8.033868427351443728e-04,4.143606961846296385e-03,-5.511382161123719835e-04,4.465413399437045397e-04,-5.511382161123719835e-04,1.082271054025323839e-04,-1.097918001262628728e-04,4.465413399437046481e-04,-1.097918001262628728e-04,1.220966982358671871e-04,5.263952004497593831e-03,2.395243710938091842e-04,-2.830378939414603329e-04,2.395243710938094010e-04,1.189969706598244898e-03,-1.805627331015851201e-03,-2.830378939414602245e-04,-1.805627331015851635e-03,2.801996513751836820e-03,2.208413501170402270e-03,5.331756287635716889e-05,-1.664423506603235218e-04,5.331756287635695205e-05,1.379626072862918072e-03,-2.094132943741625064e-03,-1.664423506603234133e-04,-2.094132943741625064e-03,3.199787996743366607e-03,4.047014004814953811e-03,1.137904999421357000e-03,-1.568106936614101698e-03,1.137904999421357217e-03,7.205982843216952307e-04,-1.011174600268313238e-03,-1.568106936614101698e-03,-1.011174600268313238e-03,1.435226522157425754e-03            
-        ])
+        self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_e = np.array(
+            [
+                -9.321213823508108476e01,
+                -1.868044102481340758e02,
+                -1.868067983858651075e02,
+                -9.320899631301440991e01,
+                -1.868014559732615112e02,
+                -1.868017660713088617e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                4.578151103701261042e-03,
+                -1.917874111009987628e-03,
+                -3.464546781179331930e-03,
+                -4.578151103701261042e-03,
+                1.917874111009987628e-03,
+                3.464546781179331930e-03,
+                -2.624402581721222913e-03,
+                3.566275128489623933e-04,
+                -2.859315986763691776e-04,
+                -5.767787273464367384e-03,
+                1.907053583551196647e-03,
+                -3.889064429673861831e-03,
+                1.786820066350549132e-04,
+                -5.327197473636275694e-03,
+                8.236236182834734409e-03,
+                8.213507848550535492e-03,
+                3.063516377236116545e-03,
+                -4.061240154484504865e-03,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                1.984979026299632174e-03,
+                -8.315452677741701822e-04,
+                -1.502146290172694243e-03,
+                -8.315452677741700738e-04,
+                3.483500446080982317e-04,
+                6.292774999372096039e-04,
+                -1.502146290172694243e-03,
+                6.292774999372097123e-04,
+                1.136759354725281907e-03,
+                1.402852790439301908e-03,
+                -5.876815743732210226e-04,
+                -1.061618327900012114e-03,
+                -5.876815743732211311e-04,
+                2.461909298049979960e-04,
+                4.447320022283834766e-04,
+                -1.061618327900012331e-03,
+                4.447320022283834766e-04,
+                8.033868427351443728e-04,
+                4.143606961846296385e-03,
+                -5.511382161123719835e-04,
+                4.465413399437045397e-04,
+                -5.511382161123719835e-04,
+                1.082271054025323839e-04,
+                -1.097918001262628728e-04,
+                4.465413399437046481e-04,
+                -1.097918001262628728e-04,
+                1.220966982358671871e-04,
+                5.263952004497593831e-03,
+                2.395243710938091842e-04,
+                -2.830378939414603329e-04,
+                2.395243710938094010e-04,
+                1.189969706598244898e-03,
+                -1.805627331015851201e-03,
+                -2.830378939414602245e-04,
+                -1.805627331015851635e-03,
+                2.801996513751836820e-03,
+                2.208413501170402270e-03,
+                5.331756287635716889e-05,
+                -1.664423506603235218e-04,
+                5.331756287635695205e-05,
+                1.379626072862918072e-03,
+                -2.094132943741625064e-03,
+                -1.664423506603234133e-04,
+                -2.094132943741625064e-03,
+                3.199787996743366607e-03,
+                4.047014004814953811e-03,
+                1.137904999421357000e-03,
+                -1.568106936614101698e-03,
+                1.137904999421357217e-03,
+                7.205982843216952307e-04,
+                -1.011174600268313238e-03,
+                -1.568106936614101698e-03,
+                -1.011174600268313238e-03,
+                1.435226522157425754e-03,
+            ]
+        )
 
     @classmethod
     def tearDownClass(cls):
         os.remove("deeppot.pb")
         cls.dp = None
-    
+
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
-
-
diff --git a/source/tests/test_descrpt_nonsmth.py b/source/tests/test_descrpt_nonsmth.py
index 01fdbcfe85..a1062d8099 100644
--- a/source/tests/test_descrpt_nonsmth.py
+++ b/source/tests/test_descrpt_nonsmth.py
@@ -1,37 +1,38 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
 
 # load grad of force module
 import deepmd.op
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-
-from deepmd.env import op_module
 
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-
-class Inter():
-    def setUp (self,
-               data,
-               comp = 0, 
-               pbc = True,
-               sess = None) :
+class Inter:
+    def setUp(self, data, comp=0, pbc=True, sess=None):
         self.sess = sess
         self.data = data
         self.natoms = self.data.get_natoms()
         self.ntypes = self.data.get_ntypes()
-        self.sel_a = [12,24]
-        self.sel_r = [12,24]
+        self.sel_a = [12, 24]
+        self.sel_r = [12, 24]
         self.rcut_a = -1
         self.rcut_r = 10.0
         self.axis_rule = [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
@@ -41,121 +42,119 @@ def setUp (self,
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         if pbc:
-            self.default_mesh = np.zeros (6, dtype = np.int32)
+            self.default_mesh = np.zeros(6, dtype=np.int32)
             self.default_mesh[3] = 2
             self.default_mesh[4] = 2
             self.default_mesh[5] = 2
-        else :
-            self.default_mesh = np.array([], dtype = np.int32)
+        else:
+            self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-
-        
-    def _net (self,
-             inputs, 
-             name,
-             reuse = False) :
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+
+    def _net(self, inputs, name, reuse=False):
         with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     GLOBAL_TF_FLOAT_PRECISION,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        dot_v = tf.matmul(
+            tf.reshape(inputs, [-1, self.ndescrpt]),
+            tf.reshape(net_w, [self.ndescrpt, 1]),
+        )
+        return tf.reshape(dot_v, [-1])
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
         t_default_mesh = tf.constant(self.default_mesh)
-        descrpt, descrpt_deriv, rij, nlist, axis, rot_mat \
-            = op_module.descrpt (dcoord, 
-                                 dtype,
-                                 tnatoms,
-                                 dbox, 
-                                 t_default_mesh,
-                                 self.t_avg,
-                                 self.t_std,
-                                 rcut_a = self.rcut_a, 
-                                 rcut_r = self.rcut_r, 
-                                 sel_a = self.sel_a, 
-                                 sel_r = self.sel_r, 
-                                 axis_rule = self.axis_rule)
+        descrpt, descrpt_deriv, rij, nlist, axis, rot_mat = op_module.descrpt(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            t_default_mesh,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+            axis_rule=self.axis_rule,
+        )
         self.axis = axis
         self.nlist = nlist
         self.descrpt = descrpt
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force (net_deriv_reshape, 
-                                      descrpt_deriv, 
-                                      nlist, 
-                                      axis, 
-                                      tnatoms,
-                                      n_a_sel = self.nnei_a, 
-                                      n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial (net_deriv_reshape, 
-                                                  descrpt_deriv, 
-                                                  rij,
-                                                  nlist, 
-                                                  axis, 
-                                                  tnatoms,
-                                                  n_a_sel = self.nnei_a, 
-                                                  n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            axis,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_vir = op_module.prod_virial(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            axis,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         return energy, force, virial
 
-
-    def comp_f_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_f_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        f_mag = tf.reduce_sum(tf.nn.tanh(force))
+        f_mag_dw = tf.gradients(f_mag, net_w)
+        assert len(f_mag_dw) == 1, "length of dw is wrong"
         return f_mag, f_mag_dw[0]
 
-
-    def comp_v_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_v_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        v_mag = tf.reduce_sum(virial)
+        v_mag_dw = tf.gradients(v_mag, net_w)
+        assert len(v_mag_dw) == 1, "length of dw is wrong"
         return v_mag, v_mag_dw[0]
 
 
-
 class TestNonSmooth(Inter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
     #     self.places = 5
@@ -169,17 +168,17 @@ def setUp(self):
         data = Data()
         Inter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, suffix = '_se')
+    def test_force(self):
+        force_test(self, self, suffix="_se")
 
-    def test_virial (self) :
-        virial_test(self, self, suffix = '_se')
+    def test_virial(self):
+        virial_test(self, self, suffix="_se")
 
-    def test_force_dw (self) :
-        force_dw_test(self, self, suffix = '_se')
+    def test_force_dw(self):
+        force_dw_test(self, self, suffix="_se")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, suffix = '_se')
+    def test_virial_dw(self):
+        virial_dw_test(self, self, suffix="_se")
 
 
 class TestLFPbc(tf.test.TestCase):
@@ -187,75 +186,107 @@ def test_pbc(self):
         data = Data()
         inter0 = Inter()
         inter1 = Inter()
-        inter0.setUp(data, pbc = True, sess=self.test_session().__enter__())
-        inter1.setUp(data, pbc = False, sess=self.test_session().__enter__())
+        inter0.setUp(data, pbc=True, sess=self.test_session().__enter__())
+        inter1.setUp(data, pbc=False, sess=self.test_session().__enter__())
         inter0.net_w_i = np.copy(np.ones(inter0.ndescrpt))
         inter1.net_w_i = np.copy(np.ones(inter1.ndescrpt))
 
-        t_energy0, t_force0, t_virial0 \
-            = inter0.comp_ef (inter0.coord, inter0.box, inter0.type, inter0.tnatoms, name = "test_lf_pbc_true")
-        t_energy1, t_force1, t_virial1 \
-            = inter1.comp_ef (inter1.coord, inter1.box, inter1.type, inter1.tnatoms, name = "test_lf_pbc_false")
+        t_energy0, t_force0, t_virial0 = inter0.comp_ef(
+            inter0.coord,
+            inter0.box,
+            inter0.type,
+            inter0.tnatoms,
+            name="test_lf_pbc_true",
+        )
+        t_energy1, t_force1, t_virial1 = inter1.comp_ef(
+            inter1.coord,
+            inter1.box,
+            inter1.type,
+            inter1.tnatoms,
+            name="test_lf_pbc_false",
+        )
+
+        inter0.sess.run(tf.global_variables_initializer())
+        inter1.sess.run(tf.global_variables_initializer())
+
+        dcoord, dbox, dtype = data.get_data()
+
+        [e0, f0, v0] = inter0.sess.run(
+            [t_energy0, t_force0, t_virial0],
+            feed_dict={
+                inter0.coord: dcoord,
+                inter0.box: dbox,
+                inter0.type: dtype,
+                inter0.tnatoms: inter0.natoms,
+            },
+        )
+        [e1, f1, v1] = inter1.sess.run(
+            [t_energy1, t_force1, t_virial1],
+            feed_dict={
+                inter1.coord: dcoord,
+                inter1.box: dbox,
+                inter1.type: dtype,
+                inter1.tnatoms: inter1.natoms,
+            },
+        )
 
-        inter0.sess.run (tf.global_variables_initializer())
-        inter1.sess.run (tf.global_variables_initializer())
-
-        dcoord, dbox, dtype = data.get_data ()
-
-        [e0, f0, v0] = inter0.sess.run ([t_energy0, t_force0, t_virial0], 
-                                        feed_dict = {
-                                            inter0.coord:     dcoord,
-                                            inter0.box:       dbox,
-                                            inter0.type:      dtype,
-                                            inter0.tnatoms:   inter0.natoms})
-        [e1, f1, v1] = inter1.sess.run ([t_energy1, t_force1, t_virial1], 
-                                        feed_dict = {
-                                            inter1.coord:     dcoord,
-                                            inter1.box:       dbox,
-                                            inter1.type:      dtype,
-                                            inter1.tnatoms:   inter1.natoms})
-        
         self.assertAlmostEqual(e0[0], e1[0])
         np.testing.assert_almost_equal(f0[0], f1[0])
         np.testing.assert_almost_equal(v0[0], v1[0])
 
     def test_pbc_small_box(self):
         data0 = Data()
-        data1 = Data(box_scale = 2)
+        data1 = Data(box_scale=2)
         inter0 = Inter()
         inter1 = Inter()
-        inter0.setUp(data0, pbc = True, sess=self.test_session().__enter__())
-        inter1.setUp(data1, pbc = False, sess=self.test_session().__enter__())
+        inter0.setUp(data0, pbc=True, sess=self.test_session().__enter__())
+        inter1.setUp(data1, pbc=False, sess=self.test_session().__enter__())
         inter0.net_w_i = np.copy(np.ones(inter0.ndescrpt))
         inter1.net_w_i = np.copy(np.ones(inter1.ndescrpt))
 
-        t_energy0, t_force0, t_virial0 \
-            = inter0.comp_ef (inter0.coord, inter0.box, inter0.type, inter0.tnatoms, name = "test_lf_pbc_sbox_true")
-        t_energy1, t_force1, t_virial1 \
-            = inter1.comp_ef (inter1.coord, inter1.box, inter1.type, inter1.tnatoms, name = "test_lf_pbc_sbox_false")
-
-        inter0.sess.run (tf.global_variables_initializer())
-        inter1.sess.run (tf.global_variables_initializer())
+        t_energy0, t_force0, t_virial0 = inter0.comp_ef(
+            inter0.coord,
+            inter0.box,
+            inter0.type,
+            inter0.tnatoms,
+            name="test_lf_pbc_sbox_true",
+        )
+        t_energy1, t_force1, t_virial1 = inter1.comp_ef(
+            inter1.coord,
+            inter1.box,
+            inter1.type,
+            inter1.tnatoms,
+            name="test_lf_pbc_sbox_false",
+        )
+
+        inter0.sess.run(tf.global_variables_initializer())
+        inter1.sess.run(tf.global_variables_initializer())
+
+        dcoord, dbox, dtype = data0.get_data()
+        [e0, f0, v0] = inter0.sess.run(
+            [t_energy0, t_force0, t_virial0],
+            feed_dict={
+                inter0.coord: dcoord,
+                inter0.box: dbox,
+                inter0.type: dtype,
+                inter0.tnatoms: inter0.natoms,
+            },
+        )
+        dcoord, dbox, dtype = data1.get_data()
+        [e1, f1, v1] = inter1.sess.run(
+            [t_energy1, t_force1, t_virial1],
+            feed_dict={
+                inter1.coord: dcoord,
+                inter1.box: dbox,
+                inter1.type: dtype,
+                inter1.tnatoms: inter1.natoms,
+            },
+        )
 
-        dcoord, dbox, dtype = data0.get_data ()
-        [e0, f0, v0] = inter0.sess.run ([t_energy0, t_force0, t_virial0], 
-                                        feed_dict = {
-                                            inter0.coord:     dcoord,
-                                            inter0.box:       dbox,
-                                            inter0.type:      dtype,
-                                            inter0.tnatoms:   inter0.natoms})
-        dcoord, dbox, dtype = data1.get_data ()
-        [e1, f1, v1] = inter1.sess.run ([t_energy1, t_force1, t_virial1], 
-                                        feed_dict = {
-                                            inter1.coord:     dcoord,
-                                            inter1.box:       dbox,
-                                            inter1.type:      dtype,
-                                            inter1.tnatoms:   inter1.natoms})
-        
         self.assertAlmostEqual(e0[0], e1[0])
         np.testing.assert_almost_equal(f0[0], f1[0])
         np.testing.assert_almost_equal(v0[0], v1[0])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_descrpt_se_a_type.py b/source/tests/test_descrpt_se_a_type.py
index f61b25f8c6..0c234d89a5 100644
--- a/source/tests/test_descrpt_se_a_type.py
+++ b/source/tests/test_descrpt_se_a_type.py
@@ -1,240 +1,369 @@
+import os
+import pickle
+import sys
+import unittest
 
-import dpdata,os,sys,unittest
+import dpdata
 import numpy as np
-from deepmd.env import tf
-import pickle
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
-from deepmd.utils.type_embed import embed_atom_type, TypeEmbedNet
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+    embed_atom_type,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
     def setUp(self):
         gen_data(nframes=2)
 
     def test_descriptor_two_sides(self):
-        jfile = 'water_se_a_type.json'
+        jfile = "water_se_a_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 2
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have (jdata['model']['descriptor'], 'sel')
-        ntypes=len(sel)
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(sel)
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
 
         # set parameters
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['axis_neuron'] = 2
-        typeebd_param = {'neuron' : [5, 5, 5], 
-                         'resnet_dt': False,
-                         'seed': 1,
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["axis_neuron"] = 2
+        typeebd_param = {
+            "neuron": [5, 5, 5],
+            "resnet_dt": False,
+            "seed": 1,
         }
 
         # init models
         typeebd = TypeEmbedNet(
-            neuron = typeebd_param['neuron'],
-            resnet_dt = typeebd_param['resnet_dt'],
-            seed = typeebd_param['seed'], 
-            uniform_seed = True
+            neuron=typeebd_param["neuron"],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
+            uniform_seed=True,
         )
 
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        type_embedding = typeebd.build(
-            ntypes, 
-            suffix = "_se_a_type_des_ebd_2sdies"
+        type_embedding = typeebd.build(ntypes, suffix="_se_a_type_des_ebd_2sdies")
+
+        dout = descrpt.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {"type_embedding": type_embedding},
+            reuse=False,
+            suffix="_se_a_type_des_2sides",
         )
 
-        dout \
-            = descrpt.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {'type_embedding' : type_embedding},
-                reuse = False,
-                suffix = "_se_a_type_des_2sides"
-            )
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [model_dout] = sess.run([dout], 
-                             feed_dict = feed_dict_test)
+        [model_dout] = sess.run([dout], feed_dict=feed_dict_test)
         model_dout = model_dout.reshape([-1])
 
-        ref_dout = [0.0005722682145569174,-0.00020202686217742682,-0.00020202686217742682,7.13250554992363e-05,-0.0014770058171250015,0.000521468690207748,-0.001143865186937176,0.0004038453384193948,0.0005617335409639567,-0.00019831394075147532,
-                    0.00048086740718842236,-0.0001693584775806112,-0.0001693584775806112,5.966987137476082e-05,-0.0012342029581315136,0.00043492340851472783,-0.0009566016612537016,0.00033706767041080107,0.00047065988464132244,-0.0001657950398095401,
-                    0.0003647849239740657,-0.00013744939018250384,-0.00013744939018250384,5.1825826955234744e-05,-0.00096004206555711,0.00036185565262332876,-0.0007267433909643961,0.0002738914365542745,0.00038019365906978136,-0.00014322754331896057,
-                    0.0004675256930823109,-0.00017634410399626168,-0.00017634410399626168,6.652672908755666e-05,-0.0012328062885292486,0.00046500213384094614,-0.0009328887521346069,0.0003518668613172834,0.0004877847509912577,-0.00018396318824508986,
-                    0.0005154794374703516,-0.00019422534512034776,-0.00019422534512034776,7.318151797939947e-05,-0.0013576642997136488,0.0005115548790018505,-0.0010275333676074971,0.00038716440070070385,0.0005376426714609369,-0.00020257810468163985,
-                    0.0004482204892297628,-0.00016887749501640607,-0.00016887749501640607,6.364643102775375e-05,-0.001181345877677835,0.0004452029242063362,-0.0008941636427724908,0.0003369586197174627,0.0004677878512312651,-0.00017625260641095753]
-    
+        ref_dout = [
+            0.0005722682145569174,
+            -0.00020202686217742682,
+            -0.00020202686217742682,
+            7.13250554992363e-05,
+            -0.0014770058171250015,
+            0.000521468690207748,
+            -0.001143865186937176,
+            0.0004038453384193948,
+            0.0005617335409639567,
+            -0.00019831394075147532,
+            0.00048086740718842236,
+            -0.0001693584775806112,
+            -0.0001693584775806112,
+            5.966987137476082e-05,
+            -0.0012342029581315136,
+            0.00043492340851472783,
+            -0.0009566016612537016,
+            0.00033706767041080107,
+            0.00047065988464132244,
+            -0.0001657950398095401,
+            0.0003647849239740657,
+            -0.00013744939018250384,
+            -0.00013744939018250384,
+            5.1825826955234744e-05,
+            -0.00096004206555711,
+            0.00036185565262332876,
+            -0.0007267433909643961,
+            0.0002738914365542745,
+            0.00038019365906978136,
+            -0.00014322754331896057,
+            0.0004675256930823109,
+            -0.00017634410399626168,
+            -0.00017634410399626168,
+            6.652672908755666e-05,
+            -0.0012328062885292486,
+            0.00046500213384094614,
+            -0.0009328887521346069,
+            0.0003518668613172834,
+            0.0004877847509912577,
+            -0.00018396318824508986,
+            0.0005154794374703516,
+            -0.00019422534512034776,
+            -0.00019422534512034776,
+            7.318151797939947e-05,
+            -0.0013576642997136488,
+            0.0005115548790018505,
+            -0.0010275333676074971,
+            0.00038716440070070385,
+            0.0005376426714609369,
+            -0.00020257810468163985,
+            0.0004482204892297628,
+            -0.00016887749501640607,
+            -0.00016887749501640607,
+            6.364643102775375e-05,
+            -0.001181345877677835,
+            0.0004452029242063362,
+            -0.0008941636427724908,
+            0.0003369586197174627,
+            0.0004677878512312651,
+            -0.00017625260641095753,
+        ]
+
         places = 10
         np.testing.assert_almost_equal(model_dout, ref_dout, places)
 
-
     def test_descriptor_one_side(self):
-        jfile = 'water_se_a_type.json'
+        jfile = "water_se_a_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have (jdata['model']['descriptor'], 'sel')
-        ntypes=len(sel)
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(sel)
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
 
         # set parameters
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['axis_neuron'] = 2
-        jdata['model']['descriptor']['type_one_side'] = True
-        typeebd_param = {'neuron' : [5, 5, 5], 
-                         'resnet_dt': False,
-                         'seed': 1,
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["axis_neuron"] = 2
+        jdata["model"]["descriptor"]["type_one_side"] = True
+        typeebd_param = {
+            "neuron": [5, 5, 5],
+            "resnet_dt": False,
+            "seed": 1,
         }
 
         # init models
         typeebd = TypeEmbedNet(
-            neuron = typeebd_param['neuron'],
-            resnet_dt = typeebd_param['resnet_dt'],
-            seed = typeebd_param['seed'], 
-            uniform_seed = True
+            neuron=typeebd_param["neuron"],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
+            uniform_seed=True,
         )
 
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        type_embedding = typeebd.build(
-            ntypes, 
-            suffix = "_se_a_type_des_ebd_1side"
+        type_embedding = typeebd.build(ntypes, suffix="_se_a_type_des_ebd_1side")
+
+        dout = descrpt.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {"type_embedding": type_embedding},
+            reuse=False,
+            suffix="_se_a_type_des_1side",
         )
 
-        dout \
-            = descrpt.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {'type_embedding' : type_embedding},
-                reuse = False,
-                suffix = "_se_a_type_des_1side"
-            )
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [model_dout] = sess.run([dout], 
-                             feed_dict = feed_dict_test)
+        [model_dout] = sess.run([dout], feed_dict=feed_dict_test)
         model_dout = model_dout.reshape([-1])
 
-        ref_dout = [0.0009704469114440277,0.0007136310372560243,0.0007136310372560243,0.000524968274824758,-0.0019790100690810016,-0.0014556100390424947,-0.001318691223889266,-0.0009698525512440269,0.001937780602605409,
-                    0.0014251755182315322,0.0008158935519461114,0.0005943870925895051,0.0005943870925895051,0.0004340263490412088,-0.0016539827195947239,-0.0012066241021841376,-0.0011042186455562336,-0.0008051343572505189,
-                    0.0016229491738044255,0.0011833923257801077,0.0006020440527161554,0.00047526899287409847,0.00047526899287409847,0.00037538142786805136,-0.0012811397377036637,-0.0010116898098710776,-0.0008465095301785942,
-                    -0.0006683577463042215,0.0012459039620461505,0.0009836962283627838,0.00077088529431722,0.0006105807630364827,0.0006105807630364827,0.00048361458700877996,-0.0016444700616024337,-0.001302510079662288,
-                    -0.0010856603485807576,-0.0008598975276238373,0.00159730642327918,0.001265146946434076,0.0008495806081447204,0.000671787466824433,0.000671787466824433,0.0005312928157964384,-0.0018105890543181475,
-                    -0.001431844407277983,-0.0011956722392735362,-0.000945544277375045,0.0017590147511761475,0.0013910348287283414,0.0007393644735054756,0.0005850536182149991,0.0005850536182149991,0.0004631887654949332,
-                    -0.0015760302086346792,-0.0012475134925387294,-0.001041074331192672,-0.0008239586048523492,0.0015319673563669856,0.0012124704278707746]
-    
+        ref_dout = [
+            0.0009704469114440277,
+            0.0007136310372560243,
+            0.0007136310372560243,
+            0.000524968274824758,
+            -0.0019790100690810016,
+            -0.0014556100390424947,
+            -0.001318691223889266,
+            -0.0009698525512440269,
+            0.001937780602605409,
+            0.0014251755182315322,
+            0.0008158935519461114,
+            0.0005943870925895051,
+            0.0005943870925895051,
+            0.0004340263490412088,
+            -0.0016539827195947239,
+            -0.0012066241021841376,
+            -0.0011042186455562336,
+            -0.0008051343572505189,
+            0.0016229491738044255,
+            0.0011833923257801077,
+            0.0006020440527161554,
+            0.00047526899287409847,
+            0.00047526899287409847,
+            0.00037538142786805136,
+            -0.0012811397377036637,
+            -0.0010116898098710776,
+            -0.0008465095301785942,
+            -0.0006683577463042215,
+            0.0012459039620461505,
+            0.0009836962283627838,
+            0.00077088529431722,
+            0.0006105807630364827,
+            0.0006105807630364827,
+            0.00048361458700877996,
+            -0.0016444700616024337,
+            -0.001302510079662288,
+            -0.0010856603485807576,
+            -0.0008598975276238373,
+            0.00159730642327918,
+            0.001265146946434076,
+            0.0008495806081447204,
+            0.000671787466824433,
+            0.000671787466824433,
+            0.0005312928157964384,
+            -0.0018105890543181475,
+            -0.001431844407277983,
+            -0.0011956722392735362,
+            -0.000945544277375045,
+            0.0017590147511761475,
+            0.0013910348287283414,
+            0.0007393644735054756,
+            0.0005850536182149991,
+            0.0005850536182149991,
+            0.0004631887654949332,
+            -0.0015760302086346792,
+            -0.0012475134925387294,
+            -0.001041074331192672,
+            -0.0008239586048523492,
+            0.0015319673563669856,
+            0.0012124704278707746,
+        ]
+
         places = 10
         np.testing.assert_almost_equal(model_dout, ref_dout, places)
-
-        
-    
-        
diff --git a/source/tests/test_descrpt_se_atten.py b/source/tests/test_descrpt_se_atten.py
index b60b5de1c8..fb6526a9e1 100644
--- a/source/tests/test_descrpt_se_atten.py
+++ b/source/tests/test_descrpt_se_atten.py
@@ -1,43 +1,66 @@
-import dpdata, os, sys, unittest
-import numpy as np
-from deepmd.env import tf
+import os
 import pickle
-from common import Data, gen_data, j_loader
-
-from common import DataSystem
-from deepmd.descriptor import DescrptSeAtten
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
-from deepmd.utils.type_embed import embed_atom_type, TypeEmbedNet
-from common import tf
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+    tf,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeAtten,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+    embed_atom_type,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestModel(tf.test.TestCase):
     def setUp(self):
         gen_data(nframes=2)
 
     def test_descriptor_two_sides(self):
-        jfile = 'water_se_atten.json'
+        jfile = "water_se_atten.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 2
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have(jdata['model']['descriptor'], 'sel')
-        ntypes = len(jdata['model']['type_map'])
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(jdata["model"]["type_map"])
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
@@ -45,110 +68,165 @@ def test_descriptor_two_sides(self):
         numb_test = 1
 
         # set parameters
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['axis_neuron'] = 2
-        typeebd_param = {'neuron': [5],
-                         'resnet_dt': False,
-                         'seed': 1,
-                         }
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["axis_neuron"] = 2
+        typeebd_param = {
+            "neuron": [5],
+            "resnet_dt": False,
+            "seed": 1,
+        }
 
         # init models
         typeebd = TypeEmbedNet(
-            neuron=typeebd_param['neuron'],
+            neuron=typeebd_param["neuron"],
             activation_function=None,
-            resnet_dt=typeebd_param['resnet_dt'],
-            seed=typeebd_param['seed'],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
             uniform_seed=True,
-            padding=True
+            padding=True,
         )
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['descriptor']['ntypes'] = ntypes
-        descrpt = DescrptSeAtten(**jdata['model']['descriptor'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"]["ntypes"] = ntypes
+        descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']]
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+        }
         descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        type_embedding = typeebd.build(
-            ntypes,
-            suffix="_se_atten_type_des_ebd_2sdies"
-        )
+        type_embedding = typeebd.build(ntypes, suffix="_se_atten_type_des_ebd_2sdies")
 
-        dout \
-            = descrpt.build(
+        dout = descrpt.build(
             t_coord,
             t_type,
             t_natoms,
             t_box,
             t_mesh,
-            {'type_embedding': type_embedding},
+            {"type_embedding": type_embedding},
             reuse=False,
-            suffix="_se_atten_type_des_2sides"
+            suffix="_se_atten_type_des_2sides",
         )
 
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                          t_energy: test_data['energy'][:numb_test],
-                          t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
-                          t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
-                          t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [model_dout] = sess.run([dout],
-                                feed_dict=feed_dict_test)
+        [model_dout] = sess.run([dout], feed_dict=feed_dict_test)
         model_dout = model_dout.reshape([-1])
 
-        ref_dout = [1.3503570575883254e-04, -9.3606804794552518e-05, -9.3606804794552518e-05,  6.4931435609575354e-05, -3.4432462227712845e-04,  2.3883309310633266e-04, -2.1612770334269806e-04,  1.4980041766865035e-04,
-        5.1902342465554648e-04, -3.5995814159000579e-04, 1.0061650355705337e-04, -7.5148260042556979e-05, -7.5148260042556979e-05,  5.6249549384058458e-05, -2.7820514647114664e-04,  2.0819618461713165e-04,
-       -1.5698895407951743e-04,  1.1721016363267746e-04, 4.0972585703616773e-04, -3.0650763759131061e-04, 7.5599650998659526e-05, -5.8808888720672558e-05, -5.8808888720672558e-05,  4.5766209906762655e-05,
-       -2.1712714013251668e-04,  1.6899894453623564e-04, -1.2167120597162636e-04,  9.4648599144861605e-05, 3.2200758382615601e-04, -2.5060486486718734e-04, 1.1293831101452813e-04, -7.9512063028041913e-05,
-       -7.9512063028041913e-05,  5.5979262682797850e-05, -2.9058515610909440e-04,  2.0457554106366365e-04, -1.8732839505532627e-04,  1.3188376232775540e-04, 4.4448730317793450e-04, -3.1292650304617497e-04,
-        1.3015885894252541e-04, -8.8816609587789126e-05, -8.8816609587789126e-05,  6.0613949400496957e-05, -3.2308121544925519e-04,  2.2046786823295058e-04, -2.1781481424814687e-04,  1.4862599684199924e-04,
-        4.9955378034266583e-04, -3.4089120488765758e-04, 1.0160496779809329e-04, -7.4538471222199861e-05, -7.4538471222199861e-05,  5.4703671679263269e-05, -2.7394267959121653e-04,  2.0103409637607701e-04,
-       -1.6657135958432620e-04,  1.2219321453198225e-04, 4.1344754259964935e-04, -3.0339251136512270e-04]
+        ref_dout = [
+            1.3503570575883254e-04,
+            -9.3606804794552518e-05,
+            -9.3606804794552518e-05,
+            6.4931435609575354e-05,
+            -3.4432462227712845e-04,
+            2.3883309310633266e-04,
+            -2.1612770334269806e-04,
+            1.4980041766865035e-04,
+            5.1902342465554648e-04,
+            -3.5995814159000579e-04,
+            1.0061650355705337e-04,
+            -7.5148260042556979e-05,
+            -7.5148260042556979e-05,
+            5.6249549384058458e-05,
+            -2.7820514647114664e-04,
+            2.0819618461713165e-04,
+            -1.5698895407951743e-04,
+            1.1721016363267746e-04,
+            4.0972585703616773e-04,
+            -3.0650763759131061e-04,
+            7.5599650998659526e-05,
+            -5.8808888720672558e-05,
+            -5.8808888720672558e-05,
+            4.5766209906762655e-05,
+            -2.1712714013251668e-04,
+            1.6899894453623564e-04,
+            -1.2167120597162636e-04,
+            9.4648599144861605e-05,
+            3.2200758382615601e-04,
+            -2.5060486486718734e-04,
+            1.1293831101452813e-04,
+            -7.9512063028041913e-05,
+            -7.9512063028041913e-05,
+            5.5979262682797850e-05,
+            -2.9058515610909440e-04,
+            2.0457554106366365e-04,
+            -1.8732839505532627e-04,
+            1.3188376232775540e-04,
+            4.4448730317793450e-04,
+            -3.1292650304617497e-04,
+            1.3015885894252541e-04,
+            -8.8816609587789126e-05,
+            -8.8816609587789126e-05,
+            6.0613949400496957e-05,
+            -3.2308121544925519e-04,
+            2.2046786823295058e-04,
+            -2.1781481424814687e-04,
+            1.4862599684199924e-04,
+            4.9955378034266583e-04,
+            -3.4089120488765758e-04,
+            1.0160496779809329e-04,
+            -7.4538471222199861e-05,
+            -7.4538471222199861e-05,
+            5.4703671679263269e-05,
+            -2.7394267959121653e-04,
+            2.0103409637607701e-04,
+            -1.6657135958432620e-04,
+            1.2219321453198225e-04,
+            4.1344754259964935e-04,
+            -3.0339251136512270e-04,
+        ]
 
         places = 10
         np.testing.assert_almost_equal(model_dout, ref_dout, places)
 
     def test_descriptor_one_side(self):
-        jfile = 'water_se_atten.json'
+        jfile = "water_se_atten.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have(jdata['model']['descriptor'], 'sel')
-        ntypes = len(jdata['model']['type_map'])
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(jdata["model"]["type_map"])
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
@@ -156,97 +234,148 @@ def test_descriptor_one_side(self):
         numb_test = 1
 
         # set parameters
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['axis_neuron'] = 2
-        jdata['model']['descriptor']['type_one_side'] = True
-        typeebd_param = {'neuron': [5],
-                         'resnet_dt': False,
-                         'seed': 1,
-                         }
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["axis_neuron"] = 2
+        jdata["model"]["descriptor"]["type_one_side"] = True
+        typeebd_param = {
+            "neuron": [5],
+            "resnet_dt": False,
+            "seed": 1,
+        }
 
         # init models
         typeebd = TypeEmbedNet(
-            neuron=typeebd_param['neuron'],
+            neuron=typeebd_param["neuron"],
             activation_function=None,
-            resnet_dt=typeebd_param['resnet_dt'],
-            seed=typeebd_param['seed'],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
             uniform_seed=True,
-            padding=True
+            padding=True,
         )
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['descriptor']['ntypes'] = ntypes
-        descrpt = DescrptSeAtten(**jdata['model']['descriptor'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"]["ntypes"] = ntypes
+        descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']]
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+        }
         descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        type_embedding = typeebd.build(
-            ntypes,
-            suffix="_se_atten_type_des_ebd_1side"
-        )
+        type_embedding = typeebd.build(ntypes, suffix="_se_atten_type_des_ebd_1side")
 
-        dout \
-            = descrpt.build(
+        dout = descrpt.build(
             t_coord,
             t_type,
             t_natoms,
             t_box,
             t_mesh,
-            {'type_embedding': type_embedding},
+            {"type_embedding": type_embedding},
             reuse=False,
-            suffix="_se_atten_type_des_1side"
+            suffix="_se_atten_type_des_1side",
         )
 
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                          t_energy: test_data['energy'][:numb_test],
-                          t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
-                          t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
-                          t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [model_dout] = sess.run([dout],
-                                feed_dict=feed_dict_test)
+        [model_dout] = sess.run([dout], feed_dict=feed_dict_test)
         model_dout = model_dout.reshape([-1])
 
-        ref_dout = [8.9336098555659429e-05, -3.8921422089719007e-05, -3.8921422089719007e-05,  1.6975109833017758e-05, -2.9184951813034413e-04,  1.2724836941382651e-04, -1.8062533253590169e-04,  7.8681048972093648e-05,
-        4.2206017420030542e-04, -1.8398310612921889e-04, 6.4996467281506633e-05, -3.0812041327073575e-05, -3.0812041327073575e-05,  1.4663988013438402e-05, -2.3274950984084172e-04,  1.1059587214865573e-04,
-       -1.3043761448464089e-04,  6.1788865409826698e-05, 3.2900269837104958e-04, -1.5623668424484728e-04, 5.0697927477465942e-05, -2.3511768544350768e-05, -2.3511768544350768e-05,  1.0919808814040025e-05,
-       -1.8622373494960208e-04,  8.6439275444049409e-05, -1.0326450661269683e-04,  4.7880797898768150e-05, 2.6230208262918372e-04, -1.2172811361250681e-04, 7.8240863239649707e-05, -3.2501260967978116e-05,
-       -3.2501260967978116e-05,  1.3502267073810926e-05, -2.5360559687597850e-04,  1.0535336854834091e-04, -1.6047265448841568e-04,  6.6660202062744658e-05, 3.6833864909272261e-04, -1.5301457671691837e-04,
-        9.1148582997925288e-05, -3.6614945467066073e-05, -3.6614945467066073e-05,  1.4709958908948206e-05, -2.8364168092837332e-04,  1.1394466218003484e-04, -1.8721615730559043e-04,  7.5203967811613109e-05,
-        4.1632420070310456e-04, -1.6724364343353009e-04, 6.9506193268190631e-05, -3.0228106532898472e-05, -3.0228106532898472e-05,  1.3156705594652870e-05, -2.3740975974826574e-04,  1.0328972070195332e-04,
-       -1.4218547815143072e-04,  6.1827596642872941e-05, 3.4031715116440432e-04, -1.4804591640658066e-04]
+        ref_dout = [
+            8.9336098555659429e-05,
+            -3.8921422089719007e-05,
+            -3.8921422089719007e-05,
+            1.6975109833017758e-05,
+            -2.9184951813034413e-04,
+            1.2724836941382651e-04,
+            -1.8062533253590169e-04,
+            7.8681048972093648e-05,
+            4.2206017420030542e-04,
+            -1.8398310612921889e-04,
+            6.4996467281506633e-05,
+            -3.0812041327073575e-05,
+            -3.0812041327073575e-05,
+            1.4663988013438402e-05,
+            -2.3274950984084172e-04,
+            1.1059587214865573e-04,
+            -1.3043761448464089e-04,
+            6.1788865409826698e-05,
+            3.2900269837104958e-04,
+            -1.5623668424484728e-04,
+            5.0697927477465942e-05,
+            -2.3511768544350768e-05,
+            -2.3511768544350768e-05,
+            1.0919808814040025e-05,
+            -1.8622373494960208e-04,
+            8.6439275444049409e-05,
+            -1.0326450661269683e-04,
+            4.7880797898768150e-05,
+            2.6230208262918372e-04,
+            -1.2172811361250681e-04,
+            7.8240863239649707e-05,
+            -3.2501260967978116e-05,
+            -3.2501260967978116e-05,
+            1.3502267073810926e-05,
+            -2.5360559687597850e-04,
+            1.0535336854834091e-04,
+            -1.6047265448841568e-04,
+            6.6660202062744658e-05,
+            3.6833864909272261e-04,
+            -1.5301457671691837e-04,
+            9.1148582997925288e-05,
+            -3.6614945467066073e-05,
+            -3.6614945467066073e-05,
+            1.4709958908948206e-05,
+            -2.8364168092837332e-04,
+            1.1394466218003484e-04,
+            -1.8721615730559043e-04,
+            7.5203967811613109e-05,
+            4.1632420070310456e-04,
+            -1.6724364343353009e-04,
+            6.9506193268190631e-05,
+            -3.0228106532898472e-05,
+            -3.0228106532898472e-05,
+            1.3156705594652870e-05,
+            -2.3740975974826574e-04,
+            1.0328972070195332e-04,
+            -1.4218547815143072e-04,
+            6.1827596642872941e-05,
+            3.4031715116440432e-04,
+            -1.4804591640658066e-04,
+        ]
 
         places = 10
         np.testing.assert_almost_equal(model_dout, ref_dout, places)
-
-
-
-
diff --git a/source/tests/test_descrpt_se_r.py b/source/tests/test_descrpt_se_r.py
index e2cc6360be..b835b2298c 100644
--- a/source/tests/test_descrpt_se_r.py
+++ b/source/tests/test_descrpt_se_r.py
@@ -1,142 +1,136 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
 
 # load grad of force module
 import deepmd.op
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-
-from deepmd.env import op_module
-
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
 
-class Inter():
-    def setUp (self, 
-               data, 
-               pbc = True,
-               sess = None) :
+class Inter:
+    def setUp(self, data, pbc=True, sess=None):
         self.sess = sess
         self.data = data
         self.natoms = self.data.get_natoms()
         self.ntypes = self.data.get_ntypes()
-        self.sel = [12,24]
-        self.sel_a = [0,0]
+        self.sel = [12, 24]
+        self.sel_a = [0, 0]
         self.rcut_smth = 2.45
         self.rcut = 10.0
         self.nnei = np.cumsum(self.sel)[-1]
         self.ndescrpt = self.nnei * 1
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         if pbc:
-            self.default_mesh = np.zeros (6, dtype = np.int32)
+            self.default_mesh = np.zeros(6, dtype=np.int32)
             self.default_mesh[3] = 2
             self.default_mesh[4] = 2
             self.default_mesh[5] = 2
         else:
-            self.default_mesh = np.array([], dtype = np.int32)            
+            self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-        
-    def _net (self,
-             inputs, 
-             name,
-              reuse = False) :
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+
+    def _net(self, inputs, name, reuse=False):
         with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     GLOBAL_TF_FLOAT_PRECISION,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist \
-            = op_module.prod_env_mat_r(dcoord, 
-                                      dtype,
-                                      tnatoms,
-                                      dbox, 
-                                      tf.constant(self.default_mesh),
-                                      self.t_avg,
-                                      self.t_std,
-                                      rcut = self.rcut, 
-                                      rcut_smth = self.rcut_smth,
-                                      sel = self.sel)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        dot_v = tf.matmul(
+            tf.reshape(inputs, [-1, self.ndescrpt]),
+            tf.reshape(net_w, [self.ndescrpt, 1]),
+        )
+        return tf.reshape(dot_v, [-1])
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_r(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.t_avg,
+            self.t_std,
+            rcut=self.rcut,
+            rcut_smth=self.rcut_smth,
+            sel=self.sel,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_se_r (net_deriv_reshape, 
-                                            descrpt_deriv, 
-                                            nlist, 
-                                            tnatoms)
-        virial, atom_vir = op_module.prod_virial_se_r (net_deriv_reshape, 
-                                                        descrpt_deriv, 
-                                                        rij,
-                                                        nlist, 
-                                                        tnatoms)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force_se_r(
+            net_deriv_reshape, descrpt_deriv, nlist, tnatoms
+        )
+        virial, atom_vir = op_module.prod_virial_se_r(
+            net_deriv_reshape, descrpt_deriv, rij, nlist, tnatoms
+        )
         return energy, force, virial
 
-
-    def comp_f_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_f_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        f_mag = tf.reduce_sum(tf.nn.tanh(force))
+        f_mag_dw = tf.gradients(f_mag, net_w)
+        assert len(f_mag_dw) == 1, "length of dw is wrong"
         return f_mag, f_mag_dw[0]
 
-
-    def comp_v_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_v_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        v_mag = tf.reduce_sum(virial)
+        v_mag_dw = tf.gradients(v_mag, net_w)
+        assert len(v_mag_dw) == 1, "length of dw is wrong"
         return v_mag, v_mag_dw[0]
 
 
-
 class TestSmooth(Inter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
     #     data = Data()
@@ -149,17 +143,17 @@ def setUp(self):
         data = Data()
         Inter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, suffix = '_se_r')
+    def test_force(self):
+        force_test(self, self, suffix="_se_r")
 
-    def test_virial (self) :
-        virial_test(self, self, suffix = '_se_r')
+    def test_virial(self):
+        virial_test(self, self, suffix="_se_r")
 
-    def test_force_dw (self) :
-        force_dw_test(self, self, suffix = '_se_r')
+    def test_force_dw(self):
+        force_dw_test(self, self, suffix="_se_r")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, suffix = '_se_r')
+    def test_virial_dw(self):
+        virial_dw_test(self, self, suffix="_se_r")
 
 
 class TestSeRPbc(tf.test.TestCase):
@@ -167,76 +161,107 @@ def test_pbc(self):
         data = Data()
         inter0 = Inter()
         inter1 = Inter()
-        inter0.setUp(data, pbc = True, sess=self.test_session().__enter__())
-        inter1.setUp(data, pbc = False, sess=self.test_session().__enter__())
+        inter0.setUp(data, pbc=True, sess=self.test_session().__enter__())
+        inter1.setUp(data, pbc=False, sess=self.test_session().__enter__())
         inter0.net_w_i = np.copy(np.ones(inter0.ndescrpt))
         inter1.net_w_i = np.copy(np.ones(inter1.ndescrpt))
 
-        t_energy0, t_force0, t_virial0 \
-            = inter0.comp_ef (inter0.coord, inter0.box, inter0.type, inter0.tnatoms, name = "test_ser_pbc_true")
-        t_energy1, t_force1, t_virial1 \
-            = inter1.comp_ef (inter1.coord, inter1.box, inter1.type, inter1.tnatoms, name = "test_ser_pbc_false")
-
-        inter0.sess.run (tf.global_variables_initializer())
-        inter1.sess.run (tf.global_variables_initializer())
-
-        dcoord, dbox, dtype = data.get_data ()
-
-        [e0, f0, v0] = inter0.sess.run ([t_energy0, t_force0, t_virial0], 
-                                        feed_dict = {
-                                            inter0.coord:     dcoord,
-                                            inter0.box:       dbox,
-                                            inter0.type:      dtype,
-                                            inter0.tnatoms:   inter0.natoms})
-        [e1, f1, v1] = inter1.sess.run ([t_energy1, t_force1, t_virial1], 
-                                        feed_dict = {
-                                            inter1.coord:     dcoord,
-                                            inter1.box:       dbox,
-                                            inter1.type:      dtype,
-                                            inter1.tnatoms:   inter1.natoms})
+        t_energy0, t_force0, t_virial0 = inter0.comp_ef(
+            inter0.coord,
+            inter0.box,
+            inter0.type,
+            inter0.tnatoms,
+            name="test_ser_pbc_true",
+        )
+        t_energy1, t_force1, t_virial1 = inter1.comp_ef(
+            inter1.coord,
+            inter1.box,
+            inter1.type,
+            inter1.tnatoms,
+            name="test_ser_pbc_false",
+        )
+
+        inter0.sess.run(tf.global_variables_initializer())
+        inter1.sess.run(tf.global_variables_initializer())
+
+        dcoord, dbox, dtype = data.get_data()
+
+        [e0, f0, v0] = inter0.sess.run(
+            [t_energy0, t_force0, t_virial0],
+            feed_dict={
+                inter0.coord: dcoord,
+                inter0.box: dbox,
+                inter0.type: dtype,
+                inter0.tnatoms: inter0.natoms,
+            },
+        )
+        [e1, f1, v1] = inter1.sess.run(
+            [t_energy1, t_force1, t_virial1],
+            feed_dict={
+                inter1.coord: dcoord,
+                inter1.box: dbox,
+                inter1.type: dtype,
+                inter1.tnatoms: inter1.natoms,
+            },
+        )
 
         self.assertAlmostEqual(e0[0], e1[0])
         np.testing.assert_almost_equal(f0[0], f1[0])
         np.testing.assert_almost_equal(v0[0], v1[0])
 
-
     def test_pbc_small_box(self):
         data0 = Data()
-        data1 = Data(box_scale = 2)
+        data1 = Data(box_scale=2)
         inter0 = Inter()
         inter1 = Inter()
-        inter0.setUp(data0, pbc = True, sess=self.test_session().__enter__())
-        inter1.setUp(data1, pbc = False, sess=self.test_session().__enter__())
+        inter0.setUp(data0, pbc=True, sess=self.test_session().__enter__())
+        inter1.setUp(data1, pbc=False, sess=self.test_session().__enter__())
         inter0.net_w_i = np.copy(np.ones(inter0.ndescrpt))
         inter1.net_w_i = np.copy(np.ones(inter1.ndescrpt))
 
-        t_energy0, t_force0, t_virial0 \
-            = inter0.comp_ef (inter0.coord, inter0.box, inter0.type, inter0.tnatoms, name = "test_ser_pbc_sbox_true")
-        t_energy1, t_force1, t_virial1 \
-            = inter1.comp_ef (inter1.coord, inter1.box, inter1.type, inter1.tnatoms, name = "test_ser_pbc_sbox_false")
-
-        inter0.sess.run (tf.global_variables_initializer())
-        inter1.sess.run (tf.global_variables_initializer())
-
-        dcoord, dbox, dtype = data0.get_data ()
-        [e0, f0, v0] = inter0.sess.run ([t_energy0, t_force0, t_virial0], 
-                                        feed_dict = {
-                                            inter0.coord:     dcoord,
-                                            inter0.box:       dbox,
-                                            inter0.type:      dtype,
-                                            inter0.tnatoms:   inter0.natoms})
-        dcoord, dbox, dtype = data1.get_data ()
-        [e1, f1, v1] = inter1.sess.run ([t_energy1, t_force1, t_virial1], 
-                                        feed_dict = {
-                                            inter1.coord:     dcoord,
-                                            inter1.box:       dbox,
-                                            inter1.type:      dtype,
-                                            inter1.tnatoms:   inter1.natoms})
+        t_energy0, t_force0, t_virial0 = inter0.comp_ef(
+            inter0.coord,
+            inter0.box,
+            inter0.type,
+            inter0.tnatoms,
+            name="test_ser_pbc_sbox_true",
+        )
+        t_energy1, t_force1, t_virial1 = inter1.comp_ef(
+            inter1.coord,
+            inter1.box,
+            inter1.type,
+            inter1.tnatoms,
+            name="test_ser_pbc_sbox_false",
+        )
+
+        inter0.sess.run(tf.global_variables_initializer())
+        inter1.sess.run(tf.global_variables_initializer())
+
+        dcoord, dbox, dtype = data0.get_data()
+        [e0, f0, v0] = inter0.sess.run(
+            [t_energy0, t_force0, t_virial0],
+            feed_dict={
+                inter0.coord: dcoord,
+                inter0.box: dbox,
+                inter0.type: dtype,
+                inter0.tnatoms: inter0.natoms,
+            },
+        )
+        dcoord, dbox, dtype = data1.get_data()
+        [e1, f1, v1] = inter1.sess.run(
+            [t_energy1, t_force1, t_virial1],
+            feed_dict={
+                inter1.coord: dcoord,
+                inter1.box: dbox,
+                inter1.type: dtype,
+                inter1.tnatoms: inter1.natoms,
+            },
+        )
 
         self.assertAlmostEqual(e0[0], e1[0])
         np.testing.assert_almost_equal(f0[0], f1[0])
         np.testing.assert_almost_equal(v0[0], v1[0])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_descrpt_sea_ef.py b/source/tests/test_descrpt_sea_ef.py
index b15aaaa7b0..03a591730c 100644
--- a/source/tests/test_descrpt_sea_ef.py
+++ b/source/tests/test_descrpt_sea_ef.py
@@ -1,36 +1,38 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
 
 # load grad of force module
 import deepmd.op
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-
-from deepmd.env import op_module
 
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-
-class Inter():
-    def setUp (self, 
-               data, 
-               pbc = True,
-               sess = None) :
+class Inter:
+    def setUp(self, data, pbc=True, sess=None):
         self.sess = sess
         self.data = data
         self.natoms = self.data.get_natoms()
         self.ntypes = self.data.get_ntypes()
-        self.sel_a = [12,24]
-        self.sel_r = [0,0]
+        self.sel_a = [12, 24]
+        self.sel_r = [0, 0]
         self.rcut_a = -1
         self.rcut_r_smth = 2.45
         self.rcut_r = 10.0
@@ -40,115 +42,114 @@ def setUp (self,
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         if pbc:
-            self.default_mesh = np.zeros (6, dtype = np.int32)
+            self.default_mesh = np.zeros(6, dtype=np.int32)
             self.default_mesh[3] = 2
             self.default_mesh[4] = 2
             self.default_mesh[5] = 2
         else:
-            self.default_mesh = np.array([], dtype = np.int32)
+            self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        
-    def _net (self,
-             inputs, 
-             name,
-              reuse = False) :
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
+    def _net(self, inputs, name, reuse=False):
         with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     GLOBAL_TF_FLOAT_PRECISION,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist \
-            = op_module.descrpt_se_a_ef (dcoord, 
-                                         dtype,
-                                         tnatoms,
-                                         dbox, 
-                                         tf.constant(self.default_mesh),
-                                         self.efield,
-                                         self.t_avg,
-                                         self.t_std,
-                                         rcut_a = self.rcut_a, 
-                                         rcut_r = self.rcut_r, 
-                                         rcut_r_smth = self.rcut_r_smth,
-                                         sel_a = self.sel_a, 
-                                         sel_r = self.sel_r)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        dot_v = tf.matmul(
+            tf.reshape(inputs, [-1, self.ndescrpt]),
+            tf.reshape(net_w, [self.ndescrpt, 1]),
+        )
+        return tf.reshape(dot_v, [-1])
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist = op_module.descrpt_se_a_ef(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.efield,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_se_a (net_deriv_reshape, 
-                                            descrpt_deriv, 
-                                            nlist, 
-                                            tnatoms,
-                                            n_a_sel = self.nnei_a, 
-                                            n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
-                                                        descrpt_deriv, 
-                                                        rij,
-                                                        nlist, 
-                                                        tnatoms,
-                                                        n_a_sel = self.nnei_a, 
-                                                        n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_vir = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         return energy, force, virial
 
-
-    def comp_f_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_f_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        f_mag = tf.reduce_sum(tf.nn.tanh(force))
+        f_mag_dw = tf.gradients(f_mag, net_w)
+        assert len(f_mag_dw) == 1, "length of dw is wrong"
         return f_mag, f_mag_dw[0]
 
-
-    def comp_v_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_v_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        v_mag = tf.reduce_sum(virial)
+        v_mag_dw = tf.gradients(v_mag, net_w)
+        assert len(v_mag_dw) == 1, "length of dw is wrong"
         return v_mag, v_mag_dw[0]
 
 
-
 class TestSmooth(Inter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
     #     data = Data()
@@ -161,20 +162,18 @@ def setUp(self):
         data = Data()
         Inter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, suffix = '_sea_ef')
-
-    def test_virial (self) :
-        virial_test(self, self, suffix = '_sea_ef')
-
-    def test_force_dw (self) :
-        force_dw_test(self, self, suffix = '_sea_ef')
+    def test_force(self):
+        force_test(self, self, suffix="_sea_ef")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, suffix = '_sea_ef')
+    def test_virial(self):
+        virial_test(self, self, suffix="_sea_ef")
 
+    def test_force_dw(self):
+        force_dw_test(self, self, suffix="_sea_ef")
 
+    def test_virial_dw(self):
+        virial_dw_test(self, self, suffix="_sea_ef")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_descrpt_sea_ef_para.py b/source/tests/test_descrpt_sea_ef_para.py
index 213fc17930..43fd192e29 100644
--- a/source/tests/test_descrpt_sea_ef_para.py
+++ b/source/tests/test_descrpt_sea_ef_para.py
@@ -1,36 +1,38 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
 
 # load grad of force module
 import deepmd.op
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-
-from deepmd.env import op_module
 
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-
-class Inter():
-    def setUp (self, 
-               data, 
-               pbc = True,
-               sess = None) :
+class Inter:
+    def setUp(self, data, pbc=True, sess=None):
         self.sess = sess
         self.data = data
         self.natoms = self.data.get_natoms()
         self.ntypes = self.data.get_ntypes()
-        self.sel_a = [12,24]
-        self.sel_r = [0,0]
+        self.sel_a = [12, 24]
+        self.sel_r = [0, 0]
         self.rcut_a = -1
         self.rcut_r_smth = 2.45
         self.rcut_r = 10.0
@@ -40,115 +42,114 @@ def setUp (self,
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         if pbc:
-            self.default_mesh = np.zeros (6, dtype = np.int32)
+            self.default_mesh = np.zeros(6, dtype=np.int32)
             self.default_mesh[3] = 2
             self.default_mesh[4] = 2
             self.default_mesh[5] = 2
         else:
-            self.default_mesh = np.array([], dtype = np.int32)
+            self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        
-    def _net (self,
-             inputs, 
-             name,
-              reuse = False) :
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
+    def _net(self, inputs, name, reuse=False):
         with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     GLOBAL_TF_FLOAT_PRECISION,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist \
-            = op_module.descrpt_se_a_ef_para (dcoord, 
-                                              dtype,
-                                              tnatoms,
-                                              dbox, 
-                                              tf.constant(self.default_mesh),
-                                              self.efield,
-                                              self.t_avg,
-                                              self.t_std,
-                                              rcut_a = self.rcut_a, 
-                                              rcut_r = self.rcut_r, 
-                                              rcut_r_smth = self.rcut_r_smth,
-                                              sel_a = self.sel_a, 
-                                              sel_r = self.sel_r)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        dot_v = tf.matmul(
+            tf.reshape(inputs, [-1, self.ndescrpt]),
+            tf.reshape(net_w, [self.ndescrpt, 1]),
+        )
+        return tf.reshape(dot_v, [-1])
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist = op_module.descrpt_se_a_ef_para(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.efield,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_se_a (net_deriv_reshape, 
-                                            descrpt_deriv, 
-                                            nlist, 
-                                            tnatoms,
-                                            n_a_sel = self.nnei_a, 
-                                            n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
-                                                        descrpt_deriv, 
-                                                        rij,
-                                                        nlist, 
-                                                        tnatoms,
-                                                        n_a_sel = self.nnei_a, 
-                                                        n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_vir = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         return energy, force, virial
 
-
-    def comp_f_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_f_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        f_mag = tf.reduce_sum(tf.nn.tanh(force))
+        f_mag_dw = tf.gradients(f_mag, net_w)
+        assert len(f_mag_dw) == 1, "length of dw is wrong"
         return f_mag, f_mag_dw[0]
 
-
-    def comp_v_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_v_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        v_mag = tf.reduce_sum(virial)
+        v_mag_dw = tf.gradients(v_mag, net_w)
+        assert len(v_mag_dw) == 1, "length of dw is wrong"
         return v_mag, v_mag_dw[0]
 
 
-
 class TestSmooth(Inter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
     #     data = Data()
@@ -161,20 +162,18 @@ def setUp(self):
         data = Data()
         Inter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, suffix = '_sea_ef_para')
-
-    def test_virial (self) :
-        virial_test(self, self, suffix = '_sea_ef_para')
-
-    def test_force_dw (self) :
-        force_dw_test(self, self, suffix = '_sea_ef_para')
+    def test_force(self):
+        force_test(self, self, suffix="_sea_ef_para")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, suffix = '_sea_ef_para')
+    def test_virial(self):
+        virial_test(self, self, suffix="_sea_ef_para")
 
+    def test_force_dw(self):
+        force_dw_test(self, self, suffix="_sea_ef_para")
 
+    def test_virial_dw(self):
+        virial_dw_test(self, self, suffix="_sea_ef_para")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_descrpt_sea_ef_rot.py b/source/tests/test_descrpt_sea_ef_rot.py
index e96951b275..4569cb1afe 100644
--- a/source/tests/test_descrpt_sea_ef_rot.py
+++ b/source/tests/test_descrpt_sea_ef_rot.py
@@ -1,24 +1,32 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+import numpy as np
+from tensorflow.python.framework import (
+    ops,
+)
+
+from deepmd.descriptor import (
+    DescrptSeA,
+    DescrptSeAEfLower,
+)
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from deepmd.env import op_module
-from deepmd.descriptor import DescrptSeA
-from deepmd.descriptor import DescrptSeAEfLower
 
 class TestEfRot(tf.test.TestCase):
     def setUp(self):
         self.sess = self.test_session().__enter__()
         self.natoms = [5, 5, 2, 3]
         self.ntypes = 2
-        self.sel_a = [12,24]
-        self.sel_r = [0,0]
+        self.sel_a = [12, 24]
+        self.sel_r = [0, 0]
         self.rcut_a = -1
         self.rcut_r_smth = 2.45
         self.rcut_r = 10.0
@@ -28,175 +36,244 @@ def setUp(self):
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         # no pbc
-        self.default_mesh = np.array([], dtype = np.int32)
+        self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
 
     def _normalize_3d(self, a):
-        na = tf.norm(a, axis = 1)
-        na = tf.tile(tf.reshape(na, [-1,1]), tf.constant([1, 3]))
+        na = tf.norm(a, axis=1)
+        na = tf.tile(tf.reshape(na, [-1, 1]), tf.constant([1, 3]))
         b = tf.divide(a, na)
         return b
 
-    def build_efv(self,
-                  dcoord,
-                  dbox,
-                  dtype,
-                  tnatoms,
-                  name, 
-                  op,
-                  reuse = None):
+    def build_efv(self, dcoord, dbox, dtype, tnatoms, name, op, reuse=None):
         efield = tf.reshape(self.efield, [-1, 3])
         efield = self._normalize_3d(efield)
         efield = tf.reshape(efield, [-1, tnatoms[0] * 3])
-        if op != op_module.prod_env_mat_a :            
-            descrpt = DescrptSeAEfLower(op, **{'sel':self.sel_a, 'rcut': 6, 'rcut_smth' : 5.5, 'seed': 1, 'uniform_seed': True})
+        if op != op_module.prod_env_mat_a:
+            descrpt = DescrptSeAEfLower(
+                op,
+                **{
+                    "sel": self.sel_a,
+                    "rcut": 6,
+                    "rcut_smth": 5.5,
+                    "seed": 1,
+                    "uniform_seed": True,
+                }
+            )
         else:
-            descrpt = DescrptSeA(**{'sel':self.sel_a, 'rcut': 6, 'rcut_smth' : 0.5, 'seed': 1, 'uniform_seed': True})
-        dout = descrpt.build(dcoord,
-                             dtype,
-                             tnatoms,
-                             dbox,
-                             tf.constant(self.default_mesh),
-                             {'efield': efield},
-                             suffix = name,
-                             reuse = reuse)
+            descrpt = DescrptSeA(
+                **{
+                    "sel": self.sel_a,
+                    "rcut": 6,
+                    "rcut_smth": 0.5,
+                    "seed": 1,
+                    "uniform_seed": True,
+                }
+            )
+        dout = descrpt.build(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            {"efield": efield},
+            suffix=name,
+            reuse=reuse,
+        )
         dout = tf.reshape(dout, [-1, descrpt.get_dim_out()])
-        atom_ener = tf.reduce_sum(dout, axis = 1)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        force, virial, atom_vir \
-            = descrpt.prod_force_virial (atom_ener, tnatoms)
-        return energy, force, virial, atom_ener, atom_vir        
+        atom_ener = tf.reduce_sum(dout, axis=1)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        force, virial, atom_vir = descrpt.prod_force_virial(atom_ener, tnatoms)
+        return energy, force, virial, atom_ener, atom_vir
 
     def make_test_data(self, nframes):
         dcoord = np.random.random([nframes, self.natoms[0], 3])
         for ii in range(nframes):
-            dcoord[ii, :, :] = dcoord[ii, :, :] - np.tile(dcoord[ii, 0, :], [self.natoms[0], 1])
+            dcoord[ii, :, :] = dcoord[ii, :, :] - np.tile(
+                dcoord[ii, 0, :], [self.natoms[0], 1]
+            )
         dcoord = dcoord.reshape([nframes, -1])
         one_box = np.eye(3).reshape([1, 9])
         dbox = np.tile(one_box, [nframes, 1])
         one_type = []
-        for ii in range(2, 2+self.ntypes):
-            one_type = one_type + [ii-2 for jj in range(self.natoms[ii])]
+        for ii in range(2, 2 + self.ntypes):
+            one_type = one_type + [ii - 2 for jj in range(self.natoms[ii])]
         np.random.shuffle(one_type)
-        one_type = np.array(one_type, dtype = int).reshape([1,-1])
+        one_type = np.array(one_type, dtype=int).reshape([1, -1])
         dtype = np.tile(one_type, [nframes, 1])
         defield = np.random.random(dcoord.shape)
         return dcoord, dbox, dtype, defield
 
     def rotate_mat(self, axis_, theta):
         axis = axis_ / np.linalg.norm(axis_)
-        onemcc = (1. - np.cos(theta))
+        onemcc = 1.0 - np.cos(theta)
         cc = np.cos(theta)
         ss = np.sin(theta)
         r = [
-            cc + axis[0]**2 * onemcc,
+            cc + axis[0] ** 2 * onemcc,
             axis[0] * axis[1] * onemcc - axis[2] * ss,
             axis[0] * axis[2] * onemcc + axis[1] * ss,
             axis[0] * axis[1] * onemcc + axis[2] * ss,
-            cc + axis[1]**2 * onemcc,
+            cc + axis[1] ** 2 * onemcc,
             axis[1] * axis[2] * onemcc - axis[0] * ss,
             axis[0] * axis[2] * onemcc - axis[1] * ss,
             axis[1] * axis[2] * onemcc + axis[0] * ss,
-            cc + axis[2]**2 * onemcc]
+            cc + axis[2] ** 2 * onemcc,
+        ]
         return np.array(r).reshape(3, 3)
-            
-    def test_rot_axis(self, suffix=''):
-        suffix = '_axis'
-        t_p_e, t_p_f, t_p_f, t_p_ae, t_p_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.descrpt_se_a_ef_para)
-        t_v_e, t_v_f, t_v_f, t_v_ae, t_v_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.descrpt_se_a_ef_vert, reuse = True)
-        t_e, t_f, t_f, t_ae, t_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.prod_env_mat_a, reuse = True)
-        self.sess.run (tf.global_variables_initializer())
 
-        np.random.seed(0)        
+    def test_rot_axis(self, suffix=""):
+        suffix = "_axis"
+        t_p_e, t_p_f, t_p_f, t_p_ae, t_p_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.descrpt_se_a_ef_para,
+        )
+        t_v_e, t_v_f, t_v_f, t_v_ae, t_v_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.descrpt_se_a_ef_vert,
+            reuse=True,
+        )
+        t_e, t_f, t_f, t_ae, t_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.prod_env_mat_a,
+            reuse=True,
+        )
+        self.sess.run(tf.global_variables_initializer())
+
+        np.random.seed(0)
         # make test data
-        nframes = 2      
+        nframes = 2
         dcoord, dbox, dtype, defield = self.make_test_data(nframes)
-        [p_ae0] = self.sess.run ([t_p_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [v_ae0] = self.sess.run ([t_v_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [ae0] = self.sess.run ([t_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
+        [p_ae0] = self.sess.run(
+            [t_p_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [v_ae0] = self.sess.run(
+            [t_v_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [ae0] = self.sess.run(
+            [t_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
         # print(p_ae0)
         # print(v_ae0)
         # print(ae0)
 
         for kk in range(0, self.natoms[0]):
             # print(f0)
-            theta = 45. / 180. * np.pi
-            rr0 = self.rotate_mat(defield[0][kk*3:kk*3+3], theta)
+            theta = 45.0 / 180.0 * np.pi
+            rr0 = self.rotate_mat(defield[0][kk * 3 : kk * 3 + 3], theta)
             # rr0 = self.rotate_mat([0, 0, 1], theta)
-            rr1 = self.rotate_mat(defield[1][kk*3:kk*3+3], theta)
+            rr1 = self.rotate_mat(defield[1][kk * 3 : kk * 3 + 3], theta)
             # print(rr0, np.matmul(rr0, rr0.T), np.matmul(rr0.T, rr0))
             # print(rr1)
             dcoord_ = np.copy(dcoord).reshape([nframes, -1, 3])
             dcoord0 = np.matmul(dcoord_[0], rr0)
             dcoord1 = np.matmul(dcoord_[1], rr1)
-            new_dcoord = np.concatenate([dcoord0, dcoord1], axis = 0).reshape([nframes, -1])
+            new_dcoord = np.concatenate([dcoord0, dcoord1], axis=0).reshape(
+                [nframes, -1]
+            )
             defield_ = np.copy(defield).reshape([nframes, -1, 3])
             defield0 = np.matmul(defield_[0], rr0)
             defield1 = np.matmul(defield_[1], rr1)
-            new_defield = np.concatenate([defield0, defield1], axis = 0).reshape([nframes, -1])
+            new_defield = np.concatenate([defield0, defield1], axis=0).reshape(
+                [nframes, -1]
+            )
 
-            [p_ae1] = self.sess.run ([t_p_ae], 
-                                    feed_dict = {
-                                        self.coord:     new_dcoord,
-                                        self.box:       dbox,
-                                        self.type:      dtype,
-                                        self.efield:    defield,
-                                        self.tnatoms:   self.natoms})
-            [v_ae1] = self.sess.run ([t_v_ae], 
-                                    feed_dict = {
-                                        self.coord:     new_dcoord,
-                                        self.box:       dbox,
-                                        self.type:      dtype,
-                                        self.efield:    defield,
-                                        self.tnatoms:   self.natoms})
-            [ae1] = self.sess.run ([t_ae], 
-                                    feed_dict = {
-                                        self.coord:     new_dcoord,
-                                        self.box:       dbox,
-                                        self.type:      dtype,
-                                        self.efield:    defield,
-                                        self.tnatoms:   self.natoms})
+            [p_ae1] = self.sess.run(
+                [t_p_ae],
+                feed_dict={
+                    self.coord: new_dcoord,
+                    self.box: dbox,
+                    self.type: dtype,
+                    self.efield: defield,
+                    self.tnatoms: self.natoms,
+                },
+            )
+            [v_ae1] = self.sess.run(
+                [t_v_ae],
+                feed_dict={
+                    self.coord: new_dcoord,
+                    self.box: dbox,
+                    self.type: dtype,
+                    self.efield: defield,
+                    self.tnatoms: self.natoms,
+                },
+            )
+            [ae1] = self.sess.run(
+                [t_ae],
+                feed_dict={
+                    self.coord: new_dcoord,
+                    self.box: dbox,
+                    self.type: dtype,
+                    self.efield: defield,
+                    self.tnatoms: self.natoms,
+                },
+            )
             for ii in range(0, self.natoms[0]):
                 for jj in range(0, self.natoms[0]):
-                    diff = dcoord[0][3*jj:3*jj+3] - dcoord[0][3*ii:3*ii+3]
-                    dot = np.dot(dcoord[0][3*jj:3*jj+3] , dcoord[0][3*ii:3*ii+3])
-                    diff1 = new_dcoord[0][3*jj:3*jj+3] - new_dcoord[0][3*ii:3*ii+3]
-                    dot1 = np.dot(new_dcoord[0][3*jj:3*jj+3] , new_dcoord[0][3*ii:3*ii+3])
-                    assert(np.abs(np.linalg.norm(diff) - np.linalg.norm(diff1)) < 1e-15)
-                    assert(np.abs(dot - dot1) < 1e-15)
+                    diff = (
+                        dcoord[0][3 * jj : 3 * jj + 3] - dcoord[0][3 * ii : 3 * ii + 3]
+                    )
+                    dot = np.dot(
+                        dcoord[0][3 * jj : 3 * jj + 3], dcoord[0][3 * ii : 3 * ii + 3]
+                    )
+                    diff1 = (
+                        new_dcoord[0][3 * jj : 3 * jj + 3]
+                        - new_dcoord[0][3 * ii : 3 * ii + 3]
+                    )
+                    dot1 = np.dot(
+                        new_dcoord[0][3 * jj : 3 * jj + 3],
+                        new_dcoord[0][3 * ii : 3 * ii + 3],
+                    )
+                    assert np.abs(np.linalg.norm(diff) - np.linalg.norm(diff1)) < 1e-15
+                    assert np.abs(dot - dot1) < 1e-15
 
             for ii in range(1, self.natoms[0]):
                 if ii == kk:
@@ -206,158 +283,236 @@ def test_rot_axis(self, suffix=''):
                     self.assertNotAlmostEqual(p_ae0[ii], p_ae1[ii])
                     self.assertNotAlmostEqual(v_ae0[ii], v_ae1[ii])
 
+    def test_rot_diff_axis(self, suffix=""):
+        suffix = "_diff_axis"
+        t_p_e, t_p_f, t_p_f, t_p_ae, t_p_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.descrpt_se_a_ef_para,
+        )
+        t_v_e, t_v_f, t_v_f, t_v_ae, t_v_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.descrpt_se_a_ef_vert,
+            reuse=True,
+        )
+        t_e, t_f, t_f, t_ae, t_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.prod_env_mat_a,
+            reuse=True,
+        )
+        self.sess.run(tf.global_variables_initializer())
 
-    def test_rot_diff_axis(self, suffix=''):
-        suffix = '_diff_axis'
-        t_p_e, t_p_f, t_p_f, t_p_ae, t_p_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.descrpt_se_a_ef_para)
-        t_v_e, t_v_f, t_v_f, t_v_ae, t_v_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.descrpt_se_a_ef_vert, reuse = True)
-        t_e, t_f, t_f, t_ae, t_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.prod_env_mat_a, reuse = True)
-        self.sess.run (tf.global_variables_initializer())
-
-        np.random.seed(0)        
+        np.random.seed(0)
         # make test data
-        nframes = 2      
+        nframes = 2
         dcoord, dbox, dtype, defield = self.make_test_data(nframes)
-        [p_ae0] = self.sess.run ([t_p_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [v_ae0] = self.sess.run ([t_v_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [ae0] = self.sess.run ([t_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
+        [p_ae0] = self.sess.run(
+            [t_p_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [v_ae0] = self.sess.run(
+            [t_v_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [ae0] = self.sess.run(
+            [t_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
 
         # print(f0)
-        theta = 45. / 180. * np.pi
+        theta = 45.0 / 180.0 * np.pi
         rr0 = self.rotate_mat([0, 0, 1], theta)
         rr1 = self.rotate_mat([1, 0, 0], theta)
         dcoord_ = np.copy(dcoord).reshape([nframes, -1, 3])
         dcoord0 = np.matmul(dcoord_[0], rr0)
         dcoord1 = np.matmul(dcoord_[1], rr1)
-        new_dcoord = np.concatenate([dcoord0, dcoord1], axis = 0).reshape([nframes, -1])
+        new_dcoord = np.concatenate([dcoord0, dcoord1], axis=0).reshape([nframes, -1])
         defield_ = np.copy(defield).reshape([nframes, -1, 3])
         defield0 = np.matmul(defield_[0], rr0)
         defield1 = np.matmul(defield_[1], rr1)
-        new_defield = np.concatenate([defield0, defield1], axis = 0).reshape([nframes, -1])
+        new_defield = np.concatenate([defield0, defield1], axis=0).reshape(
+            [nframes, -1]
+        )
 
-        [p_ae1] = self.sess.run ([t_p_ae], 
-                                feed_dict = {
-                                    self.coord:     new_dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [v_ae1] = self.sess.run ([t_v_ae], 
-                                feed_dict = {
-                                    self.coord:     new_dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [ae1] = self.sess.run ([t_ae], 
-                                feed_dict = {
-                                    self.coord:     new_dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
+        [p_ae1] = self.sess.run(
+            [t_p_ae],
+            feed_dict={
+                self.coord: new_dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [v_ae1] = self.sess.run(
+            [t_v_ae],
+            feed_dict={
+                self.coord: new_dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [ae1] = self.sess.run(
+            [t_ae],
+            feed_dict={
+                self.coord: new_dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
 
         for ii in range(0, self.natoms[0]):
             self.assertNotAlmostEqual(p_ae0[ii], p_ae1[ii])
             self.assertNotAlmostEqual(v_ae0[ii], v_ae1[ii])
 
-    def test_rot_field_corot(self, suffix=''):
-        suffix = '_field_corot'
-        t_p_e, t_p_f, t_p_f, t_p_ae, t_p_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.descrpt_se_a_ef_para)
-        t_v_e, t_v_f, t_v_f, t_v_ae, t_v_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.descrpt_se_a_ef_vert, reuse = True)
-        t_e, t_f, t_f, t_ae, t_av \
-            = self.build_efv (self.coord, self.box, self.type, self.tnatoms, name = "test_rot" + suffix, op = op_module.prod_env_mat_a, reuse = True)
-        self.sess.run (tf.global_variables_initializer())
+    def test_rot_field_corot(self, suffix=""):
+        suffix = "_field_corot"
+        t_p_e, t_p_f, t_p_f, t_p_ae, t_p_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.descrpt_se_a_ef_para,
+        )
+        t_v_e, t_v_f, t_v_f, t_v_ae, t_v_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.descrpt_se_a_ef_vert,
+            reuse=True,
+        )
+        t_e, t_f, t_f, t_ae, t_av = self.build_efv(
+            self.coord,
+            self.box,
+            self.type,
+            self.tnatoms,
+            name="test_rot" + suffix,
+            op=op_module.prod_env_mat_a,
+            reuse=True,
+        )
+        self.sess.run(tf.global_variables_initializer())
 
-        np.random.seed(0)        
+        np.random.seed(0)
         # make test data
-        nframes = 2      
+        nframes = 2
         dcoord, dbox, dtype, defield = self.make_test_data(nframes)
-        [p_ae0] = self.sess.run ([t_p_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [v_ae0] = self.sess.run ([t_v_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
-        [ae0] = self.sess.run ([t_ae], 
-                                feed_dict = {
-                                    self.coord:     dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    defield,
-                                    self.tnatoms:   self.natoms})
+        [p_ae0] = self.sess.run(
+            [t_p_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [v_ae0] = self.sess.run(
+            [t_v_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [ae0] = self.sess.run(
+            [t_ae],
+            feed_dict={
+                self.coord: dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: defield,
+                self.tnatoms: self.natoms,
+            },
+        )
 
         # print(f0)
-        theta = 45. / 180. * np.pi
+        theta = 45.0 / 180.0 * np.pi
         rr0 = self.rotate_mat(defield[0][0:3], theta)
         rr1 = self.rotate_mat(defield[1][0:3], theta)
         dcoord_ = np.copy(dcoord).reshape([nframes, -1, 3])
         dcoord0 = np.matmul(dcoord_[0], rr0)
         dcoord1 = np.matmul(dcoord_[1], rr1)
-        new_dcoord = np.concatenate([dcoord0, dcoord1], axis = 0).reshape([nframes, -1])
+        new_dcoord = np.concatenate([dcoord0, dcoord1], axis=0).reshape([nframes, -1])
         defield_ = np.copy(defield).reshape([nframes, -1, 3])
         defield0 = np.matmul(defield_[0], rr0)
         defield1 = np.matmul(defield_[1], rr1)
-        new_defield = np.concatenate([defield0, defield1], axis = 0).reshape([nframes, -1])
+        new_defield = np.concatenate([defield0, defield1], axis=0).reshape(
+            [nframes, -1]
+        )
 
-        [p_ae1] = self.sess.run ([t_p_ae], 
-                                feed_dict = {
-                                    self.coord:     new_dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    new_defield,
-                                    self.tnatoms:   self.natoms})
-        [v_ae1] = self.sess.run ([t_v_ae], 
-                                feed_dict = {
-                                    self.coord:     new_dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    new_defield,
-                                    self.tnatoms:   self.natoms})
-        [ae1] = self.sess.run ([t_ae], 
-                                feed_dict = {
-                                    self.coord:     new_dcoord,
-                                    self.box:       dbox,
-                                    self.type:      dtype,
-                                    self.efield:    new_defield,
-                                    self.tnatoms:   self.natoms})
+        [p_ae1] = self.sess.run(
+            [t_p_ae],
+            feed_dict={
+                self.coord: new_dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: new_defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [v_ae1] = self.sess.run(
+            [t_v_ae],
+            feed_dict={
+                self.coord: new_dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: new_defield,
+                self.tnatoms: self.natoms,
+            },
+        )
+        [ae1] = self.sess.run(
+            [t_ae],
+            feed_dict={
+                self.coord: new_dcoord,
+                self.box: dbox,
+                self.type: dtype,
+                self.efield: new_defield,
+                self.tnatoms: self.natoms,
+            },
+        )
 
         np.testing.assert_almost_equal(p_ae0, p_ae1)
         np.testing.assert_almost_equal(v_ae0, v_ae1)
         np.testing.assert_almost_equal(ae0, ae1)
-        
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_descrpt_sea_ef_vert.py b/source/tests/test_descrpt_sea_ef_vert.py
index 8ba82709e3..2d25392f43 100644
--- a/source/tests/test_descrpt_sea_ef_vert.py
+++ b/source/tests/test_descrpt_sea_ef_vert.py
@@ -1,36 +1,38 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
 
 # load grad of force module
 import deepmd.op
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-
-from deepmd.env import op_module
 
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-
-class Inter():
-    def setUp (self, 
-               data, 
-               pbc = True,
-               sess = None) :
+class Inter:
+    def setUp(self, data, pbc=True, sess=None):
         self.sess = sess
         self.data = data
         self.natoms = self.data.get_natoms()
         self.ntypes = self.data.get_ntypes()
-        self.sel_a = [12,24]
-        self.sel_r = [0,0]
+        self.sel_a = [12, 24]
+        self.sel_r = [0, 0]
         self.rcut_a = -1
         self.rcut_r_smth = 2.45
         self.rcut_r = 10.0
@@ -40,115 +42,114 @@ def setUp (self,
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         if pbc:
-            self.default_mesh = np.zeros (6, dtype = np.int32)
+            self.default_mesh = np.zeros(6, dtype=np.int32)
             self.default_mesh[3] = 2
             self.default_mesh[4] = 2
             self.default_mesh[5] = 2
         else:
-            self.default_mesh = np.array([], dtype = np.int32)
+            self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        
-    def _net (self,
-             inputs, 
-             name,
-              reuse = False) :
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
+    def _net(self, inputs, name, reuse=False):
         with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     GLOBAL_TF_FLOAT_PRECISION,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist \
-            = op_module.descrpt_se_a_ef_vert (dcoord, 
-                                              dtype,
-                                              tnatoms,
-                                              dbox, 
-                                              tf.constant(self.default_mesh),
-                                              self.efield,
-                                              self.t_avg,
-                                              self.t_std,
-                                              rcut_a = self.rcut_a, 
-                                              rcut_r = self.rcut_r, 
-                                              rcut_r_smth = self.rcut_r_smth,
-                                              sel_a = self.sel_a, 
-                                              sel_r = self.sel_r)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        dot_v = tf.matmul(
+            tf.reshape(inputs, [-1, self.ndescrpt]),
+            tf.reshape(net_w, [self.ndescrpt, 1]),
+        )
+        return tf.reshape(dot_v, [-1])
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist = op_module.descrpt_se_a_ef_vert(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.efield,
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_se_a (net_deriv_reshape, 
-                                            descrpt_deriv, 
-                                            nlist, 
-                                            tnatoms,
-                                            n_a_sel = self.nnei_a, 
-                                            n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
-                                                        descrpt_deriv, 
-                                                        rij,
-                                                        nlist, 
-                                                        tnatoms,
-                                                        n_a_sel = self.nnei_a, 
-                                                        n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_vir = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         return energy, force, virial
 
-
-    def comp_f_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_f_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        f_mag = tf.reduce_sum(tf.nn.tanh(force))
+        f_mag_dw = tf.gradients(f_mag, net_w)
+        assert len(f_mag_dw) == 1, "length of dw is wrong"
         return f_mag, f_mag_dw[0]
 
-
-    def comp_v_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_v_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        v_mag = tf.reduce_sum(virial)
+        v_mag_dw = tf.gradients(v_mag, net_w)
+        assert len(v_mag_dw) == 1, "length of dw is wrong"
         return v_mag, v_mag_dw[0]
 
 
-
 class TestSmooth(Inter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
     #     data = Data()
@@ -161,20 +162,18 @@ def setUp(self):
         data = Data()
         Inter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, suffix = '_sea_ef_vert')
-
-    def test_virial (self) :
-        virial_test(self, self, suffix = '_sea_ef_vert')
-
-    def test_force_dw (self) :
-        force_dw_test(self, self, suffix = '_sea_ef_vert')
+    def test_force(self):
+        force_test(self, self, suffix="_sea_ef_vert")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, suffix = '_sea_ef_vert')
+    def test_virial(self):
+        virial_test(self, self, suffix="_sea_ef_vert")
 
+    def test_force_dw(self):
+        force_dw_test(self, self, suffix="_sea_ef_vert")
 
+    def test_virial_dw(self):
+        virial_dw_test(self, self, suffix="_sea_ef_vert")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_descrpt_smooth.py b/source/tests/test_descrpt_smooth.py
index 00dbbd61d0..57fc8055f2 100644
--- a/source/tests/test_descrpt_smooth.py
+++ b/source/tests/test_descrpt_smooth.py
@@ -1,36 +1,38 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
 
 # load grad of force module
 import deepmd.op
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-
-from deepmd.env import op_module
-
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
 
-class Inter():
-    def setUp (self, 
-               data, 
-               pbc = True,
-               sess = None) :
+class Inter:
+    def setUp(self, data, pbc=True, sess=None):
         self.sess = sess
         self.data = data
         self.natoms = self.data.get_natoms()
         self.ntypes = self.data.get_ntypes()
-        self.sel_a = [12,24]
-        self.sel_r = [0,0]
+        self.sel_a = [12, 24]
+        self.sel_r = [0, 0]
         self.rcut_a = -1
         self.rcut_r_smth = 2.45
         self.rcut_r = 10.0
@@ -40,114 +42,113 @@ def setUp (self,
         self.ndescrpt_a = self.nnei_a * 4
         self.ndescrpt_r = self.nnei_r * 1
         self.ndescrpt = self.ndescrpt_a + self.ndescrpt_r
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
         if pbc:
-            self.default_mesh = np.zeros (6, dtype = np.int32)
+            self.default_mesh = np.zeros(6, dtype=np.int32)
             self.default_mesh[3] = 2
             self.default_mesh[4] = 2
             self.default_mesh[5] = 2
         else:
-            self.default_mesh = np.array([], dtype = np.int32)
+            self.default_mesh = np.array([], dtype=np.int32)
         # make place holder
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_coord')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.type       = tf.placeholder(tf.int32,   [None, self.natoms[0]], name = "t_type")
-        self.tnatoms    = tf.placeholder(tf.int32,   [None], name = "t_natoms")
-        self.efield     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name='t_efield')
-        
-    def _net (self,
-             inputs, 
-             name,
-              reuse = False) :
+        self.coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_coord"
+        )
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.type = tf.placeholder(tf.int32, [None, self.natoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+        self.efield = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.natoms[0] * 3], name="t_efield"
+        )
+
+    def _net(self, inputs, name, reuse=False):
         with tf.variable_scope(name, reuse=reuse):
-            net_w = tf.get_variable ('net_w', 
-                                     [self.ndescrpt], 
-                                     GLOBAL_TF_FLOAT_PRECISION,
-                                     tf.constant_initializer (self.net_w_i))
-        dot_v = tf.matmul (tf.reshape (inputs, [-1, self.ndescrpt]),
-                           tf.reshape (net_w, [self.ndescrpt, 1]))
-        return tf.reshape (dot_v, [-1])
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist \
-            = op_module.prod_env_mat_a (dcoord, 
-                                       dtype,
-                                       tnatoms,
-                                       dbox, 
-                                       tf.constant(self.default_mesh),
-                                       self.t_avg,
-                                       self.t_std,
-                                       rcut_a = self.rcut_a, 
-                                       rcut_r = self.rcut_r, 
-                                       rcut_r_smth = self.rcut_r_smth,
-                                       sel_a = self.sel_a, 
-                                       sel_r = self.sel_r)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])        
-        energy = tf.reduce_sum (atom_ener_reshape, axis = 1)        
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        dot_v = tf.matmul(
+            tf.reshape(inputs, [-1, self.ndescrpt]),
+            tf.reshape(net_w, [self.ndescrpt, 1]),
+        )
+        return tf.reshape(dot_v, [-1])
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+        atom_ener_reshape = tf.reshape(atom_ener, [-1, self.natoms[0]])
+        energy = tf.reduce_sum(atom_ener_reshape, axis=1)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_se_a (net_deriv_reshape, 
-                                            descrpt_deriv, 
-                                            nlist, 
-                                            tnatoms,
-                                            n_a_sel = self.nnei_a, 
-                                            n_r_sel = self.nnei_r)
-        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
-                                                        descrpt_deriv, 
-                                                        rij,
-                                                        nlist, 
-                                                        tnatoms,
-                                                        n_a_sel = self.nnei_a, 
-                                                        n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        virial, atom_vir = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         return energy, force, virial
 
-
-    def comp_f_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_f_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        f_mag = tf.reduce_sum (tf.nn.tanh(force))
-        f_mag_dw = tf.gradients (f_mag, net_w)
-        assert (len(f_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        f_mag = tf.reduce_sum(tf.nn.tanh(force))
+        f_mag_dw = tf.gradients(f_mag, net_w)
+        assert len(f_mag_dw) == 1, "length of dw is wrong"
         return f_mag, f_mag_dw[0]
 
-
-    def comp_v_dw (self, 
-                   dcoord, 
-                   dbox, 
-                   dtype,                 
-                   tnatoms,
-                   name,
-                   reuse = None) :
-        energy, force, virial = self.comp_ef (dcoord, dbox, dtype, tnatoms, name, reuse)
+    def comp_v_dw(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        energy, force, virial = self.comp_ef(dcoord, dbox, dtype, tnatoms, name, reuse)
         with tf.variable_scope(name, reuse=True):
-            net_w = tf.get_variable ('net_w', [self.ndescrpt], GLOBAL_TF_FLOAT_PRECISION, tf.constant_initializer (self.net_w_i))
-        v_mag = tf.reduce_sum (virial)
-        v_mag_dw = tf.gradients (v_mag, net_w)
-        assert (len(v_mag_dw) == 1), "length of dw is wrong"        
+            net_w = tf.get_variable(
+                "net_w",
+                [self.ndescrpt],
+                GLOBAL_TF_FLOAT_PRECISION,
+                tf.constant_initializer(self.net_w_i),
+            )
+        v_mag = tf.reduce_sum(virial)
+        v_mag_dw = tf.gradients(v_mag, net_w)
+        assert len(v_mag_dw) == 1, "length of dw is wrong"
         return v_mag, v_mag_dw[0]
 
 
-
 class TestSmooth(Inter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
     #     data = Data()
@@ -160,17 +161,17 @@ def setUp(self):
         data = Data()
         Inter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, suffix = '_smth')
+    def test_force(self):
+        force_test(self, self, suffix="_smth")
 
-    def test_virial (self) :
-        virial_test(self, self, suffix = '_smth')
+    def test_virial(self):
+        virial_test(self, self, suffix="_smth")
 
-    def test_force_dw (self) :
-        force_dw_test(self, self, suffix = '_smth')
+    def test_force_dw(self):
+        force_dw_test(self, self, suffix="_smth")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, suffix = '_smth')
+    def test_virial_dw(self):
+        virial_dw_test(self, self, suffix="_smth")
 
 
 class TestSeAPbc(tf.test.TestCase):
@@ -178,33 +179,49 @@ def test_pbc(self):
         data = Data()
         inter0 = Inter()
         inter1 = Inter()
-        inter0.setUp(data, pbc = True, sess=self.test_session().__enter__())
-        inter1.setUp(data, pbc = False, sess=self.test_session().__enter__())
+        inter0.setUp(data, pbc=True, sess=self.test_session().__enter__())
+        inter1.setUp(data, pbc=False, sess=self.test_session().__enter__())
         inter0.net_w_i = np.copy(np.ones(inter0.ndescrpt))
         inter1.net_w_i = np.copy(np.ones(inter1.ndescrpt))
 
-        t_energy0, t_force0, t_virial0 \
-            = inter0.comp_ef (inter0.coord, inter0.box, inter0.type, inter0.tnatoms, name = "test_sea_pbc_true")
-        t_energy1, t_force1, t_virial1 \
-            = inter1.comp_ef (inter1.coord, inter1.box, inter1.type, inter1.tnatoms, name = "test_sea_pbc_false")
-
-        inter0.sess.run (tf.global_variables_initializer())
-        inter1.sess.run (tf.global_variables_initializer())
-
-        dcoord, dbox, dtype = data.get_data ()
-
-        [e0, f0, v0] = inter0.sess.run ([t_energy0, t_force0, t_virial0], 
-                                        feed_dict = {
-                                            inter0.coord:     dcoord,
-                                            inter0.box:       dbox,
-                                            inter0.type:      dtype,
-                                            inter0.tnatoms:   inter0.natoms})
-        [e1, f1, v1] = inter1.sess.run ([t_energy1, t_force1, t_virial1], 
-                                        feed_dict = {
-                                            inter1.coord:     dcoord,
-                                            inter1.box:       dbox,
-                                            inter1.type:      dtype,
-                                            inter1.tnatoms:   inter1.natoms})
+        t_energy0, t_force0, t_virial0 = inter0.comp_ef(
+            inter0.coord,
+            inter0.box,
+            inter0.type,
+            inter0.tnatoms,
+            name="test_sea_pbc_true",
+        )
+        t_energy1, t_force1, t_virial1 = inter1.comp_ef(
+            inter1.coord,
+            inter1.box,
+            inter1.type,
+            inter1.tnatoms,
+            name="test_sea_pbc_false",
+        )
+
+        inter0.sess.run(tf.global_variables_initializer())
+        inter1.sess.run(tf.global_variables_initializer())
+
+        dcoord, dbox, dtype = data.get_data()
+
+        [e0, f0, v0] = inter0.sess.run(
+            [t_energy0, t_force0, t_virial0],
+            feed_dict={
+                inter0.coord: dcoord,
+                inter0.box: dbox,
+                inter0.type: dtype,
+                inter0.tnatoms: inter0.natoms,
+            },
+        )
+        [e1, f1, v1] = inter1.sess.run(
+            [t_energy1, t_force1, t_virial1],
+            feed_dict={
+                inter1.coord: dcoord,
+                inter1.box: dbox,
+                inter1.type: dtype,
+                inter1.tnatoms: inter1.natoms,
+            },
+        )
 
         self.assertAlmostEqual(e0[0], e1[0])
         np.testing.assert_almost_equal(f0[0], f1[0])
@@ -212,41 +229,57 @@ def test_pbc(self):
 
     def test_pbc_small_box(self):
         data0 = Data()
-        data1 = Data(box_scale = 2)
+        data1 = Data(box_scale=2)
         inter0 = Inter()
         inter1 = Inter()
-        inter0.setUp(data0, pbc = True, sess=self.test_session().__enter__())
-        inter1.setUp(data1, pbc = False, sess=self.test_session().__enter__())
+        inter0.setUp(data0, pbc=True, sess=self.test_session().__enter__())
+        inter1.setUp(data1, pbc=False, sess=self.test_session().__enter__())
         inter0.net_w_i = np.copy(np.ones(inter0.ndescrpt))
         inter1.net_w_i = np.copy(np.ones(inter1.ndescrpt))
 
-        t_energy0, t_force0, t_virial0 \
-            = inter0.comp_ef (inter0.coord, inter0.box, inter0.type, inter0.tnatoms, name = "test_sea_pbc_sbox_true")
-        t_energy1, t_force1, t_virial1 \
-            = inter1.comp_ef (inter1.coord, inter1.box, inter1.type, inter1.tnatoms, name = "test_sea_pbc_sbox_false")
-
-        inter0.sess.run (tf.global_variables_initializer())
-        inter1.sess.run (tf.global_variables_initializer())
-
-        dcoord, dbox, dtype = data0.get_data ()
-        [e0, f0, v0] = inter0.sess.run ([t_energy0, t_force0, t_virial0], 
-                                        feed_dict = {
-                                            inter0.coord:     dcoord,
-                                            inter0.box:       dbox,
-                                            inter0.type:      dtype,
-                                            inter0.tnatoms:   inter0.natoms})
-        dcoord, dbox, dtype = data1.get_data ()
-        [e1, f1, v1] = inter1.sess.run ([t_energy1, t_force1, t_virial1], 
-                                        feed_dict = {
-                                            inter1.coord:     dcoord,
-                                            inter1.box:       dbox,
-                                            inter1.type:      dtype,
-                                            inter1.tnatoms:   inter1.natoms})
+        t_energy0, t_force0, t_virial0 = inter0.comp_ef(
+            inter0.coord,
+            inter0.box,
+            inter0.type,
+            inter0.tnatoms,
+            name="test_sea_pbc_sbox_true",
+        )
+        t_energy1, t_force1, t_virial1 = inter1.comp_ef(
+            inter1.coord,
+            inter1.box,
+            inter1.type,
+            inter1.tnatoms,
+            name="test_sea_pbc_sbox_false",
+        )
+
+        inter0.sess.run(tf.global_variables_initializer())
+        inter1.sess.run(tf.global_variables_initializer())
+
+        dcoord, dbox, dtype = data0.get_data()
+        [e0, f0, v0] = inter0.sess.run(
+            [t_energy0, t_force0, t_virial0],
+            feed_dict={
+                inter0.coord: dcoord,
+                inter0.box: dbox,
+                inter0.type: dtype,
+                inter0.tnatoms: inter0.natoms,
+            },
+        )
+        dcoord, dbox, dtype = data1.get_data()
+        [e1, f1, v1] = inter1.sess.run(
+            [t_energy1, t_force1, t_virial1],
+            feed_dict={
+                inter1.coord: dcoord,
+                inter1.box: dbox,
+                inter1.type: dtype,
+                inter1.tnatoms: inter1.natoms,
+            },
+        )
 
         self.assertAlmostEqual(e0[0], e1[0])
         np.testing.assert_almost_equal(f0[0], f1[0])
         np.testing.assert_almost_equal(v0[0], v1[0])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_dipole_se_a.py b/source/tests/test_dipole_se_a.py
index 8f06efdfeb..f20bd7668e 100644
--- a/source/tests/test_dipole_se_a.py
+++ b/source/tests/test_dipole_se_a.py
@@ -1,101 +1,134 @@
-import dpdata,os,sys,unittest
-import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
-from common import finite_difference, strerch_box
+import os
+import sys
+import unittest
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import DipoleFittingSeA
-from deepmd.model import DipoleModel
-from deepmd.common import j_must_have
+import dpdata
+import numpy as np
+from common import (
+    Data,
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
+
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    DipoleFittingSeA,
+)
+from deepmd.model import (
+    DipoleModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'polar_se_a.json'
+        jfile = "polar_se_a.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net'].pop('type', None)
-        jdata['model']['fitting_net'].pop('fit_diag', None)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = DipoleFittingSeA(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"].pop("type", None)
+        jdata["model"]["fitting_net"].pop("fit_diag", None)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = DipoleFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = DipoleModel(descrpt, fitting)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']],
-                      'fparam': [test_data['fparam']],
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "fparam": [test_data["fparam"]],
         }
         model._compute_input_stat(input_data)
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "dipole_se_a", 
-                           reuse = False)
-        dipole = model_pred['dipole']
-        gdipole = model_pred['global_dipole']
-        force = model_pred['force']
-        virial = model_pred['virial']
-        atom_virial = model_pred['atom_virial']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="dipole_se_a",
+            reuse=False,
+        )
+        dipole = model_pred["dipole"]
+        gdipole = model_pred["global_dipole"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_virial = model_pred["atom_virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [p, gp] = sess.run([dipole, gdipole], feed_dict = feed_dict_test)
+        [p, gp] = sess.run([dipole, gdipole], feed_dict=feed_dict_test)
 
         p = p.reshape([-1])
-        refp = [1.616802262298876514e+01,9.809535439521079425e+00,3.572312180768947854e-01,1.336308874095981203e+00,1.057908563208963848e+01,-5.999602350098874881e-01]
+        refp = [
+            1.616802262298876514e01,
+            9.809535439521079425e00,
+            3.572312180768947854e-01,
+            1.336308874095981203e00,
+            1.057908563208963848e01,
+            -5.999602350098874881e-01,
+        ]
 
         places = 10
         np.testing.assert_almost_equal(p, refp, places)
@@ -107,15 +140,19 @@ def test_model(self):
         np.testing.assert_almost_equal(gp, refgp, places)
 
         # make sure only one frame is used
-        feed_dict_single = {t_prop_c:        test_data['prop_c'],
-                            t_coord:         np.reshape(test_data['coord']    [:1, :], [-1]),
-                            t_box:           test_data['box']                 [:1, :],
-                            t_type:          np.reshape(test_data['type']     [:1, :], [-1]),
-                            t_natoms:        test_data['natoms_vec'],
-                            t_mesh:          test_data['default_mesh'],
-                            is_training:     False}
-
-        [pf, pv, pav] = sess.run([force, virial, atom_virial], feed_dict = feed_dict_single)
+        feed_dict_single = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:1, :], [-1]),
+            t_box: test_data["box"][:1, :],
+            t_type: np.reshape(test_data["type"][:1, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
+
+        [pf, pv, pav] = sess.run(
+            [force, virial, atom_virial], feed_dict=feed_dict_single
+        )
         pf, pv = pf.reshape(-1), pv.reshape(-1)
         spv = pav.reshape(1, 3, -1, 9).sum(2).reshape(-1)
 
@@ -123,23 +160,32 @@ def test_model(self):
         coord0 = base_dict.pop(t_coord)
         box0 = base_dict.pop(t_box)
 
-        fdf = - finite_difference(
-                    lambda coord: sess.run(gdipole, 
-                        feed_dict={**base_dict, 
-                                t_coord:coord, 
-                                t_box:box0}).reshape(-1),
-                    test_data['coord'][:numb_test, :].reshape([-1])).reshape(-1)
-        fdv = - (finite_difference(
-                    lambda box: sess.run(gdipole, 
-                        feed_dict={**base_dict, 
-                                t_coord:strerch_box(coord0, box0, box), 
-                                t_box:box}).reshape(-1),
-                    test_data['box'][:numb_test, :]).reshape([-1,3,3]).transpose(0,2,1)
-                @ box0.reshape(3,3)).reshape(-1)
+        fdf = -finite_difference(
+            lambda coord: sess.run(
+                gdipole, feed_dict={**base_dict, t_coord: coord, t_box: box0}
+            ).reshape(-1),
+            test_data["coord"][:numb_test, :].reshape([-1]),
+        ).reshape(-1)
+        fdv = -(
+            finite_difference(
+                lambda box: sess.run(
+                    gdipole,
+                    feed_dict={
+                        **base_dict,
+                        t_coord: strerch_box(coord0, box0, box),
+                        t_box: box,
+                    },
+                ).reshape(-1),
+                test_data["box"][:numb_test, :],
+            )
+            .reshape([-1, 3, 3])
+            .transpose(0, 2, 1)
+            @ box0.reshape(3, 3)
+        ).reshape(-1)
 
         delta = 1e-5
         np.testing.assert_allclose(pf, fdf, delta)
         np.testing.assert_allclose(pv, fdv, delta)
         # make sure atomic virial sum to virial
         places = 10
-        np.testing.assert_almost_equal(pv, spv, places)
\ No newline at end of file
+        np.testing.assert_almost_equal(pv, spv, places)
diff --git a/source/tests/test_dipole_se_a_tebd.py b/source/tests/test_dipole_se_a_tebd.py
index 830528d007..1f15246b26 100644
--- a/source/tests/test_dipole_se_a_tebd.py
+++ b/source/tests/test_dipole_se_a_tebd.py
@@ -1,114 +1,150 @@
-import dpdata, os, sys, unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data, gen_data, j_loader
-from common import finite_difference, strerch_box
-
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import DipoleFittingSeA
-from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.model import DipoleModel
-from deepmd.common import j_must_have
+from common import (
+    Data,
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    DipoleFittingSeA,
+)
+from deepmd.model import (
+    DipoleModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestModel(tf.test.TestCase):
     def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'polar_se_a_tebd.json'
+        jfile = "polar_se_a_tebd.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
         test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net'].pop('type', None)
-        jdata['model']['fitting_net'].pop('fit_diag', None)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = DipoleFittingSeA(**jdata['model']['fitting_net'], uniform_seed=True)
-        typeebd_param = jdata['model']['type_embedding']
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"].pop("type", None)
+        jdata["model"]["fitting_net"].pop("fit_diag", None)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = DipoleFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
+        typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
-            neuron=typeebd_param['neuron'],
-            resnet_dt=typeebd_param['resnet_dt'],
-            seed=typeebd_param['seed'],
-            uniform_seed=True)
+            neuron=typeebd_param["neuron"],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
+            uniform_seed=True,
+        )
         model = DipoleModel(descrpt, fitting, typeebd)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']],
-                      'fparam': [test_data['fparam']],
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "fparam": [test_data["fparam"]],
+        }
         model._compute_input_stat(input_data)
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
         inputs_dict = {}
 
-        model_pred \
-            = model.build(t_coord,
-                          t_type,
-                          t_natoms,
-                          t_box,
-                          t_mesh,
-                          inputs_dict,
-                          suffix="dipole_se_a_tebd",
-                          reuse=False)
-        dipole = model_pred['dipole']
-        gdipole = model_pred['global_dipole']
-        force = model_pred['force']
-        virial = model_pred['virial']
-        atom_virial = model_pred['atom_virial']
-
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            inputs_dict,
+            suffix="dipole_se_a_tebd",
+            reuse=False,
+        )
+        dipole = model_pred["dipole"]
+        gdipole = model_pred["global_dipole"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_virial = model_pred["atom_virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
         [p, gp] = sess.run([dipole, gdipole], feed_dict=feed_dict_test)
 
         p = p.reshape([-1])
-        refp = [15.759189570481473,  9.56848733368029,  0.3494387894045414,
-                1.3280752117673629, 10.285935424492124, -0.5847081785394377]
+        refp = [
+            15.759189570481473,
+            9.56848733368029,
+            0.3494387894045414,
+            1.3280752117673629,
+            10.285935424492124,
+            -0.5847081785394377,
+        ]
 
         places = 10
         np.testing.assert_almost_equal(p, refp, places)
@@ -120,15 +156,19 @@ def test_model(self):
         np.testing.assert_almost_equal(gp, refgp, places)
 
         # make sure only one frame is used
-        feed_dict_single = {t_prop_c: test_data['prop_c'],
-                            t_coord: np.reshape(test_data['coord'][:1, :], [-1]),
-                            t_box: test_data['box'][:1, :],
-                            t_type: np.reshape(test_data['type'][:1, :], [-1]),
-                            t_natoms: test_data['natoms_vec'],
-                            t_mesh: test_data['default_mesh'],
-                            is_training: False}
-
-        [pf, pv, pav] = sess.run([force, virial, atom_virial], feed_dict=feed_dict_single)
+        feed_dict_single = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:1, :], [-1]),
+            t_box: test_data["box"][:1, :],
+            t_type: np.reshape(test_data["type"][:1, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
+
+        [pf, pv, pav] = sess.run(
+            [force, virial, atom_virial], feed_dict=feed_dict_single
+        )
         pf, pv = pf.reshape(-1), pv.reshape(-1)
         spv = pav.reshape(1, 3, -1, 9).sum(2).reshape(-1)
 
@@ -136,19 +176,28 @@ def test_model(self):
         coord0 = base_dict.pop(t_coord)
         box0 = base_dict.pop(t_box)
 
-        fdf = - finite_difference(
-            lambda coord: sess.run(gdipole,
-                                   feed_dict={**base_dict,
-                                              t_coord: coord,
-                                              t_box: box0}).reshape(-1),
-            test_data['coord'][:numb_test, :].reshape([-1])).reshape(-1)
-        fdv = - (finite_difference(
-            lambda box: sess.run(gdipole,
-                                 feed_dict={**base_dict,
-                                            t_coord: strerch_box(coord0, box0, box),
-                                            t_box: box}).reshape(-1),
-            test_data['box'][:numb_test, :]).reshape([-1, 3, 3]).transpose(0, 2, 1)
-                 @ box0.reshape(3, 3)).reshape(-1)
+        fdf = -finite_difference(
+            lambda coord: sess.run(
+                gdipole, feed_dict={**base_dict, t_coord: coord, t_box: box0}
+            ).reshape(-1),
+            test_data["coord"][:numb_test, :].reshape([-1]),
+        ).reshape(-1)
+        fdv = -(
+            finite_difference(
+                lambda box: sess.run(
+                    gdipole,
+                    feed_dict={
+                        **base_dict,
+                        t_coord: strerch_box(coord0, box0, box),
+                        t_box: box,
+                    },
+                ).reshape(-1),
+                test_data["box"][:numb_test, :],
+            )
+            .reshape([-1, 3, 3])
+            .transpose(0, 2, 1)
+            @ box0.reshape(3, 3)
+        ).reshape(-1)
 
         delta = 1e-5
         np.testing.assert_allclose(pf, fdf, delta)
diff --git a/source/tests/test_dipolecharge.py b/source/tests/test_dipolecharge.py
index c277a3823c..daf00ef095 100644
--- a/source/tests/test_dipolecharge.py
+++ b/source/tests/test_dipolecharge.py
@@ -1,55 +1,120 @@
-import os,sys,platform,shutil,dpdata
-import numpy as np
+import os
+import platform
+import shutil
+import sys
 import unittest
 
-from infer.convert2pb import convert_pbtxt_to_pb
-from deepmd.infer import DipoleChargeModifier
-from common import tests_path
+import dpdata
+import numpy as np
+from common import (
+    tests_path,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+)
+from deepmd.infer import (
+    DipoleChargeModifier,
+)
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-class TestDipoleCharge(unittest.TestCase) :
+
+class TestDipoleCharge(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","dipolecharge_d.pbtxt")), "dipolecharge_d.pb")
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "dipolecharge_d.pbtxt")),
+            "dipolecharge_d.pb",
+        )
         cls.dp = DipoleChargeModifier(
-            "dipolecharge_d.pb", 
-            [-1.0, -3.0],
-            [1.0, 1.0, 1.0, 1.0, 1.0],
-            4.0,
-            0.2
+            "dipolecharge_d.pb", [-1.0, -3.0], [1.0, 1.0, 1.0, 1.0, 1.0], 4.0, 0.2
         )
 
     def setUp(self):
-        self.coords = np.array([
-            4.6067455554,    8.8719311819,    6.3886531197,
-            4.0044515745,    4.2449530507,    7.7902855220,
-            2.6453069446,    0.8772647726,    1.2804446790,
-            1.1445332290,    0.0067366438,    1.8606485070,
-            7.1002867706,    5.0325506787,    3.1805888348,
-            4.5352891138,    7.7389683929,    9.4260970128,
-            2.1833238914,    9.0916071034,    7.2299906064,
-            4.1040157820,    1.0496745045,    5.4748315591,
-        ], dtype = np.float64)
-            # 1.1445332290,    0.0067366438,    1.8606485070,
-            # 2.1833238914,    9.0916071034,    7.2299906064,
-            # 4.0044515745,    4.2449530507,    7.7902855220,
-            # 7.1002867706,    5.0325506787,    3.1805888348,
-        self.atype = np.array([0,3,2,1,3,4,1,4], dtype=int)
-        self.box = np.array([10., 0., 0., 0., 10., 0., 0., 0., 10.])
-        self.expected_e = np.array([
-            3.671081837126222158e+00
-        ])
-        self.expected_f = np.array([
-            8.786854427753210128e-01,-1.590752486903602159e-01,-2.709225006303785932e-01,-4.449513960033193438e-01,-1.564291540964127813e-01,2.139031741772115178e-02,1.219699614140521193e+00,-5.580358618499958734e-02,-3.878662478349682585e-01,-1.286685244990778854e+00,1.886475802950296488e-01,3.904450515493615437e-01,1.605017382138404849e-02,2.138016869742287995e-01,-2.617514921203008965e-02,2.877081057057793712e-01,-3.846449683844421763e-01,3.048855616906603894e-02,-9.075632811311897807e-01,-6.509653472431625731e-03,2.302010972126376787e-01,2.370565856822822726e-01,3.600133435593881881e-01,1.243887532859055609e-02
-        ])
-        self.expected_v = np.array([
-            3.714071471995848417e-01,6.957130186032146613e-01,-1.158289779017217302e+00,6.957130186032139951e-01,-1.400130091653774933e+01,-3.631620234653316626e-01,-1.158289779017217302e+00,-3.631620234653316626e-01,3.805077486043773050e+00
-        ])
+        self.coords = np.array(
+            [
+                4.6067455554,
+                8.8719311819,
+                6.3886531197,
+                4.0044515745,
+                4.2449530507,
+                7.7902855220,
+                2.6453069446,
+                0.8772647726,
+                1.2804446790,
+                1.1445332290,
+                0.0067366438,
+                1.8606485070,
+                7.1002867706,
+                5.0325506787,
+                3.1805888348,
+                4.5352891138,
+                7.7389683929,
+                9.4260970128,
+                2.1833238914,
+                9.0916071034,
+                7.2299906064,
+                4.1040157820,
+                1.0496745045,
+                5.4748315591,
+            ],
+            dtype=np.float64,
+        )
+        # 1.1445332290,    0.0067366438,    1.8606485070,
+        # 2.1833238914,    9.0916071034,    7.2299906064,
+        # 4.0044515745,    4.2449530507,    7.7902855220,
+        # 7.1002867706,    5.0325506787,    3.1805888348,
+        self.atype = np.array([0, 3, 2, 1, 3, 4, 1, 4], dtype=int)
+        self.box = np.array([10.0, 0.0, 0.0, 0.0, 10.0, 0.0, 0.0, 0.0, 10.0])
+        self.expected_e = np.array([3.671081837126222158e00])
+        self.expected_f = np.array(
+            [
+                8.786854427753210128e-01,
+                -1.590752486903602159e-01,
+                -2.709225006303785932e-01,
+                -4.449513960033193438e-01,
+                -1.564291540964127813e-01,
+                2.139031741772115178e-02,
+                1.219699614140521193e00,
+                -5.580358618499958734e-02,
+                -3.878662478349682585e-01,
+                -1.286685244990778854e00,
+                1.886475802950296488e-01,
+                3.904450515493615437e-01,
+                1.605017382138404849e-02,
+                2.138016869742287995e-01,
+                -2.617514921203008965e-02,
+                2.877081057057793712e-01,
+                -3.846449683844421763e-01,
+                3.048855616906603894e-02,
+                -9.075632811311897807e-01,
+                -6.509653472431625731e-03,
+                2.302010972126376787e-01,
+                2.370565856822822726e-01,
+                3.600133435593881881e-01,
+                1.243887532859055609e-02,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                3.714071471995848417e-01,
+                6.957130186032146613e-01,
+                -1.158289779017217302e00,
+                6.957130186032139951e-01,
+                -1.400130091653774933e01,
+                -3.631620234653316626e-01,
+                -1.158289779017217302e00,
+                -3.631620234653316626e-01,
+                3.805077486043773050e00,
+            ]
+        )
         self.natoms = self.atype.size
         self.coords = self.coords.reshape([-1, self.natoms, 3])
 
@@ -60,27 +125,26 @@ def tearDownClass(cls):
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 5)
-        self.assertAlmostEqual(self.dp.get_rcut(), 4.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['A', 'B', 'C', 'D', 'E'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 4.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["A", "B", "C", "D", "E"])
 
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, eval_fv = True
-)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, eval_fv=True)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
         self.assertEqual(ee.shape, (nframes,))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         self.assertEqual(self.expected_e.shape, (nframes,))
-        self.assertEqual(self.expected_f.shape, (nframes*natoms*3,))
-        self.assertEqual(self.expected_v.shape, (nframes*9,))
+        self.assertEqual(self.expected_f.shape, (nframes * natoms * 3,))
+        self.assertEqual(self.expected_v.shape, (nframes * 9,))
         # np.savetxt('ee.out', ee.reshape([1, -1]), delimiter=',')
         # np.savetxt('ff.out', ff.reshape([1, -1]), delimiter=',')
         # np.savetxt('vv.out', vv.reshape([1, -1]), delimiter=',')
         ee = ee.reshape([-1])
         ff = ff.reshape([-1])
-        vv = vv.reshape([-1])        
+        vv = vv.reshape([-1])
         np.testing.assert_almost_equal(ee, self.expected_e)
         np.testing.assert_almost_equal(ff, self.expected_f)
         np.testing.assert_almost_equal(vv, self.expected_v)
@@ -89,23 +153,21 @@ def test_2frame(self):
         nframes = 2
         self.coords = np.tile(self.coords, [nframes, 1, 1])
         self.box = np.tile(self.box, [nframes, 1])
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, eval_fv = True
-)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, eval_fv=True)
         # check shape of the returns
         natoms = len(self.atype)
         self.assertEqual(ee.shape, (nframes,))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         self.expected_e = np.tile(self.expected_e, [nframes])
         self.expected_f = np.tile(self.expected_f, [nframes])
         self.expected_v = np.tile(self.expected_v, [nframes])
         self.assertEqual(self.expected_e.shape, (nframes,))
-        self.assertEqual(self.expected_f.shape, (nframes*natoms*3,))
-        self.assertEqual(self.expected_v.shape, (nframes*9,))
+        self.assertEqual(self.expected_f.shape, (nframes * natoms * 3,))
+        self.assertEqual(self.expected_v.shape, (nframes * 9,))
         ee = ee.reshape([-1])
         ff = ff.reshape([-1])
         vv = vv.reshape([-1])
         np.testing.assert_almost_equal(ee, self.expected_e)
         np.testing.assert_almost_equal(ff, self.expected_f)
         np.testing.assert_almost_equal(vv, self.expected_v)
-
diff --git a/source/tests/test_embedding_net.py b/source/tests/test_embedding_net.py
index d1930524f7..3f25389278 100644
--- a/source/tests/test_embedding_net.py
+++ b/source/tests/test_embedding_net.py
@@ -1,105 +1,128 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.env import tf
-from tensorflow.python.framework import ops
+import numpy as np
+from tensorflow.python.framework import (
+    ops,
+)
 
-from deepmd.utils.network import embedding_net
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.utils.network import (
+    embedding_net,
+)
 
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
 
 class Inter(tf.test.TestCase):
-    def setUp (self) :
+    def setUp(self):
         self.sess = self.test_session().__enter__()
-        self.inputs = tf.constant([ 0., 1., 2.], dtype = tf.float64)
+        self.inputs = tf.constant([0.0, 1.0, 2.0], dtype=tf.float64)
         self.ndata = 3
         self.inputs = tf.reshape(self.inputs, [-1, 1])
         self.places = 6
-        
+
     def test_enlarger_net(self):
         network_size = [3, 4]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            name_suffix = 'enlarger_net',
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            name_suffix="enlarger_net",
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
-        refout = [[-0.1482171,  -0.14177827, -0.76181204,  0.21266767],
-                  [-0.27800543, -0.08974353, -0.78784335,  0.3485518 ],
-                  [-0.36744368, -0.06285603, -0.80749876,  0.4347974 ]]
+        refout = [
+            [-0.1482171, -0.14177827, -0.76181204, 0.21266767],
+            [-0.27800543, -0.08974353, -0.78784335, 0.3485518],
+            [-0.36744368, -0.06285603, -0.80749876, 0.4347974],
+        ]
         np.testing.assert_almost_equal(refout, myout, self.places)
 
-
     def test_enlarger_net_1(self):
         network_size = [4, 4]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            name_suffix = 'enlarger_net_1',
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            name_suffix="enlarger_net_1",
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
-        refout = [[ 0.10842905, -0.61623145, -1.46738788, -0.01921788],
-                  [ 0.09376136, -0.75526936, -1.64995884,  0.01076112],
-                  [ 0.1033177,  -0.8911794,  -1.75530172,  0.00653156]]
+        refout = [
+            [0.10842905, -0.61623145, -1.46738788, -0.01921788],
+            [0.09376136, -0.75526936, -1.64995884, 0.01076112],
+            [0.1033177, -0.8911794, -1.75530172, 0.00653156],
+        ]
         np.testing.assert_almost_equal(refout, myout, self.places)
 
     def test_enlarger_net_1_idt(self):
         network_size = [4, 4]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            name_suffix = 'enlarger_net_1_idt',
-                            resnet_dt = True,
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            name_suffix="enlarger_net_1_idt",
+            resnet_dt=True,
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
-        refout = [[ 0.10839754, -0.6161336,  -1.46673253, -0.01927138],
-                  [ 0.09370214, -0.75516888, -1.64927868,  0.01067603],
-                  [ 0.10323835, -0.89107102, -1.75460243,  0.00642493]]
+        refout = [
+            [0.10839754, -0.6161336, -1.46673253, -0.01927138],
+            [0.09370214, -0.75516888, -1.64927868, 0.01067603],
+            [0.10323835, -0.89107102, -1.75460243, 0.00642493],
+        ]
         np.testing.assert_almost_equal(refout, myout, self.places)
 
     def test_enlarger_net_2(self):
         network_size = [2, 4]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            name_suffix = 'enlarger_net_2',
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            name_suffix="enlarger_net_2",
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
-        refout = [[ 0.24023149, -0.66311811, -0.50951819, -0.36873654],
-                  [ 2.00858313, -0.05971232,  0.52272395, -0.12604478],
-                  [ 3.39365063,  0.63492697,  1.5780069,   0.46445682]]
+        refout = [
+            [0.24023149, -0.66311811, -0.50951819, -0.36873654],
+            [2.00858313, -0.05971232, 0.52272395, -0.12604478],
+            [3.39365063, 0.63492697, 1.5780069, 0.46445682],
+        ]
         np.testing.assert_almost_equal(refout, myout, self.places)
 
-
     def test_enlarger_net_2(self):
         network_size = [2, 4]
-        out = embedding_net(self.inputs, 
-                            network_size, 
-                            tf.float64,
-                            name_suffix = 'enlarger_net_2_idt',
-                            resnet_dt = True,
-                            seed = 1, 
-                            uniform_seed = True)
+        out = embedding_net(
+            self.inputs,
+            network_size,
+            tf.float64,
+            name_suffix="enlarger_net_2_idt",
+            resnet_dt=True,
+            seed=1,
+            uniform_seed=True,
+        )
         self.sess.run(tf.global_variables_initializer())
         myout = self.sess.run(out)
-        refout = [[ 0.2403889,  -0.66290763, -0.50883586, -0.36869913],
-                  [ 2.00891479, -0.05936574,  0.52351633, -0.12579749],
-                  [ 3.3940202,   0.63538459,  1.57887697,  0.46486689]]
+        refout = [
+            [0.2403889, -0.66290763, -0.50883586, -0.36869913],
+            [2.00891479, -0.05936574, 0.52351633, -0.12579749],
+            [3.3940202, 0.63538459, 1.57887697, 0.46486689],
+        ]
         np.testing.assert_almost_equal(refout, myout, self.places)
 
 
-
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_env.py b/source/tests/test_env.py
index ea886046f4..e1864306c3 100644
--- a/source/tests/test_env.py
+++ b/source/tests/test_env.py
@@ -1,20 +1,27 @@
 import unittest
+from unittest import (
+    mock,
+)
 
-from deepmd import env
-from unittest import mock
+from deepmd import (
+    env,
+)
 
 
 class TestTFThreadCount(unittest.TestCase):
-    @mock.patch.dict('os.environ', values={})
+    @mock.patch.dict("os.environ", values={})
     def test_empty(self):
         intra, inter = env.get_tf_default_nthreads()
         self.assertEqual(intra, 0)
         self.assertEqual(inter, 0)
 
-    @mock.patch.dict('os.environ', values={
-        'TF_INTRA_OP_PARALLELISM_THREADS': '5',
-        'TF_INTER_OP_PARALLELISM_THREADS': '3'
-    })
+    @mock.patch.dict(
+        "os.environ",
+        values={
+            "TF_INTRA_OP_PARALLELISM_THREADS": "5",
+            "TF_INTER_OP_PARALLELISM_THREADS": "3",
+        },
+    )
     def test_given(self):
         intra, inter = env.get_tf_default_nthreads()
         self.assertEqual(intra, 5)
@@ -27,7 +34,7 @@ def test_default(self):
         new = env.get_tf_session_config()
         self.assertNotEqual(id(shared), id(new))
 
-    @mock.patch('deepmd.env.get_tf_default_nthreads')
+    @mock.patch("deepmd.env.get_tf_default_nthreads")
     def test_get(self, mock_method):
         mock_method.return_value = (5, 3)
         config = env.get_tf_session_config()
@@ -37,6 +44,6 @@ def test_get(self, mock_method):
     def test_reset(self):
         shared = env.default_tf_session_config
         env.reset_default_tf_session_config(True)
-        self.assertEqual(shared.device_count['GPU'], 0)
+        self.assertEqual(shared.device_count["GPU"], 0)
         env.reset_default_tf_session_config(False)
         self.assertEqual(len(shared.device_count), 0)
diff --git a/source/tests/test_ewald.py b/source/tests/test_ewald.py
index 236c295b91..024a7ec289 100644
--- a/source/tests/test_ewald.py
+++ b/source/tests/test_ewald.py
@@ -1,27 +1,34 @@
-import os,sys,platform
-import numpy as np
+import os
+import platform
+import sys
 import unittest
-from deepmd.env import tf
 
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
-from deepmd.infer.ewald_recp import op_module
-from deepmd.infer.ewald_recp import EwaldRecp
+import numpy as np
 
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.infer.ewald_recp import (
+    EwaldRecp,
+    op_module,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     global_default_fv_hh = 1e-2
     global_default_dw_hh = 1e-2
     global_default_places = 3
-else :
+else:
     global_default_fv_hh = 1e-6
     global_default_dw_hh = 1e-4
     global_default_places = 5
 
 
-class TestEwaldRecp (tf.test.TestCase) :
+class TestEwaldRecp(tf.test.TestCase):
     def setUp(self):
-        boxl = 4.5 # NOTICE grid should not change before and after box pert...
+        boxl = 4.5  # NOTICE grid should not change before and after box pert...
         box_pert = 0.2
         self.natoms = 16
         self.nframes = 2
@@ -36,10 +43,10 @@ def setUp(self):
             box = np.eye(3) * boxl
             box[1][1] += 1
             box[2][2] += 2
-            box += np.random.random([3,3]) * box_pert
+            box += np.random.random([3, 3]) * box_pert
             box = 0.5 * (box + box.T)
             self.dbox.append(box)
-            # scaled 
+            # scaled
             coord = np.random.random([self.natoms, 3])
             self.rcoord.append(coord)
             # real coords
@@ -47,109 +54,125 @@ def setUp(self):
             # charge
             dcharge = np.random.random([self.natoms])
             dcharge -= np.average(dcharge)
-            assert(np.abs(np.sum(self.dcharge) - 0) < 1e-12)
+            assert np.abs(np.sum(self.dcharge) - 0) < 1e-12
             self.dcharge.append(dcharge)
         self.dbox = np.array(self.dbox).reshape([self.nframes, 9])
-        self.rcoord = np.array(self.rcoord).reshape([self.nframes, 3*self.natoms])
-        self.dcoord = np.array(self.dcoord).reshape([self.nframes, 3*self.natoms])
+        self.rcoord = np.array(self.rcoord).reshape([self.nframes, 3 * self.natoms])
+        self.dcoord = np.array(self.dcoord).reshape([self.nframes, 3 * self.natoms])
         self.dcharge = np.array(self.dcharge).reshape([self.nframes, self.natoms])
         # place holders
-        self.coord      = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_coord')
-        self.charge     = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_charge')
-        self.box        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_box')
-        self.nloc    = tf.placeholder(tf.int32, [1], name = "t_nloc")        
+        self.coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_coord")
+        self.charge = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_charge")
+        self.box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_box")
+        self.nloc = tf.placeholder(tf.int32, [1], name="t_nloc")
 
     def test_py_interface(self):
         hh = 1e-4
         places = 4
         sess = self.test_session().__enter__()
-        t_energy, t_force, t_virial \
-            = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
-                                   ewald_h = self.ewald_h,
-                                   ewald_beta = self.ewald_beta)
-        [e, f, v] = sess.run([t_energy, t_force, t_virial], 
-                           feed_dict = {
-                               self.coord:  self.dcoord.reshape([-1]),
-                               self.charge: self.dcharge.reshape([-1]),
-                               self.box:    self.dbox.reshape([-1]),
-                               self.nloc:   [self.natoms],
-                           })
+        t_energy, t_force, t_virial = op_module.ewald_recp(
+            self.coord,
+            self.charge,
+            self.nloc,
+            self.box,
+            ewald_h=self.ewald_h,
+            ewald_beta=self.ewald_beta,
+        )
+        [e, f, v] = sess.run(
+            [t_energy, t_force, t_virial],
+            feed_dict={
+                self.coord: self.dcoord.reshape([-1]),
+                self.charge: self.dcharge.reshape([-1]),
+                self.box: self.dbox.reshape([-1]),
+                self.nloc: [self.natoms],
+            },
+        )
         er = EwaldRecp(self.ewald_h, self.ewald_beta)
-        e1, f1, v1 = er.eval(self.dcoord, self.dcharge, self.dbox)        
-        np.testing.assert_almost_equal(e, e1, 
-                                       places,
-                                       err_msg = "energy failed")
-        np.testing.assert_almost_equal(f, f1, 
-                                       places,
-                                       err_msg = "force component failed")
-        np.testing.assert_almost_equal(v, v, 
-                                       places,
-                                       err_msg = "virial component failed")
-
-
+        e1, f1, v1 = er.eval(self.dcoord, self.dcharge, self.dbox)
+        np.testing.assert_almost_equal(e, e1, places, err_msg="energy failed")
+        np.testing.assert_almost_equal(f, f1, places, err_msg="force component failed")
+        np.testing.assert_almost_equal(v, v, places, err_msg="virial component failed")
 
     def test_force(self):
         hh = 1e-4
         places = 6
         sess = self.test_session().__enter__()
-        t_energy, t_force, t_virial \
-            = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
-                                   ewald_h = self.ewald_h,
-                                   ewald_beta = self.ewald_beta)
-        [force] = sess.run([t_force], 
-                           feed_dict = {
-                               self.coord:  self.dcoord.reshape([-1]),
-                               self.charge: self.dcharge.reshape([-1]),
-                               self.box:    self.dbox.reshape([-1]),
-                               self.nloc:   [self.natoms],
-                           })
+        t_energy, t_force, t_virial = op_module.ewald_recp(
+            self.coord,
+            self.charge,
+            self.nloc,
+            self.box,
+            ewald_h=self.ewald_h,
+            ewald_beta=self.ewald_beta,
+        )
+        [force] = sess.run(
+            [t_force],
+            feed_dict={
+                self.coord: self.dcoord.reshape([-1]),
+                self.charge: self.dcharge.reshape([-1]),
+                self.box: self.dbox.reshape([-1]),
+                self.nloc: [self.natoms],
+            },
+        )
         for idx in range(self.natoms):
             for dd in range(3):
                 dcoordp = np.copy(self.dcoord)
                 dcoordm = np.copy(self.dcoord)
-                dcoordp[:,idx*3+dd] = self.dcoord[:,idx*3+dd] + hh
-                dcoordm[:,idx*3+dd] = self.dcoord[:,idx*3+dd] - hh
-                energyp = sess.run([t_energy], 
-                                   feed_dict = {
-                                       self.coord:  dcoordp.reshape([-1]),
-                                       self.charge: self.dcharge.reshape([-1]),
-                                       self.box:    self.dbox.reshape([-1]),
-                                       self.nloc:   [self.natoms],
-                                   })                                
-                energym = sess.run([t_energy], 
-                                   feed_dict = {
-                                       self.coord:  dcoordm.reshape([-1]),
-                                       self.charge: self.dcharge.reshape([-1]),
-                                       self.box:    self.dbox.reshape([-1]),
-                                       self.nloc:   [self.natoms],
-                                   })
-                c_force = -(energyp[0] - energym[0]) / (2*hh)
-                np.testing.assert_almost_equal(c_force, force[:,idx*3+dd], 
-                                           places,
-                                           err_msg = "force component [%d,%d] failed" % (idx, dd))
-
+                dcoordp[:, idx * 3 + dd] = self.dcoord[:, idx * 3 + dd] + hh
+                dcoordm[:, idx * 3 + dd] = self.dcoord[:, idx * 3 + dd] - hh
+                energyp = sess.run(
+                    [t_energy],
+                    feed_dict={
+                        self.coord: dcoordp.reshape([-1]),
+                        self.charge: self.dcharge.reshape([-1]),
+                        self.box: self.dbox.reshape([-1]),
+                        self.nloc: [self.natoms],
+                    },
+                )
+                energym = sess.run(
+                    [t_energy],
+                    feed_dict={
+                        self.coord: dcoordm.reshape([-1]),
+                        self.charge: self.dcharge.reshape([-1]),
+                        self.box: self.dbox.reshape([-1]),
+                        self.nloc: [self.natoms],
+                    },
+                )
+                c_force = -(energyp[0] - energym[0]) / (2 * hh)
+                np.testing.assert_almost_equal(
+                    c_force,
+                    force[:, idx * 3 + dd],
+                    places,
+                    err_msg="force component [%d,%d] failed" % (idx, dd),
+                )
 
     def test_virial(self):
         hh = 1e-4
         places = 6
         sess = self.test_session().__enter__()
-        t_energy, t_force, t_virial \
-            = op_module.ewald_recp(self.coord, self.charge, self.nloc, self.box, 
-                                   ewald_h = self.ewald_h,
-                                   ewald_beta = self.ewald_beta)
-        [virial] = sess.run([t_virial], 
-                           feed_dict = {
-                               self.coord:  self.dcoord.reshape([-1]),
-                               self.charge: self.dcharge.reshape([-1]),
-                               self.box:    self.dbox.reshape([-1]),
-                               self.nloc:   [self.natoms],
-                           })
-
-        from scipy.stats import ortho_group
-
-        
-
-        self.dbox3 = np.reshape(self.dbox, [self.nframes, 3,3])
+        t_energy, t_force, t_virial = op_module.ewald_recp(
+            self.coord,
+            self.charge,
+            self.nloc,
+            self.box,
+            ewald_h=self.ewald_h,
+            ewald_beta=self.ewald_beta,
+        )
+        [virial] = sess.run(
+            [t_virial],
+            feed_dict={
+                self.coord: self.dcoord.reshape([-1]),
+                self.charge: self.dcharge.reshape([-1]),
+                self.box: self.dbox.reshape([-1]),
+                self.nloc: [self.natoms],
+            },
+        )
+
+        from scipy.stats import (
+            ortho_group,
+        )
+
+        self.dbox3 = np.reshape(self.dbox, [self.nframes, 3, 3])
         self.drbox3 = np.linalg.inv(self.dbox3)
         # print(np.matmul(self.dbox3, self.drbox3))
         # print(np.matmul(self.drbox3, self.dbox3))
@@ -159,36 +182,40 @@ def test_virial(self):
         # print(np.matmul(self.dcoord3, self.drbox3))
         # print('check rcoord ', np.linalg.norm(self.rcoord3 - self.rcoord.reshape([self.nframes, self.natoms, 3])))
 
-        num_deriv = np.zeros([self.nframes,3,3])
+        num_deriv = np.zeros([self.nframes, 3, 3])
         for ii in range(3):
             for jj in range(3):
                 dbox3p = np.copy(self.dbox3)
                 dbox3m = np.copy(self.dbox3)
-                dbox3p[:,ii,jj] = self.dbox3[:,ii,jj] + hh
-                dbox3m[:,ii,jj] = self.dbox3[:,ii,jj] - hh
-                dboxp = np.reshape(dbox3p, [-1,9])
-                dboxm = np.reshape(dbox3m, [-1,9])
+                dbox3p[:, ii, jj] = self.dbox3[:, ii, jj] + hh
+                dbox3m[:, ii, jj] = self.dbox3[:, ii, jj] - hh
+                dboxp = np.reshape(dbox3p, [-1, 9])
+                dboxm = np.reshape(dbox3m, [-1, 9])
                 dcoord = self.dcoord
                 dcoord3p = np.matmul(self.rcoord3, dbox3p)
                 dcoord3m = np.matmul(self.rcoord3, dbox3m)
-                dcoordp = np.reshape(dcoord3p, [self.nframes,-1])
-                dcoordm = np.reshape(dcoord3m, [self.nframes,-1])
-                energyp = sess.run([t_energy],
-                                   feed_dict = {
-                                       self.coord:  dcoordp.reshape([-1]),
-                                       self.charge: self.dcharge.reshape([-1]),
-                                       self.box:    dboxp.reshape([-1]),
-                                       self.nloc:   [self.natoms],
-                                   })
-                energym = sess.run([t_energy], 
-                                   feed_dict = {
-                                       self.coord:  dcoordm.reshape([-1]),
-                                       self.charge: self.dcharge.reshape([-1]),
-                                       self.box:    dboxm.reshape([-1]),
-                                       self.nloc:   [self.natoms],
-                                   })
-                num_deriv[:,ii,jj] = -(energyp[0] - energym[0]) / (2.*hh)
-        num_deriv_t = np.transpose(num_deriv, [0,2,1])
+                dcoordp = np.reshape(dcoord3p, [self.nframes, -1])
+                dcoordm = np.reshape(dcoord3m, [self.nframes, -1])
+                energyp = sess.run(
+                    [t_energy],
+                    feed_dict={
+                        self.coord: dcoordp.reshape([-1]),
+                        self.charge: self.dcharge.reshape([-1]),
+                        self.box: dboxp.reshape([-1]),
+                        self.nloc: [self.natoms],
+                    },
+                )
+                energym = sess.run(
+                    [t_energy],
+                    feed_dict={
+                        self.coord: dcoordm.reshape([-1]),
+                        self.charge: self.dcharge.reshape([-1]),
+                        self.box: dboxm.reshape([-1]),
+                        self.nloc: [self.natoms],
+                    },
+                )
+                num_deriv[:, ii, jj] = -(energyp[0] - energym[0]) / (2.0 * hh)
+        num_deriv_t = np.transpose(num_deriv, [0, 2, 1])
         t_esti = np.matmul(num_deriv_t, self.dbox3)
         # # t_esti = np.matmul(num_deriv, self.dbox3)
         # print(num_deriv[0])
@@ -197,12 +224,7 @@ def test_virial(self):
         # print(virial[0].reshape([3,3]))
         # # print(0.5 * (t_esti[0] + t_esti[0].T) - virial[0].reshape([3,3]))
         # print(0.5 * (t_esti[0] + t_esti[0]) - virial[0].reshape([3,3]))
-        # print(0.5 * (t_esti[0] + t_esti[0].T) - virial[0].reshape([3,3]))        
-        np.testing.assert_almost_equal(t_esti.ravel(), virial.ravel(), 
-                                           places,
-                                           err_msg = "virial component failed")
-            
-                
-
-
-
+        # print(0.5 * (t_esti[0] + t_esti[0].T) - virial[0].reshape([3,3]))
+        np.testing.assert_almost_equal(
+            t_esti.ravel(), virial.ravel(), places, err_msg="virial component failed"
+        )
diff --git a/source/tests/test_examples.py b/source/tests/test_examples.py
index babcd56992..0a5a33c0e1 100644
--- a/source/tests/test_examples.py
+++ b/source/tests/test_examples.py
@@ -1,13 +1,18 @@
 """This module ensures input in the examples directory
 could pass the argument checking.
 """
-import unittest
 import json
-from pathlib import Path
-
-from deepmd.common import j_loader
-from deepmd.utils.argcheck import normalize
+import unittest
+from pathlib import (
+    Path,
+)
 
+from deepmd.common import (
+    j_loader,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
 
 p_examples = Path(__file__).parent.parent.parent / "examples"
 
diff --git a/source/tests/test_finetune_se_atten.py b/source/tests/test_finetune_se_atten.py
index a3124c726c..eaf41624b9 100644
--- a/source/tests/test_finetune_se_atten.py
+++ b/source/tests/test_finetune_se_atten.py
@@ -1,17 +1,40 @@
-import os, sys, platform, shutil, dpdata, json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
-from common import j_loader, tests_path, run_dp
-from deepmd.utils.graph import get_tensor_by_name
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.infer import DeepPotential
-
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION, tf
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.infer import (
+    DeepPotential,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.graph import (
+    get_tensor_by_name,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -27,9 +50,9 @@ def _file_delete(file):
 
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
@@ -41,12 +64,18 @@ def _init_models():
     data_file_mixed_type = str(tests_path / os.path.join("finetune", "data_mixed_type"))
     pretrained_model = str(tests_path / "pretrained_model_se_atten.pb")
     finetuned_model = str(tests_path / "finetuned_model_se_atten.pb")
-    finetuned_model_mixed_type = str(tests_path / "finetuned_model_se_atten_mixed_type.pb")
+    finetuned_model_mixed_type = str(
+        tests_path / "finetuned_model_se_atten_mixed_type.pb"
+    )
     INPUT_PRE = str(tests_path / "input_pretrain_se_atten.json")
     INPUT_FINETUNE = str(tests_path / "input_finetune_se_atten.json")
     INPUT_FINETUNE_MIX = str(tests_path / "input_finetune_se_atten_mixed_type.json")
-    jdata_pre = j_loader(str(tests_path / os.path.join("finetune", "input_pretrain.json")))
-    jdata_finetune = j_loader(str(tests_path / os.path.join("finetune", "input_finetune.json")))
+    jdata_pre = j_loader(
+        str(tests_path / os.path.join("finetune", "input_pretrain.json"))
+    )
+    jdata_finetune = j_loader(
+        str(tests_path / os.path.join("finetune", "input_finetune.json"))
+    )
     jdata_pre["training"]["training_data"]["systems"] = data_file
     jdata_pre["training"]["validation_data"]["systems"] = data_file
     jdata_finetune["training"]["training_data"]["systems"] = data_file
@@ -63,49 +92,75 @@ def _init_models():
         json.dump(jdata_finetune, fp, indent=4)
 
     ret = run_dp("dp train " + INPUT_PRE)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -o " + pretrained_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     ret = run_dp("dp train " + INPUT_FINETUNE + " -t " + pretrained_model)
-    np.testing.assert_equal(ret, 0, 'DP finetune failed!')
+    np.testing.assert_equal(ret, 0, "DP finetune failed!")
     ret = run_dp("dp freeze -o " + finetuned_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     ret = run_dp("dp train " + INPUT_FINETUNE_MIX + " -t " + pretrained_model)
-    np.testing.assert_equal(ret, 0, 'DP finetune failed!')
+    np.testing.assert_equal(ret, 0, "DP finetune failed!")
     ret = run_dp("dp freeze -o " + finetuned_model_mixed_type)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
-    jdata_pre = update_deepmd_input(jdata_pre, warning=True, dump="input_v2_compat.json")
+    jdata_pre = update_deepmd_input(
+        jdata_pre, warning=True, dump="input_v2_compat.json"
+    )
     jdata_pre = normalize(jdata_pre)
-    rcut = jdata_pre['model']['descriptor']['rcut']
-    type_map = jdata_pre['model']['type_map']
+    rcut = jdata_pre["model"]["descriptor"]["rcut"]
+    type_map = jdata_pre["model"]["type_map"]
     data = DeepmdDataSystem(
         systems=[data_file],
         batch_size=1,
         test_size=1,
         rcut=rcut,
         type_map=type_map,
-        trn_all_set=True
+        trn_all_set=True,
     )
-    data_requirement = {'energy': {'ndof': 1,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0}}
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        }
+    }
     data.add_dict(data_requirement)
-    return INPUT_PRE, INPUT_FINETUNE, INPUT_FINETUNE_MIX, \
-        pretrained_model, finetuned_model, finetuned_model_mixed_type, type_map_pre, type_map_finetune, data
+    return (
+        INPUT_PRE,
+        INPUT_FINETUNE,
+        INPUT_FINETUNE_MIX,
+        pretrained_model,
+        finetuned_model,
+        finetuned_model_mixed_type,
+        type_map_pre,
+        type_map_finetune,
+        data,
+    )
 
 
 if not parse_version(tf.__version__) < parse_version("1.15"):
-    INPUT_PRE, INPUT_FINETUNE, INPUT_FINETUNE_MIX, \
-        PRE_MODEL, FINETUNED_MODEL, FINETUNED_MODEL_MIX, PRE_MAP, FINETUNED_MAP, VALID_DATA = _init_models()
+    (
+        INPUT_PRE,
+        INPUT_FINETUNE,
+        INPUT_FINETUNE_MIX,
+        PRE_MODEL,
+        FINETUNED_MODEL,
+        FINETUNED_MODEL_MIX,
+        PRE_MAP,
+        FINETUNED_MAP,
+        VALID_DATA,
+    ) = _init_models()
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-                 f"The current tf version {tf.__version__} is too low to run the new testing model.")
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestFinetuneSeAtten(unittest.TestCase):
     @classmethod
     def setUpClass(self):
@@ -134,8 +189,10 @@ def tearDownClass(self):
         _file_delete("lcurve.out")
 
     def test_finetune_standard(self):
-        pretrained_bias = get_tensor_by_name(PRE_MODEL, 'fitting_attr/t_bias_atom_e')
-        finetuned_bias = get_tensor_by_name(FINETUNED_MODEL, 'fitting_attr/t_bias_atom_e')
+        pretrained_bias = get_tensor_by_name(PRE_MODEL, "fitting_attr/t_bias_atom_e")
+        finetuned_bias = get_tensor_by_name(
+            FINETUNED_MODEL, "fitting_attr/t_bias_atom_e"
+        )
         sorter = np.argsort(PRE_MAP)
         idx_type_map = sorter[np.searchsorted(PRE_MAP, FINETUNED_MAP, sorter=sorter)]
         test_data = self.valid_data.get_test()
@@ -145,20 +202,30 @@ def test_finetune_standard(self):
         energy = dp.eval(test_data["coord"], test_data["box"], test_data["type"][0])[0]
         energy_diff = test_data["energy"] - energy
         finetune_shift = finetuned_bias[idx_type_map] - pretrained_bias[idx_type_map]
-        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[0].reshape(-1)
+        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[
+            0
+        ].reshape(-1)
 
         dp_finetuned = DeepPotential(FINETUNED_MODEL)
-        energy_finetuned = dp_finetuned.eval(test_data["coord"], test_data["box"], test_data["type"][0])[0]
+        energy_finetuned = dp_finetuned.eval(
+            test_data["coord"], test_data["box"], test_data["type"][0]
+        )[0]
         energy_diff_finetuned = test_data["energy"] - energy_finetuned
-        finetune_results = np.linalg.lstsq(atom_nums, energy_diff_finetuned, rcond=None)[0].reshape(-1)
+        finetune_results = np.linalg.lstsq(
+            atom_nums, energy_diff_finetuned, rcond=None
+        )[0].reshape(-1)
 
         # check values
-        np.testing.assert_almost_equal(finetune_shift, ground_truth_shift, default_places)
+        np.testing.assert_almost_equal(
+            finetune_shift, ground_truth_shift, default_places
+        )
         np.testing.assert_almost_equal(finetune_results, 0.0, default_places)
 
     def test_finetune_mixed_type(self):
-        pretrained_bias = get_tensor_by_name(PRE_MODEL, 'fitting_attr/t_bias_atom_e')
-        finetuned_bias_mixed_type = get_tensor_by_name(FINETUNED_MODEL_MIX, 'fitting_attr/t_bias_atom_e')
+        pretrained_bias = get_tensor_by_name(PRE_MODEL, "fitting_attr/t_bias_atom_e")
+        finetuned_bias_mixed_type = get_tensor_by_name(
+            FINETUNED_MODEL_MIX, "fitting_attr/t_bias_atom_e"
+        )
         sorter = np.argsort(PRE_MAP)
         idx_type_map = sorter[np.searchsorted(PRE_MAP, FINETUNED_MAP, sorter=sorter)]
         test_data = self.valid_data.get_test()
@@ -167,14 +234,24 @@ def test_finetune_mixed_type(self):
         dp = DeepPotential(PRE_MODEL)
         energy = dp.eval(test_data["coord"], test_data["box"], test_data["type"][0])[0]
         energy_diff = test_data["energy"] - energy
-        finetune_shift = finetuned_bias_mixed_type[idx_type_map] - pretrained_bias[idx_type_map]
-        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[0].reshape(-1)
+        finetune_shift = (
+            finetuned_bias_mixed_type[idx_type_map] - pretrained_bias[idx_type_map]
+        )
+        ground_truth_shift = np.linalg.lstsq(atom_nums, energy_diff, rcond=None)[
+            0
+        ].reshape(-1)
 
         dp_finetuned_mixed_type = DeepPotential(FINETUNED_MODEL_MIX)
-        energy_finetuned = dp_finetuned_mixed_type.eval(test_data["coord"], test_data["box"], test_data["type"][0])[0]
+        energy_finetuned = dp_finetuned_mixed_type.eval(
+            test_data["coord"], test_data["box"], test_data["type"][0]
+        )[0]
         energy_diff_finetuned = test_data["energy"] - energy_finetuned
-        finetune_results = np.linalg.lstsq(atom_nums, energy_diff_finetuned, rcond=None)[0].reshape(-1)
+        finetune_results = np.linalg.lstsq(
+            atom_nums, energy_diff_finetuned, rcond=None
+        )[0].reshape(-1)
 
         # check values
-        np.testing.assert_almost_equal(finetune_shift, ground_truth_shift, default_places)
+        np.testing.assert_almost_equal(
+            finetune_shift, ground_truth_shift, default_places
+        )
         np.testing.assert_almost_equal(finetune_results, 0.0, default_places)
diff --git a/source/tests/test_fitting_ener_type.py b/source/tests/test_fitting_ener_type.py
index 0451460917..02902f5d3e 100644
--- a/source/tests/test_fitting_ener_type.py
+++ b/source/tests/test_fitting_ener_type.py
@@ -1,117 +1,217 @@
+import os
+import pickle
+import sys
+import unittest
 
-import dpdata,os,sys,unittest
+import dpdata
 import numpy as np
-from deepmd.env import tf
-import pickle
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_fitting(self):
-        jfile = 'water_se_a_type.json'
+        jfile = "water_se_a_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have (jdata['model']['descriptor'], 'sel')
-        ntypes=len(sel)
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(sel)
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['axis_neuron'] = 2
-        
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
-        
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["axis_neuron"] = 2
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
+
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
-        
-
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
-        
-        dout = np.array([0.0005722682145569174,-0.00020202686217742682,-0.00020202686217742682,7.13250554992363e-05,-0.0014770058171250015,0.000521468690207748,-0.001143865186937176,0.0004038453384193948,0.0005617335409639567,-0.00019831394075147532,
-                    0.00048086740718842236,-0.0001693584775806112,-0.0001693584775806112,5.966987137476082e-05,-0.0012342029581315136,0.00043492340851472783,-0.0009566016612537016,0.00033706767041080107,0.00047065988464132244,-0.0001657950398095401,
-                    0.0003647849239740657,-0.00013744939018250384,-0.00013744939018250384,5.1825826955234744e-05,-0.00096004206555711,0.00036185565262332876,-0.0007267433909643961,0.0002738914365542745,0.00038019365906978136,-0.00014322754331896057,
-                    0.0004675256930823109,-0.00017634410399626168,-0.00017634410399626168,6.652672908755666e-05,-0.0012328062885292486,0.00046500213384094614,-0.0009328887521346069,0.0003518668613172834,0.0004877847509912577,-0.00018396318824508986,
-                    0.0005154794374703516,-0.00019422534512034776,-0.00019422534512034776,7.318151797939947e-05,-0.0013576642997136488,0.0005115548790018505,-0.0010275333676074971,0.00038716440070070385,0.0005376426714609369,-0.00020257810468163985,
-                    0.0004482204892297628,-0.00016887749501640607,-0.00016887749501640607,6.364643102775375e-05,-0.001181345877677835,0.0004452029242063362,-0.0008941636427724908,0.0003369586197174627,0.0004677878512312651,-0.00017625260641095753])
-        type_embedding = np.array([1.4916816460764615,0.2720153234707013,-2.4385153754181985,-1.8454294510880027,2.874575701113528,1.1225116575801295,0.4204818970813372,-2.3784087249787587,-1.5053748251050598,2.769329403073084])
+
+        dout = np.array(
+            [
+                0.0005722682145569174,
+                -0.00020202686217742682,
+                -0.00020202686217742682,
+                7.13250554992363e-05,
+                -0.0014770058171250015,
+                0.000521468690207748,
+                -0.001143865186937176,
+                0.0004038453384193948,
+                0.0005617335409639567,
+                -0.00019831394075147532,
+                0.00048086740718842236,
+                -0.0001693584775806112,
+                -0.0001693584775806112,
+                5.966987137476082e-05,
+                -0.0012342029581315136,
+                0.00043492340851472783,
+                -0.0009566016612537016,
+                0.00033706767041080107,
+                0.00047065988464132244,
+                -0.0001657950398095401,
+                0.0003647849239740657,
+                -0.00013744939018250384,
+                -0.00013744939018250384,
+                5.1825826955234744e-05,
+                -0.00096004206555711,
+                0.00036185565262332876,
+                -0.0007267433909643961,
+                0.0002738914365542745,
+                0.00038019365906978136,
+                -0.00014322754331896057,
+                0.0004675256930823109,
+                -0.00017634410399626168,
+                -0.00017634410399626168,
+                6.652672908755666e-05,
+                -0.0012328062885292486,
+                0.00046500213384094614,
+                -0.0009328887521346069,
+                0.0003518668613172834,
+                0.0004877847509912577,
+                -0.00018396318824508986,
+                0.0005154794374703516,
+                -0.00019422534512034776,
+                -0.00019422534512034776,
+                7.318151797939947e-05,
+                -0.0013576642997136488,
+                0.0005115548790018505,
+                -0.0010275333676074971,
+                0.00038716440070070385,
+                0.0005376426714609369,
+                -0.00020257810468163985,
+                0.0004482204892297628,
+                -0.00016887749501640607,
+                -0.00016887749501640607,
+                6.364643102775375e-05,
+                -0.001181345877677835,
+                0.0004452029242063362,
+                -0.0008941636427724908,
+                0.0003369586197174627,
+                0.0004677878512312651,
+                -0.00017625260641095753,
+            ]
+        )
+        type_embedding = np.array(
+            [
+                1.4916816460764615,
+                0.2720153234707013,
+                -2.4385153754181985,
+                -1.8454294510880027,
+                2.874575701113528,
+                1.1225116575801295,
+                0.4204818970813372,
+                -2.3784087249787587,
+                -1.5053748251050598,
+                2.769329403073084,
+            ]
+        )
         atype = np.array([0, 0, 1, 1, 1, 1], dtype=np.int32)
 
-        dout= dout.reshape([-1,10])
-        type_embedding = type_embedding.reshape([ntypes,-1])
+        dout = dout.reshape([-1, 10])
+        type_embedding = type_embedding.reshape([ntypes, -1])
         atype = atype.reshape([-1])
-        atom_ener = fitting.build(tf.convert_to_tensor(dout),
-                                  t_natoms, 
-                                  {'type_embedding':tf.convert_to_tensor(type_embedding),
-                                   'atype':tf.convert_to_tensor(atype)},
-                                  reuse=False,
-                                  suffix="se_a_type_fit_")
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        atom_ener = fitting.build(
+            tf.convert_to_tensor(dout),
+            t_natoms,
+            {
+                "type_embedding": tf.convert_to_tensor(type_embedding),
+                "atype": tf.convert_to_tensor(atype),
+            },
+            reuse=False,
+            suffix="se_a_type_fit_",
+        )
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [pred_atom_ener] = sess.run([atom_ener], 
-                             feed_dict = feed_dict_test)
+        [pred_atom_ener] = sess.run([atom_ener], feed_dict=feed_dict_test)
 
-        pred_atom_ener=pred_atom_ener.reshape([-1])
+        pred_atom_ener = pred_atom_ener.reshape([-1])
 
-        ref_atom_ener =[10.121733946849165,10.121712105320634,10.143275419743475,10.143299785396826,10.143311150431957,10.143295201182019]
+        ref_atom_ener = [
+            10.121733946849165,
+            10.121712105320634,
+            10.143275419743475,
+            10.143299785396826,
+            10.143311150431957,
+            10.143295201182019,
+        ]
 
         places = 10
-                
-        
+
         np.testing.assert_almost_equal(pred_atom_ener, ref_atom_ener, places)
-        
-        
diff --git a/source/tests/test_fitting_stat.py b/source/tests/test_fitting_stat.py
index 970e30da4b..ff0cbb6d21 100644
--- a/source/tests/test_fitting_stat.py
+++ b/source/tests/test_fitting_stat.py
@@ -1,13 +1,24 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
+from collections import (
+    defaultdict,
+)
+
+import numpy as np
+from common import (
+    j_loader,
+)
 
-from collections import defaultdict
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from common import j_loader
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+
+input_json = "water_se_a_afparam.json"
 
-input_json = 'water_se_a_afparam.json'
 
 def _make_fake_data(sys_natoms, sys_nframes, avgs, stds):
     all_stat = defaultdict(list)
@@ -16,62 +27,66 @@ def _make_fake_data(sys_natoms, sys_nframes, avgs, stds):
     for ii in range(nsys):
         tmp_data_f = []
         tmp_data_a = []
-        for jj in range(ndof) :
-            tmp_data_f.append(np.random.normal(loc = avgs[jj], 
-                                               scale = stds[jj],
-                                               size = (sys_nframes[ii],1)))
-            tmp_data_a.append(np.random.normal(loc = avgs[jj], 
-                                               scale = stds[jj],
-                                               size = (sys_nframes[ii], sys_natoms[ii])))
-        tmp_data_f = np.transpose(tmp_data_f, (1,2,0))
-        tmp_data_a = np.transpose(tmp_data_a, (1,2,0))
-        all_stat['fparam'].append(tmp_data_f)
-        all_stat['aparam'].append(tmp_data_a)
+        for jj in range(ndof):
+            tmp_data_f.append(
+                np.random.normal(
+                    loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], 1)
+                )
+            )
+            tmp_data_a.append(
+                np.random.normal(
+                    loc=avgs[jj], scale=stds[jj], size=(sys_nframes[ii], sys_natoms[ii])
+                )
+            )
+        tmp_data_f = np.transpose(tmp_data_f, (1, 2, 0))
+        tmp_data_a = np.transpose(tmp_data_a, (1, 2, 0))
+        all_stat["fparam"].append(tmp_data_f)
+        all_stat["aparam"].append(tmp_data_a)
     return all_stat
 
+
 def _brute_fparam(data, ndim):
-    adata = data['fparam']
+    adata = data["fparam"]
     all_data = []
     for ii in adata:
         tmp = np.reshape(ii, [-1, ndim])
         if len(all_data) == 0:
             all_data = np.array(tmp)
         else:
-            all_data = np.concatenate((all_data, tmp), axis = 0)
-    avg = np.average(all_data, axis = 0)
-    std = np.std(all_data, axis = 0)
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
     return avg, std
 
+
 def _brute_aparam(data, ndim):
-    adata = data['aparam']
+    adata = data["aparam"]
     all_data = []
     for ii in adata:
         tmp = np.reshape(ii, [-1, ndim])
         if len(all_data) == 0:
             all_data = np.array(tmp)
         else:
-            all_data = np.concatenate((all_data, tmp), axis = 0)
-    avg = np.average(all_data, axis = 0)
-    std = np.std(all_data, axis = 0)
+            all_data = np.concatenate((all_data, tmp), axis=0)
+    avg = np.average(all_data, axis=0)
+    std = np.std(all_data, axis=0)
     return avg, std
 
 
-class TestEnerFittingStat (unittest.TestCase) :
-    def test (self) :
+class TestEnerFittingStat(unittest.TestCase):
+    def test(self):
         jdata = j_loader(input_json)
-        jdata = jdata['model']
+        jdata = jdata["model"]
         # descrpt = DescrptSeA(jdata['descriptor'])
         # fitting = EnerFitting(jdata['fitting_net'], descrpt)
-        descrpt = DescrptSeA(6.0, 
-                             5.8,
-                             [46, 92],
-                             neuron = [25, 50, 100], 
-                             axis_neuron = 16)
-        fitting = EnerFitting(descrpt,
-                              neuron = [240, 240, 240],
-                              resnet_dt = True,
-                              numb_fparam = 2,
-                              numb_aparam = 2)
+        descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
+        fitting = EnerFitting(
+            descrpt,
+            neuron=[240, 240, 240],
+            resnet_dt=True,
+            numb_fparam=2,
+            numb_aparam=2,
+        )
         avgs = [0, 10]
         stds = [2, 0.4]
         sys_natoms = [10, 100]
@@ -79,7 +94,7 @@ def test (self) :
         all_data = _make_fake_data(sys_natoms, sys_nframes, avgs, stds)
         frefa, frefs = _brute_fparam(all_data, len(avgs))
         arefa, arefs = _brute_aparam(all_data, len(avgs))
-        fitting.compute_input_stats(all_data, protection = 1e-2)
+        fitting.compute_input_stats(all_data, protection=1e-2)
         # print(frefa, frefs)
         np.testing.assert_almost_equal(frefa, fitting.fparam_avg)
         np.testing.assert_almost_equal(frefs, fitting.fparam_std)
diff --git a/source/tests/test_gen_stat_data.py b/source/tests/test_gen_stat_data.py
index 83e4ff2f82..eac279a3a4 100644
--- a/source/tests/test_gen_stat_data.py
+++ b/source/tests/test_gen_stat_data.py
@@ -1,32 +1,45 @@
 import shutil
-import numpy as np
 import unittest
+
 import dpdata
+import numpy as np
 
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model.model_stat import (
+    _make_all_stat_ref,
+    make_stat_input,
+    merge_sys_stat,
+)
 from deepmd.utils import random as dp_random
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model.model_stat import make_stat_input, merge_sys_stat, _make_all_stat_ref
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
 
 def gen_sys(nframes, atom_types):
     natoms = len(atom_types)
     data = {}
-    data['coords'] = np.random.random([nframes, natoms, 3])
-    data['forces'] = np.random.random([nframes, natoms, 3])
-    data['cells'] = np.random.random([nframes, 9])
-    data['energies'] = np.random.random([nframes, 1])
+    data["coords"] = np.random.random([nframes, natoms, 3])
+    data["forces"] = np.random.random([nframes, natoms, 3])
+    data["cells"] = np.random.random([nframes, 9])
+    data["energies"] = np.random.random([nframes, 1])
     types = list(set(list(atom_types)))
     types.sort()
-    data['atom_names'] = []
-    data['atom_numbs'] = []
+    data["atom_names"] = []
+    data["atom_numbs"] = []
     for ii in range(len(types)):
-        data['atom_names'] .append( 'TYPE_%d' % ii )
-        data['atom_numbs'] .append(np.sum(atom_types == ii))
-    data['atom_types'] = np.array(atom_types, dtype = int)
+        data["atom_names"].append("TYPE_%d" % ii)
+        data["atom_numbs"].append(np.sum(atom_types == ii))
+    data["atom_types"] = np.array(atom_types, dtype=int)
     return data
 
-class TestGenStatData(unittest.TestCase) :
+
+class TestGenStatData(unittest.TestCase):
     def setUp(self):
         data0 = gen_sys(20, [0, 1, 0, 2, 1])
         data1 = gen_sys(30, [0, 1, 0, 0])
@@ -34,59 +47,50 @@ def setUp(self):
         sys1 = dpdata.LabeledSystem()
         sys0.data = data0
         sys1.data = data1
-        sys0.to_deepmd_npy('system_0', set_size = 10)
-        sys1.to_deepmd_npy('system_1', set_size = 10)
-        
+        sys0.to_deepmd_npy("system_0", set_size=10)
+        sys1.to_deepmd_npy("system_1", set_size=10)
+
     def tearDown(self):
-        shutil.rmtree('system_0')
-        shutil.rmtree('system_1')
+        shutil.rmtree("system_0")
+        shutil.rmtree("system_1")
 
-    def _comp_data(self, d0, d1) :
+    def _comp_data(self, d0, d1):
         np.testing.assert_almost_equal(d0, d1)
 
     def test_merge_all_stat(self):
         dp_random.seed(0)
-        data0 = DeepmdDataSystem(['system_0', 'system_1'], 
-                                5, 
-                                10, 
-                                1.0)
-        data0.add('energy', 1, must = True)
+        data0 = DeepmdDataSystem(["system_0", "system_1"], 5, 10, 1.0)
+        data0.add("energy", 1, must=True)
         dp_random.seed(0)
-        data1 = DeepmdDataSystem(['system_0', 'system_1'], 
-                                5, 
-                                10, 
-                                1.0)
-        data1.add('energy', 1, must = True)
+        data1 = DeepmdDataSystem(["system_0", "system_1"], 5, 10, 1.0)
+        data1.add("energy", 1, must=True)
         dp_random.seed(0)
-        data2 = DeepmdDataSystem(['system_0', 'system_1'], 
-                                5, 
-                                10, 
-                                1.0)
-        data2.add('energy', 1, must = True)
-        
+        data2 = DeepmdDataSystem(["system_0", "system_1"], 5, 10, 1.0)
+        data2.add("energy", 1, must=True)
+
         dp_random.seed(0)
-        all_stat_0 = make_stat_input(data0, 10, merge_sys = False)
+        all_stat_0 = make_stat_input(data0, 10, merge_sys=False)
         dp_random.seed(0)
-        all_stat_1 = make_stat_input(data1, 10, merge_sys = True)
+        all_stat_1 = make_stat_input(data1, 10, merge_sys=True)
         all_stat_2 = merge_sys_stat(all_stat_0)
         dp_random.seed(0)
         all_stat_3 = _make_all_stat_ref(data2, 10)
-        
+
         ####################################
         # only check if the energy is concatenated correctly
         ####################################
-        dd = 'energy'
-            # if 'find_' in dd: continue
-            # if 'natoms_vec' in dd: continue
-            # if 'default_mesh' in dd: continue
-            # print(all_stat_2[dd])
-            # print(dd, all_stat_1[dd])
+        dd = "energy"
+        # if 'find_' in dd: continue
+        # if 'natoms_vec' in dd: continue
+        # if 'default_mesh' in dd: continue
+        # print(all_stat_2[dd])
+        # print(dd, all_stat_1[dd])
         d1 = np.array(all_stat_1[dd])
         d2 = np.array(all_stat_2[dd])
         d3 = np.array(all_stat_3[dd])
         # print(dd)
         # print(d1.shape)
-        # print(d2.shape)            
+        # print(d2.shape)
         # self.assertEqual(all_stat_2[dd], all_stat_1[dd])
         self._comp_data(d1, d2)
         self._comp_data(d1, d3)
@@ -95,61 +99,44 @@ def test_merge_all_stat(self):
 class TestEnerShift(unittest.TestCase):
     def setUp(self):
         data0 = gen_sys(30, [0, 1, 0, 2, 1])
-        data1 = gen_sys(30, [0, 1, 0, 0])    
+        data1 = gen_sys(30, [0, 1, 0, 0])
         sys0 = dpdata.LabeledSystem()
         sys1 = dpdata.LabeledSystem()
         sys0.data = data0
         sys1.data = data1
-        sys0.to_deepmd_npy('system_0', set_size = 10)
-        sys1.to_deepmd_npy('system_1', set_size = 10)
-        
+        sys0.to_deepmd_npy("system_0", set_size=10)
+        sys1.to_deepmd_npy("system_1", set_size=10)
+
     def tearDown(self):
-        shutil.rmtree('system_0')
-        shutil.rmtree('system_1')
+        shutil.rmtree("system_0")
+        shutil.rmtree("system_1")
 
     def test_ener_shift(self):
         dp_random.seed(0)
-        data = DeepmdDataSystem(['system_0', 'system_1'], 
-                                5, 
-                                10, 
-                                1.0)
-        data.add('energy', 1, must = True)
-        ener_shift0 = data.compute_energy_shift(rcond = 1)
-        all_stat = make_stat_input(data, 4, merge_sys = False)
-        descrpt = DescrptSeA(6.0, 
-                             5.8,
-                             [46, 92],
-                             neuron = [25, 50, 100], 
-                             axis_neuron = 16)
-        fitting = EnerFitting(descrpt,
-                              neuron = [240, 240, 240],
-                              resnet_dt = True)
-        ener_shift1 = fitting._compute_output_stats(all_stat, rcond = 1)        
+        data = DeepmdDataSystem(["system_0", "system_1"], 5, 10, 1.0)
+        data.add("energy", 1, must=True)
+        ener_shift0 = data.compute_energy_shift(rcond=1)
+        all_stat = make_stat_input(data, 4, merge_sys=False)
+        descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
+        fitting = EnerFitting(descrpt, neuron=[240, 240, 240], resnet_dt=True)
+        ener_shift1 = fitting._compute_output_stats(all_stat, rcond=1)
         np.testing.assert_almost_equal(ener_shift0, ener_shift1)
 
     def test_ener_shift_assigned(self):
         dp_random.seed(0)
         ae0 = dp_random.random()
-        data = DeepmdDataSystem(['system_0'],
-                                5,
-                                10,
-                                1.0)
-        data.add('energy', 1, must = True)
-        all_stat = make_stat_input(data, 4, merge_sys = False)
-        descrpt = DescrptSeA(6.0,
-                             5.8,
-                             [46, 92],
-                             neuron = [25, 50, 100],
-                             axis_neuron = 16)
-        fitting = EnerFitting(descrpt,
-                              neuron = [240, 240, 240],
-                              resnet_dt = True,
-                              atom_ener=[ae0, None, None])
-        ener_shift1 = fitting._compute_output_stats(all_stat, rcond = 1)
+        data = DeepmdDataSystem(["system_0"], 5, 10, 1.0)
+        data.add("energy", 1, must=True)
+        all_stat = make_stat_input(data, 4, merge_sys=False)
+        descrpt = DescrptSeA(6.0, 5.8, [46, 92], neuron=[25, 50, 100], axis_neuron=16)
+        fitting = EnerFitting(
+            descrpt, neuron=[240, 240, 240], resnet_dt=True, atom_ener=[ae0, None, None]
+        )
+        ener_shift1 = fitting._compute_output_stats(all_stat, rcond=1)
         # check assigned energy
         np.testing.assert_almost_equal(ae0, ener_shift1[0])
         # check if total energy are the same
         natoms = data.natoms_vec[0][2:]
-        tot0 = np.dot(data.compute_energy_shift(rcond = 1), natoms)
+        tot0 = np.dot(data.compute_energy_shift(rcond=1), natoms)
         tot1 = np.dot(ener_shift1, natoms)
         np.testing.assert_almost_equal(tot0, tot1)
diff --git a/source/tests/test_get_potential.py b/source/tests/test_get_potential.py
index 8637b7ff66..72b53cb688 100644
--- a/source/tests/test_get_potential.py
+++ b/source/tests/test_get_potential.py
@@ -1,32 +1,39 @@
 """Test if `DeepPotential` facto function returns the right type of potential."""
 
 import unittest
-from pathlib import Path
+from pathlib import (
+    Path,
+)
 
-from deepmd.infer import (DeepDipole, DeepGlobalPolar, DeepPolar, DeepPot,
-                          DeepPotential, DeepWFC)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
 
-from infer.convert2pb import convert_pbtxt_to_pb
+from deepmd.infer import (
+    DeepDipole,
+    DeepGlobalPolar,
+    DeepPolar,
+    DeepPot,
+    DeepPotential,
+    DeepWFC,
+)
 
 
 class TestGetPotential(unittest.TestCase):
-
     def setUp(self):
         self.work_dir = Path(__file__).parent / "infer"
 
         convert_pbtxt_to_pb(
-            str(self.work_dir / "deeppot.pbtxt"),
-            str(self.work_dir / "deep_pot.pb")
+            str(self.work_dir / "deeppot.pbtxt"), str(self.work_dir / "deep_pot.pb")
         )
 
         convert_pbtxt_to_pb(
             str(self.work_dir / "deepdipole.pbtxt"),
-            str(self.work_dir / "deep_dipole.pb")
+            str(self.work_dir / "deep_dipole.pb"),
         )
 
         convert_pbtxt_to_pb(
-            str(self.work_dir / "deeppolar.pbtxt"),
-            str(self.work_dir / "deep_polar.pb")
+            str(self.work_dir / "deeppolar.pbtxt"), str(self.work_dir / "deep_polar.pb")
         )
 
         # TODO add model files for globalpolar and WFC
diff --git a/source/tests/test_init_frz_model_multi.py b/source/tests/test_init_frz_model_multi.py
index ffc4c41b45..3405ab1544 100644
--- a/source/tests/test_init_frz_model_multi.py
+++ b/source/tests/test_init_frz_model_multi.py
@@ -1,18 +1,43 @@
-import os, sys, platform, shutil, dpdata, json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
-from common import j_loader, tests_path, run_dp
-from deepmd.train.trainer import DPTrainer
-from deepmd.train.run_options import RunOptions
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
-from deepmd.utils.multi_init import replace_model_params_with_frz_multi_model
-
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION, tf
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.multi_init import (
+    replace_model_params_with_frz_multi_model,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -42,8 +67,12 @@ def _init_models():
     jdata["training"]["data_dict"]["water_ener"] = {}
     jdata["training"]["data_dict"]["water_ener"]["training_data"] = training_data_config
     jdata["training"]["data_dict"]["water_ener"]["training_data"]["systems"] = data_file
-    jdata["training"]["data_dict"]["water_ener"]["validation_data"] = validation_data_config
-    jdata["training"]["data_dict"]["water_ener"]["validation_data"]["systems"] = data_file
+    jdata["training"]["data_dict"]["water_ener"][
+        "validation_data"
+    ] = validation_data_config
+    jdata["training"]["data_dict"]["water_ener"]["validation_data"][
+        "systems"
+    ] = data_file
     jdata["training"]["save_ckpt"] = ckpt
     jdata["model"]["fitting_net_dict"] = {}
     jdata["model"]["fitting_net_dict"]["water_ener"] = fitting_config
@@ -52,9 +81,11 @@ def _init_models():
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
-    ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model + " --united-model")
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
+    ret = run_dp(
+        "dp freeze -c " + str(tests_path) + " -o " + frozen_model + " --united-model"
+    )
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
     jdata = normalize(jdata)
     model_ckpt = DPTrainer(jdata, run_opt=run_opt_ckpt)
@@ -66,10 +97,18 @@ def _init_models():
     jdata["loss_dict"]["water_ener_new"] = loss_config
     jdata["training"]["data_dict"] = {}
     jdata["training"]["data_dict"]["water_ener_new"] = {}
-    jdata["training"]["data_dict"]["water_ener_new"]["training_data"] = training_data_config
-    jdata["training"]["data_dict"]["water_ener_new"]["training_data"]["systems"] = data_file
-    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"] = validation_data_config
-    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"]["systems"] = data_file
+    jdata["training"]["data_dict"]["water_ener_new"][
+        "training_data"
+    ] = training_data_config
+    jdata["training"]["data_dict"]["water_ener_new"]["training_data"][
+        "systems"
+    ] = data_file
+    jdata["training"]["data_dict"]["water_ener_new"][
+        "validation_data"
+    ] = validation_data_config
+    jdata["training"]["data_dict"]["water_ener_new"]["validation_data"][
+        "systems"
+    ] = data_file
     jdata["training"].pop("fitting_weight")
 
     jdata = replace_model_params_with_frz_multi_model(jdata, frozen_model)
@@ -86,50 +125,70 @@ def _init_models():
         test_size=1,
         rcut=rcut,
         type_map=type_map,
-        trn_all_set=True
+        trn_all_set=True,
     )
-    data_requirement = {'energy': {'ndof': 1,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'force': {'ndof': 3,
-                                  'atomic': True,
-                                  'must': False,
-                                  'high_prec': False,
-                                  'type_sel': None,
-                                  'repeat': 1,
-                                  'default': 0.0},
-                        'virial': {'ndof': 9,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': False,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'atom_ener': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 1,
-                                      'default': 0.0},
-                        'atom_pref': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 3,
-                                      'default': 0.0}}
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "force": {
+            "ndof": 3,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "virial": {
+            "ndof": 9,
+            "atomic": False,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_ener": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_pref": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 3,
+            "default": 0.0,
+        },
+    }
     data.add_dict(data_requirement)
     stop_batch = jdata["training"]["numb_steps"]
 
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-INPUT, CKPT, FROZEN_MODEL, CKPT_TRAINER, FRZ_TRAINER, VALID_DATA, STOP_BATCH = _init_models()
+(
+    INPUT,
+    CKPT,
+    FROZEN_MODEL,
+    CKPT_TRAINER,
+    FRZ_TRAINER,
+    VALID_DATA,
+    STOP_BATCH,
+) = _init_models()
 
 
 class TestInitFrzModelMulti(unittest.TestCase):
@@ -138,7 +197,10 @@ def setUpClass(cls):
         cls.dp_ckpt = CKPT_TRAINER
         cls.dp_frz = FRZ_TRAINER
         cls.valid_data_dict = {"water_ener": VALID_DATA}
-        cls.valid_data_dict_new = {"water_ener": VALID_DATA, "water_ener_new": VALID_DATA}
+        cls.valid_data_dict_new = {
+            "water_ener": VALID_DATA,
+            "water_ener_new": VALID_DATA,
+        }
         cls.stop_batch = STOP_BATCH
 
     @classmethod
@@ -147,38 +209,46 @@ def tearDownClass(cls):
         _file_delete(FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT+".meta")
-        _file_delete(CKPT+".index")
-        _file_delete(CKPT+".data-00000-of-00001")
-        _file_delete(CKPT+"-0.meta")
-        _file_delete(CKPT+"-0.index")
-        _file_delete(CKPT+"-0.data-00000-of-00001")
-        _file_delete(CKPT+"-1.meta")
-        _file_delete(CKPT+"-1.index")
-        _file_delete(CKPT+"-1.data-00000-of-00001")
+        _file_delete(CKPT + ".meta")
+        _file_delete(CKPT + ".index")
+        _file_delete(CKPT + ".data-00000-of-00001")
+        _file_delete(CKPT + "-0.meta")
+        _file_delete(CKPT + "-0.index")
+        _file_delete(CKPT + "-0.data-00000-of-00001")
+        _file_delete(CKPT + "-1.meta")
+        _file_delete(CKPT + "-1.index")
+        _file_delete(CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
     def test_single_frame(self):
         test_sys_name = "water_ener"
         valid_batch = self.valid_data_dict[test_sys_name].get_batch()
-        natoms = valid_batch['natoms_vec']
+        natoms = valid_batch["natoms_vec"]
         tf.reset_default_graph()
         self.dp_ckpt.build(self.valid_data_dict, self.stop_batch)
         self.dp_ckpt._init_session()
         feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False)
-        ckpt_rmse_ckpt = self.dp_ckpt.loss_dict[test_sys_name].eval(self.dp_ckpt.sess, feed_dict_ckpt, natoms)
+        ckpt_rmse_ckpt = self.dp_ckpt.loss_dict[test_sys_name].eval(
+            self.dp_ckpt.sess, feed_dict_ckpt, natoms
+        )
         tf.reset_default_graph()
 
         self.dp_frz.build(self.valid_data_dict_new, self.stop_batch)
         self.dp_frz._init_session()
         feed_dict_frz = self.dp_frz.get_feed_dict(valid_batch, is_training=False)
-        ckpt_rmse_frz = self.dp_frz.loss_dict[test_sys_name].eval(self.dp_frz.sess, feed_dict_frz, natoms)
+        ckpt_rmse_frz = self.dp_frz.loss_dict[test_sys_name].eval(
+            self.dp_frz.sess, feed_dict_frz, natoms
+        )
         tf.reset_default_graph()
 
         # check values
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_e'], ckpt_rmse_frz['rmse_e'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_f'], ckpt_rmse_frz['rmse_f'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_v'], ckpt_rmse_frz['rmse_v'], default_places)
-
-
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places
+        )
diff --git a/source/tests/test_init_frz_model_se_a.py b/source/tests/test_init_frz_model_se_a.py
index b95ffb59a0..b392472dc6 100644
--- a/source/tests/test_init_frz_model_se_a.py
+++ b/source/tests/test_init_frz_model_se_a.py
@@ -1,15 +1,38 @@
-import os, sys, platform, shutil, dpdata, json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
-from common import j_loader, tests_path, run_dp
-from deepmd.train.trainer import DPTrainer
-from deepmd.train.run_options import RunOptions
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
+import sys
+import unittest
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION, tf
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
 
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
@@ -38,9 +61,9 @@ def _init_models():
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
     jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
     jdata = normalize(jdata)
@@ -54,50 +77,70 @@ def _init_models():
         test_size=1,
         rcut=rcut,
         type_map=type_map,
-        trn_all_set=True
+        trn_all_set=True,
     )
-    data_requirement = {'energy': {'ndof': 1,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'force': {'ndof': 3,
-                                  'atomic': True,
-                                  'must': False,
-                                  'high_prec': False,
-                                  'type_sel': None,
-                                  'repeat': 1,
-                                  'default': 0.0},
-                        'virial': {'ndof': 9,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': False,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'atom_ener': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 1,
-                                      'default': 0.0},
-                        'atom_pref': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 3,
-                                      'default': 0.0}}
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "force": {
+            "ndof": 3,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "virial": {
+            "ndof": 9,
+            "atomic": False,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_ener": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_pref": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 3,
+            "default": 0.0,
+        },
+    }
     data.add_dict(data_requirement)
     stop_batch = jdata["training"]["numb_steps"]
 
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-INPUT, CKPT, FROZEN_MODEL, CKPT_TRAINER, FRZ_TRAINER, VALID_DATA, STOP_BATCH = _init_models()
+(
+    INPUT,
+    CKPT,
+    FROZEN_MODEL,
+    CKPT_TRAINER,
+    FRZ_TRAINER,
+    VALID_DATA,
+    STOP_BATCH,
+) = _init_models()
 
 
 class TestInitFrzModelA(unittest.TestCase):
@@ -114,26 +157,28 @@ def tearDownClass(cls):
         _file_delete(FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT+".meta")
-        _file_delete(CKPT+".index")
-        _file_delete(CKPT+".data-00000-of-00001")
-        _file_delete(CKPT+"-0.meta")
-        _file_delete(CKPT+"-0.index")
-        _file_delete(CKPT+"-0.data-00000-of-00001")
-        _file_delete(CKPT+"-1.meta")
-        _file_delete(CKPT+"-1.index")
-        _file_delete(CKPT+"-1.data-00000-of-00001")
+        _file_delete(CKPT + ".meta")
+        _file_delete(CKPT + ".index")
+        _file_delete(CKPT + ".data-00000-of-00001")
+        _file_delete(CKPT + "-0.meta")
+        _file_delete(CKPT + "-0.index")
+        _file_delete(CKPT + "-0.data-00000-of-00001")
+        _file_delete(CKPT + "-1.meta")
+        _file_delete(CKPT + "-1.index")
+        _file_delete(CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
     def test_single_frame(self):
         valid_batch = self.valid_data.get_batch()
-        natoms = valid_batch['natoms_vec']
+        natoms = valid_batch["natoms_vec"]
         tf.reset_default_graph()
         self.dp_ckpt.build(self.valid_data, self.stop_batch)
         self.dp_ckpt._init_session()
         feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False)
-        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(self.dp_ckpt.sess, feed_dict_ckpt, natoms)
+        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(
+            self.dp_ckpt.sess, feed_dict_ckpt, natoms
+        )
         tf.reset_default_graph()
 
         self.dp_frz.build(self.valid_data, self.stop_batch)
@@ -143,8 +188,12 @@ def test_single_frame(self):
         tf.reset_default_graph()
 
         # check values
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_e'], ckpt_rmse_frz['rmse_e'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_f'], ckpt_rmse_frz['rmse_f'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_v'], ckpt_rmse_frz['rmse_v'], default_places)
-
-
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places
+        )
diff --git a/source/tests/test_init_frz_model_se_a_type.py b/source/tests/test_init_frz_model_se_a_type.py
index 5ec8c65cb4..19dddce7da 100644
--- a/source/tests/test_init_frz_model_se_a_type.py
+++ b/source/tests/test_init_frz_model_se_a_type.py
@@ -1,15 +1,38 @@
-import os, sys, platform, shutil, dpdata, json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
-from common import j_loader, tests_path, run_dp
-from deepmd.train.trainer import DPTrainer
-from deepmd.train.run_options import RunOptions
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
+import sys
+import unittest
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION, tf
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
 
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
@@ -36,15 +59,15 @@ def _init_models():
     jdata["training"]["validation_data"]["systems"] = data_file
     jdata["training"]["save_ckpt"] = ckpt
     type_embed = {}
-    type_embed['neuron'] = [2, 4, 8]
-    type_embed['resnet_dt'] = False
-    jdata['model']["type_embedding"] = type_embed
+    type_embed["neuron"] = [2, 4, 8]
+    type_embed["resnet_dt"] = False
+    jdata["model"]["type_embedding"] = type_embed
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
     jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
     jdata = normalize(jdata)
@@ -58,50 +81,70 @@ def _init_models():
         test_size=1,
         rcut=rcut,
         type_map=type_map,
-        trn_all_set=True
+        trn_all_set=True,
     )
-    data_requirement = {'energy': {'ndof': 1,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'force': {'ndof': 3,
-                                  'atomic': True,
-                                  'must': False,
-                                  'high_prec': False,
-                                  'type_sel': None,
-                                  'repeat': 1,
-                                  'default': 0.0},
-                        'virial': {'ndof': 9,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': False,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'atom_ener': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 1,
-                                      'default': 0.0},
-                        'atom_pref': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 3,
-                                      'default': 0.0}}
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "force": {
+            "ndof": 3,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "virial": {
+            "ndof": 9,
+            "atomic": False,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_ener": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_pref": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 3,
+            "default": 0.0,
+        },
+    }
     data.add_dict(data_requirement)
     stop_batch = jdata["training"]["numb_steps"]
 
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-INPUT, CKPT, FROZEN_MODEL, CKPT_TRAINER, FRZ_TRAINER, VALID_DATA, STOP_BATCH = _init_models()
+(
+    INPUT,
+    CKPT,
+    FROZEN_MODEL,
+    CKPT_TRAINER,
+    FRZ_TRAINER,
+    VALID_DATA,
+    STOP_BATCH,
+) = _init_models()
 
 
 class TestInitFrzModelAType(unittest.TestCase):
@@ -118,26 +161,28 @@ def tearDownClass(cls):
         _file_delete(FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT+".meta")
-        _file_delete(CKPT+".index")
-        _file_delete(CKPT+".data-00000-of-00001")
-        _file_delete(CKPT+"-0.meta")
-        _file_delete(CKPT+"-0.index")
-        _file_delete(CKPT+"-0.data-00000-of-00001")
-        _file_delete(CKPT+"-1.meta")
-        _file_delete(CKPT+"-1.index")
-        _file_delete(CKPT+"-1.data-00000-of-00001")
+        _file_delete(CKPT + ".meta")
+        _file_delete(CKPT + ".index")
+        _file_delete(CKPT + ".data-00000-of-00001")
+        _file_delete(CKPT + "-0.meta")
+        _file_delete(CKPT + "-0.index")
+        _file_delete(CKPT + "-0.data-00000-of-00001")
+        _file_delete(CKPT + "-1.meta")
+        _file_delete(CKPT + "-1.index")
+        _file_delete(CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
     def test_single_frame(self):
         valid_batch = self.valid_data.get_batch()
-        natoms = valid_batch['natoms_vec']
+        natoms = valid_batch["natoms_vec"]
         tf.reset_default_graph()
         self.dp_ckpt.build(self.valid_data, self.stop_batch)
         self.dp_ckpt._init_session()
         feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False)
-        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(self.dp_ckpt.sess, feed_dict_ckpt, natoms)
+        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(
+            self.dp_ckpt.sess, feed_dict_ckpt, natoms
+        )
         tf.reset_default_graph()
 
         self.dp_frz.build(self.valid_data, self.stop_batch)
@@ -147,8 +192,12 @@ def test_single_frame(self):
         tf.reset_default_graph()
 
         # check values
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_e'], ckpt_rmse_frz['rmse_e'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_f'], ckpt_rmse_frz['rmse_f'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_v'], ckpt_rmse_frz['rmse_v'], default_places)
-
-
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places
+        )
diff --git a/source/tests/test_init_frz_model_se_atten.py b/source/tests/test_init_frz_model_se_atten.py
index fffa4785ec..a379ad453c 100644
--- a/source/tests/test_init_frz_model_se_atten.py
+++ b/source/tests/test_init_frz_model_se_atten.py
@@ -1,17 +1,40 @@
-import os, sys, platform, shutil, dpdata, json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
-from common import j_loader, tests_path, run_dp
-from deepmd.train.trainer import DPTrainer
-from deepmd.train.run_options import RunOptions
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
-
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION, tf
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
 else:
@@ -36,14 +59,14 @@ def _init_models():
     jdata["training"]["training_data"]["systems"] = data_file
     jdata["training"]["validation_data"]["systems"] = data_file
     jdata["training"]["save_ckpt"] = ckpt
-    jdata['model']["descriptor"]['type'] = 'se_atten'
-    jdata['model']["descriptor"]['sel'] = 120
+    jdata["model"]["descriptor"]["type"] = "se_atten"
+    jdata["model"]["descriptor"]["sel"] = 120
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
     jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
     jdata = normalize(jdata)
@@ -57,43 +80,55 @@ def _init_models():
         test_size=1,
         rcut=rcut,
         type_map=type_map,
-        trn_all_set=True
+        trn_all_set=True,
     )
-    data_requirement = {'energy': {'ndof': 1,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'force': {'ndof': 3,
-                                  'atomic': True,
-                                  'must': False,
-                                  'high_prec': False,
-                                  'type_sel': None,
-                                  'repeat': 1,
-                                  'default': 0.0},
-                        'virial': {'ndof': 9,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': False,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'atom_ener': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 1,
-                                      'default': 0.0},
-                        'atom_pref': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 3,
-                                      'default': 0.0}}
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "force": {
+            "ndof": 3,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "virial": {
+            "ndof": 9,
+            "atomic": False,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_ener": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_pref": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 3,
+            "default": 0.0,
+        },
+    }
     data.add_dict(data_requirement)
     stop_batch = jdata["training"]["numb_steps"]
 
@@ -101,11 +136,21 @@ def _init_models():
 
 
 if not parse_version(tf.__version__) < parse_version("1.15"):
-    INPUT, CKPT, FROZEN_MODEL, CKPT_TRAINER, FRZ_TRAINER, VALID_DATA, STOP_BATCH = _init_models()
-
-
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
+    (
+        INPUT,
+        CKPT,
+        FROZEN_MODEL,
+        CKPT_TRAINER,
+        FRZ_TRAINER,
+        VALID_DATA,
+        STOP_BATCH,
+    ) = _init_models()
+
+
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestInitFrzModelAtten(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -120,26 +165,28 @@ def tearDownClass(cls):
         _file_delete(FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT+".meta")
-        _file_delete(CKPT+".index")
-        _file_delete(CKPT+".data-00000-of-00001")
-        _file_delete(CKPT+"-0.meta")
-        _file_delete(CKPT+"-0.index")
-        _file_delete(CKPT+"-0.data-00000-of-00001")
-        _file_delete(CKPT+"-1.meta")
-        _file_delete(CKPT+"-1.index")
-        _file_delete(CKPT+"-1.data-00000-of-00001")
+        _file_delete(CKPT + ".meta")
+        _file_delete(CKPT + ".index")
+        _file_delete(CKPT + ".data-00000-of-00001")
+        _file_delete(CKPT + "-0.meta")
+        _file_delete(CKPT + "-0.index")
+        _file_delete(CKPT + "-0.data-00000-of-00001")
+        _file_delete(CKPT + "-1.meta")
+        _file_delete(CKPT + "-1.index")
+        _file_delete(CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
     def test_single_frame(self):
         valid_batch = self.valid_data.get_batch()
-        natoms = valid_batch['natoms_vec']
+        natoms = valid_batch["natoms_vec"]
         tf.reset_default_graph()
         self.dp_ckpt.build(self.valid_data, self.stop_batch)
         self.dp_ckpt._init_session()
         feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False)
-        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(self.dp_ckpt.sess, feed_dict_ckpt, natoms)
+        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(
+            self.dp_ckpt.sess, feed_dict_ckpt, natoms
+        )
         tf.reset_default_graph()
 
         self.dp_frz.build(self.valid_data, self.stop_batch)
@@ -149,8 +196,12 @@ def test_single_frame(self):
         tf.reset_default_graph()
 
         # check values
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_e'], ckpt_rmse_frz['rmse_e'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_f'], ckpt_rmse_frz['rmse_f'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_v'], ckpt_rmse_frz['rmse_v'], default_places)
-
-
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places
+        )
diff --git a/source/tests/test_init_frz_model_se_r.py b/source/tests/test_init_frz_model_se_r.py
index 51aa379efa..ae63253a0c 100644
--- a/source/tests/test_init_frz_model_se_r.py
+++ b/source/tests/test_init_frz_model_se_r.py
@@ -1,15 +1,38 @@
-import os, sys, platform, shutil, dpdata, json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
-from common import j_loader, tests_path, run_dp
-from deepmd.train.trainer import DPTrainer
-from deepmd.train.run_options import RunOptions
-from deepmd.utils.argcheck import normalize
-from deepmd.utils.compat import update_deepmd_input
-from deepmd.utils.data_system import DeepmdDataSystem
+import sys
+import unittest
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION, tf
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.train.run_options import (
+    RunOptions,
+)
+from deepmd.train.trainer import (
+    DPTrainer,
+)
+from deepmd.utils.argcheck import (
+    normalize,
+)
+from deepmd.utils.compat import (
+    update_deepmd_input,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
 
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
@@ -46,9 +69,9 @@ def _init_models():
     with open(INPUT, "w") as fp:
         json.dump(jdata, fp, indent=4)
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -c " + str(tests_path) + " -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
 
     jdata = update_deepmd_input(jdata, warning=True, dump="input_v2_compat.json")
     jdata = normalize(jdata)
@@ -62,50 +85,70 @@ def _init_models():
         test_size=1,
         rcut=rcut,
         type_map=type_map,
-        trn_all_set=True
+        trn_all_set=True,
     )
-    data_requirement = {'energy': {'ndof': 1,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': True,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'force': {'ndof': 3,
-                                  'atomic': True,
-                                  'must': False,
-                                  'high_prec': False,
-                                  'type_sel': None,
-                                  'repeat': 1,
-                                  'default': 0.0},
-                        'virial': {'ndof': 9,
-                                   'atomic': False,
-                                   'must': False,
-                                   'high_prec': False,
-                                   'type_sel': None,
-                                   'repeat': 1,
-                                   'default': 0.0},
-                        'atom_ener': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 1,
-                                      'default': 0.0},
-                        'atom_pref': {'ndof': 1,
-                                      'atomic': True,
-                                      'must': False,
-                                      'high_prec': False,
-                                      'type_sel': None,
-                                      'repeat': 3,
-                                      'default': 0.0}}
+    data_requirement = {
+        "energy": {
+            "ndof": 1,
+            "atomic": False,
+            "must": False,
+            "high_prec": True,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "force": {
+            "ndof": 3,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "virial": {
+            "ndof": 9,
+            "atomic": False,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_ener": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 1,
+            "default": 0.0,
+        },
+        "atom_pref": {
+            "ndof": 1,
+            "atomic": True,
+            "must": False,
+            "high_prec": False,
+            "type_sel": None,
+            "repeat": 3,
+            "default": 0.0,
+        },
+    }
     data.add_dict(data_requirement)
     stop_batch = jdata["training"]["numb_steps"]
 
     return INPUT, ckpt, frozen_model, model_ckpt, model_frz, data, stop_batch
 
 
-INPUT, CKPT, FROZEN_MODEL, CKPT_TRAINER, FRZ_TRAINER, VALID_DATA, STOP_BATCH = _init_models()
+(
+    INPUT,
+    CKPT,
+    FROZEN_MODEL,
+    CKPT_TRAINER,
+    FRZ_TRAINER,
+    VALID_DATA,
+    STOP_BATCH,
+) = _init_models()
 
 
 class TestInitFrzModelR(unittest.TestCase):
@@ -122,26 +165,28 @@ def tearDownClass(cls):
         _file_delete(FROZEN_MODEL)
         _file_delete("out.json")
         _file_delete(str(tests_path / "checkpoint"))
-        _file_delete(CKPT+".meta")
-        _file_delete(CKPT+".index")
-        _file_delete(CKPT+".data-00000-of-00001")
-        _file_delete(CKPT+"-0.meta")
-        _file_delete(CKPT+"-0.index")
-        _file_delete(CKPT+"-0.data-00000-of-00001")
-        _file_delete(CKPT+"-1.meta")
-        _file_delete(CKPT+"-1.index")
-        _file_delete(CKPT+"-1.data-00000-of-00001")
+        _file_delete(CKPT + ".meta")
+        _file_delete(CKPT + ".index")
+        _file_delete(CKPT + ".data-00000-of-00001")
+        _file_delete(CKPT + "-0.meta")
+        _file_delete(CKPT + "-0.index")
+        _file_delete(CKPT + "-0.data-00000-of-00001")
+        _file_delete(CKPT + "-1.meta")
+        _file_delete(CKPT + "-1.index")
+        _file_delete(CKPT + "-1.data-00000-of-00001")
         _file_delete("input_v2_compat.json")
         _file_delete("lcurve.out")
 
     def test_single_frame(self):
         valid_batch = self.valid_data.get_batch()
-        natoms = valid_batch['natoms_vec']
+        natoms = valid_batch["natoms_vec"]
         tf.reset_default_graph()
         self.dp_ckpt.build(self.valid_data, self.stop_batch)
         self.dp_ckpt._init_session()
         feed_dict_ckpt = self.dp_ckpt.get_feed_dict(valid_batch, is_training=False)
-        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(self.dp_ckpt.sess, feed_dict_ckpt, natoms)
+        ckpt_rmse_ckpt = self.dp_ckpt.loss.eval(
+            self.dp_ckpt.sess, feed_dict_ckpt, natoms
+        )
         tf.reset_default_graph()
 
         self.dp_frz.build(self.valid_data, self.stop_batch)
@@ -151,6 +196,12 @@ def test_single_frame(self):
         tf.reset_default_graph()
 
         # check values
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_e'], ckpt_rmse_frz['rmse_e'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_f'], ckpt_rmse_frz['rmse_f'], default_places)
-        np.testing.assert_almost_equal(ckpt_rmse_ckpt['rmse_v'], ckpt_rmse_frz['rmse_v'], default_places)
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_e"], ckpt_rmse_frz["rmse_e"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_f"], ckpt_rmse_frz["rmse_f"], default_places
+        )
+        np.testing.assert_almost_equal(
+            ckpt_rmse_ckpt["rmse_v"], ckpt_rmse_frz["rmse_v"], default_places
+        )
diff --git a/source/tests/test_lammps.py b/source/tests/test_lammps.py
index 098ab55d5d..c1a8036a7f 100644
--- a/source/tests/test_lammps.py
+++ b/source/tests/test_lammps.py
@@ -1,20 +1,28 @@
-import unittest
 import os
 import subprocess
+import unittest
+from pathlib import (
+    Path,
+)
 
-from pathlib import Path
-from deepmd.utils.convert import convert_pbtxt_to_pb
+from deepmd.utils.convert import (
+    convert_pbtxt_to_pb,
+)
 
-@unittest.skipIf(os.environ.get("CIBUILDWHEEL", "0") != "1", "Only test under cibuildwheel environment")
+
+@unittest.skipIf(
+    os.environ.get("CIBUILDWHEEL", "0") != "1",
+    "Only test under cibuildwheel environment",
+)
 class TestLAMMPS(unittest.TestCase):
     """Test LAMMPS in cibuildwheel environment."""
+
     @classmethod
     def setUpClass(cls):
         cls.work_dir = (Path(__file__).parent / "infer").absolute()
 
         convert_pbtxt_to_pb(
-            str(cls.work_dir / "deeppot.pbtxt"),
-            str(cls.work_dir / "deep_pot.pb")
+            str(cls.work_dir / "deeppot.pbtxt"), str(cls.work_dir / "deep_pot.pb")
         )
 
     def test_lmp(self):
diff --git a/source/tests/test_layer_name.py b/source/tests/test_layer_name.py
index dec4513669..5e3355c6ef 100644
--- a/source/tests/test_layer_name.py
+++ b/source/tests/test_layer_name.py
@@ -1,12 +1,27 @@
 import numpy as np
-
-from deepmd.env import tf
-from common import gen_data, del_data, j_loader
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting, DipoleFittingSeA
-from deepmd.model import MultiModel
-from deepmd.common import j_must_have
+from common import (
+    DataSystem,
+    del_data,
+    gen_data,
+    j_loader,
+)
+
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    DipoleFittingSeA,
+    EnerFitting,
+)
+from deepmd.model import (
+    MultiModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
@@ -22,100 +37,111 @@ def tearDown(self):
 
     def test_model(self):
         """Two fittings which share the same parameters should give the same result"""
-        jfile = 'water_layer_name.json'
+        jfile = "water_layer_name.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
         test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['descriptor']['multi_task'] = True
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"]["multi_task"] = True
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
         fitting_dict = {}
         fitting_type_dict = {}
-        for fitting_key in jdata['model']['fitting_net_dict']:
-            item_fitting_param = jdata['model']['fitting_net_dict'][fitting_key]
-            item_fitting_type = item_fitting_param.get('type', 'ener')
+        for fitting_key in jdata["model"]["fitting_net_dict"]:
+            item_fitting_param = jdata["model"]["fitting_net_dict"][fitting_key]
+            item_fitting_type = item_fitting_param.get("type", "ener")
             fitting_type_dict[fitting_key] = item_fitting_type
-            item_fitting_param.pop('type', None)
-            item_fitting_param.pop('fit_diag', None)
-            item_fitting_param['descrpt'] = descrpt
-            if item_fitting_type == 'ener':
-                fitting_dict[fitting_key] = EnerFitting(**item_fitting_param, uniform_seed=True)
-            elif item_fitting_type == 'dipole':
-                fitting_dict[fitting_key] = DipoleFittingSeA(**item_fitting_param, uniform_seed=True)
+            item_fitting_param.pop("type", None)
+            item_fitting_param.pop("fit_diag", None)
+            item_fitting_param["descrpt"] = descrpt
+            if item_fitting_type == "ener":
+                fitting_dict[fitting_key] = EnerFitting(
+                    **item_fitting_param, uniform_seed=True
+                )
+            elif item_fitting_type == "dipole":
+                fitting_dict[fitting_key] = DipoleFittingSeA(
+                    **item_fitting_param, uniform_seed=True
+                )
             else:
-                raise RuntimeError('Test should not be here!')
+                raise RuntimeError("Test should not be here!")
         model = MultiModel(descrpt, fitting_dict, fitting_type_dict)
 
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']]
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+        }
 
-        for fitting_key in jdata['model']['fitting_net_dict']:
+        for fitting_key in jdata["model"]["fitting_net_dict"]:
             model._compute_input_stat(input_data, fitting_key=fitting_key)
         model.descrpt.merge_input_stats(model.descrpt.stat_dict)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build(t_coord,
-                        t_type,
-                        t_natoms,
-                        t_box,
-                        t_mesh,
-                        t_fparam,
-                        suffix="_layer_name",
-                        reuse=False)
-
-        e_energy1 = model_pred['water_ener']['energy']
-        e_force1 = model_pred['water_ener']['force']
-        e_virial1 = model_pred['water_ener']['virial']
-        e_energy2 = model_pred['water_ener2']['energy']
-        e_force2 = model_pred['water_ener2']['force']
-        e_virial2 = model_pred['water_ener2']['virial']
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                        t_energy: test_data['energy'][:numb_test],
-                        t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
-                        t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
-                        t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                        t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                        t_box: test_data['box'][:numb_test, :],
-                        t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                        t_natoms: test_data['natoms_vec'],
-                        t_mesh: test_data['default_mesh'],
-                        is_training: False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="_layer_name",
+            reuse=False,
+        )
+
+        e_energy1 = model_pred["water_ener"]["energy"]
+        e_force1 = model_pred["water_ener"]["force"]
+        e_virial1 = model_pred["water_ener"]["virial"]
+        e_energy2 = model_pred["water_ener2"]["energy"]
+        e_force2 = model_pred["water_ener2"]["force"]
+        e_virial2 = model_pred["water_ener2"]["virial"]
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         with self.test_session() as sess:
             sess.run(tf.global_variables_initializer())
             [e1, f1, v1, e2, f2, v2] = sess.run(
-                            [e_energy1, e_force1, e_virial1, e_energy2, e_force2, e_virial2],
-                            feed_dict=feed_dict_test)
+                [e_energy1, e_force1, e_virial1, e_energy2, e_force2, e_virial2],
+                feed_dict=feed_dict_test,
+            )
         np.testing.assert_allclose(e1, e2, rtol=1e-5, atol=1e-5)
         np.testing.assert_allclose(f1, f2, rtol=1e-5, atol=1e-5)
         np.testing.assert_allclose(v1, v2, rtol=1e-5, atol=1e-5)
diff --git a/source/tests/test_mixed_prec_training.py b/source/tests/test_mixed_prec_training.py
index e51c64aaaf..9ee4f8287b 100644
--- a/source/tests/test_mixed_prec_training.py
+++ b/source/tests/test_mixed_prec_training.py
@@ -1,36 +1,53 @@
-import os,json
-import numpy as np
-import unittest
+import json
+import os
 import subprocess as sp
-from packaging.version import Version
+import unittest
+
+import numpy as np
 
-from deepmd.infer import DeepPot
 # from deepmd.entrypoints.compress import compress
-from common import j_loader, tests_path, run_dp
-from deepmd.env import TF_VERSION
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+from packaging.version import (
+    Version,
+)
+
+from deepmd.env import (
+    TF_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
 
 
-def _file_delete(file) :
+def _file_delete(file):
     if os.path.isdir(file):
         os.rmdir(file)
     elif os.path.isfile(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
+
 class TestMixedPrecTraining(unittest.TestCase):
     def setUp(self):
-        data_file  = str(tests_path / os.path.join("model_compression", "data"))
+        data_file = str(tests_path / os.path.join("model_compression", "data"))
         self.INPUT = str(tests_path / "input.json")
-        jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
+        jdata = j_loader(
+            str(tests_path / os.path.join("model_compression", "input.json"))
+        )
         jdata["training"]["training_data"]["systems"] = data_file
         jdata["training"]["validation_data"]["systems"] = data_file
         jdata["training"]["mixed_precision"] = {}
@@ -41,10 +58,10 @@ def setUp(self):
 
     def test_training(self):
         _TF_VERSION = Version(TF_VERSION)
-        # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed 
-        if _TF_VERSION >= Version('1.14.0'):
+        # check the TF_VERSION, when TF < 1.12, mixed precision is not allowed
+        if _TF_VERSION >= Version("1.14.0"):
             ret = run_dp("dp train " + self.INPUT)
-            np.testing.assert_equal(ret, 0, 'DP train failed!')
+            np.testing.assert_equal(ret, 0, "DP train failed!")
 
     def tearDown(self):
         _file_delete(self.INPUT)
diff --git a/source/tests/test_model_compression_se_a.py b/source/tests/test_model_compression_se_a.py
index 4db6e8a12f..f33beb7a51 100644
--- a/source/tests/test_model_compression_se_a.py
+++ b/source/tests/test_model_compression_se_a.py
@@ -1,37 +1,55 @@
-import os,sys,platform,shutil,dpdata,json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
+import sys
+import unittest
 
-from deepmd.infer import DeepPot
-from deepmd.env import MODEL_VERSION
-# from deepmd.entrypoints.compress import compress
-from common import j_loader, tests_path, run_dp
+import dpdata
+import numpy as np
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+# from deepmd.entrypoints.compress import compress
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    MODEL_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-def _file_delete(file) :
+
+def _file_delete(file):
     if os.path.isdir(file):
         os.rmdir(file)
     elif os.path.isfile(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
+
 def _init_models():
-    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original.pb")
     compressed_model = str(tests_path / "dp-compressed.pb")
     INPUT = str(tests_path / "input.json")
@@ -42,75 +60,103 @@ def _init_models():
         json.dump(jdata, fp, indent=4)
 
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model)
-    np.testing.assert_equal(ret, 0, 'DP model compression failed!')
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
     return INPUT, frozen_model, compressed_model
 
+
 INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
 
-class TestDeepPotAPBC(unittest.TestCase) :
+
+class TestDeepPotAPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -121,21 +167,25 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
@@ -145,53 +195,77 @@ def test_2frame_atm(self):
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
 
-class TestDeepPotANoPBC(unittest.TestCase) :
+class TestDeepPotANoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
         self.box = None
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -201,21 +275,25 @@ def test_1frame_atm(self):
 
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
@@ -224,54 +302,78 @@ def test_2frame_atm(self):
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
-    
-class TestDeepPotALargeBoxNoPBC(unittest.TestCase) :
+
+class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([19., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -280,16 +382,26 @@ def test_1frame_atm(self):
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_ase(self):
-        from ase import Atoms
-        from deepmd.calculator import DP
-        water0 = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP(FROZEN_MODEL))
-        water1 = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP(COMPRESSED_MODEL))
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water0 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(FROZEN_MODEL),
+        )
+        water1 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(COMPRESSED_MODEL),
+        )
         ee0 = water0.get_potential_energy()
         ff0 = water0.get_forces()
         ee1 = water1.get_potential_energy()
@@ -298,19 +410,36 @@ def test_ase(self):
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
 
-class TestDeepPotAPBCExcludeTypes(unittest.TestCase) :
+
+class TestDeepPotAPBCExcludeTypes(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     @classmethod
     def tearDownClass(self):
@@ -336,50 +465,60 @@ def tearDownClass(self):
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -390,25 +529,29 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
         np.testing.assert_almost_equal(av0, av1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
-        np.testing.assert_almost_equal(vv0, vv1, default_places)
\ No newline at end of file
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
diff --git a/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py b/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
index ab575283ab..ab85128dc5 100644
--- a/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
+++ b/source/tests/test_model_compression_se_a_type_one_side_exclude_types.py
@@ -1,37 +1,55 @@
-import os,sys,platform,shutil,dpdata,json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
+import sys
+import unittest
+
+import dpdata
+import numpy as np
 
-from deepmd.infer import DeepPot
-from deepmd.env import MODEL_VERSION
 # from deepmd.entrypoints.compress import compress
-from common import j_loader, tests_path, run_dp
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    MODEL_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-def _file_delete(file) :
+
+def _file_delete(file):
     if os.path.isdir(file):
         os.rmdir(file)
     elif os.path.isfile(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
+
 def _init_models():
-    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-type-one-side-exclude-types.pb")
     compressed_model = str(tests_path / "dp-compressed-type-one-side-exclude-types.pb")
     INPUT = str(tests_path / "input.json")
@@ -44,75 +62,103 @@ def _init_models():
         json.dump(jdata, fp, indent=4)
 
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model)
-    np.testing.assert_equal(ret, 0, 'DP model compression failed!')
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
     return INPUT, frozen_model, compressed_model
 
+
 INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
 
-class TestDeepPotAPBCTypeOneSideExcludeTypes(unittest.TestCase) :
+
+class TestDeepPotAPBCTypeOneSideExcludeTypes(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -123,21 +169,25 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
diff --git a/source/tests/test_model_compression_se_r.py b/source/tests/test_model_compression_se_r.py
index 9433c93017..a8ef379b23 100644
--- a/source/tests/test_model_compression_se_r.py
+++ b/source/tests/test_model_compression_se_r.py
@@ -1,47 +1,65 @@
-import os,sys,platform,shutil,dpdata,json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
+import sys
+import unittest
 
-from deepmd.infer import DeepPot
-from deepmd.env import MODEL_VERSION
-# from deepmd.entrypoints.compress import compress
-from common import j_loader, tests_path, run_dp
+import dpdata
+import numpy as np
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+# from deepmd.entrypoints.compress import compress
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    MODEL_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-def _file_delete(file) :
+
+def _file_delete(file):
     if os.path.isdir(file):
         os.rmdir(file)
     elif os.path.isfile(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
+
 def _init_models():
-    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-se-r.pb")
     compressed_model = str(tests_path / "dp-compressed-se-r.pb")
     INPUT = str(tests_path / "input.json")
     jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
     jdata["model"]["descriptor"] = {}
     jdata["model"]["descriptor"]["type"] = "se_e2_r"
-    jdata["model"]["descriptor"]["sel"]  = [46, 92]
+    jdata["model"]["descriptor"]["sel"] = [46, 92]
     jdata["model"]["descriptor"]["rcut_smth"] = 0.5
     jdata["model"]["descriptor"]["rcut"] = 6.0
-    jdata["model"]["descriptor"]["neuron"] = [5,10,20]
+    jdata["model"]["descriptor"]["neuron"] = [5, 10, 20]
     jdata["model"]["descriptor"]["resnet_dt"] = False
     jdata["model"]["descriptor"]["seed"] = 1
     jdata["training"]["training_data"]["systems"] = data_file
@@ -50,75 +68,103 @@ def _init_models():
         json.dump(jdata, fp, indent=4)
 
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model)
-    np.testing.assert_equal(ret, 0, 'DP model compression failed!')
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
     return INPUT, frozen_model, compressed_model
 
+
 INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
 
-class TestDeepPotAPBC(unittest.TestCase) :
+
+class TestDeepPotAPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -129,21 +175,25 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
@@ -153,53 +203,77 @@ def test_2frame_atm(self):
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
 
-class TestDeepPotANoPBC(unittest.TestCase) :
+class TestDeepPotANoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
         self.box = None
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -209,21 +283,25 @@ def test_1frame_atm(self):
 
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
@@ -232,54 +310,78 @@ def test_2frame_atm(self):
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
-    
-class TestDeepPotALargeBoxNoPBC(unittest.TestCase) :
+
+class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([19., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -288,16 +390,26 @@ def test_1frame_atm(self):
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_ase(self):
-        from ase import Atoms
-        from deepmd.calculator import DP
-        water0 = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP(FROZEN_MODEL))
-        water1 = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP(COMPRESSED_MODEL))
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water0 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(FROZEN_MODEL),
+        )
+        water1 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(COMPRESSED_MODEL),
+        )
         ee0 = water0.get_potential_energy()
         ff0 = water0.get_forces()
         ee1 = water1.get_potential_energy()
@@ -306,19 +418,36 @@ def test_ase(self):
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
 
-class TestDeepPotAPBCExcludeTypes(unittest.TestCase) :
+
+class TestDeepPotAPBCExcludeTypes(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     @classmethod
     def tearDownClass(self):
@@ -344,50 +473,60 @@ def tearDownClass(self):
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -398,25 +537,29 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
         np.testing.assert_almost_equal(av0, av1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
-        np.testing.assert_almost_equal(vv0, vv1, default_places)
\ No newline at end of file
+        np.testing.assert_almost_equal(vv0, vv1, default_places)
diff --git a/source/tests/test_model_compression_se_t.py b/source/tests/test_model_compression_se_t.py
index 71e58fd212..17d3e7d7a6 100644
--- a/source/tests/test_model_compression_se_t.py
+++ b/source/tests/test_model_compression_se_t.py
@@ -1,47 +1,65 @@
-import os,sys,platform,shutil,dpdata,json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
+import sys
+import unittest
 
-from deepmd.infer import DeepPot
-from deepmd.env import MODEL_VERSION
-# from deepmd.entrypoints.compress import compress
-from common import j_loader, tests_path, run_dp
+import dpdata
+import numpy as np
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+# from deepmd.entrypoints.compress import compress
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    MODEL_VERSION,
+)
+from deepmd.infer import (
+    DeepPot,
+)
+
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-def _file_delete(file) :
+
+def _file_delete(file):
     if os.path.isdir(file):
         os.rmdir(file)
     elif os.path.isfile(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
+
 def _init_models():
-    data_file  = str(tests_path / os.path.join("model_compression", "data"))
+    data_file = str(tests_path / os.path.join("model_compression", "data"))
     frozen_model = str(tests_path / "dp-original-se-t.pb")
     compressed_model = str(tests_path / "dp-compressed-se-t.pb")
     INPUT = str(tests_path / "input.json")
     jdata = j_loader(str(tests_path / os.path.join("model_compression", "input.json")))
     jdata["model"]["descriptor"] = {}
     jdata["model"]["descriptor"]["type"] = "se_e3"
-    jdata["model"]["descriptor"]["sel"]  = [46, 92]
+    jdata["model"]["descriptor"]["sel"] = [46, 92]
     jdata["model"]["descriptor"]["rcut_smth"] = 0.5
     jdata["model"]["descriptor"]["rcut"] = 6.0
-    jdata["model"]["descriptor"]["neuron"] = [4,8,16]
+    jdata["model"]["descriptor"]["neuron"] = [4, 8, 16]
     jdata["model"]["descriptor"]["resnet_dt"] = False
     jdata["model"]["descriptor"]["seed"] = 1
     jdata["training"]["training_data"]["systems"] = data_file
@@ -50,75 +68,103 @@ def _init_models():
         json.dump(jdata, fp, indent=4)
 
     ret = run_dp("dp train " + INPUT)
-    np.testing.assert_equal(ret, 0, 'DP train failed!')
+    np.testing.assert_equal(ret, 0, "DP train failed!")
     ret = run_dp("dp freeze -o " + frozen_model)
-    np.testing.assert_equal(ret, 0, 'DP freeze failed!')
+    np.testing.assert_equal(ret, 0, "DP freeze failed!")
     ret = run_dp("dp compress " + " -i " + frozen_model + " -o " + compressed_model)
-    np.testing.assert_equal(ret, 0, 'DP model compression failed!')
+    np.testing.assert_equal(ret, 0, "DP model compression failed!")
     return INPUT, frozen_model, compressed_model
 
+
 INPUT, FROZEN_MODEL, COMPRESSED_MODEL = _init_models()
 
-class TestDeepPotAPBC(unittest.TestCase) :
+
+class TestDeepPotAPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -129,21 +175,25 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
@@ -153,53 +203,77 @@ def test_2frame_atm(self):
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
 
-class TestDeepPotANoPBC(unittest.TestCase) :
+class TestDeepPotANoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
         self.box = None
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -209,21 +283,25 @@ def test_1frame_atm(self):
 
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
@@ -232,54 +310,78 @@ def test_2frame_atm(self):
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
-    
-class TestDeepPotALargeBoxNoPBC(unittest.TestCase) :
+
+class TestDeepPotALargeBoxNoPBC(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([19., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([19.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -288,16 +390,26 @@ def test_1frame_atm(self):
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_ase(self):
-        from ase import Atoms
-        from deepmd.calculator import DP
-        water0 = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP(FROZEN_MODEL))
-        water1 = Atoms('OHHOHH',
-                    positions=self.coords.reshape((-1,3)),
-                    cell=self.box.reshape((3,3)),
-                    calculator=DP(COMPRESSED_MODEL))
+        from ase import (
+            Atoms,
+        )
+
+        from deepmd.calculator import (
+            DP,
+        )
+
+        water0 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(FROZEN_MODEL),
+        )
+        water1 = Atoms(
+            "OHHOHH",
+            positions=self.coords.reshape((-1, 3)),
+            cell=self.box.reshape((3, 3)),
+            calculator=DP(COMPRESSED_MODEL),
+        )
         ee0 = water0.get_potential_energy()
         ff0 = water0.get_forces()
         ee1 = water1.get_potential_energy()
@@ -306,19 +418,36 @@ def test_ase(self):
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
 
-class TestDeepPotAPBCExcludeTypes(unittest.TestCase) :
+
+class TestDeepPotAPBCExcludeTypes(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.dp_original = DeepPot(FROZEN_MODEL)
         self.dp_compressed = DeepPot(COMPRESSED_MODEL)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
 
     @classmethod
     def tearDownClass(self):
@@ -344,50 +473,60 @@ def tearDownClass(self):
 
     def test_attrs(self):
         self.assertEqual(self.dp_original.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_original.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp_original.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp_original.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_original.get_dim_fparam(), 0)
         self.assertEqual(self.dp_original.get_dim_aparam(), 0)
 
         self.assertEqual(self.dp_compressed.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp_compressed.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp_compressed.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(
+            self.dp_compressed.get_rcut(), 6.0, places=default_places
+        )
+        self.assertEqual(self.dp_compressed.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp_compressed.get_dim_fparam(), 0)
         self.assertEqual(self.dp_compressed.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee0, ff0, vv0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = False)
-        ee1, ff1, vv1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = False)
+        ee0, ff0, vv0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
+        ee1, ff1, vv1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=False
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ee0, ee1, default_places)
         np.testing.assert_almost_equal(vv0, vv1, default_places)
 
     def test_1frame_atm(self):
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(self.coords, self.box, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(self.coords, self.box, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
         np.testing.assert_almost_equal(ae0, ae1, default_places)
@@ -398,21 +537,25 @@ def test_1frame_atm(self):
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(coords2, box2, self.atype, atomic = True)
-        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(coords2, box2, self.atype, atomic = True)
+        ee0, ff0, vv0, ae0, av0 = self.dp_original.eval(
+            coords2, box2, self.atype, atomic=True
+        )
+        ee1, ff1, vv1, ae1, av1 = self.dp_compressed.eval(
+            coords2, box2, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee0.shape, (nframes,1))
-        self.assertEqual(ff0.shape, (nframes,natoms,3))
-        self.assertEqual(vv0.shape, (nframes,9))
-        self.assertEqual(ae0.shape, (nframes,natoms,1))
-        self.assertEqual(av0.shape, (nframes,natoms,9))
-        self.assertEqual(ee1.shape, (nframes,1))
-        self.assertEqual(ff1.shape, (nframes,natoms,3))
-        self.assertEqual(vv1.shape, (nframes,9))
-        self.assertEqual(ae1.shape, (nframes,natoms,1))
-        self.assertEqual(av1.shape, (nframes,natoms,9))
+        self.assertEqual(ee0.shape, (nframes, 1))
+        self.assertEqual(ff0.shape, (nframes, natoms, 3))
+        self.assertEqual(vv0.shape, (nframes, 9))
+        self.assertEqual(ae0.shape, (nframes, natoms, 1))
+        self.assertEqual(av0.shape, (nframes, natoms, 9))
+        self.assertEqual(ee1.shape, (nframes, 1))
+        self.assertEqual(ff1.shape, (nframes, natoms, 3))
+        self.assertEqual(vv1.shape, (nframes, 9))
+        self.assertEqual(ae1.shape, (nframes, natoms, 1))
+        self.assertEqual(av1.shape, (nframes, natoms, 9))
 
         # check values
         np.testing.assert_almost_equal(ff0, ff1, default_places)
diff --git a/source/tests/test_model_devi.py b/source/tests/test_model_devi.py
index 07fa69014a..63f3f31ee7 100644
--- a/source/tests/test_model_devi.py
+++ b/source/tests/test_model_devi.py
@@ -1,11 +1,24 @@
-from deepmd.infer import DeepPotential
+import os
+import shutil
+import sys
 import unittest
-import os, sys, shutil
+
 import numpy as np
-from deepmd.infer import calc_model_devi
+
+from deepmd.infer import (
+    DeepPotential,
+    calc_model_devi,
+)
+
 sys.path.insert(0, os.path.abspath(os.path.dirname(__file__)))
-from infer.convert2pb import convert_pbtxt_to_pb
-from common import gen_data, tests_path, del_data
+from common import (
+    del_data,
+    gen_data,
+    tests_path,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
 
 
 class TestMakeModelDevi(unittest.TestCase):
@@ -19,29 +32,42 @@ def setUp(self):
         self.box = np.vstack([box, box])
         self.freq = 10
 
-        self.pbtxts = [os.path.join(tests_path, "infer/deeppot.pbtxt"),
-                       os.path.join(tests_path, "infer/deeppot-1.pbtxt")]
+        self.pbtxts = [
+            os.path.join(tests_path, "infer/deeppot.pbtxt"),
+            os.path.join(tests_path, "infer/deeppot-1.pbtxt"),
+        ]
         self.graph_dirs = [pbtxt.replace("pbtxt", "pb") for pbtxt in self.pbtxts]
         for pbtxt, pb in zip(self.pbtxts, self.graph_dirs):
             convert_pbtxt_to_pb(pbtxt, pb)
         self.graphs = [DeepPotential(pb) for pb in self.graph_dirs]
         self.output = os.path.join(tests_path, "model_devi.out")
-        self.expect = np.array([0, 1.670048e-01, 4.182279e-04, 8.048649e-02, 5.095047e-01, 4.584241e-01, 4.819783e-01])
-    
+        self.expect = np.array(
+            [
+                0,
+                1.670048e-01,
+                4.182279e-04,
+                8.048649e-02,
+                5.095047e-01,
+                4.584241e-01,
+                4.819783e-01,
+            ]
+        )
+
     def test_calc_model_devi(self):
-        model_devi = calc_model_devi(self.coord,
-                                     None, 
-                                     self.atype, 
-                                     self.graphs,
-                                     frequency=self.freq,
-                                     fname=self.output,
-                                     )
+        model_devi = calc_model_devi(
+            self.coord,
+            None,
+            self.atype,
+            self.graphs,
+            frequency=self.freq,
+            fname=self.output,
+        )
         self.assertAlmostEqual(model_devi[0][0], 0)
         self.assertAlmostEqual(model_devi[1][0], self.freq)
         np.testing.assert_almost_equal(model_devi[0][1:7], self.expect[1:7], 6)
         np.testing.assert_almost_equal(model_devi[0][1:7], model_devi[1][1:7], 6)
         self.assertTrue(os.path.isfile(self.output))
-    
+
     def tearDown(self):
         for pb in self.graph_dirs:
             os.remove(pb)
diff --git a/source/tests/test_model_loc_frame.py b/source/tests/test_model_loc_frame.py
index e37354778d..2f460d9437 100644
--- a/source/tests/test_model_loc_frame.py
+++ b/source/tests/test_model_loc_frame.py
@@ -1,13 +1,31 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptLocFrame
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptLocFrame,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
@@ -15,98 +33,131 @@
 
 
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water.json'
+        jfile = "water.json"
         jdata = j_loader(jfile)
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
-        numb_test = 1        
-
-        jdata['model']['descriptor'].pop('type', None)        
-        jdata['model']['descriptor'].pop('_comment', None)        
-        descrpt = DescrptLocFrame(**jdata['model']['descriptor'])
-        fitting = EnerFitting(descrpt, 
-                              neuron = [240, 120, 60, 30, 10], 
-                              seed = 1, 
-                              uniform_seed = True)
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
+        numb_test = 1
+
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"].pop("_comment", None)
+        descrpt = DescrptLocFrame(**jdata["model"]["descriptor"])
+        fitting = EnerFitting(
+            descrpt, neuron=[240, 120, 60, 30, 10], seed=1, uniform_seed=True
+        )
         model = EnerModel(
-            descrpt, 
-            fitting, 
+            descrpt,
+            fitting,
         )
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.fitting.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "loc_frame", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="loc_frame",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        refe = [1.165945032784766511e+01]
-        reff = [2.356319331246305437e-01,1.772322096063349284e-01,1.455439548950788684e-02,1.968599426000810226e-01,2.648214484898352983e-01,7.595232354012236564e-02,-2.121321856338151401e-01,-2.463886119018566037e-03,-2.075636300914874069e-02,-9.360310077571798101e-03,-1.751965198776750943e-01,-2.046405309983102827e-02,-1.990194093283037535e-01,-1.828347741191920298e-02,-6.916374506995154325e-02,-1.197997068502068031e-02,-2.461097746875573200e-01,1.987744214930105627e-02]
-        refv = [-4.998509978510510265e-01,-1.966169437179327711e-02,1.136130543869883977e-02,-1.966169437179334650e-02,-4.575353297894450555e-01,-2.668666556859019493e-03,1.136130543869887100e-02,-2.668666556859039876e-03,2.455466940358383508e-03]
+        refe = [1.165945032784766511e01]
+        reff = [
+            2.356319331246305437e-01,
+            1.772322096063349284e-01,
+            1.455439548950788684e-02,
+            1.968599426000810226e-01,
+            2.648214484898352983e-01,
+            7.595232354012236564e-02,
+            -2.121321856338151401e-01,
+            -2.463886119018566037e-03,
+            -2.075636300914874069e-02,
+            -9.360310077571798101e-03,
+            -1.751965198776750943e-01,
+            -2.046405309983102827e-02,
+            -1.990194093283037535e-01,
+            -1.828347741191920298e-02,
+            -6.916374506995154325e-02,
+            -1.197997068502068031e-02,
+            -2.461097746875573200e-01,
+            1.987744214930105627e-02,
+        ]
+        refv = [
+            -4.998509978510510265e-01,
+            -1.966169437179327711e-02,
+            1.136130543869883977e-02,
+            -1.966169437179334650e-02,
+            -4.575353297894450555e-01,
+            -2.668666556859019493e-03,
+            1.136130543869887100e-02,
+            -2.668666556859039876e-03,
+            2.455466940358383508e-03,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
@@ -115,4 +166,3 @@ def test_model(self):
         np.testing.assert_almost_equal(e, refe, places)
         np.testing.assert_almost_equal(f, reff, places)
         np.testing.assert_almost_equal(v, refv, places)
-
diff --git a/source/tests/test_model_multi.py b/source/tests/test_model_multi.py
index a9a043d934..04fb75f09d 100644
--- a/source/tests/test_model_multi.py
+++ b/source/tests/test_model_multi.py
@@ -1,13 +1,35 @@
-import dpdata, os, sys, unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data, gen_data, del_data, j_loader, finite_difference, strerch_box
+from common import (
+    Data,
+    DataSystem,
+    del_data,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting, DipoleFittingSeA
-from deepmd.model import MultiModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    DipoleFittingSeA,
+    EnerFitting,
+)
+from deepmd.model import (
+    MultiModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
@@ -22,117 +44,148 @@ def tearDown(self):
         del_data()
 
     def test_model(self):
-        jfile = 'water_multi.json'
+        jfile = "water_multi.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
         test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['descriptor']['multi_task'] = True
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"]["multi_task"] = True
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
         fitting_dict = {}
         fitting_type_dict = {}
-        for fitting_key in jdata['model']['fitting_net_dict']:
-            item_fitting_param = jdata['model']['fitting_net_dict'][fitting_key]
-            item_fitting_type = item_fitting_param.get('type', 'ener')
+        for fitting_key in jdata["model"]["fitting_net_dict"]:
+            item_fitting_param = jdata["model"]["fitting_net_dict"][fitting_key]
+            item_fitting_type = item_fitting_param.get("type", "ener")
             fitting_type_dict[fitting_key] = item_fitting_type
-            item_fitting_param.pop('type', None)
-            item_fitting_param.pop('fit_diag', None)
-            item_fitting_param['descrpt'] = descrpt
-            if item_fitting_type == 'ener':
-                fitting_dict[fitting_key] = EnerFitting(**item_fitting_param, uniform_seed=True)
-            elif item_fitting_type == 'dipole':
-                fitting_dict[fitting_key] = DipoleFittingSeA(**item_fitting_param, uniform_seed=True)
+            item_fitting_param.pop("type", None)
+            item_fitting_param.pop("fit_diag", None)
+            item_fitting_param["descrpt"] = descrpt
+            if item_fitting_type == "ener":
+                fitting_dict[fitting_key] = EnerFitting(
+                    **item_fitting_param, uniform_seed=True
+                )
+            elif item_fitting_type == "dipole":
+                fitting_dict[fitting_key] = DipoleFittingSeA(
+                    **item_fitting_param, uniform_seed=True
+                )
             else:
-                RuntimeError('Test should not be here!')
+                RuntimeError("Test should not be here!")
         model = MultiModel(descrpt, fitting_dict, fitting_type_dict)
 
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']]
-                      }
-        for fitting_key in jdata['model']['fitting_net_dict']:
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+        }
+        for fitting_key in jdata["model"]["fitting_net_dict"]:
             model._compute_input_stat(input_data, fitting_key=fitting_key)
         model.descrpt.merge_input_stats(model.descrpt.stat_dict)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build(t_coord,
-                          t_type,
-                          t_natoms,
-                          t_box,
-                          t_mesh,
-                          t_fparam,
-                          suffix="multi",
-                          reuse=False)
-        e_energy = model_pred['water_ener']['energy']
-        e_force = model_pred['water_ener']['force']
-        e_virial = model_pred['water_ener']['virial']
-        e_atom_ener = model_pred['water_ener']['atom_ener']
-
-        d_dipole = model_pred['water_dipole']['dipole']
-        d_gdipole = model_pred['water_dipole']['global_dipole']
-        d_force = model_pred['water_dipole']['force']
-        d_virial = model_pred['water_dipole']['virial']
-        d_atom_virial = model_pred['water_dipole']['atom_virial']
-
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                          t_energy: test_data['energy'][:numb_test],
-                          t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
-                          t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
-                          t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="multi",
+            reuse=False,
+        )
+        e_energy = model_pred["water_ener"]["energy"]
+        e_force = model_pred["water_ener"]["force"]
+        e_virial = model_pred["water_ener"]["virial"]
+        e_atom_ener = model_pred["water_ener"]["atom_ener"]
+
+        d_dipole = model_pred["water_dipole"]["dipole"]
+        d_gdipole = model_pred["water_dipole"]["global_dipole"]
+        d_force = model_pred["water_dipole"]["force"]
+        d_virial = model_pred["water_dipole"]["virial"]
+        d_atom_virial = model_pred["water_dipole"]["atom_virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
         sess = self.test_session().__enter__()
 
         # test water energy
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([e_energy, e_force, e_virial],
-                             feed_dict=feed_dict_test)
+        [e, f, v] = sess.run([e_energy, e_force, e_virial], feed_dict=feed_dict_test)
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        refe = [6.135449167779321300e+01]
-        reff = [7.799691562262310585e-02, 9.423098804815030483e-02, 3.790560997388224204e-03, 1.432522403799846578e-01,
-                1.148392791403983204e-01, -1.321871172563671148e-02, -7.318966526325138000e-02,
-                6.516069212737778116e-02, 5.406418483320515412e-04, 5.870713761026503247e-02, -1.605402669549013672e-01,
-                -5.089516979826595386e-03, -2.554593467731766654e-01, 3.092063507347833987e-02,
-                1.510355029451411479e-02, 4.869271842355533952e-02, -1.446113274345035005e-01,
-                -1.126524434771078789e-03]
-        refv = [-6.076776685178300053e-01, 1.103174323630009418e-01, 1.984250991380156690e-02, 1.103174323630009557e-01,
-                -3.319759402259439551e-01, -6.007404107650986258e-03, 1.984250991380157036e-02,
-                -6.007404107650981921e-03, -1.200076017439753642e-03]
+        refe = [6.135449167779321300e01]
+        reff = [
+            7.799691562262310585e-02,
+            9.423098804815030483e-02,
+            3.790560997388224204e-03,
+            1.432522403799846578e-01,
+            1.148392791403983204e-01,
+            -1.321871172563671148e-02,
+            -7.318966526325138000e-02,
+            6.516069212737778116e-02,
+            5.406418483320515412e-04,
+            5.870713761026503247e-02,
+            -1.605402669549013672e-01,
+            -5.089516979826595386e-03,
+            -2.554593467731766654e-01,
+            3.092063507347833987e-02,
+            1.510355029451411479e-02,
+            4.869271842355533952e-02,
+            -1.446113274345035005e-01,
+            -1.126524434771078789e-03,
+        ]
+        refv = [
+            -6.076776685178300053e-01,
+            1.103174323630009418e-01,
+            1.984250991380156690e-02,
+            1.103174323630009557e-01,
+            -3.319759402259439551e-01,
+            -6.007404107650986258e-03,
+            1.984250991380157036e-02,
+            -6.007404107650981921e-03,
+            -1.200076017439753642e-03,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
@@ -145,8 +198,14 @@ def test_model(self):
         # test water dipole
         [p, gp] = sess.run([d_dipole, d_gdipole], feed_dict=feed_dict_test)
         p = p.reshape([-1])
-        refp = [1.616802262298876514e+01, 9.809535439521079425e+00, 3.572312180768947854e-01, 1.336308874095981203e+00,
-                1.057908563208963848e+01, -5.999602350098874881e-01]
+        refp = [
+            1.616802262298876514e01,
+            9.809535439521079425e00,
+            3.572312180768947854e-01,
+            1.336308874095981203e00,
+            1.057908563208963848e01,
+            -5.999602350098874881e-01,
+        ]
         places = 10
         np.testing.assert_almost_equal(p, refp, places)
         gp = gp.reshape([-1])
@@ -155,15 +214,19 @@ def test_model(self):
         np.testing.assert_almost_equal(gp, refgp, places)
 
         # test water dipole : make sure only one frame is used
-        feed_dict_single = {t_prop_c: test_data['prop_c'],
-                            t_coord: np.reshape(test_data['coord'][:1, :], [-1]),
-                            t_box: test_data['box'][:1, :],
-                            t_type: np.reshape(test_data['type'][:1, :], [-1]),
-                            t_natoms: test_data['natoms_vec'],
-                            t_mesh: test_data['default_mesh'],
-                            is_training: False}
-
-        [pf, pv, pav] = sess.run([d_force, d_virial, d_atom_virial], feed_dict=feed_dict_single)
+        feed_dict_single = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:1, :], [-1]),
+            t_box: test_data["box"][:1, :],
+            t_type: np.reshape(test_data["type"][:1, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
+
+        [pf, pv, pav] = sess.run(
+            [d_force, d_virial, d_atom_virial], feed_dict=feed_dict_single
+        )
         pf, pv = pf.reshape(-1), pv.reshape(-1)
         spv = pav.reshape(1, 3, -1, 9).sum(2).reshape(-1)
 
@@ -171,19 +234,28 @@ def test_model(self):
         coord0 = base_dict.pop(t_coord)
         box0 = base_dict.pop(t_box)
 
-        fdf = - finite_difference(
-            lambda coord: sess.run(d_gdipole,
-                                   feed_dict={**base_dict,
-                                              t_coord: coord,
-                                              t_box: box0}).reshape(-1),
-            test_data['coord'][:numb_test, :].reshape([-1])).reshape(-1)
-        fdv = - (finite_difference(
-            lambda box: sess.run(d_gdipole,
-                                 feed_dict={**base_dict,
-                                            t_coord: strerch_box(coord0, box0, box),
-                                            t_box: box}).reshape(-1),
-            test_data['box'][:numb_test, :]).reshape([-1, 3, 3]).transpose(0, 2, 1)
-                 @ box0.reshape(3, 3)).reshape(-1)
+        fdf = -finite_difference(
+            lambda coord: sess.run(
+                d_gdipole, feed_dict={**base_dict, t_coord: coord, t_box: box0}
+            ).reshape(-1),
+            test_data["coord"][:numb_test, :].reshape([-1]),
+        ).reshape(-1)
+        fdv = -(
+            finite_difference(
+                lambda box: sess.run(
+                    d_gdipole,
+                    feed_dict={
+                        **base_dict,
+                        t_coord: strerch_box(coord0, box0, box),
+                        t_box: box,
+                    },
+                ).reshape(-1),
+                test_data["box"][:numb_test, :],
+            )
+            .reshape([-1, 3, 3])
+            .transpose(0, 2, 1)
+            @ box0.reshape(3, 3)
+        ).reshape(-1)
 
         delta = 1e-5
         np.testing.assert_allclose(pf, fdf, delta)
diff --git a/source/tests/test_model_se_a.py b/source/tests/test_model_se_a.py
index 28caed4fd9..7559eac737 100644
--- a/source/tests/test_model_se_a.py
+++ b/source/tests/test_model_se_a.py
@@ -1,206 +1,260 @@
+import os
+import sys
+import unittest
 
-import dpdata,os,sys,unittest
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data, gen_data, del_data, j_loader
-
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
-from deepmd.utils.type_embed import TypeEmbedNet
+from common import (
+    Data,
+    DataSystem,
+    del_data,
+    gen_data,
+    j_loader,
+)
+
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def tearDown(self):
         del_data()
 
-    def test_model_atom_ener(self):        
-        jfile = 'water_se_a.json'
+    def test_model_atom_ener(self):
+        jfile = "water_se_a.json"
         jdata = j_loader(jfile)
-        set_atom_ener = [0.02, 0.01]        
-        jdata['model']['fitting_net']['atom_ener'] = set_atom_ener
+        set_atom_ener = [0.02, 0.01]
+        jdata["model"]["fitting_net"]["atom_ener"] = set_atom_ener
 
         sys = dpdata.LabeledSystem()
-        sys.data['atom_names'] = ['foo', 'bar']
-        sys.data['coords'] = np.array([0, 0, 0, 0, 0, 0])
-        sys.data['atom_types'] = [0]
-        sys.data['cells'] = np.array([np.eye(3) * 30, np.eye(3) * 30])
+        sys.data["atom_names"] = ["foo", "bar"]
+        sys.data["coords"] = np.array([0, 0, 0, 0, 0, 0])
+        sys.data["atom_types"] = [0]
+        sys.data["cells"] = np.array([np.eye(3) * 30, np.eye(3) * 30])
         nframes = 2
         natoms = 1
-        sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
-        sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
-        sys.data['energies'] = np.zeros([nframes,1])
-        sys.data['forces'] = np.zeros([nframes,natoms,3])
-        sys.to_deepmd_npy('system', prec=np.float64)    
-
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        sys.data["coords"] = sys.data["coords"].reshape([nframes, natoms, 3])
+        sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+        sys.data["energies"] = np.zeros([nframes, 1])
+        sys.data["forces"] = np.zeros([nframes, natoms, 3])
+        sys.to_deepmd_npy("system", prec=np.float64)
+
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)        
-        test_data = data.get_test ()
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+        test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
-        test_data['natoms_vec'] = [1, 1, 1, 0]
+        test_data["natoms_vec"] = [1, 1, 1, 0]
 
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.fitting.bias_atom_e = np.array(set_atom_ener)
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "se_a_atom_ener_0", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape([0], [-1]),
-                          t_natoms:        [1, 1, 1, 0],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="se_a_atom_ener_0",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape([0], [-1]),
+            t_natoms: [1, 1, 1, 0],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
         }
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
-        self.assertAlmostEqual(e[0], set_atom_ener[0], places = 10)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
+        self.assertAlmostEqual(e[0], set_atom_ener[0], places=10)
 
         feed_dict_test[t_type] = np.reshape([1], [-1])
         feed_dict_test[t_natoms] = [1, 1, 0, 1]
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
-        self.assertAlmostEqual(e[0], set_atom_ener[1], places = 10)
-
-
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
+        self.assertAlmostEqual(e[0], set_atom_ener[1], places=10)
 
     def test_model(self):
-        jfile = 'water_se_a.json'
+        jfile = "water_se_a.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed=True)
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "se_a", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="se_a",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        refe = [6.135449167779321300e+01]
-        reff = [7.799691562262310585e-02,9.423098804815030483e-02,3.790560997388224204e-03,1.432522403799846578e-01,1.148392791403983204e-01,-1.321871172563671148e-02,-7.318966526325138000e-02,6.516069212737778116e-02,5.406418483320515412e-04,5.870713761026503247e-02,-1.605402669549013672e-01,-5.089516979826595386e-03,-2.554593467731766654e-01,3.092063507347833987e-02,1.510355029451411479e-02,4.869271842355533952e-02,-1.446113274345035005e-01,-1.126524434771078789e-03]
-        refv = [-6.076776685178300053e-01,1.103174323630009418e-01,1.984250991380156690e-02,1.103174323630009557e-01,-3.319759402259439551e-01,-6.007404107650986258e-03,1.984250991380157036e-02,-6.007404107650981921e-03,-1.200076017439753642e-03]
+        refe = [6.135449167779321300e01]
+        reff = [
+            7.799691562262310585e-02,
+            9.423098804815030483e-02,
+            3.790560997388224204e-03,
+            1.432522403799846578e-01,
+            1.148392791403983204e-01,
+            -1.321871172563671148e-02,
+            -7.318966526325138000e-02,
+            6.516069212737778116e-02,
+            5.406418483320515412e-04,
+            5.870713761026503247e-02,
+            -1.605402669549013672e-01,
+            -5.089516979826595386e-03,
+            -2.554593467731766654e-01,
+            3.092063507347833987e-02,
+            1.510355029451411479e-02,
+            4.869271842355533952e-02,
+            -1.446113274345035005e-01,
+            -1.126524434771078789e-03,
+        ]
+        refv = [
+            -6.076776685178300053e-01,
+            1.103174323630009418e-01,
+            1.984250991380156690e-02,
+            1.103174323630009557e-01,
+            -3.319759402259439551e-01,
+            -6.007404107650986258e-03,
+            1.984250991380157036e-02,
+            -6.007404107650981921e-03,
+            -1.200076017439753642e-03,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
@@ -210,101 +264,99 @@ def test_model(self):
         np.testing.assert_almost_equal(f, reff, places)
         np.testing.assert_almost_equal(v, refv, places)
 
-    def test_model_atom_ener_type_embedding(self):        
+    def test_model_atom_ener_type_embedding(self):
         """Test atom ener with type embedding"""
-        jfile = 'water_se_a.json'
+        jfile = "water_se_a.json"
         jdata = j_loader(jfile)
-        set_atom_ener = [0.02, 0.01]        
-        jdata['model']['fitting_net']['atom_ener'] = set_atom_ener
-        jdata['model']['type_embeding'] = {"neuron": [2]}
+        set_atom_ener = [0.02, 0.01]
+        jdata["model"]["fitting_net"]["atom_ener"] = set_atom_ener
+        jdata["model"]["type_embeding"] = {"neuron": [2]}
 
         sys = dpdata.LabeledSystem()
-        sys.data['atom_names'] = ['foo', 'bar']
-        sys.data['coords'] = np.array([0, 0, 0, 0, 0, 0])
-        sys.data['atom_types'] = [0]
-        sys.data['cells'] = np.array([np.eye(3) * 30, np.eye(3) * 30])
+        sys.data["atom_names"] = ["foo", "bar"]
+        sys.data["coords"] = np.array([0, 0, 0, 0, 0, 0])
+        sys.data["atom_types"] = [0]
+        sys.data["cells"] = np.array([np.eye(3) * 30, np.eye(3) * 30])
         nframes = 2
         natoms = 1
-        sys.data['coords'] = sys.data['coords'].reshape([nframes,natoms,3])
-        sys.data['cells'] = sys.data['cells'].reshape([nframes,3,3])
-        sys.data['energies'] = np.zeros([nframes,1])
-        sys.data['forces'] = np.zeros([nframes,natoms,3])
-        sys.to_deepmd_npy('system', prec=np.float64)    
-
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        sys.data["coords"] = sys.data["coords"].reshape([nframes, natoms, 3])
+        sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3])
+        sys.data["energies"] = np.zeros([nframes, 1])
+        sys.data["forces"] = np.zeros([nframes, natoms, 3])
+        sys.to_deepmd_npy("system", prec=np.float64)
+
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)        
-        test_data = data.get_test ()
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+        test_data = data.get_test()
         numb_test = 1
 
-        typeebd = TypeEmbedNet(**jdata['model']['type_embeding'])
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed=True)
+        typeebd = TypeEmbedNet(**jdata["model"]["type_embeding"])
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting, typeebd=typeebd)
 
-        test_data['natoms_vec'] = [1, 1, 1, 0]
+        test_data["natoms_vec"] = [1, 1, 1, 0]
 
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.fitting.bias_atom_e = np.array(set_atom_ener)
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "se_a_atom_ener_type_embbed_0", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape([0], [-1]),
-                          t_natoms:        [1, 1, 1, 0],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="se_a_atom_ener_type_embbed_0",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape([0], [-1]),
+            t_natoms: [1, 1, 1, 0],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
         }
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
-        self.assertAlmostEqual(e[0], set_atom_ener[0], places = 10)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
+        self.assertAlmostEqual(e[0], set_atom_ener[0], places=10)
 
         feed_dict_test[t_type] = np.reshape([1], [-1])
         feed_dict_test[t_natoms] = [1, 1, 0, 1]
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
-        self.assertAlmostEqual(e[0], set_atom_ener[1], places = 10)
-
-
-
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
+        self.assertAlmostEqual(e[0], set_atom_ener[1], places=10)
diff --git a/source/tests/test_model_se_a_aparam.py b/source/tests/test_model_se_a_aparam.py
index d2269ac642..784754fd32 100644
--- a/source/tests/test_model_se_a_aparam.py
+++ b/source/tests/test_model_se_a_aparam.py
@@ -1,111 +1,164 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water_se_a_aparam.json'
+        jfile = "water_se_a_aparam.json"
         jdata = j_loader(jfile)
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)        
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         # manually set aparam
-        test_data['aparam'] = np.load('system/set.000/aparam.npy')
+        test_data["aparam"] = np.load("system/set.000/aparam.npy")
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']],
-                      'aparam': [test_data['aparam']],
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "aparam": [test_data["aparam"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        t_aparam           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_aparam')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        t_aparam = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_aparam")
+        is_training = tf.placeholder(tf.bool)
         input_dict = {}
-        input_dict['aparam'] = t_aparam
-
-        model_pred\
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           input_dict,
-                           suffix = "se_a_aparam", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          t_aparam:        np.reshape(test_data['aparam']   [:numb_test, :], [-1]),
-                          is_training:     False}
+        input_dict["aparam"] = t_aparam
+
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            input_dict,
+            suffix="se_a_aparam",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            t_aparam: np.reshape(test_data["aparam"][:numb_test, :], [-1]),
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
         refe = [61.35473702079649]
-        reff = [7.789591210641927388e-02,9.411176646369459609e-02,3.785806413688173194e-03,1.430830954178063386e-01,1.146964190520970150e-01,-1.320340288927138173e-02,-7.308720494747594776e-02,6.508269338140809657e-02,5.398739145542804643e-04,5.863268336973800898e-02,-1.603409523950408699e-01,-5.083084610994957619e-03,-2.551569799443983988e-01,3.087934885732580501e-02,1.508590526622844222e-02,4.863249399791078065e-02,-1.444292753594846324e-01,-1.125098094204559241e-03]
-        refv = [-6.069498397488943819e-01,1.101778888191114192e-01,1.981907430646132409e-02,1.101778888191114608e-01,-3.315612988100872793e-01,-5.999739184898976799e-03,1.981907430646132756e-02,-5.999739184898974197e-03,-1.198656608172396325e-03]
+        reff = [
+            7.789591210641927388e-02,
+            9.411176646369459609e-02,
+            3.785806413688173194e-03,
+            1.430830954178063386e-01,
+            1.146964190520970150e-01,
+            -1.320340288927138173e-02,
+            -7.308720494747594776e-02,
+            6.508269338140809657e-02,
+            5.398739145542804643e-04,
+            5.863268336973800898e-02,
+            -1.603409523950408699e-01,
+            -5.083084610994957619e-03,
+            -2.551569799443983988e-01,
+            3.087934885732580501e-02,
+            1.508590526622844222e-02,
+            4.863249399791078065e-02,
+            -1.444292753594846324e-01,
+            -1.125098094204559241e-03,
+        ]
+        refv = [
+            -6.069498397488943819e-01,
+            1.101778888191114192e-01,
+            1.981907430646132409e-02,
+            1.101778888191114608e-01,
+            -3.315612988100872793e-01,
+            -5.999739184898976799e-03,
+            1.981907430646132756e-02,
+            -5.999739184898974197e-03,
+            -1.198656608172396325e-03,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
diff --git a/source/tests/test_model_se_a_fparam.py b/source/tests/test_model_se_a_fparam.py
index b940671b87..129303ddbe 100644
--- a/source/tests/test_model_se_a_fparam.py
+++ b/source/tests/test_model_se_a_fparam.py
@@ -1,112 +1,165 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water_se_a_fparam.json'
+        jfile = "water_se_a_fparam.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         # descrpt = DescrptSeA(jdata['model']['descriptor'])
         # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
         model = EnerModel(descrpt, fitting)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']],
-                      'fparam': [test_data['fparam']],
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "fparam": [test_data["fparam"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        t_fparam           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_fparam')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        t_fparam = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_fparam")
+        is_training = tf.placeholder(tf.bool)
         input_dict = {}
-        input_dict['fparam'] = t_fparam
-
-        model_pred\
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           input_dict,
-                           suffix = "se_a_fparam", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          t_fparam:        np.reshape(test_data['fparam']   [:numb_test, :], [-1]),
-                          is_training:     False}
+        input_dict["fparam"] = t_fparam
+
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            input_dict,
+            suffix="se_a_fparam",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            t_fparam: np.reshape(test_data["fparam"][:numb_test, :], [-1]),
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
         refe = [61.35473702079649]
-        reff = [7.789591210641927388e-02,9.411176646369459609e-02,3.785806413688173194e-03,1.430830954178063386e-01,1.146964190520970150e-01,-1.320340288927138173e-02,-7.308720494747594776e-02,6.508269338140809657e-02,5.398739145542804643e-04,5.863268336973800898e-02,-1.603409523950408699e-01,-5.083084610994957619e-03,-2.551569799443983988e-01,3.087934885732580501e-02,1.508590526622844222e-02,4.863249399791078065e-02,-1.444292753594846324e-01,-1.125098094204559241e-03]
-        refv = [-6.069498397488943819e-01,1.101778888191114192e-01,1.981907430646132409e-02,1.101778888191114608e-01,-3.315612988100872793e-01,-5.999739184898976799e-03,1.981907430646132756e-02,-5.999739184898974197e-03,-1.198656608172396325e-03]
+        reff = [
+            7.789591210641927388e-02,
+            9.411176646369459609e-02,
+            3.785806413688173194e-03,
+            1.430830954178063386e-01,
+            1.146964190520970150e-01,
+            -1.320340288927138173e-02,
+            -7.308720494747594776e-02,
+            6.508269338140809657e-02,
+            5.398739145542804643e-04,
+            5.863268336973800898e-02,
+            -1.603409523950408699e-01,
+            -5.083084610994957619e-03,
+            -2.551569799443983988e-01,
+            3.087934885732580501e-02,
+            1.508590526622844222e-02,
+            4.863249399791078065e-02,
+            -1.444292753594846324e-01,
+            -1.125098094204559241e-03,
+        ]
+        refv = [
+            -6.069498397488943819e-01,
+            1.101778888191114192e-01,
+            1.981907430646132409e-02,
+            1.101778888191114608e-01,
+            -3.315612988100872793e-01,
+            -5.999739184898976799e-03,
+            1.981907430646132756e-02,
+            -5.999739184898974197e-03,
+            -1.198656608172396325e-03,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
diff --git a/source/tests/test_model_se_a_srtab.py b/source/tests/test_model_se_a_srtab.py
index 855503691d..60f4514cbc 100644
--- a/source/tests/test_model_se_a_srtab.py
+++ b/source/tests/test_model_se_a_srtab.py
@@ -1,134 +1,188 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
-def _make_tab(ntype) :
-    xx = np.arange(0,9,0.001)
-    yy = 1000/(xx+.5)**6
+
+def _make_tab(ntype):
+    xx = np.arange(0, 9, 0.001)
+    yy = 1000 / (xx + 0.5) ** 6
     prt = xx
     ninter = ntype * (ntype + 1) // 2
-    for ii in range(ninter) :
+    for ii in range(ninter):
         prt = np.append(prt, yy)
-    prt = np.reshape(prt, [ninter+1, -1])
-    np.savetxt('tab.xvg', prt.T)
+    prt = np.reshape(prt, [ninter + 1, -1])
+    np.savetxt("tab.xvg", prt.T)
+
 
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
         _make_tab(2)
 
     def tearDown(self):
-        os.remove('tab.xvg')
+        os.remove("tab.xvg")
 
     def test_model(self):
-        jfile = 'water_se_a_srtab.json'
+        jfile = "water_se_a_srtab.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         # descrpt = DescrptSeA(jdata['model']['descriptor'])
         # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
         model = EnerModel(
-            descrpt, 
-            fitting, 
+            descrpt,
+            fitting,
             None,
-            jdata['model'].get('type_map'),
-            jdata['model'].get('data_stat_nbatch'),
-            jdata['model'].get('data_stat_protect'),
-            jdata['model'].get('use_srtab'),
-            jdata['model'].get('smin_alpha'),
-            jdata['model'].get('sw_rmin'),
-            jdata['model'].get('sw_rmax')
+            jdata["model"].get("type_map"),
+            jdata["model"].get("data_stat_nbatch"),
+            jdata["model"].get("data_stat_protect"),
+            jdata["model"].get("use_srtab"),
+            jdata["model"].get("smin_alpha"),
+            jdata["model"].get("sw_rmin"),
+            jdata["model"].get("sw_rmax"),
         )
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred\
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "se_a_srtab", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="se_a_srtab",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
 
-        refe = [1.141610882066236599e+02]
-        reff = [-1.493121233165248043e+02,-1.831419491743885715e+02,-8.439542992300344437e+00,-1.811987095947552859e+02,-1.476380826187439084e+02,1.264271856742560018e+01,1.544377958934875323e+02,-7.816520233903435866e+00,1.287925245463442225e+00,-4.000393268449002449e+00,1.910748885843098890e+02,7.134789955349889468e+00,1.826908441979261113e+02,3.677156386479059513e+00,-1.122312112141401741e+01,-2.617413911684622008e+00,1.438445070562470391e+02,-1.402769654524568033e+00]
-        refv = [3.585047655925112622e+02,-7.569252978336677984e+00,-1.068382043878426124e+01,-7.569252978336677096e+00,3.618439481685132932e+02,5.448668500896081568e+00,-1.068382043878426302e+01,5.448668500896082456e+00,1.050393462151727686e+00]
+        refe = [1.141610882066236599e02]
+        reff = [
+            -1.493121233165248043e02,
+            -1.831419491743885715e02,
+            -8.439542992300344437e00,
+            -1.811987095947552859e02,
+            -1.476380826187439084e02,
+            1.264271856742560018e01,
+            1.544377958934875323e02,
+            -7.816520233903435866e00,
+            1.287925245463442225e00,
+            -4.000393268449002449e00,
+            1.910748885843098890e02,
+            7.134789955349889468e00,
+            1.826908441979261113e02,
+            3.677156386479059513e00,
+            -1.122312112141401741e01,
+            -2.617413911684622008e00,
+            1.438445070562470391e02,
+            -1.402769654524568033e00,
+        ]
+        refv = [
+            3.585047655925112622e02,
+            -7.569252978336677984e00,
+            -1.068382043878426124e01,
+            -7.569252978336677096e00,
+            3.618439481685132932e02,
+            5.448668500896081568e00,
+            -1.068382043878426302e01,
+            5.448668500896082456e00,
+            1.050393462151727686e00,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
diff --git a/source/tests/test_model_se_a_type.py b/source/tests/test_model_se_a_type.py
index 18f40eaa16..9995f9007a 100644
--- a/source/tests/test_model_se_a_type.py
+++ b/source/tests/test_model_se_a_type.py
@@ -1,108 +1,135 @@
-import dpdata,os,sys,unittest
-import numpy as np
-from deepmd.env import tf
+import os
 import pickle
-from common import Data,gen_data, j_loader
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water_se_a_type.json'
+        jfile = "water_se_a_type.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
-        typeebd_param = jdata['model']['type_embedding']
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
+        typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
-            neuron = typeebd_param['neuron'],
-            resnet_dt = typeebd_param['resnet_dt'],
-            seed = typeebd_param['seed'], 
-            uniform_seed = True)
+            neuron=typeebd_param["neuron"],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
+            uniform_seed=True,
+        )
         model = EnerModel(descrpt, fitting, typeebd)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
         inputs_dict = {}
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           inputs_dict,
-                           suffix = "se_a_type", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            inputs_dict,
+            suffix="se_a_type",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
         # print(sess.run(model.type_embedding))
         # np.savetxt('tmp.out', sess.run(descrpt.dout, feed_dict = feed_dict_test), fmt='%.10e')
         # # print(sess.run(model.atype_embed, feed_dict = feed_dict_test))
@@ -113,13 +140,42 @@ def test_model(self):
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        np.savetxt('e.out', e.reshape([1, -1]), delimiter=',')
-        np.savetxt('f.out', f.reshape([1, -1]), delimiter=',')
-        np.savetxt('v.out', v.reshape([1, -1]), delimiter=',')
+        np.savetxt("e.out", e.reshape([1, -1]), delimiter=",")
+        np.savetxt("f.out", f.reshape([1, -1]), delimiter=",")
+        np.savetxt("v.out", v.reshape([1, -1]), delimiter=",")
 
-        refe = [6.049065170680415804e+01]
-        reff = [1.021832439441947293e-01,1.122650466359011306e-01,3.927874278714531091e-03,1.407089812207832635e-01,1.312473824343091400e-01,-1.228371057389851181e-02,-1.109672154547165501e-01,6.582735820731049070e-02,1.251568633647655391e-03,7.933758749748777428e-02,-1.831777072317984367e-01,-6.173090134630876760e-03,-2.703597126460742794e-01,4.817856571062521104e-02,1.491963457594796399e-02,5.909711543832503466e-02,-1.743406457563475287e-01,-1.642276779780762769e-03]
-        refv = [-6.932736357193732823e-01,1.453756052949563837e-01,2.138263139115256783e-02,1.453756052949564392e-01,-3.880901656480436612e-01,-7.782259726407755700e-03,2.138263139115256437e-02,-7.782259726407749628e-03,-1.225285973678705374e-03]
+        refe = [6.049065170680415804e01]
+        reff = [
+            1.021832439441947293e-01,
+            1.122650466359011306e-01,
+            3.927874278714531091e-03,
+            1.407089812207832635e-01,
+            1.312473824343091400e-01,
+            -1.228371057389851181e-02,
+            -1.109672154547165501e-01,
+            6.582735820731049070e-02,
+            1.251568633647655391e-03,
+            7.933758749748777428e-02,
+            -1.831777072317984367e-01,
+            -6.173090134630876760e-03,
+            -2.703597126460742794e-01,
+            4.817856571062521104e-02,
+            1.491963457594796399e-02,
+            5.909711543832503466e-02,
+            -1.743406457563475287e-01,
+            -1.642276779780762769e-03,
+        ]
+        refv = [
+            -6.932736357193732823e-01,
+            1.453756052949563837e-01,
+            2.138263139115256783e-02,
+            1.453756052949564392e-01,
+            -3.880901656480436612e-01,
+            -7.782259726407755700e-03,
+            2.138263139115256437e-02,
+            -7.782259726407749628e-03,
+            -1.225285973678705374e-03,
+        ]
 
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
diff --git a/source/tests/test_model_se_atten.py b/source/tests/test_model_se_atten.py
index e284186855..14016db4e2 100644
--- a/source/tests/test_model_se_atten.py
+++ b/source/tests/test_model_se_atten.py
@@ -1,115 +1,143 @@
-import dpdata, os, sys, unittest
-import numpy as np
-from deepmd.env import tf
+import os
 import pickle
-from common import Data, gen_data, j_loader
-
-from common import DataSystem
-from deepmd.descriptor import DescrptSeAtten
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.common import j_must_have
-from common import tf
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+    tf,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeAtten,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestModel(tf.test.TestCase):
     def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water_se_atten.json'
+        jfile = "water_se_atten.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
         test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['descriptor']['ntypes'] = 2
-        descrpt = DescrptSeAtten(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed=True)
-        typeebd_param = jdata['model']['type_embedding']
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["descriptor"]["ntypes"] = 2
+        descrpt = DescrptSeAtten(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
+        typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
-            neuron=typeebd_param['neuron'],
+            neuron=typeebd_param["neuron"],
             activation_function=None,
-            resnet_dt=typeebd_param['resnet_dt'],
-            seed=typeebd_param['seed'],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
             uniform_seed=True,
-            padding=True)
+            padding=True,
+        )
         model = EnerModel(descrpt, fitting, typeebd)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']]
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+        }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
         inputs_dict = {}
 
-        model_pred \
-            = model.build(t_coord,
-                          t_type,
-                          t_natoms,
-                          t_box,
-                          t_mesh,
-                          inputs_dict,
-                          suffix="se_atten",
-                          reuse=False)
-        energy = model_pred['energy']
-        force = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener = model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                          t_energy: test_data['energy'][:numb_test],
-                          t_force: np.reshape(test_data['force'][:numb_test, :], [-1]),
-                          t_virial: np.reshape(test_data['virial'][:numb_test, :], [-1]),
-                          t_atom_ener: np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            inputs_dict,
+            suffix="se_atten",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial],
-                             feed_dict=feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
         # print(sess.run(model.type_embedding))
         # np.savetxt('tmp.out', sess.run(descrpt.dout, feed_dict = feed_dict_test), fmt='%.10e')
         # # print(sess.run(model.atype_embed, feed_dict = feed_dict_test))
@@ -120,13 +148,42 @@ def test_model(self):
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        np.savetxt('e.out', e.reshape([1, -1]), delimiter=',')
-        np.savetxt('f.out', f.reshape([1, -1]), delimiter=',')
-        np.savetxt('v.out', v.reshape([1, -1]), delimiter=',')
-
-        refe = [6.12188445792698e+01]
-        reff = [-2.7590100298321299e-03, -2.7392865283639755e-03, 8.5672424478673337e-05,  7.3154109032780492e-03, 7.6754109031673332e-04, -1.0882393042639207e-03, 9.8633073531477645e-03,  3.6631966083397029e-03, -2.2379079261940034e-04, -4.2393697523149913e-03, 4.9491210390296492e-04,  1.6970049039709007e-04, -8.9021867696626039e-03, -4.7967452269658322e-03, 9.2569990351204447e-04, -1.2781517046160920e-03, 2.6103819527704053e-03,  1.3095727849551296e-04]
-        refv = [-1.0171833662757776e-02, -6.7981543912862021e-03, 6.1480942994810296e-04, -6.7981543912861942e-03, 3.0092645628232335e-03,  3.8060849919518031e-04, 6.1480942994810383e-04,  3.8060849919518036e-04, -5.6890657188056002e-05]
+        np.savetxt("e.out", e.reshape([1, -1]), delimiter=",")
+        np.savetxt("f.out", f.reshape([1, -1]), delimiter=",")
+        np.savetxt("v.out", v.reshape([1, -1]), delimiter=",")
+
+        refe = [6.12188445792698e01]
+        reff = [
+            -2.7590100298321299e-03,
+            -2.7392865283639755e-03,
+            8.5672424478673337e-05,
+            7.3154109032780492e-03,
+            7.6754109031673332e-04,
+            -1.0882393042639207e-03,
+            9.8633073531477645e-03,
+            3.6631966083397029e-03,
+            -2.2379079261940034e-04,
+            -4.2393697523149913e-03,
+            4.9491210390296492e-04,
+            1.6970049039709007e-04,
+            -8.9021867696626039e-03,
+            -4.7967452269658322e-03,
+            9.2569990351204447e-04,
+            -1.2781517046160920e-03,
+            2.6103819527704053e-03,
+            1.3095727849551296e-04,
+        ]
+        refv = [
+            -1.0171833662757776e-02,
+            -6.7981543912862021e-03,
+            6.1480942994810296e-04,
+            -6.7981543912861942e-03,
+            3.0092645628232335e-03,
+            3.8060849919518031e-04,
+            6.1480942994810383e-04,
+            3.8060849919518036e-04,
+            -5.6890657188056002e-05,
+        ]
 
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
diff --git a/source/tests/test_model_se_r.py b/source/tests/test_model_se_r.py
index 52aad18fc5..dfb8d85c88 100644
--- a/source/tests/test_model_se_r.py
+++ b/source/tests/test_model_se_r.py
@@ -1,115 +1,168 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeR
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeR,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water_se_r.json'
+        jfile = "water_se_r.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeR(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeR(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         # fitting = EnerFitting(jdata['model']['fitting_net'], descrpt)
         model = EnerModel(descrpt, fitting)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred\
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "se_r", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="se_r",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        refe = [6.152085988309423925e+01]
-        reff = [-1.714443151616400110e-04,-1.315836609370952051e-04,-5.584120460897444674e-06,-7.197863450669731334e-05,-1.384609799994930676e-04,8.856091902774708468e-06,1.120578238869146797e-04,-7.428703645877488470e-05,9.370560731488587317e-07,-1.048347129617610465e-04,1.977876923815685781e-04,7.522050342771599598e-06,2.361772659657814205e-04,-5.774651813388292487e-05,-1.233143271630744828e-05,2.257277740226381951e-08,2.042905031476775584e-04,6.003548585097267914e-07]
-        refv = [1.035180911513190792e-03,-1.118982949050497126e-04,-2.383287813436022850e-05,-1.118982949050497126e-04,4.362023915782403281e-04,8.119543218224559240e-06,-2.383287813436022850e-05,8.119543218224559240e-06,1.201142938802945237e-06]
+        refe = [6.152085988309423925e01]
+        reff = [
+            -1.714443151616400110e-04,
+            -1.315836609370952051e-04,
+            -5.584120460897444674e-06,
+            -7.197863450669731334e-05,
+            -1.384609799994930676e-04,
+            8.856091902774708468e-06,
+            1.120578238869146797e-04,
+            -7.428703645877488470e-05,
+            9.370560731488587317e-07,
+            -1.048347129617610465e-04,
+            1.977876923815685781e-04,
+            7.522050342771599598e-06,
+            2.361772659657814205e-04,
+            -5.774651813388292487e-05,
+            -1.233143271630744828e-05,
+            2.257277740226381951e-08,
+            2.042905031476775584e-04,
+            6.003548585097267914e-07,
+        ]
+        refv = [
+            1.035180911513190792e-03,
+            -1.118982949050497126e-04,
+            -2.383287813436022850e-05,
+            -1.118982949050497126e-04,
+            4.362023915782403281e-04,
+            8.119543218224559240e-06,
+            -2.383287813436022850e-05,
+            8.119543218224559240e-06,
+            1.201142938802945237e-06,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
 
         places = 6
-        for ii in range(e.size) :
-            self.assertAlmostEqual(e[ii], refe[ii], places = places)
-        for ii in range(f.size) :
-            self.assertAlmostEqual(f[ii], reff[ii], places = places)
-        for ii in range(v.size) :
-            self.assertAlmostEqual(v[ii], refv[ii], places = places)
+        for ii in range(e.size):
+            self.assertAlmostEqual(e[ii], refe[ii], places=places)
+        for ii in range(f.size):
+            self.assertAlmostEqual(f[ii], reff[ii], places=places)
+        for ii in range(v.size):
+            self.assertAlmostEqual(v[ii], refv[ii], places=places)
diff --git a/source/tests/test_model_se_t.py b/source/tests/test_model_se_t.py
index d33b9a6658..dbb1e56d5e 100644
--- a/source/tests/test_model_se_t.py
+++ b/source/tests/test_model_se_t.py
@@ -1,108 +1,161 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeT
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeT,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'water_se_t.json'
+        jfile = "water_se_t.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)        
-        descrpt = DescrptSeT(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = EnerFitting(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeT(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = EnerFitting(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = EnerModel(descrpt, fitting)
 
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']]
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
         }
         model._compute_input_stat(input_data)
         model.descrpt.bias_atom_e = data.compute_energy_shift()
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred\
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "se_t", 
-                           reuse = False)
-        energy = model_pred['energy']
-        force  = model_pred['force']
-        virial = model_pred['virial']
-        atom_ener =  model_pred['atom_ener']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_energy:        test_data['energy']              [:numb_test],
-                          t_force:         np.reshape(test_data['force']    [:numb_test, :], [-1]),
-                          t_virial:        np.reshape(test_data['virial']   [:numb_test, :], [-1]),
-                          t_atom_ener:     np.reshape(test_data['atom_ener'][:numb_test, :], [-1]),
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="se_t",
+            reuse=False,
+        )
+        energy = model_pred["energy"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_ener = model_pred["atom_ener"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_energy: test_data["energy"][:numb_test],
+            t_force: np.reshape(test_data["force"][:numb_test, :], [-1]),
+            t_virial: np.reshape(test_data["virial"][:numb_test, :], [-1]),
+            t_atom_ener: np.reshape(test_data["atom_ener"][:numb_test, :], [-1]),
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [e, f, v] = sess.run([energy, force, virial], 
-                             feed_dict = feed_dict_test)
+        [e, f, v] = sess.run([energy, force, virial], feed_dict=feed_dict_test)
 
         e = e.reshape([-1])
         f = f.reshape([-1])
         v = v.reshape([-1])
-        np.savetxt('e.out', e.reshape([1, -1]))
-        np.savetxt('f.out', f.reshape([1, -1]), delimiter = ',')
-        np.savetxt('v.out', v.reshape([1, -1]), delimiter = ',')
-        refe = [4.8436558582194039e+01]
-        reff = [5.2896335066946598e+00,5.5778402259211131e+00,2.6839994229557251e-01,5.3528786387686784e+00,5.2477755362164968e+00,-4.0486366542657343e-01,-5.1297084055340498e+00,3.4607112287117253e-01,-5.1800783428369482e-02,1.5557068351407846e-01,-5.9071343228741506e+00,-2.2012359669589748e-01,-5.9156735320857488e+00,8.8397615509389127e-02,3.6701215949753935e-01,2.4729910864238122e-01,-5.3529501776440211e+00,4.1375943757728552e-02]
-        refv = [-1.3159448660141607e+01,4.6952048725161544e-01,3.5482003698976106e-01,4.6952048725161577e-01,-1.2178990983673918e+01,-1.6867277410496895e-01,3.5482003698976106e-01,-1.6867277410496900e-01,-3.3986741457321945e-02]
+        np.savetxt("e.out", e.reshape([1, -1]))
+        np.savetxt("f.out", f.reshape([1, -1]), delimiter=",")
+        np.savetxt("v.out", v.reshape([1, -1]), delimiter=",")
+        refe = [4.8436558582194039e01]
+        reff = [
+            5.2896335066946598e00,
+            5.5778402259211131e00,
+            2.6839994229557251e-01,
+            5.3528786387686784e00,
+            5.2477755362164968e00,
+            -4.0486366542657343e-01,
+            -5.1297084055340498e00,
+            3.4607112287117253e-01,
+            -5.1800783428369482e-02,
+            1.5557068351407846e-01,
+            -5.9071343228741506e00,
+            -2.2012359669589748e-01,
+            -5.9156735320857488e00,
+            8.8397615509389127e-02,
+            3.6701215949753935e-01,
+            2.4729910864238122e-01,
+            -5.3529501776440211e00,
+            4.1375943757728552e-02,
+        ]
+        refv = [
+            -1.3159448660141607e01,
+            4.6952048725161544e-01,
+            3.5482003698976106e-01,
+            4.6952048725161577e-01,
+            -1.2178990983673918e01,
+            -1.6867277410496895e-01,
+            3.5482003698976106e-01,
+            -1.6867277410496900e-01,
+            -3.3986741457321945e-02,
+        ]
         refe = np.reshape(refe, [-1])
         reff = np.reshape(reff, [-1])
         refv = np.reshape(refv, [-1])
diff --git a/source/tests/test_neighbor_stat.py b/source/tests/test_neighbor_stat.py
index 1b3ad9a650..64f314759f 100644
--- a/source/tests/test_neighbor_stat.py
+++ b/source/tests/test_neighbor_stat.py
@@ -1,22 +1,28 @@
 import shutil
-import numpy as np
 import unittest
+
 import dpdata
+import numpy as np
+
+from deepmd.entrypoints.neighbor_stat import (
+    neighbor_stat,
+)
 
-from deepmd.entrypoints.neighbor_stat import neighbor_stat
 
 def gen_sys(nframes):
     natoms = 1000
     data = {}
     X, Y, Z = np.mgrid[0:2:3j, 0:2:3j, 0:2:3j]
-    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T #+ 0.1
-    data['coords'] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
-    data['forces'] = np.random.random([nframes, natoms, 3])
-    data['cells'] = np.array([3., 0., 0., 0., 3., 0., 0., 0., 3.]).reshape(1,3,3)
-    data['energies'] = np.random.random([nframes, 1])
-    data['atom_names'] = ['TYPE']
-    data['atom_numbs'] = [27]
-    data['atom_types'] = np.repeat(0, 27)
+    positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T  # + 0.1
+    data["coords"] = np.repeat(positions[np.newaxis, :, :], nframes, axis=0)
+    data["forces"] = np.random.random([nframes, natoms, 3])
+    data["cells"] = np.array([3.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 3.0]).reshape(
+        1, 3, 3
+    )
+    data["energies"] = np.random.random([nframes, 1])
+    data["atom_names"] = ["TYPE"]
+    data["atom_numbs"] = [27]
+    data["atom_types"] = np.repeat(0, 27)
     return data
 
 
@@ -25,23 +31,27 @@ def setUp(self):
         data0 = gen_sys(1)
         sys0 = dpdata.LabeledSystem()
         sys0.data = data0
-        sys0.to_deepmd_npy('system_0', set_size = 1)
-        
+        sys0.to_deepmd_npy("system_0", set_size=1)
+
     def tearDown(self):
-        shutil.rmtree('system_0')
+        shutil.rmtree("system_0")
 
     def test_neighbor_stat(self):
         # set rcut to 0. will cause a core dumped
         # TODO: check what is wrong
-        for rcut in (1., 2., 4.):
+        for rcut in (1.0, 2.0, 4.0):
             with self.subTest():
-                rcut += 1e-3 # prevent numerical errors
-                min_nbor_dist, max_nbor_size = neighbor_stat(system="system_0", rcut=rcut, type_map=["TYPE"])
-                upper = np.ceil(rcut)+1
+                rcut += 1e-3  # prevent numerical errors
+                min_nbor_dist, max_nbor_size = neighbor_stat(
+                    system="system_0", rcut=rcut, type_map=["TYPE"]
+                )
+                upper = np.ceil(rcut) + 1
                 X, Y, Z = np.mgrid[-upper:upper, -upper:upper, -upper:upper]
                 positions = np.vstack([X.ravel(), Y.ravel(), Z.ravel()]).T
                 # distance to (0,0,0)
                 distance = np.linalg.norm(positions, axis=1)
-                expected_neighbors = np.count_nonzero(np.logical_and(distance > 0, distance <= rcut))
+                expected_neighbors = np.count_nonzero(
+                    np.logical_and(distance > 0, distance <= rcut)
+                )
                 self.assertAlmostEqual(min_nbor_dist, 1.0, 6)
                 self.assertEqual(max_nbor_size, [expected_neighbors])
diff --git a/source/tests/test_nvnmd_entrypoints.py b/source/tests/test_nvnmd_entrypoints.py
index 890d2ce886..9070cad478 100644
--- a/source/tests/test_nvnmd_entrypoints.py
+++ b/source/tests/test_nvnmd_entrypoints.py
@@ -1,17 +1,38 @@
 import os
-import numpy as np
 import unittest
 
-from common import tests_path
+import numpy as np
+from common import (
+    tests_path,
+)
 
-from deepmd.env import tf
-from deepmd.nvnmd.utils.fio import FioNpyDic, FioJsonDic, FioBin
-from deepmd.nvnmd.entrypoints.freeze import save_weight
-from deepmd.nvnmd.entrypoints.mapt import mapt
-from deepmd.nvnmd.entrypoints.train import normalized_input, normalized_input_qnn
-from deepmd.nvnmd.entrypoints.wrap import wrap
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.data.data import jdata_deepmd_input
+from deepmd.env import (
+    tf,
+)
+from deepmd.nvnmd.data.data import (
+    jdata_deepmd_input,
+)
+from deepmd.nvnmd.entrypoints.freeze import (
+    save_weight,
+)
+from deepmd.nvnmd.entrypoints.mapt import (
+    mapt,
+)
+from deepmd.nvnmd.entrypoints.train import (
+    normalized_input,
+    normalized_input_qnn,
+)
+from deepmd.nvnmd.entrypoints.wrap import (
+    wrap,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.fio import (
+    FioBin,
+    FioJsonDic,
+    FioNpyDic,
+)
 
 
 class TestNvnmdFreeze(tf.test.TestCase):
@@ -19,7 +40,9 @@ def setUp(self):
         tf.reset_default_graph()
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
 
     def test_freeze(self):
@@ -34,16 +57,14 @@ def test_freeze(self):
             "final_layer_type_0/matrix",
             "final_layer_type_0/bias",
         ]
-        namelist2 = [
-            "train_attr/min_nbor_dist"
-        ]
+        namelist2 = ["train_attr/min_nbor_dist"]
         namelist = namelist1 + namelist2
         # crete variable according to namelist
         tvlist = []
         save_path = str(tests_path / "nvnmd" / "out" / "weight.npy")
         vinit = tf.random_normal_initializer(stddev=1.0, seed=0)
         for sname in namelist:
-            scope, name = sname.split('/')[0:2]
+            scope, name = sname.split("/")[0:2]
             with tf.variable_scope(scope, reuse=False):
                 if sname in namelist1:
                     # create variable
@@ -51,14 +72,12 @@ def test_freeze(self):
                     tvlist.append(tv)
                 elif sname in namelist2:
                     # create constant tensor
-                    ts = tf.constant(2.0,
-                        name = name,
-                        dtype = tf.float64)
+                    ts = tf.constant(2.0, name=name, dtype=tf.float64)
         # save variable and test
         self.sess.run(tf.global_variables_initializer())
         save_weight(self.sess, save_path)
         weight = FioNpyDic().load(save_path)
-        namelist = [sname.replace('/', '.') for sname in namelist]
+        namelist = [sname.replace("/", ".") for sname in namelist]
         np.testing.assert_equal(namelist, list(weight.keys()))
         tf.reset_default_graph()
 
@@ -67,7 +86,9 @@ class TestNvnmdMapt(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
 
     def test_mapt(self):
@@ -75,9 +96,9 @@ def test_mapt(self):
         nvnmd_weight = str(tests_path / "nvnmd" / "weight.npy")
         nvnmd_map = str(tests_path / "nvnmd" / "out" / "map.npy")
         jdata = {
-            'nvnmd_config': nvnmd_config,
-            'nvnmd_weight': nvnmd_weight,
-            'nvnmd_map': nvnmd_map,
+            "nvnmd_config": nvnmd_config,
+            "nvnmd_weight": nvnmd_weight,
+            "nvnmd_map": nvnmd_map,
         }
         mapt(**jdata)
         #
@@ -85,31 +106,30 @@ def test_mapt(self):
         #
         nvnmd_map2 = str(tests_path / "nvnmd" / "map.npy")
         data2 = FioNpyDic().load(nvnmd_map2)
-        keys = [
-            'cfg_u2s', 'cfg_s2g', 's', 's_grad', 'h', 'h_grad', 'g', 'g_grad'
-        ]
-        s1 = np.reshape(np.array(data1['s']), [-1, 4])
-        s2 = np.reshape(np.array(data2['s']), [-1, 4])
-        g1 = np.reshape(np.array(data1['g']), [-1, 4])
-        g2 = np.reshape(np.array(data2['g']), [-1, 4])
+        keys = ["cfg_u2s", "cfg_s2g", "s", "s_grad", "h", "h_grad", "g", "g_grad"]
+        s1 = np.reshape(np.array(data1["s"]), [-1, 4])
+        s2 = np.reshape(np.array(data2["s"]), [-1, 4])
+        g1 = np.reshape(np.array(data1["g"]), [-1, 4])
+        g2 = np.reshape(np.array(data2["g"]), [-1, 4])
         np.testing.assert_equal(keys, list(data1.keys()))
         np.testing.assert_almost_equal(s1, s2, 5)
         np.testing.assert_almost_equal(g1, g2, 5)
         tf.reset_default_graph()
         # close NVNMD
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['config_file'] = "none"
-        jdata['weight_file'] = "none"
-        jdata['map_file'] = "none"
-        jdata['enable'] = False
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["config_file"] = "none"
+        jdata["weight_file"] = "none"
+        jdata["map_file"] = "none"
+        jdata["enable"] = False
         nvnmd_cfg.init_from_jdata(jdata)
 
+
 class TestNvnmdTrain(tf.test.TestCase):
     def test_train_input(self):
         # test1: train cnn
         INPUT = str(tests_path / "nvnmd" / "train.json")
         PATH_CNN = "nvnmd_cnn"
-        jdata = normalized_input(INPUT, PATH_CNN, 'none')
+        jdata = normalized_input(INPUT, PATH_CNN, "none")
         fn_ref = str(tests_path / "nvnmd" / "out" / "train_cnn.json")
         FioJsonDic().save(fn_ref, jdata)
         # test2: train qnn
@@ -122,13 +142,14 @@ def test_train_input(self):
         FioJsonDic().save(fn_ref, jdata)
         tf.reset_default_graph()
         # close NVNMD
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['config_file'] = "none"
-        jdata['weight_file'] = "none"
-        jdata['map_file'] = "none"
-        jdata['enable'] = False
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["config_file"] = "none"
+        jdata["weight_file"] = "none"
+        jdata["map_file"] = "none"
+        jdata["enable"] = False
         nvnmd_cfg.init_from_jdata(jdata)
 
+
 class TestNvnmdWrap(tf.test.TestCase):
     def test_wrap(self):
         nvnmd_config = str(tests_path / "nvnmd" / "config.npy")
@@ -136,10 +157,10 @@ def test_wrap(self):
         nvnmd_map = str(tests_path / "nvnmd" / "map.npy")
         nvnmd_model = str(tests_path / "nvnmd" / "out" / "model.pb")
         jdata = {
-            'nvnmd_config': nvnmd_config,
-            'nvnmd_weight': nvnmd_weight,
-            'nvnmd_map': nvnmd_map,
-            'nvnmd_model': nvnmd_model,
+            "nvnmd_config": nvnmd_config,
+            "nvnmd_weight": nvnmd_weight,
+            "nvnmd_map": nvnmd_map,
+            "nvnmd_model": nvnmd_model,
         }
         wrap(**jdata)
         # test
@@ -147,18 +168,64 @@ def test_wrap(self):
         idx = [1, 11, 111, 1111, 11111]
         idxx = []
         for ii in range(1, 10):
-            idxx.extend([ii*i for i in idx])
-        dat  = [data[i] for i in idxx]
-        dat2 = [0, 0, 0, 0, 48, 0, 0, 0, 0, 4, 0, 0, 100, 5, 150, 0, 29, 41, 29, 171, 196, 0, 0, 94, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0, 223, 17, 9, 196, 0, 211, 130, 24]
+            idxx.extend([ii * i for i in idx])
+        dat = [data[i] for i in idxx]
+        dat2 = [
+            0,
+            0,
+            0,
+            0,
+            48,
+            0,
+            0,
+            0,
+            0,
+            4,
+            0,
+            0,
+            100,
+            5,
+            150,
+            0,
+            29,
+            41,
+            29,
+            171,
+            196,
+            0,
+            0,
+            94,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            44,
+            0,
+            0,
+            0,
+            223,
+            17,
+            9,
+            196,
+            0,
+            211,
+            130,
+            24,
+        ]
         np.testing.assert_equal(dat, dat2)
         # # close NVNMD
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['config_file'] = "none"
-        jdata['weight_file'] = "none"
-        jdata['map_file'] = "none"
-        jdata['enable'] = False
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["config_file"] = "none"
+        jdata["weight_file"] = "none"
+        jdata["map_file"] = "none"
+        jdata["enable"] = False
         nvnmd_cfg.init_from_jdata(jdata)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_nvnmd_op.py b/source/tests/test_nvnmd_op.py
index e0e9b5c2df..12927f50f8 100644
--- a/source/tests/test_nvnmd_op.py
+++ b/source/tests/test_nvnmd_op.py
@@ -1,50 +1,109 @@
 import os
 import sys
-import numpy as np
 import unittest
 
-from common import tests_path
+import numpy as np
+from common import (
+    tests_path,
+)
 
 import deepmd.op
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
 
 
 class TestOpAddFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 1], 't_x')
+        t_x = tf.placeholder(tf.float64, [None, 1], "t_x")
         t_y = op_module.add_flt_nvnmd(t_x, t_x)
         # feed_dic
         x = np.reshape(np.arange(0, 8**2) / 3.0, [-1, 1])
-        feed_dict = {
-            t_x : x
-        }
+        feed_dict = {t_x: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-             0.,  0.66666651,  1.33333302,  2.,   2.66666603,  3.33333206,
-             4.,  4.66666412,  5.33333206,  6.,   6.66666412,  7.33333206,
-             8.,  8.66666412,  9.33332825, 10.,  10.66666412, 11.33332825,
-            12., 12.66666412, 13.33332825, 14.,  14.66666412, 15.33332825,
-            16., 16.66665649, 17.33332825, 18.,  18.66665649, 19.33332825,
-            20., 20.66665649, 21.33332825, 22.,  22.66665649, 23.33332825,
-            24., 24.66665649, 25.33332825, 26.,  26.66665649, 27.33332825,
-            28., 28.66665649, 29.33332825, 30.,  30.66665649, 31.33332825,
-            32., 32.66665649, 33.33331299, 34.,  34.66665649, 35.33331299,
-            36., 36.66665649, 37.33331299, 38.,  38.66665649, 39.33331299,
-            40., 40.66665649, 41.33331299, 42.
-        ])
+        y_test = np.array(
+            [
+                0.0,
+                0.66666651,
+                1.33333302,
+                2.0,
+                2.66666603,
+                3.33333206,
+                4.0,
+                4.66666412,
+                5.33333206,
+                6.0,
+                6.66666412,
+                7.33333206,
+                8.0,
+                8.66666412,
+                9.33332825,
+                10.0,
+                10.66666412,
+                11.33332825,
+                12.0,
+                12.66666412,
+                13.33332825,
+                14.0,
+                14.66666412,
+                15.33332825,
+                16.0,
+                16.66665649,
+                17.33332825,
+                18.0,
+                18.66665649,
+                19.33332825,
+                20.0,
+                20.66665649,
+                21.33332825,
+                22.0,
+                22.66665649,
+                23.33332825,
+                24.0,
+                24.66665649,
+                25.33332825,
+                26.0,
+                26.66665649,
+                27.33332825,
+                28.0,
+                28.66665649,
+                29.33332825,
+                30.0,
+                30.66665649,
+                31.33332825,
+                32.0,
+                32.66665649,
+                33.33331299,
+                34.0,
+                34.66665649,
+                35.33331299,
+                36.0,
+                36.66665649,
+                37.33331299,
+                38.0,
+                38.66665649,
+                39.33331299,
+                40.0,
+                40.66665649,
+                41.33331299,
+                42.0,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -55,25 +114,24 @@ class TestOpCopyFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 1], 't_x')
+        t_x = tf.placeholder(tf.float64, [None, 1], "t_x")
         t_y, t_y2 = op_module.copy_flt_nvnmd(t_x)
         # feed_dic
         x = np.reshape(np.arange(0, 8) / 3.0, [-1, 1])
-        feed_dict = {
-            t_x : x
-        }
+        feed_dict = {t_x: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
-        y_pred, y_pred2= self.sess.run([t_y, t_y2], feed_dict=feed_dict)
-        y_test = np.array([
-            0., 0.33333325, 0.66666651, 1.,
-            1.33333302, 1.66666603, 2., 2.33333206
-        ])
+        y_pred, y_pred2 = self.sess.run([t_y, t_y2], feed_dict=feed_dict)
+        y_test = np.array(
+            [0.0, 0.33333325, 0.66666651, 1.0, 1.33333302, 1.66666603, 2.0, 2.33333206]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_pred2 = np.reshape(y_pred2, [-1])
         y_test = np.reshape(y_test, [-1])
@@ -86,24 +144,22 @@ class TestOpDotmulFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
         t_y = op_module.dotmul_flt_nvnmd(t_x, t_x)
         # feed_dic
         x = np.reshape(np.arange(0, 8) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x
-        }
+        feed_dict = {t_x: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            1.55555, 13.99998
-        ])
+        y_test = np.array([1.55555, 13.99998])
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -114,24 +170,22 @@ class TestOpFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
         t_y = op_module.flt_nvnmd(t_x)
         # feed_dic
         x = np.reshape(np.arange(0, 8) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x
-        }
+        feed_dict = {t_x: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            0., 0.33333, 0.66667, 1., 1.33333, 1.66667, 2., 2.33333
-        ])
+        y_test = np.array([0.0, 0.33333, 0.66667, 1.0, 1.33333, 1.66667, 2.0, 2.33333])
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -142,68 +196,172 @@ class TestOpMapFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
 
     def test_op(self):
         map_path = str(tests_path / os.path.join("nvnmd", "map.npy"))
         mapt = np.load(map_path, allow_pickle=True)[0]
-        table = GLOBAL_NP_FLOAT_PRECISION(np.concatenate([mapt['s'][0], mapt['h'][0]], axis=1))
-        table_grad = GLOBAL_NP_FLOAT_PRECISION(np.concatenate([mapt['s_grad'][0], mapt['h_grad'][0]], axis=1))
-        table_info = mapt['cfg_u2s']
+        table = GLOBAL_NP_FLOAT_PRECISION(
+            np.concatenate([mapt["s"][0], mapt["h"][0]], axis=1)
+        )
+        table_grad = GLOBAL_NP_FLOAT_PRECISION(
+            np.concatenate([mapt["s_grad"][0], mapt["h_grad"][0]], axis=1)
+        )
+        table_info = mapt["cfg_u2s"]
         table_info = np.array([np.float64(v) for vs in table_info for v in vs])
         table_info = GLOBAL_NP_FLOAT_PRECISION(table_info)
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 1], 't_x')
-        t_table = tf.placeholder(tf.float64, [None, None], 't_table')
-        t_table_grad = tf.placeholder(tf.float64, [None, None], 't_table_grad')
-        t_table_info = tf.placeholder(tf.float64, [None], 't_table_info')
+        t_x = tf.placeholder(tf.float64, [None, 1], "t_x")
+        t_table = tf.placeholder(tf.float64, [None, None], "t_table")
+        t_table_grad = tf.placeholder(tf.float64, [None, None], "t_table_grad")
+        t_table_info = tf.placeholder(tf.float64, [None], "t_table_info")
         t_y = op_module.map_flt_nvnmd(t_x, t_table, t_table_grad, t_table_info)
         # feed_dic
         x = np.reshape(np.arange(0, 8**2), [-1, 1])
         feed_dict = {
-            t_x : x,
-            t_table : table,
-            t_table_grad : table_grad * 0.0,
-            t_table_info : np.reshape(np.array(table_info), [-1])
+            t_x: x,
+            t_table: table,
+            t_table_grad: table_grad * 0.0,
+            t_table_info: np.reshape(np.array(table_info), [-1]),
         }
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            -4.02932405e-01,  0.00000000e+00,  1.27062531e+01,  2.10604095e+01,
-             8.86666107e+00,  1.05302048e+01,  7.16565704e+00,  7.02013779e+00,
-             6.15166092e+00,  5.26510239e+00,  5.45392990e+00,  4.20795822e+00,
-             4.91504288e+00,  3.48788261e+00,  4.46474457e+00,  2.95572662e+00,
-             4.06997681e+00,  2.54059982e+00,  3.71370125e+00,  2.20451164e+00,
-             3.38652611e+00,  1.92516804e+00,  3.08297348e+00,  1.68853760e+00,
-             2.79967117e+00,  1.48526478e+00,  2.53443527e+00,  1.30881405e+00,
-             2.28576660e+00,  1.15443516e+00,  2.05257607e+00,  1.01856136e+00,
-             1.83401871e+00,  8.98437500e-01,  1.62939548e+00,  7.91882038e-01,
-             1.43810177e+00,  6.97134972e-01,  1.25958920e+00,  6.12747669e-01,
-             1.09334564e+00,  5.37512302e-01,  9.38878059e-01,  4.70406055e-01,
-             7.95710087e-01,  4.10553455e-01,  6.63372517e-01,  3.57197762e-01,
-             5.41402817e-01,  3.09679031e-01,  4.29343462e-01,  2.67416716e-01,
-             3.26739788e-01,  2.29896545e-01,  2.33141899e-01,  1.96660519e-01,
-             1.48102522e-01,  1.67298198e-01,  7.11788535e-02,  1.41440034e-01,
-             1.93022378e-03,  1.18751287e-01, -6.00755513e-02,  9.89289284e-02,
-            -1.15272462e-01,  8.16950202e-02, -1.64083123e-01,  6.67971969e-02,
-            -2.06929684e-01,  5.40025234e-02, -2.44227648e-01,  4.30970192e-02,
-            -2.76385307e-01,  3.38837802e-02, -3.03807735e-01,  2.61801332e-02,
-            -3.26895952e-01,  1.98162049e-02, -3.46041203e-01,  1.46353990e-02,
-            -3.61629009e-01,  1.04917213e-02, -3.74043703e-01,  7.24812597e-03,
-            -3.83658171e-01,  4.77796420e-03, -3.90845537e-01,  2.96119228e-03,
-            -3.95965099e-01,  1.68743543e-03, -3.99380922e-01,  8.50534532e-04,
-            -4.01440382e-01,  3.53393843e-04, -4.02492762e-01,  1.03041355e-04,
-            -4.02877808e-01,  1.26575978e-05, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00,
-            -4.02932405e-01,  0.00000000e+00, -4.02932405e-01,  0.00000000e+00
-        ])
+        y_test = np.array(
+            [
+                -4.02932405e-01,
+                0.00000000e00,
+                1.27062531e01,
+                2.10604095e01,
+                8.86666107e00,
+                1.05302048e01,
+                7.16565704e00,
+                7.02013779e00,
+                6.15166092e00,
+                5.26510239e00,
+                5.45392990e00,
+                4.20795822e00,
+                4.91504288e00,
+                3.48788261e00,
+                4.46474457e00,
+                2.95572662e00,
+                4.06997681e00,
+                2.54059982e00,
+                3.71370125e00,
+                2.20451164e00,
+                3.38652611e00,
+                1.92516804e00,
+                3.08297348e00,
+                1.68853760e00,
+                2.79967117e00,
+                1.48526478e00,
+                2.53443527e00,
+                1.30881405e00,
+                2.28576660e00,
+                1.15443516e00,
+                2.05257607e00,
+                1.01856136e00,
+                1.83401871e00,
+                8.98437500e-01,
+                1.62939548e00,
+                7.91882038e-01,
+                1.43810177e00,
+                6.97134972e-01,
+                1.25958920e00,
+                6.12747669e-01,
+                1.09334564e00,
+                5.37512302e-01,
+                9.38878059e-01,
+                4.70406055e-01,
+                7.95710087e-01,
+                4.10553455e-01,
+                6.63372517e-01,
+                3.57197762e-01,
+                5.41402817e-01,
+                3.09679031e-01,
+                4.29343462e-01,
+                2.67416716e-01,
+                3.26739788e-01,
+                2.29896545e-01,
+                2.33141899e-01,
+                1.96660519e-01,
+                1.48102522e-01,
+                1.67298198e-01,
+                7.11788535e-02,
+                1.41440034e-01,
+                1.93022378e-03,
+                1.18751287e-01,
+                -6.00755513e-02,
+                9.89289284e-02,
+                -1.15272462e-01,
+                8.16950202e-02,
+                -1.64083123e-01,
+                6.67971969e-02,
+                -2.06929684e-01,
+                5.40025234e-02,
+                -2.44227648e-01,
+                4.30970192e-02,
+                -2.76385307e-01,
+                3.38837802e-02,
+                -3.03807735e-01,
+                2.61801332e-02,
+                -3.26895952e-01,
+                1.98162049e-02,
+                -3.46041203e-01,
+                1.46353990e-02,
+                -3.61629009e-01,
+                1.04917213e-02,
+                -3.74043703e-01,
+                7.24812597e-03,
+                -3.83658171e-01,
+                4.77796420e-03,
+                -3.90845537e-01,
+                2.96119228e-03,
+                -3.95965099e-01,
+                1.68743543e-03,
+                -3.99380922e-01,
+                8.50534532e-04,
+                -4.01440382e-01,
+                3.53393843e-04,
+                -4.02492762e-01,
+                1.03041355e-04,
+                -4.02877808e-01,
+                1.26575978e-05,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+                -4.02932405e-01,
+                0.00000000e00,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -214,29 +372,42 @@ class TestOpMatmulFitnetNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
-        t_w = tf.placeholder(tf.float64, [4, 4], 't_w')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
+        t_w = tf.placeholder(tf.float64, [4, 4], "t_w")
         t_y = op_module.matmul_fitnet_nvnmd(t_x, t_w, 23, 19, 1)
         # feed_dic
         x = np.reshape(np.arange(0, 16) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x,
-            t_w : x
-        }
+        feed_dict = {t_x: x, t_w: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            6.22222,  6.88888,  7.55555,  8.22222,
-            16.88887, 19.33331, 21.77776, 24.22221,
-            27.55553, 31.77774, 35.99997, 40.2222 ,
-            38.22219, 44.22217, 50.22218, 56.22219
-        ])
+        y_test = np.array(
+            [
+                6.22222,
+                6.88888,
+                7.55555,
+                8.22222,
+                16.88887,
+                19.33331,
+                21.77776,
+                24.22221,
+                27.55553,
+                31.77774,
+                35.99997,
+                40.2222,
+                38.22219,
+                44.22217,
+                50.22218,
+                56.22219,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -247,29 +418,42 @@ class TestOpMatmulFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
-        t_w = tf.placeholder(tf.float64, [4, 4], 't_w')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
+        t_w = tf.placeholder(tf.float64, [4, 4], "t_w")
         t_y = op_module.matmul_flt_nvnmd(t_x, t_w, 0, 0)
         # feed_dic
         x = np.reshape(np.arange(0, 16) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x,
-            t_w : x
-        }
+        feed_dict = {t_x: x, t_w: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            6.22222,  6.88888,  7.55555,  8.22221, 
-            16.88887, 19.33331, 21.77776, 24.2222 , 
-            27.55553, 31.77774, 35.99997, 40.2222 ,
-            38.22217, 44.22217, 50.22217, 56.22217
-        ])
+        y_test = np.array(
+            [
+                6.22222,
+                6.88888,
+                7.55555,
+                8.22221,
+                16.88887,
+                19.33331,
+                21.77776,
+                24.2222,
+                27.55553,
+                31.77774,
+                35.99997,
+                40.2222,
+                38.22217,
+                44.22217,
+                50.22217,
+                56.22217,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -280,29 +464,42 @@ class TestOpMatmulFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
-        t_w = tf.placeholder(tf.float64, [4, 4], 't_w')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
+        t_w = tf.placeholder(tf.float64, [4, 4], "t_w")
         t_y = op_module.matmul_flt_nvnmd(t_x, t_w, 0, 0)
         # feed_dic
         x = np.reshape(np.arange(0, 16) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x,
-            t_w : x
-        }
+        feed_dict = {t_x: x, t_w: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            6.22222,  6.88888,  7.55555,  8.22221, 
-            16.88887, 19.33331, 21.77776, 24.2222 , 
-            27.55553, 31.77774, 35.99997, 40.2222 ,
-            38.22217, 44.22217, 50.22217, 56.22217
-        ])
+        y_test = np.array(
+            [
+                6.22222,
+                6.88888,
+                7.55555,
+                8.22221,
+                16.88887,
+                19.33331,
+                21.77776,
+                24.2222,
+                27.55553,
+                31.77774,
+                35.99997,
+                40.2222,
+                38.22217,
+                44.22217,
+                50.22217,
+                56.22217,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -313,61 +510,88 @@ class TestOpMatmulFlt2fixNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
-        t_w = tf.placeholder(tf.float64, [4, 4], 't_w')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
+        t_w = tf.placeholder(tf.float64, [4, 4], "t_w")
         t_y = op_module.matmul_flt2fix_nvnmd(t_x, t_w, 23)
         # feed_dic
         x = np.reshape(np.arange(0, 16) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x,
-            t_w : x
-        }
+        feed_dict = {t_x: x, t_w: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            6.22222,  6.88888,  7.55555,  8.22221, 
-            16.88887, 19.33331, 21.77776, 24.2222 , 
-            27.55554, 31.77776, 35.99997, 40.2222 ,
-            38.2222 , 44.2222 , 50.22217, 56.2222 
-        ])
+        y_test = np.array(
+            [
+                6.22222,
+                6.88888,
+                7.55555,
+                8.22221,
+                16.88887,
+                19.33331,
+                21.77776,
+                24.2222,
+                27.55554,
+                31.77776,
+                35.99997,
+                40.2222,
+                38.2222,
+                44.2222,
+                50.22217,
+                56.2222,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
         tf.reset_default_graph()
 
+
 class TestOpMulFltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
-        t_w = tf.placeholder(tf.float64, [4, 4], 't_w')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
+        t_w = tf.placeholder(tf.float64, [4, 4], "t_w")
         t_y = op_module.mul_flt_nvnmd(t_x, t_w)
         # feed_dic
         x = np.reshape(np.arange(0, 16) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x,
-            t_w : x
-        }
+        feed_dict = {t_x: x, t_w: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            0.     ,  0.11111,  0.44444,  1.     ,
-            1.77778,  2.77777, 4.     ,  5.44444,
-            7.11111,  9.     , 11.1111 , 13.44444,
-            16.     , 18.77776, 21.77774, 25.
-        ])
+        y_test = np.array(
+            [
+                0.0,
+                0.11111,
+                0.44444,
+                1.0,
+                1.77778,
+                2.77777,
+                4.0,
+                5.44444,
+                7.11111,
+                9.0,
+                11.1111,
+                13.44444,
+                16.0,
+                18.77776,
+                21.77774,
+                25.0,
+            ]
+        )
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -378,24 +602,22 @@ class TestOpQuantizeNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
         t_y = op_module.quantize_nvnmd(t_x, 0, 23, 23, -1)
         # feed_dic
         x = np.reshape(np.arange(0, 8) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x
-        }
+        feed_dict = {t_x: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            0., 0.33333, 0.66667, 1., 1.33333, 1.66667, 2., 2.33333
-        ])
+        y_test = np.array([0.0, 0.33333, 0.66667, 1.0, 1.33333, 1.66667, 2.0, 2.33333])
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
@@ -406,29 +628,27 @@ class TestOpTanh4FltNvnmd(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_op(self):
         # graph
-        t_x = tf.placeholder(tf.float64, [None, 4], 't_x')
+        t_x = tf.placeholder(tf.float64, [None, 4], "t_x")
         t_y = op_module.tanh4_flt_nvnmd(t_x)
         # feed_dic
         x = np.reshape(np.arange(0, 8) / 3.0, [-1, 4])
-        feed_dict = {
-            t_x : x
-        }
+        feed_dict = {t_x: x}
         # get value and test
         self.sess.run(tf.global_variables_initializer())
         y_pred = self.sess.run(t_y, feed_dict=feed_dict)
-        y_test = np.array([
-            0., 0.32485, 0.60494, 0.8125, 0.93827, 0.99151, 1., 1.     
-        ])
+        y_test = np.array([0.0, 0.32485, 0.60494, 0.8125, 0.93827, 0.99151, 1.0, 1.0])
         y_pred = np.reshape(y_pred, [-1])
         y_test = np.reshape(y_test, [-1])
         np.testing.assert_almost_equal(y_test, y_pred, 5)
         tf.reset_default_graph()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_nvnmd_se_a.py b/source/tests/test_nvnmd_se_a.py
index 1b2571342c..b2cca07c40 100644
--- a/source/tests/test_nvnmd_se_a.py
+++ b/source/tests/test_nvnmd_se_a.py
@@ -1,25 +1,52 @@
+import os
+import pickle
+import sys
+import unittest
 
-import dpdata,os,sys,unittest
+import dpdata
 import numpy as np
-from deepmd.env import tf
-import pickle
-from common import Data, gen_data, j_loader
+from common import (
+    Data,
+    DataSystem,
+    gen_data,
+    j_loader,
+)
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import EnerFitting
-from deepmd.model import EnerModel
-from deepmd.common import j_must_have
-from deepmd.utils.type_embed import embed_atom_type, TypeEmbedNet
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    EnerFitting,
+)
+from deepmd.model import (
+    EnerModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+    embed_atom_type,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
 #
-from common import tests_path
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.data.data import jdata_deepmd_input
+from common import (
+    tests_path,
+)
+
+from deepmd.nvnmd.data.data import (
+    jdata_deepmd_input,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
 
 
 class TestModel(tf.test.TestCase):
@@ -42,12 +69,12 @@ def setUp(self):
             for iy in range(4):
                 for iz in range(4):
                     if (ix + iy + iz) % 2 == 0:
-                        coord[ct] = np.array([ix*a, iy*a, iz*a])
+                        coord[ct] = np.array([ix * a, iy * a, iz * a])
                         ct += 1
                     else:
-                        coord[ct2] = np.array([ix*a, iy*a, iz*a])
+                        coord[ct2] = np.array([ix * a, iy * a, iz * a])
                         ct2 += 1
-        coord = coord.reshape([1, natom*3])
+        coord = coord.reshape([1, natom * 3])
         natoms = np.array([64, 64, 32, 32])
         mesh = np.array([0, 0, 0, 2, 2, 2])
         #
@@ -62,74 +89,82 @@ def test_descriptor_one_side_qnn(self):
 
         Reference:
             test_descrpt_se_a_type.py
-            
-        Note: 
+
+        Note:
             The test_nvnmd_se_a.py must be run after test_nvnmd_entrypoints.py.
             Because the data file map.npy ia generated in running test_nvnmd_entrypoints.py.
         """
         tf.reset_default_graph()
         # open NVNMD
-        jdata_cf = jdata_deepmd_input['nvnmd']
-        jdata_cf['config_file'] = str(tests_path / os.path.join("nvnmd", "config.npy"))
-        jdata_cf['weight_file'] = str(tests_path / os.path.join("nvnmd", "weight.npy"))
-        jdata_cf['map_file'] = str(tests_path / os.path.join("nvnmd", "map.npy"))
-        jdata_cf['enable'] = True
+        jdata_cf = jdata_deepmd_input["nvnmd"]
+        jdata_cf["config_file"] = str(tests_path / os.path.join("nvnmd", "config.npy"))
+        jdata_cf["weight_file"] = str(tests_path / os.path.join("nvnmd", "weight.npy"))
+        jdata_cf["map_file"] = str(tests_path / os.path.join("nvnmd", "map.npy"))
+        jdata_cf["enable"] = True
         nvnmd_cfg.init_from_jdata(jdata_cf)
         nvnmd_cfg.quantize_descriptor = True
         nvnmd_cfg.restore_descriptor = True
         # load input
         jfile = str(tests_path / os.path.join("nvnmd", "train_cnn.json"))
         jdata = j_loader(jfile)
-        ntypes = nvnmd_cfg.dscp['ntype']
+        ntypes = nvnmd_cfg.dscp["ntype"]
 
         # build descriptor
-        jdata['model']['descriptor'].pop('type', None)
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
+        jdata["model"]["descriptor"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
 
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None, None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_coord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, None], name="i_coord"
+        )
+        t_type = tf.placeholder(tf.int32, [None, None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
 
-        dout \
-            = descrpt.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {},
-                reuse = False,
-                suffix = "_se_a_nvnmd"
-            )
+        dout = descrpt.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            reuse=False,
+            suffix="_se_a_nvnmd",
+        )
         # data
         feed_dict_test = {
-                          t_coord:         self.coord,
-                          t_box:           self.box,
-                          t_type:          self.types,
-                          t_natoms:        self.natoms,
-                          t_mesh:          self.mesh,
-                          is_training:     False}
+            t_coord: self.coord,
+            t_box: self.box,
+            t_type: self.types,
+            t_natoms: self.natoms,
+            t_mesh: self.mesh,
+            is_training: False,
+        }
         # run
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [model_dout] = sess.run([dout], feed_dict = feed_dict_test)
+        [model_dout] = sess.run([dout], feed_dict=feed_dict_test)
         model_dout = model_dout.reshape([-1])
         # compare
         ref_dout = [
-            0.0136348009,  0.0083287954, -0.0639076233,  0.0129181147,
-            0.0050876141, -0.0390379429,  0.0078909397,  0.0022796392,
-            0.2995386124, -0.0605480671
+            0.0136348009,
+            0.0083287954,
+            -0.0639076233,
+            0.0129181147,
+            0.0050876141,
+            -0.0390379429,
+            0.0078909397,
+            0.0022796392,
+            0.2995386124,
+            -0.0605480671,
         ]
         places = 10
         np.testing.assert_almost_equal(model_dout[0:10], ref_dout, places)
         # close NVNMD
-        jdata_cf['enable'] = False
+        jdata_cf["enable"] = False
         nvnmd_cfg.init_from_jdata(jdata_cf)
 
-        
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()
-        
diff --git a/source/tests/test_nvnmd_utils.py b/source/tests/test_nvnmd_utils.py
index a7cf6d46ed..eafd1b4d6c 100644
--- a/source/tests/test_nvnmd_utils.py
+++ b/source/tests/test_nvnmd_utils.py
@@ -1,11 +1,20 @@
 import os
-import numpy as np
 import unittest
 
-from deepmd.env import tf
-from deepmd.nvnmd.utils.network import one_layer
-from deepmd.nvnmd.utils.config import nvnmd_cfg
-from deepmd.nvnmd.data.data import jdata_deepmd_input
+import numpy as np
+
+from deepmd.env import (
+    tf,
+)
+from deepmd.nvnmd.data.data import (
+    jdata_deepmd_input,
+)
+from deepmd.nvnmd.utils.config import (
+    nvnmd_cfg,
+)
+from deepmd.nvnmd.utils.network import (
+    one_layer,
+)
 
 
 class TestNvnmdNetwork(tf.test.TestCase):
@@ -13,45 +22,51 @@ def setUp(self):
         tf.reset_default_graph()
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
-    
+
     def test_onelayer(self):
         # open NVNMD
-        jdata = jdata_deepmd_input['nvnmd']
-        jdata['config_file'] = "none"
-        jdata['weight_file'] = "none"
-        jdata['map_file'] = "none"
-        jdata['enable'] = True
+        jdata = jdata_deepmd_input["nvnmd"]
+        jdata["config_file"] = "none"
+        jdata["weight_file"] = "none"
+        jdata["map_file"] = "none"
+        jdata["enable"] = True
         nvnmd_cfg.init_from_jdata(jdata)
         w = np.array([-0.313429, 0.783452, -0.423276, 0.832279]).reshape(4, 1)
         b = np.array([0.3482787]).reshape([1, 1])
-        nvnmd_cfg.weight = {
-            "nvnmd.matrix": w,
-            "nvnmd.bias": b
-        }
+        nvnmd_cfg.weight = {"nvnmd.matrix": w, "nvnmd.bias": b}
         nvnmd_cfg.quantize_fitting_net = True
         nvnmd_cfg.restore_fitting_net = True
         # build
-        x = np.array([-0.313429, 1.436861, 0.324769, -1.4823674, 
-        0.783452, -0.171208, -0.033421, -1.324673]).reshape([2, 4])
+        x = np.array(
+            [
+                -0.313429,
+                1.436861,
+                0.324769,
+                -1.4823674,
+                0.783452,
+                -0.171208,
+                -0.033421,
+                -1.324673,
+            ]
+        ).reshape([2, 4])
         y = np.array([0.19909, -0.86702]).reshape([-1])
-        ty = one_layer(
-            tf.constant(x),
-            1,
-            name="nvnmd"
-        )
+        ty = one_layer(tf.constant(x), 1, name="nvnmd")
         # run
         self.sess.run(tf.global_variables_initializer())
         typ = self.sess.run(ty)
         typ = typ.reshape([-1])
         np.testing.assert_almost_equal(typ, y, 5)
         # close NVNMD
-        jdata['enable'] = False
+        jdata["enable"] = False
         nvnmd_cfg.init_from_jdata(jdata)
         nvnmd_cfg.weight = {}
         nvnmd_cfg.quantize_fitting_net = False
         nvnmd_cfg.restore_fitting_net = False
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_parallel_training.py b/source/tests/test_parallel_training.py
index 320d8ca22c..096f2a0186 100644
--- a/source/tests/test_parallel_training.py
+++ b/source/tests/test_parallel_training.py
@@ -2,9 +2,13 @@
 import subprocess as sp
 import unittest
 
-from deepmd.cluster.local import get_gpus
+from common import (
+    tests_path,
+)
 
-from common import tests_path
+from deepmd.cluster.local import (
+    get_gpus,
+)
 
 
 class TestSingleMachine(unittest.TestCase):
@@ -12,26 +16,37 @@ def setUp(self):
         try:
             import horovod
         except ImportError:
-            raise unittest.SkipTest("Package horovod is required for parallel-training tests.")
+            raise unittest.SkipTest(
+                "Package horovod is required for parallel-training tests."
+            )
         self.input_file = str(tests_path / "model_compression" / "input.json")
 
     def test_two_workers(self):
-        command = 'horovodrun -np 2 dp train -m workers ' + self.input_file
+        command = "horovodrun -np 2 dp train -m workers " + self.input_file
         penv = os.environ.copy()
         num_gpus = len(get_gpus() or [])
         if num_gpus > 1:
-            penv['CUDA_VISIBLE_DEVICES'] = '0,1'
+            penv["CUDA_VISIBLE_DEVICES"] = "0,1"
         elif num_gpus == 1:
-            raise unittest.SkipTest("At least 2 GPU cards are needed for parallel-training tests.")
-        popen = sp.Popen(command, shell=True, cwd=str(tests_path), env=penv, stdout=sp.PIPE, stderr=sp.STDOUT)
-        for line in iter(popen.stdout.readline, b''):
-            if hasattr(line, 'decode'):
-                line = line.decode('utf-8')
+            raise unittest.SkipTest(
+                "At least 2 GPU cards are needed for parallel-training tests."
+            )
+        popen = sp.Popen(
+            command,
+            shell=True,
+            cwd=str(tests_path),
+            env=penv,
+            stdout=sp.PIPE,
+            stderr=sp.STDOUT,
+        )
+        for line in iter(popen.stdout.readline, b""):
+            if hasattr(line, "decode"):
+                line = line.decode("utf-8")
             line = line.rstrip()
             print(line)
         popen.wait()
-        self.assertEqual(0, popen.returncode, 'Parallel training failed!')
+        self.assertEqual(0, popen.returncode, "Parallel training failed!")
 
 
-if __name__ == '__main__':
-    unittest.main()
\ No newline at end of file
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/test_polar_se_a.py b/source/tests/test_polar_se_a.py
index fe7a5f2288..79fcbd9360 100644
--- a/source/tests/test_polar_se_a.py
+++ b/source/tests/test_polar_se_a.py
@@ -1,120 +1,169 @@
-import dpdata,os,sys,unittest
-import numpy as np
-from deepmd.env import tf
-from common import Data,gen_data, j_loader
-from common import finite_difference, strerch_box
+import os
+import sys
+import unittest
 
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import PolarFittingSeA
-from deepmd.model import PolarModel
-from deepmd.common import j_must_have
+import dpdata
+import numpy as np
+from common import (
+    Data,
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
+
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    PolarFittingSeA,
+)
+from deepmd.model import (
+    PolarModel,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
-    def setUp(self) :
+    def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'polar_se_a.json'
+        jfile = "polar_se_a.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
+
+        test_data = data.get_test()
         numb_test = 1
-        
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['fitting_net'].pop('type', None)
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed = True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = PolarFittingSeA(**jdata['model']['fitting_net'], uniform_seed = True)
+
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["fitting_net"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = PolarFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
         model = PolarModel(descrpt, fitting)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord' : [test_data['coord']], 
-                      'box': [test_data['box']], 
-                      'type': [test_data['type']],
-                      'natoms_vec' : [test_data['natoms_vec']],
-                      'default_mesh' : [test_data['default_mesh']],
-                      'fparam': [test_data['fparam']],
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "fparam": [test_data["fparam"]],
         }
         model._compute_input_stat(input_data)
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_energy           = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial           = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener        = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [model.ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
         t_fparam = None
 
-        model_pred \
-            = model.build (t_coord, 
-                           t_type, 
-                           t_natoms, 
-                           t_box, 
-                           t_mesh,
-                           t_fparam,
-                           suffix = "polar_se_a", 
-                           reuse = False)
-        polar = model_pred['polar']
-        gpolar = model_pred['global_polar']
-        force = model_pred['force']
-        virial = model_pred['virial']
-        atom_virial = model_pred['atom_virial']
-
-        feed_dict_test = {t_prop_c:        test_data['prop_c'],
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            t_fparam,
+            suffix="polar_se_a",
+            reuse=False,
+        )
+        polar = model_pred["polar"]
+        gpolar = model_pred["global_polar"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_virial = model_pred["atom_virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
-        [p, gp] = sess.run([polar, gpolar], feed_dict = feed_dict_test)
+        [p, gp] = sess.run([polar, gpolar], feed_dict=feed_dict_test)
 
         p = p.reshape([-1])
-        refp = [3.39695248e+01,  2.16564043e+01,  8.18501479e-01,  2.16564043e+01,  1.38211789e+01,  5.22775159e-01,  8.18501479e-01,  5.22775159e-01, 1.97847218e-02, 8.08467431e-01,  3.42081126e+00, -2.01072261e-01,  3.42081126e+00, 1.54924596e+01, -9.06153697e-01, -2.01072261e-01, -9.06153697e-01,  5.30193262e-02]
+        refp = [
+            3.39695248e01,
+            2.16564043e01,
+            8.18501479e-01,
+            2.16564043e01,
+            1.38211789e01,
+            5.22775159e-01,
+            8.18501479e-01,
+            5.22775159e-01,
+            1.97847218e-02,
+            8.08467431e-01,
+            3.42081126e00,
+            -2.01072261e-01,
+            3.42081126e00,
+            1.54924596e01,
+            -9.06153697e-01,
+            -2.01072261e-01,
+            -9.06153697e-01,
+            5.30193262e-02,
+        ]
 
         places = 6
         np.testing.assert_almost_equal(p, refp, places)
 
         gp = gp.reshape([-1])
         refgp = np.array(refp).reshape(-1, 9).sum(0)
-        
+
         places = 5
         np.testing.assert_almost_equal(gp, refgp, places)
 
         # make sure only one frame is used
-        feed_dict_single = {t_prop_c:        test_data['prop_c'],
-                            t_coord:         np.reshape(test_data['coord']    [:1, :], [-1]),
-                            t_box:           test_data['box']                 [:1, :],
-                            t_type:          np.reshape(test_data['type']     [:1, :], [-1]),
-                            t_natoms:        test_data['natoms_vec'],
-                            t_mesh:          test_data['default_mesh'],
-                            is_training:     False}
-
-        [pf, pv, pav] = sess.run([force, virial, atom_virial], feed_dict = feed_dict_single)
+        feed_dict_single = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:1, :], [-1]),
+            t_box: test_data["box"][:1, :],
+            t_type: np.reshape(test_data["type"][:1, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
+
+        [pf, pv, pav] = sess.run(
+            [force, virial, atom_virial], feed_dict=feed_dict_single
+        )
         pf, pv = pf.reshape(-1), pv.reshape(-1)
         spv = pav.reshape(1, 9, -1, 9).sum(2).reshape(-1)
 
@@ -122,23 +171,32 @@ def test_model(self):
         coord0 = base_dict.pop(t_coord)
         box0 = base_dict.pop(t_box)
 
-        fdf = - finite_difference(
-                    lambda coord: sess.run(gpolar, 
-                        feed_dict={**base_dict, 
-                                t_coord:coord, 
-                                t_box:box0}).reshape(-1),
-                    test_data['coord'][:numb_test, :].reshape([-1])).reshape(-1)
-        fdv = - (finite_difference(
-                    lambda box: sess.run(gpolar, 
-                        feed_dict={**base_dict, 
-                                t_coord:strerch_box(coord0, box0, box), 
-                                t_box:box}).reshape(-1),
-                    test_data['box'][:numb_test, :]).reshape([-1,3,3]).transpose(0,2,1)
-                @ box0.reshape(3,3)).reshape(-1)
+        fdf = -finite_difference(
+            lambda coord: sess.run(
+                gpolar, feed_dict={**base_dict, t_coord: coord, t_box: box0}
+            ).reshape(-1),
+            test_data["coord"][:numb_test, :].reshape([-1]),
+        ).reshape(-1)
+        fdv = -(
+            finite_difference(
+                lambda box: sess.run(
+                    gpolar,
+                    feed_dict={
+                        **base_dict,
+                        t_coord: strerch_box(coord0, box0, box),
+                        t_box: box,
+                    },
+                ).reshape(-1),
+                test_data["box"][:numb_test, :],
+            )
+            .reshape([-1, 3, 3])
+            .transpose(0, 2, 1)
+            @ box0.reshape(3, 3)
+        ).reshape(-1)
 
         delta = 1e-4
         np.testing.assert_allclose(pf, fdf, delta)
         np.testing.assert_allclose(pv, fdv, delta)
-        # make sure atomic virial sum to virial        
+        # make sure atomic virial sum to virial
         places = 10
         np.testing.assert_almost_equal(pv, spv, places)
diff --git a/source/tests/test_polar_se_a_tebd.py b/source/tests/test_polar_se_a_tebd.py
index a8348a8795..1cd3a58493 100644
--- a/source/tests/test_polar_se_a_tebd.py
+++ b/source/tests/test_polar_se_a_tebd.py
@@ -1,120 +1,161 @@
-import dpdata, os, sys, unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from common import Data, gen_data, j_loader
-from common import finite_difference, strerch_box
-
-from common import DataSystem
-from deepmd.descriptor import DescrptSeA
-from deepmd.fit import PolarFittingSeA
-from deepmd.utils.type_embed import TypeEmbedNet
-from deepmd.model import PolarModel
-from deepmd.common import j_must_have
+from common import (
+    Data,
+    DataSystem,
+    finite_difference,
+    gen_data,
+    j_loader,
+    strerch_box,
+)
 from packaging.version import parse as parse_version
 
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    DescrptSeA,
+)
+from deepmd.env import (
+    tf,
+)
+from deepmd.fit import (
+    PolarFittingSeA,
+)
+from deepmd.model import (
+    PolarModel,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+)
+
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
 
-@unittest.skipIf(parse_version(tf.__version__) < parse_version("1.15"),
-    f"The current tf version {tf.__version__} is too low to run the new testing model.")
+@unittest.skipIf(
+    parse_version(tf.__version__) < parse_version("1.15"),
+    f"The current tf version {tf.__version__} is too low to run the new testing model.",
+)
 class TestModel(tf.test.TestCase):
     def setUp(self):
         gen_data()
 
     def test_model(self):
-        jfile = 'polar_se_a_tebd.json'
+        jfile = "polar_se_a_tebd.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        stop_batch = j_must_have(jdata, 'stop_batch')
-        rcut = j_must_have(jdata['model']['descriptor'], 'rcut')
+        stop_batch = j_must_have(jdata, "stop_batch")
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
 
         data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
         test_data = data.get_test()
         numb_test = 1
 
-        jdata['model']['descriptor'].pop('type', None)
-        jdata['model']['fitting_net'].pop('type', None)
-        descrpt = DescrptSeA(**jdata['model']['descriptor'], uniform_seed=True)
-        jdata['model']['fitting_net']['descrpt'] = descrpt
-        fitting = PolarFittingSeA(**jdata['model']['fitting_net'], uniform_seed=True)
-        typeebd_param = jdata['model']['type_embedding']
+        jdata["model"]["descriptor"].pop("type", None)
+        jdata["model"]["fitting_net"].pop("type", None)
+        descrpt = DescrptSeA(**jdata["model"]["descriptor"], uniform_seed=True)
+        jdata["model"]["fitting_net"]["descrpt"] = descrpt
+        fitting = PolarFittingSeA(**jdata["model"]["fitting_net"], uniform_seed=True)
+        typeebd_param = jdata["model"]["type_embedding"]
         typeebd = TypeEmbedNet(
-            neuron=typeebd_param['neuron'],
-            resnet_dt=typeebd_param['resnet_dt'],
-            seed=typeebd_param['seed'],
-            uniform_seed=True)
+            neuron=typeebd_param["neuron"],
+            resnet_dt=typeebd_param["resnet_dt"],
+            seed=typeebd_param["seed"],
+            uniform_seed=True,
+        )
         model = PolarModel(descrpt, fitting, typeebd)
 
         # model._compute_dstats([test_data['coord']], [test_data['box']], [test_data['type']], [test_data['natoms_vec']], [test_data['default_mesh']])
-        input_data = {'coord': [test_data['coord']],
-                      'box': [test_data['box']],
-                      'type': [test_data['type']],
-                      'natoms_vec': [test_data['natoms_vec']],
-                      'default_mesh': [test_data['default_mesh']],
-                      'fparam': [test_data['fparam']],
-                      }
+        input_data = {
+            "coord": [test_data["coord"]],
+            "box": [test_data["box"]],
+            "type": [test_data["type"]],
+            "natoms_vec": [test_data["natoms_vec"]],
+            "default_mesh": [test_data["default_mesh"]],
+            "fparam": [test_data["fparam"]],
+        }
         model._compute_input_stat(input_data)
 
-        t_prop_c = tf.placeholder(tf.float32, [5], name='t_prop_c')
-        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name='t_energy')
-        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_force')
-        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_virial')
-        t_atom_ener = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='t_atom_ener')
-        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type = tf.placeholder(tf.int32, [None], name='i_type')
-        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name='i_natoms')
-        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh = tf.placeholder(tf.int32, [None], name='i_mesh')
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_energy = tf.placeholder(GLOBAL_ENER_FLOAT_PRECISION, [None], name="t_energy")
+        t_force = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_force")
+        t_virial = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="t_virial")
+        t_atom_ener = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None], name="t_atom_ener"
+        )
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [model.ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
         is_training = tf.placeholder(tf.bool)
         t_fparam = None
         inputs_dict = {}
 
-        model_pred \
-            = model.build(t_coord,
-                          t_type,
-                          t_natoms,
-                          t_box,
-                          t_mesh,
-                          inputs_dict,
-                          suffix="polar_se_a_tebd",
-                          reuse=False)
-        polar = model_pred['polar']
-        gpolar = model_pred['global_polar']
-        force = model_pred['force']
-        virial = model_pred['virial']
-        atom_virial = model_pred['atom_virial']
-
-        feed_dict_test = {t_prop_c: test_data['prop_c'],
-                          t_coord: np.reshape(test_data['coord'][:numb_test, :], [-1]),
-                          t_box: test_data['box'][:numb_test, :],
-                          t_type: np.reshape(test_data['type'][:numb_test, :], [-1]),
-                          t_natoms: test_data['natoms_vec'],
-                          t_mesh: test_data['default_mesh'],
-                          is_training: False}
+        model_pred = model.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            inputs_dict,
+            suffix="polar_se_a_tebd",
+            reuse=False,
+        )
+        polar = model_pred["polar"]
+        gpolar = model_pred["global_polar"]
+        force = model_pred["force"]
+        virial = model_pred["virial"]
+        atom_virial = model_pred["atom_virial"]
+
+        feed_dict_test = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
 
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
         [p, gp] = sess.run([polar, gpolar], feed_dict=feed_dict_test)
 
         p = p.reshape([-1])
-        refp = [2.9133718771953156e+01, 1.8731769782664504e+01,
-                7.1105887944162027e-01, 1.8731769782664514e+01,
-                1.2112482356986591e+01, 4.6145727469215880e-01,
-                7.1105887944162083e-01, 4.6145727469215880e-01,
-                1.7621311697004052e-02, 1.0344561330095343e+00,
-                3.3008794894866171e+00, -1.9826409346695134e-01,
-                3.3008794894866158e+00, 1.3546741456649618e+01,
-                -7.9755689451196410e-01, -1.9826409346695140e-01,
-                -7.9755689451196443e-01, 4.7023623761814869e-02]
+        refp = [
+            2.9133718771953156e01,
+            1.8731769782664504e01,
+            7.1105887944162027e-01,
+            1.8731769782664514e01,
+            1.2112482356986591e01,
+            4.6145727469215880e-01,
+            7.1105887944162083e-01,
+            4.6145727469215880e-01,
+            1.7621311697004052e-02,
+            1.0344561330095343e00,
+            3.3008794894866171e00,
+            -1.9826409346695134e-01,
+            3.3008794894866158e00,
+            1.3546741456649618e01,
+            -7.9755689451196410e-01,
+            -1.9826409346695140e-01,
+            -7.9755689451196443e-01,
+            4.7023623761814869e-02,
+        ]
         places = 6
         np.testing.assert_almost_equal(p, refp, places)
 
@@ -125,15 +166,19 @@ def test_model(self):
         np.testing.assert_almost_equal(gp, refgp, places)
 
         # make sure only one frame is used
-        feed_dict_single = {t_prop_c: test_data['prop_c'],
-                            t_coord: np.reshape(test_data['coord'][:1, :], [-1]),
-                            t_box: test_data['box'][:1, :],
-                            t_type: np.reshape(test_data['type'][:1, :], [-1]),
-                            t_natoms: test_data['natoms_vec'],
-                            t_mesh: test_data['default_mesh'],
-                            is_training: False}
-
-        [pf, pv, pav] = sess.run([force, virial, atom_virial], feed_dict=feed_dict_single)
+        feed_dict_single = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:1, :], [-1]),
+            t_box: test_data["box"][:1, :],
+            t_type: np.reshape(test_data["type"][:1, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
+
+        [pf, pv, pav] = sess.run(
+            [force, virial, atom_virial], feed_dict=feed_dict_single
+        )
         pf, pv = pf.reshape(-1), pv.reshape(-1)
         spv = pav.reshape(1, 9, -1, 9).sum(2).reshape(-1)
 
@@ -141,23 +186,32 @@ def test_model(self):
         coord0 = base_dict.pop(t_coord)
         box0 = base_dict.pop(t_box)
 
-        fdf = - finite_difference(
-            lambda coord: sess.run(gpolar,
-                                   feed_dict={**base_dict,
-                                              t_coord: coord,
-                                              t_box: box0}).reshape(-1),
-            test_data['coord'][:numb_test, :].reshape([-1])).reshape(-1)
-        fdv = - (finite_difference(
-            lambda box: sess.run(gpolar,
-                                 feed_dict={**base_dict,
-                                            t_coord: strerch_box(coord0, box0, box),
-                                            t_box: box}).reshape(-1),
-            test_data['box'][:numb_test, :]).reshape([-1, 3, 3]).transpose(0, 2, 1)
-                 @ box0.reshape(3, 3)).reshape(-1)
+        fdf = -finite_difference(
+            lambda coord: sess.run(
+                gpolar, feed_dict={**base_dict, t_coord: coord, t_box: box0}
+            ).reshape(-1),
+            test_data["coord"][:numb_test, :].reshape([-1]),
+        ).reshape(-1)
+        fdv = -(
+            finite_difference(
+                lambda box: sess.run(
+                    gpolar,
+                    feed_dict={
+                        **base_dict,
+                        t_coord: strerch_box(coord0, box0, box),
+                        t_box: box,
+                    },
+                ).reshape(-1),
+                test_data["box"][:numb_test, :],
+            )
+            .reshape([-1, 3, 3])
+            .transpose(0, 2, 1)
+            @ box0.reshape(3, 3)
+        ).reshape(-1)
 
         delta = 1e-4
         np.testing.assert_allclose(pf, fdf, delta)
         np.testing.assert_allclose(pv, fdv, delta)
-        # make sure atomic virial sum to virial        
+        # make sure atomic virial sum to virial
         places = 10
         np.testing.assert_almost_equal(pv, spv, places)
diff --git a/source/tests/test_prod_env_mat.py b/source/tests/test_prod_env_mat.py
index 82fe3e18eb..1ef9a14a9e 100644
--- a/source/tests/test_prod_env_mat.py
+++ b/source/tests/test_prod_env_mat.py
@@ -1,212 +1,1187 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
+import numpy as np
+
 import deepmd.op
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+
 
 class TestProdEnvMat(tf.test.TestCase):
     def setUp(self):
         self.sess = self.test_session().__enter__()
         self.nframes = 2
         self.dcoord = [
-            12.83, 2.56, 2.18,
-            12.09, 2.87, 2.74,
-            00.25, 3.32, 1.68,
-            3.36, 3.00, 1.81,
-            3.51, 2.51, 2.60,
-            4.27, 3.22, 1.56]
+            12.83,
+            2.56,
+            2.18,
+            12.09,
+            2.87,
+            2.74,
+            00.25,
+            3.32,
+            1.68,
+            3.36,
+            3.00,
+            1.81,
+            3.51,
+            2.51,
+            2.60,
+            4.27,
+            3.22,
+            1.56,
+        ]
         self.dtype = [0, 1, 1, 0, 1, 1]
-        self.dbox = [13., 0., 0., 0., 13., 0., 0., 0., 13.]
+        self.dbox = [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]
         self.dcoord = np.reshape(self.dcoord, [1, -1])
         self.dtype = np.reshape(self.dtype, [1, -1])
         self.dbox = np.reshape(self.dbox, [1, -1])
         self.dcoord = np.tile(self.dcoord, [self.nframes, 1])
         self.dtype = np.tile(self.dtype, [self.nframes, 1])
-        self.dbox = np.tile(self.dbox, [self.nframes, 1])        
+        self.dbox = np.tile(self.dbox, [self.nframes, 1])
         self.pbc_expected_output = [
-            0.12206, 0.12047, 0.01502, -0.01263, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.02167, -0.77271, 0.32370, 0.58475, 0.99745, 0.41810, 0.75655, -0.49773, 0.10564, 0.10495, -0.00143, 0.01198, 0.03103, 0.03041, 0.00452, -0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-            1.02167, 0.77271, -0.32370, -0.58475, 0.04135, 0.04039, 0.00123, -0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, 0.42028, 0.16304, -0.38405, 0.03694, 0.03680, -0.00300, -0.00117, 0.00336, 0.00327, 0.00022, -0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-            0.99745, -0.41810, -0.75655, 0.49773, 0.19078, 0.18961, -0.01951, 0.00793, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.59220, -0.42028, -0.16304, 0.38405, 0.13499, 0.12636, -0.03140, 0.03566, 0.07054, 0.07049, -0.00175, -0.00210, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-            0.12206, -0.12047, -0.01502, 0.01263, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 1.06176, 0.16913, -0.55250, 0.89077, 1.03163, 0.96880, 0.23422, -0.26615, 0.19078, -0.18961, 0.01951, -0.00793, 0.04135, -0.04039, -0.00123, 0.00880, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-            1.06176, -0.16913, 0.55250, -0.89077, 0.10564, -0.10495, 0.00143, -0.01198, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, 0.34516, 0.32245, -0.47232, 0.13499, -0.12636, 0.03140, -0.03566, 0.03694, -0.03680, 0.00300, 0.00117, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 
-            1.03163, -0.96880, -0.23422, 0.26615, 0.03103, -0.03041, -0.00452, 0.00425, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.66798, -0.34516, -0.32245, 0.47232, 0.07054, -0.07049, 0.00175, 0.00210, 0.00336, -0.00327, -0.00022, 0.00074, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000, 0.00000]
+            0.12206,
+            0.12047,
+            0.01502,
+            -0.01263,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.02167,
+            -0.77271,
+            0.32370,
+            0.58475,
+            0.99745,
+            0.41810,
+            0.75655,
+            -0.49773,
+            0.10564,
+            0.10495,
+            -0.00143,
+            0.01198,
+            0.03103,
+            0.03041,
+            0.00452,
+            -0.00425,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.02167,
+            0.77271,
+            -0.32370,
+            -0.58475,
+            0.04135,
+            0.04039,
+            0.00123,
+            -0.00880,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.59220,
+            0.42028,
+            0.16304,
+            -0.38405,
+            0.03694,
+            0.03680,
+            -0.00300,
+            -0.00117,
+            0.00336,
+            0.00327,
+            0.00022,
+            -0.00074,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.99745,
+            -0.41810,
+            -0.75655,
+            0.49773,
+            0.19078,
+            0.18961,
+            -0.01951,
+            0.00793,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.59220,
+            -0.42028,
+            -0.16304,
+            0.38405,
+            0.13499,
+            0.12636,
+            -0.03140,
+            0.03566,
+            0.07054,
+            0.07049,
+            -0.00175,
+            -0.00210,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.12206,
+            -0.12047,
+            -0.01502,
+            0.01263,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.06176,
+            0.16913,
+            -0.55250,
+            0.89077,
+            1.03163,
+            0.96880,
+            0.23422,
+            -0.26615,
+            0.19078,
+            -0.18961,
+            0.01951,
+            -0.00793,
+            0.04135,
+            -0.04039,
+            -0.00123,
+            0.00880,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.06176,
+            -0.16913,
+            0.55250,
+            -0.89077,
+            0.10564,
+            -0.10495,
+            0.00143,
+            -0.01198,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.66798,
+            0.34516,
+            0.32245,
+            -0.47232,
+            0.13499,
+            -0.12636,
+            0.03140,
+            -0.03566,
+            0.03694,
+            -0.03680,
+            0.00300,
+            0.00117,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.03163,
+            -0.96880,
+            -0.23422,
+            0.26615,
+            0.03103,
+            -0.03041,
+            -0.00452,
+            0.00425,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.66798,
+            -0.34516,
+            -0.32245,
+            0.47232,
+            0.07054,
+            -0.07049,
+            0.00175,
+            0.00210,
+            0.00336,
+            -0.00327,
+            -0.00022,
+            0.00074,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+        ]
         self.nopbc_expected_output = [
-            0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.02167,-0.77271,0.32370,0.58475,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,
-            1.02167,0.77271,-0.32370,-0.58475,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,
-            0.19078,0.18961,-0.01951,0.00793,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.13499,0.12636,-0.03140,0.03566,0.07054,0.07049,-0.00175,-0.00210,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,
-            0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.06176,0.16913,-0.55250,0.89077,1.03163,0.96880,0.23422,-0.26615,0.19078,-0.18961,0.01951,-0.00793,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,
-            1.06176,-0.16913,0.55250,-0.89077,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.66798,0.34516,0.32245,-0.47232,0.13499,-0.12636,0.03140,-0.03566,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,
-1.03163,-0.96880,-0.23422,0.26615,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.66798,-0.34516,-0.32245,0.47232,0.07054,-0.07049,0.00175,0.00210,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000]
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.02167,
+            -0.77271,
+            0.32370,
+            0.58475,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.02167,
+            0.77271,
+            -0.32370,
+            -0.58475,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.19078,
+            0.18961,
+            -0.01951,
+            0.00793,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.13499,
+            0.12636,
+            -0.03140,
+            0.03566,
+            0.07054,
+            0.07049,
+            -0.00175,
+            -0.00210,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.06176,
+            0.16913,
+            -0.55250,
+            0.89077,
+            1.03163,
+            0.96880,
+            0.23422,
+            -0.26615,
+            0.19078,
+            -0.18961,
+            0.01951,
+            -0.00793,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.06176,
+            -0.16913,
+            0.55250,
+            -0.89077,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.66798,
+            0.34516,
+            0.32245,
+            -0.47232,
+            0.13499,
+            -0.12636,
+            0.03140,
+            -0.03566,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.03163,
+            -0.96880,
+            -0.23422,
+            0.26615,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.66798,
+            -0.34516,
+            -0.32245,
+            0.47232,
+            0.07054,
+            -0.07049,
+            0.00175,
+            0.00210,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+        ]
         self.sel = [10, 10]
-        self.sec = np.array([0, 0, 0], dtype = int)
+        self.sec = np.array([0, 0, 0], dtype=int)
         self.sec[1:3] = np.cumsum(self.sel)
-        self.rcut = 6.
+        self.rcut = 6.0
         self.rcut_smth = 0.8
         self.dnatoms = [6, 6, 2, 4]
-        self.tcoord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * 3], name='t_coord')
-        self.tbox = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_box')
-        self.ttype = tf.placeholder(tf.int32, [None, self.dnatoms[0]], name = "t_type")
-        self.tnatoms = tf.placeholder(tf.int32, [None], name = "t_natoms")
+        self.tcoord = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * 3], name="t_coord"
+        )
+        self.tbox = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_box")
+        self.ttype = tf.placeholder(tf.int32, [None, self.dnatoms[0]], name="t_type")
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
         self.nloc = self.dnatoms[0]
         self.nall = self.dnatoms[1]
         self.nnei = self.sec[-1]
         self.ndescrpt = 4 * self.nnei
         self.ntypes = np.max(self.dtype) + 1
-        davg = np.zeros ([self.ntypes, self.ndescrpt])
-        dstd = np.ones  ([self.ntypes, self.ndescrpt])
+        davg = np.zeros([self.ntypes, self.ndescrpt])
+        dstd = np.ones([self.ntypes, self.ndescrpt])
         self.t_avg = tf.constant(davg.astype(GLOBAL_NP_FLOAT_PRECISION))
         self.t_std = tf.constant(dstd.astype(GLOBAL_NP_FLOAT_PRECISION))
- 
+
     def test_pbc_self_built_nlist(self):
-        tem, tem_deriv, trij, tnlist \
-            = op_module.prod_env_mat_a (
-                self.tcoord,
-                self.ttype,
-                self.tnatoms,
-                self.tbox, 
-                tf.constant(np.zeros(6, dtype = np.int32)),
-                self.t_avg,
-                self.t_std,
-                rcut_a = -1, 
-                rcut_r = self.rcut, 
-                rcut_r_smth = self.rcut_smth,
-                sel_a = self.sel, 
-                sel_r = [0, 0])
-        self.sess.run (tf.global_variables_initializer())
+        tem, tem_deriv, trij, tnlist = op_module.prod_env_mat_a(
+            self.tcoord,
+            self.ttype,
+            self.tnatoms,
+            self.tbox,
+            tf.constant(np.zeros(6, dtype=np.int32)),
+            self.t_avg,
+            self.t_std,
+            rcut_a=-1,
+            rcut_r=self.rcut,
+            rcut_r_smth=self.rcut_smth,
+            sel_a=self.sel,
+            sel_r=[0, 0],
+        )
+        self.sess.run(tf.global_variables_initializer())
         dem, dem_deriv, drij, dnlist = self.sess.run(
             [tem, tem_deriv, trij, tnlist],
-            feed_dict = {
+            feed_dict={
                 self.tcoord: self.dcoord,
                 self.ttype: self.dtype,
                 self.tbox: self.dbox,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
-        self.assertEqual(dem.shape, (self.nframes, self.nloc*self.ndescrpt))
-        self.assertEqual(dem_deriv.shape, (self.nframes, self.nloc*self.ndescrpt*3))
-        self.assertEqual(drij.shape, (self.nframes, self.nloc*self.nnei*3))
-        self.assertEqual(dnlist.shape, (self.nframes, self.nloc*self.nnei))
+        self.assertEqual(dem.shape, (self.nframes, self.nloc * self.ndescrpt))
+        self.assertEqual(dem_deriv.shape, (self.nframes, self.nloc * self.ndescrpt * 3))
+        self.assertEqual(drij.shape, (self.nframes, self.nloc * self.nnei * 3))
+        self.assertEqual(dnlist.shape, (self.nframes, self.nloc * self.nnei))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dem[ff], self.pbc_expected_output, 5)
 
     def test_pbc_self_built_nlist_deriv(self):
         hh = 1e-4
-        tem, tem_deriv, trij, tnlist \
-            = op_module.prod_env_mat_a (
-                self.tcoord,
-                self.ttype,
-                self.tnatoms,
-                self.tbox, 
-                tf.constant(np.zeros(6, dtype = np.int32)),
-                self.t_avg,
-                self.t_std,
-                rcut_a = -1, 
-                rcut_r = self.rcut, 
-                rcut_r_smth = self.rcut_smth,
-                sel_a = self.sel, 
-                sel_r = [0, 0])
-        self.sess.run (tf.global_variables_initializer())
+        tem, tem_deriv, trij, tnlist = op_module.prod_env_mat_a(
+            self.tcoord,
+            self.ttype,
+            self.tnatoms,
+            self.tbox,
+            tf.constant(np.zeros(6, dtype=np.int32)),
+            self.t_avg,
+            self.t_std,
+            rcut_a=-1,
+            rcut_r=self.rcut,
+            rcut_r_smth=self.rcut_smth,
+            sel_a=self.sel,
+            sel_r=[0, 0],
+        )
+        self.sess.run(tf.global_variables_initializer())
         self.check_deriv_numerical_deriv(hh, tem, tem_deriv, trij, tnlist)
 
     def test_nopbc_self_built_nlist(self):
-        tem, tem_deriv, trij, tnlist \
-            = op_module.prod_env_mat_a (
-                self.tcoord,
-                self.ttype,
-                self.tnatoms,
-                self.tbox, 
-                tf.constant(np.zeros(0, dtype = np.int32)),
-                self.t_avg,
-                self.t_std,
-                rcut_a = -1, 
-                rcut_r = self.rcut, 
-                rcut_r_smth = self.rcut_smth,
-                sel_a = self.sel, 
-                sel_r = [0, 0])
-        self.sess.run (tf.global_variables_initializer())
+        tem, tem_deriv, trij, tnlist = op_module.prod_env_mat_a(
+            self.tcoord,
+            self.ttype,
+            self.tnatoms,
+            self.tbox,
+            tf.constant(np.zeros(0, dtype=np.int32)),
+            self.t_avg,
+            self.t_std,
+            rcut_a=-1,
+            rcut_r=self.rcut,
+            rcut_r_smth=self.rcut_smth,
+            sel_a=self.sel,
+            sel_r=[0, 0],
+        )
+        self.sess.run(tf.global_variables_initializer())
         dem, dem_deriv, drij, dnlist = self.sess.run(
             [tem, tem_deriv, trij, tnlist],
-            feed_dict = {
+            feed_dict={
                 self.tcoord: self.dcoord,
                 self.ttype: self.dtype,
                 self.tbox: self.dbox,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
-        self.assertEqual(dem.shape, (self.nframes, self.nloc*self.ndescrpt))
-        self.assertEqual(dem_deriv.shape, (self.nframes, self.nloc*self.ndescrpt*3))
-        self.assertEqual(drij.shape, (self.nframes, self.nloc*self.nnei*3))
-        self.assertEqual(dnlist.shape, (self.nframes, self.nloc*self.nnei))
+        self.assertEqual(dem.shape, (self.nframes, self.nloc * self.ndescrpt))
+        self.assertEqual(dem_deriv.shape, (self.nframes, self.nloc * self.ndescrpt * 3))
+        self.assertEqual(drij.shape, (self.nframes, self.nloc * self.nnei * 3))
+        self.assertEqual(dnlist.shape, (self.nframes, self.nloc * self.nnei))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dem[ff], self.nopbc_expected_output, 5)
 
-
     def test_nopbc_self_built_nlist_deriv(self):
         hh = 1e-4
-        tem, tem_deriv, trij, tnlist \
-            = op_module.prod_env_mat_a (
-                self.tcoord,
-                self.ttype,
-                self.tnatoms,
-                self.tbox, 
-                tf.constant(np.zeros(0, dtype = np.int32)),
-                self.t_avg,
-                self.t_std,
-                rcut_a = -1, 
-                rcut_r = self.rcut, 
-                rcut_r_smth = self.rcut_smth,
-                sel_a = self.sel, 
-                sel_r = [0, 0])
-        self.sess.run (tf.global_variables_initializer())
+        tem, tem_deriv, trij, tnlist = op_module.prod_env_mat_a(
+            self.tcoord,
+            self.ttype,
+            self.tnatoms,
+            self.tbox,
+            tf.constant(np.zeros(0, dtype=np.int32)),
+            self.t_avg,
+            self.t_std,
+            rcut_a=-1,
+            rcut_r=self.rcut,
+            rcut_r_smth=self.rcut_smth,
+            sel_a=self.sel,
+            sel_r=[0, 0],
+        )
+        self.sess.run(tf.global_variables_initializer())
         self.check_deriv_numerical_deriv(hh, tem, tem_deriv, trij, tnlist)
 
-
-    def check_deriv_numerical_deriv(self, 
-                                    hh,
-                                    tem, tem_deriv, trij, tnlist):
+    def check_deriv_numerical_deriv(self, hh, tem, tem_deriv, trij, tnlist):
         dem_, dem_deriv_, drij_, dnlist_ = self.sess.run(
             [tem, tem_deriv, trij, tnlist],
-            feed_dict = {
+            feed_dict={
                 self.tcoord: self.dcoord,
                 self.ttype: self.dtype,
                 self.tbox: self.dbox,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
         ff = 0
         dem = dem_[ff]
         dem_deriv = dem_deriv_[ff]
         dnlist = dnlist_[ff]
-        for ii in range(self.dnatoms[0]):            
+        for ii in range(self.dnatoms[0]):
             for jj in range(self.nnei):
-                j_idx = dnlist[ii*self.nnei+jj]
+                j_idx = dnlist[ii * self.nnei + jj]
                 if j_idx < 0:
                     continue
                 for kk in range(4):
                     for dd in range(3):
                         dcoord_0 = np.copy(self.dcoord)
                         dcoord_1 = np.copy(self.dcoord)
-                        dcoord_0[ff][j_idx*3+dd] -= hh
-                        dcoord_1[ff][j_idx*3+dd] += hh
+                        dcoord_0[ff][j_idx * 3 + dd] -= hh
+                        dcoord_1[ff][j_idx * 3 + dd] += hh
                         dem_0, dem_deriv_0, drij_0, dnlist_0 = self.sess.run(
                             [tem, tem_deriv, trij, tnlist],
-                            feed_dict = {
+                            feed_dict={
                                 self.tcoord: dcoord_0,
                                 self.ttype: self.dtype,
                                 self.tbox: self.dbox,
-                                self.tnatoms: self.dnatoms}
+                                self.tnatoms: self.dnatoms,
+                            },
                         )
                         dem_1, dem_deriv_1, drij_1, dnlist_1 = self.sess.run(
                             [tem, tem_deriv, trij, tnlist],
-                            feed_dict = {
+                            feed_dict={
                                 self.tcoord: dcoord_1,
                                 self.ttype: self.dtype,
                                 self.tbox: self.dbox,
-                                self.tnatoms: self.dnatoms}
+                                self.tnatoms: self.dnatoms,
+                            },
                         )
-                        num_deriv = (dem_1[0][ii*self.nnei*4+jj*4+kk] - dem_0[0][ii*self.ndescrpt+jj*4+kk]) / (2.*hh)
-                        ana_deriv = -dem_deriv[ii*self.nnei*4*3+jj*4*3+kk*3+dd]
-                        self.assertAlmostEqual(num_deriv, ana_deriv, places = 5)
-
+                        num_deriv = (
+                            dem_1[0][ii * self.nnei * 4 + jj * 4 + kk]
+                            - dem_0[0][ii * self.ndescrpt + jj * 4 + kk]
+                        ) / (2.0 * hh)
+                        ana_deriv = -dem_deriv[
+                            ii * self.nnei * 4 * 3 + jj * 4 * 3 + kk * 3 + dd
+                        ]
+                        self.assertAlmostEqual(num_deriv, ana_deriv, places=5)
diff --git a/source/tests/test_prod_force.py b/source/tests/test_prod_force.py
index e6703b6614..b44178cd9e 100644
--- a/source/tests/test_prod_force.py
+++ b/source/tests/test_prod_force.py
@@ -1,45 +1,834 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
+import numpy as np
+
 import deepmd.op
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+
 
 class TestProdForce(tf.test.TestCase):
     def setUp(self):
         config = tf.ConfigProto()
         if int(os.environ.get("DP_AUTO_PARALLELIZATION", 0)):
-            config.graph_options.rewrite_options.custom_optimizers.add().name = "dpparallel"
+            config.graph_options.rewrite_options.custom_optimizers.add().name = (
+                "dpparallel"
+            )
         self.sess = self.test_session(config=config).__enter__()
         self.nframes = 2
         self.dcoord = [
-            12.83, 2.56, 2.18,
-            12.09, 2.87, 2.74,
-            00.25, 3.32, 1.68,
-            3.36, 3.00, 1.81,
-            3.51, 2.51, 2.60,
-            4.27, 3.22, 1.56]
+            12.83,
+            2.56,
+            2.18,
+            12.09,
+            2.87,
+            2.74,
+            00.25,
+            3.32,
+            1.68,
+            3.36,
+            3.00,
+            1.81,
+            3.51,
+            2.51,
+            2.60,
+            4.27,
+            3.22,
+            1.56,
+        ]
         self.dtype = [0, 1, 1, 0, 1, 1]
-        self.dbox = [13., 0., 0., 0., 13., 0., 0., 0., 13.]
-        self.dnlist = [33, -1, -1, -1, -1, 1, 32, 34, 35, -1, 
-                       0, 33, -1, -1, -1, 32, 34, 35, -1, -1, 
-                       6, 3, -1, -1, -1, 7, 4, 5, -1, -1, 
-                       6, -1, -1, -1, -1, 4, 5, 2, 7, -1, 
-                       3, 6, -1, -1, -1, 5, 2, 7, -1, -1, 
-                       3, 6, -1, -1, -1, 4, 2, 7, -1, -1]
-        self.dem_deriv = [0.13227682739491875, 0.01648776318803519, -0.013864709953575083, 0.12967498112414713, 0.0204174282700489, -0.017169201045268437, 0.0204174282700489, -0.031583528930688706, -0.0021400703852459233, -0.01716920104526844, -0.0021400703852459233, -0.03232887285478848, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7946522798827726, 0.33289487400494444, 0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.4206262499369199, 0.761133214171572, -0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898,
-                          -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.12240657396947655, -0.0016631327984983461, 0.013970315507385892, 0.12123416269111335, -0.0020346719145638054, 0.017091244082335703, -0.002034671914563806, -0.028490045221941415, -0.00023221799024912971, 0.017091244082335703, -0.00023221799024912971, -0.026567059102687942, 0.057945707686107975, 0.008613551142529565, -0.008091517739952026, 0.056503423854730866, 0.009417127630974357, -0.008846392623036528, 0.009417127630974357, -0.005448318729873151, -0.0013150043088297543, -0.008846392623036528, -0.0013150043088297541, -0.005612854948377751, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7946522798827726, -0.33289487400494444, -0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.06884320605436924, 0.002095928989945659, -0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3025931001933299, 0.11738525438534331, -0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.06555082496658332, -0.005338981218997747, -0.002076270474054677, 0.06523884623439505, -0.00599162877720186, -0.0023300778578007205, -0.00599162877720186, -0.007837034455273667, 0.00018978009701544363, -0.0023300778578007205, 0.00018978009701544363, -0.008251237047966105, 0.014091999096200191, 0.0009521621010946066, -0.00321014651226182, 0.013676554858123476, 0.0009667394698497006, -0.0032592930697789946, 0.0009667394698497006, -0.0005658690612028018, -0.00022022250471479668, -0.0032592930697789937, -0.00022022250471479666, 0.00011127514881492382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.4206262499369199, -0.761133214171572, 0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898, -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.17265177804411166, -0.01776481317495682, 0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.3025931001933299, -0.11738525438534331, 0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.13298898711407747, -0.03304327593938735, 0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, 0.09709214772325653, -0.00241522755530488, -0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1322768273949186, -0.016487763188035173, 0.013864709953575069, 0.12967498112414702, 0.020417428270048884, -0.017169201045268423, 0.02041742827004888, -0.03158352893068868, -0.002140070385245921, -0.017169201045268423, -0.002140070385245921, -0.03232887285478844, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1802999914938216, -0.5889799722131493, 0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, 1.0053013143052718, 0.24303987818369216, -0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.17265177804411166, 0.01776481317495682, -0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, -0.06884320605436924, -0.002095928989945659, 0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.1802999914938216, 0.5889799722131493, -0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, -0.12240657396947667, 0.0016631327984983487, -0.013970315507385913, 0.12123416269111348, -0.002034671914563809, 0.01709124408233573, -0.002034671914563809, -0.028490045221941467, -0.00023221799024913015, 0.01709124408233573, -0.00023221799024913015, -0.026567059102687987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2602591506940697, 0.24313683814840728, -0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.13298898711407747, 0.03304327593938735, -0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, -0.0655508249665835, 0.005338981218997763, 0.002076270474054683, 0.0652388462343952, -0.005991628777201879, -0.0023300778578007283, -0.005991628777201879, -0.007837034455273709, 0.0001897800970154443, -0.002330077857800728, 0.0001897800970154443, -0.008251237047966148, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0053013143052718, -0.24303987818369216, 0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.057945707686107864, -0.008613551142529548, 0.00809151773995201, 0.05650342385473076, 0.009417127630974336, -0.00884639262303651, 0.009417127630974336, -0.005448318729873148, -0.0013150043088297515, -0.00884639262303651, -0.0013150043088297513, -0.005612854948377747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2602591506940697, -0.24313683814840728, 0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.09709214772325653, 0.00241522755530488, 0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, -0.014091999096200191, -0.0009521621010946064, 0.0032101465122618194, 0.013676554858123474, 0.0009667394698497003, -0.0032592930697789933, 0.0009667394698497003, -0.0005658690612028016, -0.0002202225047147966, -0.0032592930697789933, -0.0002202225047147966, 0.00011127514881492362, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        self.dbox = [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]
+        self.dnlist = [
+            33,
+            -1,
+            -1,
+            -1,
+            -1,
+            1,
+            32,
+            34,
+            35,
+            -1,
+            0,
+            33,
+            -1,
+            -1,
+            -1,
+            32,
+            34,
+            35,
+            -1,
+            -1,
+            6,
+            3,
+            -1,
+            -1,
+            -1,
+            7,
+            4,
+            5,
+            -1,
+            -1,
+            6,
+            -1,
+            -1,
+            -1,
+            -1,
+            4,
+            5,
+            2,
+            7,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            5,
+            2,
+            7,
+            -1,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            4,
+            2,
+            7,
+            -1,
+            -1,
+        ]
+        self.dem_deriv = [
+            0.13227682739491875,
+            0.01648776318803519,
+            -0.013864709953575083,
+            0.12967498112414713,
+            0.0204174282700489,
+            -0.017169201045268437,
+            0.0204174282700489,
+            -0.031583528930688706,
+            -0.0021400703852459233,
+            -0.01716920104526844,
+            -0.0021400703852459233,
+            -0.03232887285478848,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7946522798827726,
+            0.33289487400494444,
+            0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.4206262499369199,
+            0.761133214171572,
+            -0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.12240657396947655,
+            -0.0016631327984983461,
+            0.013970315507385892,
+            0.12123416269111335,
+            -0.0020346719145638054,
+            0.017091244082335703,
+            -0.002034671914563806,
+            -0.028490045221941415,
+            -0.00023221799024912971,
+            0.017091244082335703,
+            -0.00023221799024912971,
+            -0.026567059102687942,
+            0.057945707686107975,
+            0.008613551142529565,
+            -0.008091517739952026,
+            0.056503423854730866,
+            0.009417127630974357,
+            -0.008846392623036528,
+            0.009417127630974357,
+            -0.005448318729873151,
+            -0.0013150043088297543,
+            -0.008846392623036528,
+            -0.0013150043088297541,
+            -0.005612854948377751,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7946522798827726,
+            -0.33289487400494444,
+            -0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.06884320605436924,
+            0.002095928989945659,
+            -0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.3025931001933299,
+            0.11738525438534331,
+            -0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.06555082496658332,
+            -0.005338981218997747,
+            -0.002076270474054677,
+            0.06523884623439505,
+            -0.00599162877720186,
+            -0.0023300778578007205,
+            -0.00599162877720186,
+            -0.007837034455273667,
+            0.00018978009701544363,
+            -0.0023300778578007205,
+            0.00018978009701544363,
+            -0.008251237047966105,
+            0.014091999096200191,
+            0.0009521621010946066,
+            -0.00321014651226182,
+            0.013676554858123476,
+            0.0009667394698497006,
+            -0.0032592930697789946,
+            0.0009667394698497006,
+            -0.0005658690612028018,
+            -0.00022022250471479668,
+            -0.0032592930697789937,
+            -0.00022022250471479666,
+            0.00011127514881492382,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.4206262499369199,
+            -0.761133214171572,
+            0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.17265177804411166,
+            -0.01776481317495682,
+            0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.3025931001933299,
+            -0.11738525438534331,
+            0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.13298898711407747,
+            -0.03304327593938735,
+            0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            0.09709214772325653,
+            -0.00241522755530488,
+            -0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1322768273949186,
+            -0.016487763188035173,
+            0.013864709953575069,
+            0.12967498112414702,
+            0.020417428270048884,
+            -0.017169201045268423,
+            0.02041742827004888,
+            -0.03158352893068868,
+            -0.002140070385245921,
+            -0.017169201045268423,
+            -0.002140070385245921,
+            -0.03232887285478844,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.1802999914938216,
+            -0.5889799722131493,
+            0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            1.0053013143052718,
+            0.24303987818369216,
+            -0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.17265177804411166,
+            0.01776481317495682,
+            -0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            -0.06884320605436924,
+            -0.002095928989945659,
+            0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1802999914938216,
+            0.5889799722131493,
+            -0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            -0.12240657396947667,
+            0.0016631327984983487,
+            -0.013970315507385913,
+            0.12123416269111348,
+            -0.002034671914563809,
+            0.01709124408233573,
+            -0.002034671914563809,
+            -0.028490045221941467,
+            -0.00023221799024913015,
+            0.01709124408233573,
+            -0.00023221799024913015,
+            -0.026567059102687987,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.2602591506940697,
+            0.24313683814840728,
+            -0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.13298898711407747,
+            0.03304327593938735,
+            -0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            -0.0655508249665835,
+            0.005338981218997763,
+            0.002076270474054683,
+            0.0652388462343952,
+            -0.005991628777201879,
+            -0.0023300778578007283,
+            -0.005991628777201879,
+            -0.007837034455273709,
+            0.0001897800970154443,
+            -0.002330077857800728,
+            0.0001897800970154443,
+            -0.008251237047966148,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -1.0053013143052718,
+            -0.24303987818369216,
+            0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.057945707686107864,
+            -0.008613551142529548,
+            0.00809151773995201,
+            0.05650342385473076,
+            0.009417127630974336,
+            -0.00884639262303651,
+            0.009417127630974336,
+            -0.005448318729873148,
+            -0.0013150043088297515,
+            -0.00884639262303651,
+            -0.0013150043088297513,
+            -0.005612854948377747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.2602591506940697,
+            -0.24313683814840728,
+            0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.09709214772325653,
+            0.00241522755530488,
+            0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            -0.014091999096200191,
+            -0.0009521621010946064,
+            0.0032101465122618194,
+            0.013676554858123474,
+            0.0009667394698497003,
+            -0.0032592930697789933,
+            0.0009667394698497003,
+            -0.0005658690612028016,
+            -0.0002202225047147966,
+            -0.0032592930697789933,
+            -0.0002202225047147966,
+            0.00011127514881492362,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+        ]
         self.dcoord = np.reshape(self.dcoord, [1, -1])
         self.dtype = np.reshape(self.dtype, [1, -1])
         self.dbox = np.reshape(self.dbox, [1, -1])
@@ -50,23 +839,157 @@ def setUp(self):
         self.dbox = np.tile(self.dbox, [self.nframes, 1])
         self.dnlist = np.tile(self.dnlist, [self.nframes, 1])
         self.dem_deriv = np.tile(self.dem_deriv, [self.nframes, 1])
-        self.expected_force = [9.44498, -13.86254, 10.52884, -19.42688,  8.09273, 19.64478,  4.81771, 11.39255, 12.38830, -16.65832,  6.65153, -10.15585,  
-                               1.16660, -14.43259, 22.97076, 22.86479,  7.42726, -11.41943, -7.67893, -7.23287, -11.33442, -4.51184, -3.80588, -2.44935,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               1.16217,  6.16192, -28.79094,  3.81076, -0.01986, -1.01629,  3.65869, -0.49195, -0.07437,  1.35028,  0.11969, -0.29201,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                               0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000]
+        self.expected_force = [
+            9.44498,
+            -13.86254,
+            10.52884,
+            -19.42688,
+            8.09273,
+            19.64478,
+            4.81771,
+            11.39255,
+            12.38830,
+            -16.65832,
+            6.65153,
+            -10.15585,
+            1.16660,
+            -14.43259,
+            22.97076,
+            22.86479,
+            7.42726,
+            -11.41943,
+            -7.67893,
+            -7.23287,
+            -11.33442,
+            -4.51184,
+            -3.80588,
+            -2.44935,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.16217,
+            6.16192,
+            -28.79094,
+            3.81076,
+            -0.01986,
+            -1.01629,
+            3.65869,
+            -0.49195,
+            -0.07437,
+            1.35028,
+            0.11969,
+            -0.29201,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+        ]
 
         self.sel = [5, 5]
-        self.sec = np.array([0, 0, 0], dtype = int)
+        self.sec = np.array([0, 0, 0], dtype=int)
         self.sec[1:3] = np.cumsum(self.sel)
-        self.rcut = 6.
+        self.rcut = 6.0
         self.rcut_smth = 0.8
         self.dnatoms = [6, 48, 2, 4]
 
@@ -75,66 +998,77 @@ def setUp(self):
         self.nnei = self.sec[-1]
         self.ndescrpt = 4 * self.nnei
         self.ntypes = np.max(self.dtype) + 1
-        self.dnet_deriv=[]
+        self.dnet_deriv = []
         for ii in range(self.nloc * self.ndescrpt):
-            self.dnet_deriv.append(10-ii*0.01)
+            self.dnet_deriv.append(10 - ii * 0.01)
         self.dnet_deriv = np.reshape(self.dnet_deriv, [1, -1])
         self.dnet_deriv = np.tile(self.dnet_deriv, [self.nframes, 1])
 
-        self.tnet_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt], name='t_net_deriv')
-        self.tem_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt * 3], name='t_em_deriv')
-        self.tnlist = tf.placeholder(tf.int32, [None, self.dnatoms[0] * self.nnei], name = "t_nlist")
-        self.tnatoms = tf.placeholder(tf.int32, [None], name = "t_natoms")
-        
+        self.tnet_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt],
+            name="t_net_deriv",
+        )
+        self.tem_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt * 3],
+            name="t_em_deriv",
+        )
+        self.tnlist = tf.placeholder(
+            tf.int32, [None, self.dnatoms[0] * self.nnei], name="t_nlist"
+        )
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
     def test_prod_force(self):
-        tforce \
-            = op_module.prod_force_se_a(
-                self.tnet_deriv,
-                self.tem_deriv,
-                self.tnlist,
-                self.tnatoms, 
-                n_a_sel=self.nnei,
-                n_r_sel=0)
-        self.sess.run (tf.global_variables_initializer())
+        tforce = op_module.prod_force_se_a(
+            self.tnet_deriv,
+            self.tem_deriv,
+            self.tnlist,
+            self.tnatoms,
+            n_a_sel=self.nnei,
+            n_r_sel=0,
+        )
+        self.sess.run(tf.global_variables_initializer())
         dforce = self.sess.run(
             tforce,
-            feed_dict = {
+            feed_dict={
                 self.tnet_deriv: self.dnet_deriv,
                 self.tem_deriv: self.dem_deriv,
                 self.tnlist: self.dnlist,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
-        self.assertEqual(dforce.shape, (self.nframes, self.nall*3))
+        self.assertEqual(dforce.shape, (self.nframes, self.nall * 3))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dforce[ff], self.expected_force, 5)
-    
+
     @unittest.skipIf(tf.test.is_gpu_available(), reason="Not supported in GPUs")
     def test_prod_force_parallel(self):
         forces = []
         for ii in range(4):
-            tforce \
-                = op_module.parallel_prod_force_se_a(
-                    self.tnet_deriv,
-                    self.tem_deriv,
-                    self.tnlist,
-                    self.tnatoms, 
-                    n_a_sel=self.nnei,
-                    n_r_sel=0,
-                    parallel=True,
-                    start_frac = ii/4,
-                    end_frac = (ii+1)/4,
-                    )
+            tforce = op_module.parallel_prod_force_se_a(
+                self.tnet_deriv,
+                self.tem_deriv,
+                self.tnlist,
+                self.tnatoms,
+                n_a_sel=self.nnei,
+                n_r_sel=0,
+                parallel=True,
+                start_frac=ii / 4,
+                end_frac=(ii + 1) / 4,
+            )
             forces.append(tforce)
         tforce = tf.add_n(forces)
-        self.sess.run (tf.global_variables_initializer())
+        self.sess.run(tf.global_variables_initializer())
         dforce = self.sess.run(
             tforce,
-            feed_dict = {
+            feed_dict={
                 self.tnet_deriv: self.dnet_deriv,
                 self.tem_deriv: self.dem_deriv,
                 self.tnlist: self.dnlist,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
-        self.assertEqual(dforce.shape, (self.nframes, self.nall*3))
+        self.assertEqual(dforce.shape, (self.nframes, self.nall * 3))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dforce[ff], self.expected_force, 5)
diff --git a/source/tests/test_prod_force_grad.py b/source/tests/test_prod_force_grad.py
index 71a95380f0..da49a46c1b 100644
--- a/source/tests/test_prod_force_grad.py
+++ b/source/tests/test_prod_force_grad.py
@@ -1,42 +1,829 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
+import numpy as np
+
 import deepmd.op
-from deepmd.env import tf
-from deepmd.env import op_grads_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_grads_module,
+    tf,
+)
+
 
 class TestProdForceGrad(tf.test.TestCase):
     def setUp(self):
         self.sess = self.test_session().__enter__()
         self.nframes = 2
         self.dcoord = [
-            12.83, 2.56, 2.18,
-            12.09, 2.87, 2.74,
-            00.25, 3.32, 1.68,
-            3.36, 3.00, 1.81,
-            3.51, 2.51, 2.60,
-            4.27, 3.22, 1.56]
+            12.83,
+            2.56,
+            2.18,
+            12.09,
+            2.87,
+            2.74,
+            00.25,
+            3.32,
+            1.68,
+            3.36,
+            3.00,
+            1.81,
+            3.51,
+            2.51,
+            2.60,
+            4.27,
+            3.22,
+            1.56,
+        ]
         self.dtype = [0, 1, 1, 0, 1, 1]
-        self.dbox = [13., 0., 0., 0., 13., 0., 0., 0., 13.]
-        self.dnlist = [33, -1, -1, -1, -1, 1, 32, 34, 35, -1, 
-                       0, 33, -1, -1, -1, 32, 34, 35, -1, -1, 
-                       6, 3, -1, -1, -1, 7, 4, 5, -1, -1, 
-                       6, -1, -1, -1, -1, 4, 5, 2, 7, -1, 
-                       3, 6, -1, -1, -1, 5, 2, 7, -1, -1, 
-                       3, 6, -1, -1, -1, 4, 2, 7, -1, -1]
-        self.dem_deriv = [0.13227682739491875, 0.01648776318803519, -0.013864709953575083, 0.12967498112414713, 0.0204174282700489, -0.017169201045268437, 0.0204174282700489, -0.031583528930688706, -0.0021400703852459233, -0.01716920104526844, -0.0021400703852459233, -0.03232887285478848, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7946522798827726, 0.33289487400494444, 0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.4206262499369199, 0.761133214171572, -0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898,
-                          -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.12240657396947655, -0.0016631327984983461, 0.013970315507385892, 0.12123416269111335, -0.0020346719145638054, 0.017091244082335703, -0.002034671914563806, -0.028490045221941415, -0.00023221799024912971, 0.017091244082335703, -0.00023221799024912971, -0.026567059102687942, 0.057945707686107975, 0.008613551142529565, -0.008091517739952026, 0.056503423854730866, 0.009417127630974357, -0.008846392623036528, 0.009417127630974357, -0.005448318729873151, -0.0013150043088297543, -0.008846392623036528, -0.0013150043088297541, -0.005612854948377751, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7946522798827726, -0.33289487400494444, -0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.06884320605436924, 0.002095928989945659, -0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3025931001933299, 0.11738525438534331, -0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.06555082496658332, -0.005338981218997747, -0.002076270474054677, 0.06523884623439505, -0.00599162877720186, -0.0023300778578007205, -0.00599162877720186, -0.007837034455273667, 0.00018978009701544363, -0.0023300778578007205, 0.00018978009701544363, -0.008251237047966105, 0.014091999096200191, 0.0009521621010946066, -0.00321014651226182, 0.013676554858123476, 0.0009667394698497006, -0.0032592930697789946, 0.0009667394698497006, -0.0005658690612028018, -0.00022022250471479668, -0.0032592930697789937, -0.00022022250471479666, 0.00011127514881492382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.4206262499369199, -0.761133214171572, 0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898, -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.17265177804411166, -0.01776481317495682, 0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.3025931001933299, -0.11738525438534331, 0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.13298898711407747, -0.03304327593938735, 0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, 0.09709214772325653, -0.00241522755530488, -0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1322768273949186, -0.016487763188035173, 0.013864709953575069, 0.12967498112414702, 0.020417428270048884, -0.017169201045268423, 0.02041742827004888, -0.03158352893068868, -0.002140070385245921, -0.017169201045268423, -0.002140070385245921, -0.03232887285478844, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1802999914938216, -0.5889799722131493, 0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, 1.0053013143052718, 0.24303987818369216, -0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.17265177804411166, 0.01776481317495682, -0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, -0.06884320605436924, -0.002095928989945659, 0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.1802999914938216, 0.5889799722131493, -0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, -0.12240657396947667, 0.0016631327984983487, -0.013970315507385913, 0.12123416269111348, -0.002034671914563809, 0.01709124408233573, -0.002034671914563809, -0.028490045221941467, -0.00023221799024913015, 0.01709124408233573, -0.00023221799024913015, -0.026567059102687987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2602591506940697, 0.24313683814840728, -0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.13298898711407747, 0.03304327593938735, -0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, -0.0655508249665835, 0.005338981218997763, 0.002076270474054683, 0.0652388462343952, -0.005991628777201879, -0.0023300778578007283, -0.005991628777201879, -0.007837034455273709, 0.0001897800970154443, -0.002330077857800728, 0.0001897800970154443, -0.008251237047966148, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0053013143052718, -0.24303987818369216, 0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.057945707686107864, -0.008613551142529548, 0.00809151773995201, 0.05650342385473076, 0.009417127630974336, -0.00884639262303651, 0.009417127630974336, -0.005448318729873148, -0.0013150043088297515, -0.00884639262303651, -0.0013150043088297513, -0.005612854948377747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2602591506940697, -0.24313683814840728, 0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.09709214772325653, 0.00241522755530488, 0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, -0.014091999096200191, -0.0009521621010946064, 0.0032101465122618194, 0.013676554858123474, 0.0009667394698497003, -0.0032592930697789933, 0.0009667394698497003, -0.0005658690612028016, -0.0002202225047147966, -0.0032592930697789933, -0.0002202225047147966, 0.00011127514881492362, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+        self.dbox = [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]
+        self.dnlist = [
+            33,
+            -1,
+            -1,
+            -1,
+            -1,
+            1,
+            32,
+            34,
+            35,
+            -1,
+            0,
+            33,
+            -1,
+            -1,
+            -1,
+            32,
+            34,
+            35,
+            -1,
+            -1,
+            6,
+            3,
+            -1,
+            -1,
+            -1,
+            7,
+            4,
+            5,
+            -1,
+            -1,
+            6,
+            -1,
+            -1,
+            -1,
+            -1,
+            4,
+            5,
+            2,
+            7,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            5,
+            2,
+            7,
+            -1,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            4,
+            2,
+            7,
+            -1,
+            -1,
+        ]
+        self.dem_deriv = [
+            0.13227682739491875,
+            0.01648776318803519,
+            -0.013864709953575083,
+            0.12967498112414713,
+            0.0204174282700489,
+            -0.017169201045268437,
+            0.0204174282700489,
+            -0.031583528930688706,
+            -0.0021400703852459233,
+            -0.01716920104526844,
+            -0.0021400703852459233,
+            -0.03232887285478848,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7946522798827726,
+            0.33289487400494444,
+            0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.4206262499369199,
+            0.761133214171572,
+            -0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.12240657396947655,
+            -0.0016631327984983461,
+            0.013970315507385892,
+            0.12123416269111335,
+            -0.0020346719145638054,
+            0.017091244082335703,
+            -0.002034671914563806,
+            -0.028490045221941415,
+            -0.00023221799024912971,
+            0.017091244082335703,
+            -0.00023221799024912971,
+            -0.026567059102687942,
+            0.057945707686107975,
+            0.008613551142529565,
+            -0.008091517739952026,
+            0.056503423854730866,
+            0.009417127630974357,
+            -0.008846392623036528,
+            0.009417127630974357,
+            -0.005448318729873151,
+            -0.0013150043088297543,
+            -0.008846392623036528,
+            -0.0013150043088297541,
+            -0.005612854948377751,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7946522798827726,
+            -0.33289487400494444,
+            -0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.06884320605436924,
+            0.002095928989945659,
+            -0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.3025931001933299,
+            0.11738525438534331,
+            -0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.06555082496658332,
+            -0.005338981218997747,
+            -0.002076270474054677,
+            0.06523884623439505,
+            -0.00599162877720186,
+            -0.0023300778578007205,
+            -0.00599162877720186,
+            -0.007837034455273667,
+            0.00018978009701544363,
+            -0.0023300778578007205,
+            0.00018978009701544363,
+            -0.008251237047966105,
+            0.014091999096200191,
+            0.0009521621010946066,
+            -0.00321014651226182,
+            0.013676554858123476,
+            0.0009667394698497006,
+            -0.0032592930697789946,
+            0.0009667394698497006,
+            -0.0005658690612028018,
+            -0.00022022250471479668,
+            -0.0032592930697789937,
+            -0.00022022250471479666,
+            0.00011127514881492382,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.4206262499369199,
+            -0.761133214171572,
+            0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.17265177804411166,
+            -0.01776481317495682,
+            0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.3025931001933299,
+            -0.11738525438534331,
+            0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.13298898711407747,
+            -0.03304327593938735,
+            0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            0.09709214772325653,
+            -0.00241522755530488,
+            -0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1322768273949186,
+            -0.016487763188035173,
+            0.013864709953575069,
+            0.12967498112414702,
+            0.020417428270048884,
+            -0.017169201045268423,
+            0.02041742827004888,
+            -0.03158352893068868,
+            -0.002140070385245921,
+            -0.017169201045268423,
+            -0.002140070385245921,
+            -0.03232887285478844,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.1802999914938216,
+            -0.5889799722131493,
+            0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            1.0053013143052718,
+            0.24303987818369216,
+            -0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.17265177804411166,
+            0.01776481317495682,
+            -0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            -0.06884320605436924,
+            -0.002095928989945659,
+            0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1802999914938216,
+            0.5889799722131493,
+            -0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            -0.12240657396947667,
+            0.0016631327984983487,
+            -0.013970315507385913,
+            0.12123416269111348,
+            -0.002034671914563809,
+            0.01709124408233573,
+            -0.002034671914563809,
+            -0.028490045221941467,
+            -0.00023221799024913015,
+            0.01709124408233573,
+            -0.00023221799024913015,
+            -0.026567059102687987,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.2602591506940697,
+            0.24313683814840728,
+            -0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.13298898711407747,
+            0.03304327593938735,
+            -0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            -0.0655508249665835,
+            0.005338981218997763,
+            0.002076270474054683,
+            0.0652388462343952,
+            -0.005991628777201879,
+            -0.0023300778578007283,
+            -0.005991628777201879,
+            -0.007837034455273709,
+            0.0001897800970154443,
+            -0.002330077857800728,
+            0.0001897800970154443,
+            -0.008251237047966148,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -1.0053013143052718,
+            -0.24303987818369216,
+            0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.057945707686107864,
+            -0.008613551142529548,
+            0.00809151773995201,
+            0.05650342385473076,
+            0.009417127630974336,
+            -0.00884639262303651,
+            0.009417127630974336,
+            -0.005448318729873148,
+            -0.0013150043088297515,
+            -0.00884639262303651,
+            -0.0013150043088297513,
+            -0.005612854948377747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.2602591506940697,
+            -0.24313683814840728,
+            0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.09709214772325653,
+            0.00241522755530488,
+            0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            -0.014091999096200191,
+            -0.0009521621010946064,
+            0.0032101465122618194,
+            0.013676554858123474,
+            0.0009667394698497003,
+            -0.0032592930697789933,
+            0.0009667394698497003,
+            -0.0005658690612028016,
+            -0.0002202225047147966,
+            -0.0032592930697789933,
+            -0.0002202225047147966,
+            0.00011127514881492362,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+        ]
         self.dcoord = np.reshape(self.dcoord, [1, -1])
         self.dtype = np.reshape(self.dtype, [1, -1])
         self.dbox = np.reshape(self.dbox, [1, -1])
@@ -47,17 +834,253 @@ def setUp(self):
         self.dbox = np.tile(self.dbox, [self.nframes, 1])
         self.dnlist = np.tile(self.dnlist, [self.nframes, 1])
         self.dem_deriv = np.tile(self.dem_deriv, [self.nframes, 1])
-        self.expected_grad_net = [-0.12141, -0.11963,  0.01198,  0.04647,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04188,  0.37642,  0.28680,  0.26547, -0.40861,  0.25610, -0.02009,  1.00344, -0.16166, -0.16355, 0.03691,  0.01165, -0.08770, -0.08561, -0.00398,  0.02366,  0.00000,  0.00000,  0.00000,  0.00000, 
-                              -0.04188, -0.37642, -0.28680, -0.26547, -0.03357, -0.03151,  0.00454,  0.01377,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04304,  0.05219,  0.08677,  0.16032, -0.05232, -0.05123,  0.01227,  0.00935, -0.01420, -0.01366, -0.00022,  0.00404,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,
-                              -0.40861, -0.25610,  0.02009, -1.00344, -0.04863, -0.04701,  0.02501,  0.01556,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04304, -0.05219, -0.08677, -0.16032, -0.08249, -0.07502,  0.04767, -0.00448, -0.08260, -0.08165,  0.01821,  0.01869,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
-                              -0.12141,  0.11963, -0.01198, -0.04647,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.16227,  0.28667,  0.50683,  0.06651, -0.58330, -0.45376,  0.37464,  0.93891, -0.04863,  0.04701, -0.02501, -0.01556, -0.03357,  0.03151, -0.00454, -0.01377,  0.00000,  0.00000,  0.00000,  0.00000, 
-                              -0.16227, -0.28667, -0.50683, -0.06651, -0.16166,  0.16355, -0.03691, -0.01165,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04418,  0.09284,  0.09569,  0.19565, -0.08249,  0.07502, -0.04767,  0.00448, -0.05232,  0.05123, -0.01227, -0.00935,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
-                              -0.58330,  0.45376, -0.37464, -0.93891, -0.08770,  0.08561,  0.00398, -0.02366,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.04418, -0.09284, -0.09569, -0.19565, -0.08260,  0.08165, -0.01821, -0.01869, -0.01420,  0.01366,  0.00022, -0.00404,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000]
- 
+        self.expected_grad_net = [
+            -0.12141,
+            -0.11963,
+            0.01198,
+            0.04647,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.04188,
+            0.37642,
+            0.28680,
+            0.26547,
+            -0.40861,
+            0.25610,
+            -0.02009,
+            1.00344,
+            -0.16166,
+            -0.16355,
+            0.03691,
+            0.01165,
+            -0.08770,
+            -0.08561,
+            -0.00398,
+            0.02366,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.04188,
+            -0.37642,
+            -0.28680,
+            -0.26547,
+            -0.03357,
+            -0.03151,
+            0.00454,
+            0.01377,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.04304,
+            0.05219,
+            0.08677,
+            0.16032,
+            -0.05232,
+            -0.05123,
+            0.01227,
+            0.00935,
+            -0.01420,
+            -0.01366,
+            -0.00022,
+            0.00404,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.40861,
+            -0.25610,
+            0.02009,
+            -1.00344,
+            -0.04863,
+            -0.04701,
+            0.02501,
+            0.01556,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.04304,
+            -0.05219,
+            -0.08677,
+            -0.16032,
+            -0.08249,
+            -0.07502,
+            0.04767,
+            -0.00448,
+            -0.08260,
+            -0.08165,
+            0.01821,
+            0.01869,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.12141,
+            0.11963,
+            -0.01198,
+            -0.04647,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.16227,
+            0.28667,
+            0.50683,
+            0.06651,
+            -0.58330,
+            -0.45376,
+            0.37464,
+            0.93891,
+            -0.04863,
+            0.04701,
+            -0.02501,
+            -0.01556,
+            -0.03357,
+            0.03151,
+            -0.00454,
+            -0.01377,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.16227,
+            -0.28667,
+            -0.50683,
+            -0.06651,
+            -0.16166,
+            0.16355,
+            -0.03691,
+            -0.01165,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.04418,
+            0.09284,
+            0.09569,
+            0.19565,
+            -0.08249,
+            0.07502,
+            -0.04767,
+            0.00448,
+            -0.05232,
+            0.05123,
+            -0.01227,
+            -0.00935,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.58330,
+            0.45376,
+            -0.37464,
+            -0.93891,
+            -0.08770,
+            0.08561,
+            0.00398,
+            -0.02366,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.04418,
+            -0.09284,
+            -0.09569,
+            -0.19565,
+            -0.08260,
+            0.08165,
+            -0.01821,
+            -0.01869,
+            -0.01420,
+            0.01366,
+            0.00022,
+            -0.00404,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+        ]
+
         self.sel = [5, 5]
-        self.sec = np.array([0, 0, 0], dtype = int)
+        self.sec = np.array([0, 0, 0], dtype=int)
         self.sec[1:3] = np.cumsum(self.sel)
-        self.rcut = 6.
+        self.rcut = 6.0
         self.rcut_smth = 0.8
         self.dnatoms = [6, 6, 2, 4]
 
@@ -66,43 +1089,56 @@ def setUp(self):
         self.nnei = self.sec[-1]
         self.ndescrpt = 4 * self.nnei
         self.ntypes = np.max(self.dtype) + 1
-        self.dnet_deriv=[]
+        self.dnet_deriv = []
         for ii in range(self.nloc * self.ndescrpt):
-            self.dnet_deriv.append(10-ii*0.01)
-        self.dgrad=[]
+            self.dnet_deriv.append(10 - ii * 0.01)
+        self.dgrad = []
         for ii in range(self.nloc * 3):
-            self.dgrad.append(10-ii*0.1)
+            self.dgrad.append(10 - ii * 0.1)
         self.dnet_deriv = np.reshape(self.dnet_deriv, [1, -1])
         self.dgrad = np.reshape(self.dgrad, [1, -1])
         self.dnet_deriv = np.tile(self.dnet_deriv, [self.nframes, 1])
         self.dgrad = np.tile(self.dgrad, [self.nframes, 1])
 
-        self.tgrad = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * 3], name='t_grad')
-        self.tnet_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt], name='t_net_deriv')
-        self.tem_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt * 3], name='t_em_deriv')
-        self.tnlist = tf.placeholder(tf.int32, [None, self.dnatoms[0] * self.nnei], name = "t_nlist")
-        self.tnatoms = tf.placeholder(tf.int32, [None], name = "t_natoms")
-        
+        self.tgrad = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * 3], name="t_grad"
+        )
+        self.tnet_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt],
+            name="t_net_deriv",
+        )
+        self.tem_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt * 3],
+            name="t_em_deriv",
+        )
+        self.tnlist = tf.placeholder(
+            tf.int32, [None, self.dnatoms[0] * self.nnei], name="t_nlist"
+        )
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
     def test_prod_force_grad(self):
-        tgrad_net \
-            = op_grads_module.prod_force_se_a_grad(
-                self.tgrad,
-                self.tnet_deriv,
-                self.tem_deriv,
-                self.tnlist,
-                self.tnatoms, 
-                n_a_sel=self.nnei,
-                n_r_sel=0)
-        self.sess.run (tf.global_variables_initializer())
+        tgrad_net = op_grads_module.prod_force_se_a_grad(
+            self.tgrad,
+            self.tnet_deriv,
+            self.tem_deriv,
+            self.tnlist,
+            self.tnatoms,
+            n_a_sel=self.nnei,
+            n_r_sel=0,
+        )
+        self.sess.run(tf.global_variables_initializer())
         dgrad_net = self.sess.run(
             tgrad_net,
-            feed_dict = {
+            feed_dict={
                 self.tgrad: self.dgrad,
                 self.tnet_deriv: self.dnet_deriv,
                 self.tem_deriv: self.dem_deriv,
                 self.tnlist: self.dnlist,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
-        self.assertEqual(dgrad_net.shape, (self.nframes, self.nloc*self.ndescrpt))
+        self.assertEqual(dgrad_net.shape, (self.nframes, self.nloc * self.ndescrpt))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dgrad_net[ff], self.expected_grad_net, 5)
diff --git a/source/tests/test_prod_virial.py b/source/tests/test_prod_virial.py
index 691a8a652f..34427383e3 100644
--- a/source/tests/test_prod_virial.py
+++ b/source/tests/test_prod_virial.py
@@ -1,45 +1,1011 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
+import numpy as np
+
 import deepmd.op
-from deepmd.env import tf
-from deepmd.env import op_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_module,
+    tf,
+)
+
 
 class TestProdVirial(tf.test.TestCase):
     def setUp(self):
         self.sess = self.test_session().__enter__()
         self.nframes = 2
         self.dcoord = [
-            12.83, 2.56, 2.18,
-            12.09, 2.87, 2.74,
-            00.25, 3.32, 1.68,
-            3.36, 3.00, 1.81,
-            3.51, 2.51, 2.60,
-            4.27, 3.22, 1.56]
+            12.83,
+            2.56,
+            2.18,
+            12.09,
+            2.87,
+            2.74,
+            00.25,
+            3.32,
+            1.68,
+            3.36,
+            3.00,
+            1.81,
+            3.51,
+            2.51,
+            2.60,
+            4.27,
+            3.22,
+            1.56,
+        ]
         self.dtype = [0, 1, 1, 0, 1, 1]
-        self.dbox = [13., 0., 0., 0., 13., 0., 0., 0., 13.]
-        self.dnlist = [33, -1, -1, -1, -1, 1, 32, 34, 35, -1, 
-                       0, 33, -1, -1, -1, 32, 34, 35, -1, -1, 
-                       6, 3, -1, -1, -1, 7, 4, 5, -1, -1, 
-                       6, -1, -1, -1, -1, 4, 5, 2, 7, -1, 
-                       3, 6, -1, -1, -1, 5, 2, 7, -1, -1, 
-                       3, 6, -1, -1, -1, 4, 2, 7, -1, -1]
-        self.dem_deriv = [0.13227682739491875, 0.01648776318803519, -0.013864709953575083, 0.12967498112414713, 0.0204174282700489, -0.017169201045268437, 0.0204174282700489, -0.031583528930688706, -0.0021400703852459233, -0.01716920104526844, -0.0021400703852459233, -0.03232887285478848, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7946522798827726, 0.33289487400494444, 0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.4206262499369199, 0.761133214171572, -0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898,
-                          -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.12240657396947655, -0.0016631327984983461, 0.013970315507385892, 0.12123416269111335, -0.0020346719145638054, 0.017091244082335703, -0.002034671914563806, -0.028490045221941415, -0.00023221799024912971, 0.017091244082335703, -0.00023221799024912971, -0.026567059102687942, 0.057945707686107975, 0.008613551142529565, -0.008091517739952026, 0.056503423854730866, 0.009417127630974357, -0.008846392623036528, 0.009417127630974357, -0.005448318729873151, -0.0013150043088297543, -0.008846392623036528, -0.0013150043088297541, -0.005612854948377751, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7946522798827726, -0.33289487400494444, -0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.06884320605436924, 0.002095928989945659, -0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3025931001933299, 0.11738525438534331, -0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.06555082496658332, -0.005338981218997747, -0.002076270474054677, 0.06523884623439505, -0.00599162877720186, -0.0023300778578007205, -0.00599162877720186, -0.007837034455273667, 0.00018978009701544363, -0.0023300778578007205, 0.00018978009701544363, -0.008251237047966105, 0.014091999096200191, 0.0009521621010946066, -0.00321014651226182, 0.013676554858123476, 0.0009667394698497006, -0.0032592930697789946, 0.0009667394698497006, -0.0005658690612028018, -0.00022022250471479668, -0.0032592930697789937, -0.00022022250471479666, 0.00011127514881492382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.4206262499369199, -0.761133214171572, 0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898, -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.17265177804411166, -0.01776481317495682, 0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.3025931001933299, -0.11738525438534331, 0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.13298898711407747, -0.03304327593938735, 0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, 0.09709214772325653, -0.00241522755530488, -0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1322768273949186, -0.016487763188035173, 0.013864709953575069, 0.12967498112414702, 0.020417428270048884, -0.017169201045268423, 0.02041742827004888, -0.03158352893068868, -0.002140070385245921, -0.017169201045268423, -0.002140070385245921, -0.03232887285478844, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1802999914938216, -0.5889799722131493, 0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, 1.0053013143052718, 0.24303987818369216, -0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.17265177804411166, 0.01776481317495682, -0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, -0.06884320605436924, -0.002095928989945659, 0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.1802999914938216, 0.5889799722131493, -0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, -0.12240657396947667, 0.0016631327984983487, -0.013970315507385913, 0.12123416269111348, -0.002034671914563809, 0.01709124408233573, -0.002034671914563809, -0.028490045221941467, -0.00023221799024913015, 0.01709124408233573, -0.00023221799024913015, -0.026567059102687987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2602591506940697, 0.24313683814840728, -0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.13298898711407747, 0.03304327593938735, -0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, -0.0655508249665835, 0.005338981218997763, 0.002076270474054683, 0.0652388462343952, -0.005991628777201879, -0.0023300778578007283, -0.005991628777201879, -0.007837034455273709, 0.0001897800970154443, -0.002330077857800728, 0.0001897800970154443, -0.008251237047966148, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0053013143052718, -0.24303987818369216, 0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.057945707686107864, -0.008613551142529548, 0.00809151773995201, 0.05650342385473076, 0.009417127630974336, -0.00884639262303651, 0.009417127630974336, -0.005448318729873148, -0.0013150043088297515, -0.00884639262303651, -0.0013150043088297513, -0.005612854948377747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2602591506940697, -0.24313683814840728, 0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.09709214772325653, 0.00241522755530488, 0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, -0.014091999096200191, -0.0009521621010946064, 0.0032101465122618194, 0.013676554858123474, 0.0009667394698497003, -0.0032592930697789933, 0.0009667394698497003, -0.0005658690612028016, -0.0002202225047147966, -0.0032592930697789933, -0.0002202225047147966, 0.00011127514881492362, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-        self.drij = [3.5299999999999976, 0.4399999999999995, -0.37000000000000055, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.06099789543e-313, 0.0, 0.0, -0.740000000000002, 0.31000000000000005, 0.5599999999999996, 0.41999999999999815, 0.7599999999999993, -0.5000000000000007, 3.6799999999999997, -0.05000000000000071, 0.4199999999999995, 4.439999999999998, 0.6599999999999997, -0.6200000000000006, 1.06099789543e-313, 3.11, -0.31999999999999984, 0.740000000000002, -0.31000000000000005, -0.5599999999999996, 4.27, 0.12999999999999945, -0.9300000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.06099789543e-313, 3.26, -0.81, 1.1600000000000001, 0.4499999999999993, -1.0600000000000003, 4.420000000000002, -0.36000000000000076, -0.14000000000000012, 5.18, 0.34999999999999964, -1.1800000000000002, 0.0, 0.0, 0.0, 1.06099789543e-313, 0.0, 0.0,
-                     -0.41999999999999815, -0.7599999999999993, 0.5000000000000007, 3.11, -0.31999999999999984, 0.13000000000000012, 1.0609978957e-313, 2.1219957915e-314, 6.3659873744e-314, 6.3659873744e-314, 0.0, 0.0, 0.0, 0.1499999999999999, -0.4900000000000002, -1.1600000000000001, -0.4499999999999993, 1.0600000000000003, 3.2600000000000002, -0.81, 0.9200000000000002, 4.0200000000000005, -0.09999999999999964, -0.11999999999999988, 0.0, 0.0, 0.0, 0.0, -0.1499999999999999, 0.4900000000000002, -3.529999999999998, -0.4399999999999995, 0.37000000000000055, 0.0, 0.0, 0.0, 5e-324, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7599999999999998, 0.7100000000000004, 0.15000000000000036, -0.4900000000000002, 0.79, 0.9100000000000006, 0.2200000000000002, -0.25, -3.11, 0.31999999999999984, -0.13000000000000012, -4.27, -0.12999999999999945, 0.9300000000000002, 0.0, -0.9099999999999997, -0.2200000000000002,
-                     -0.15000000000000036, 0.4900000000000002, -0.79, -3.6799999999999984, 0.05000000000000071, -0.4199999999999995, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7599999999999998, -0.7100000000000004, 0.7600000000000002, 0.7100000000000004, -1.04, -3.2600000000000002, 0.81, -0.9200000000000002, -4.42, 0.36000000000000076, 0.14000000000000012, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.9100000000000006, -0.2200000000000002, 0.25, -4.439999999999999, -0.6599999999999997, 0.6200000000000006, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7400000000000002, 0.31000000000000005, -0.7600000000000002, -0.7100000000000004, 1.04, -4.0200000000000005, 0.09999999999999964, 0.11999999999999988, -5.180000000000001, -0.34999999999999964, 1.1800000000000002, 0.0, 0.0, 0.0, 0.0, 0.7400000000000002, -0.31000000000000005]
+        self.dbox = [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]
+        self.dnlist = [
+            33,
+            -1,
+            -1,
+            -1,
+            -1,
+            1,
+            32,
+            34,
+            35,
+            -1,
+            0,
+            33,
+            -1,
+            -1,
+            -1,
+            32,
+            34,
+            35,
+            -1,
+            -1,
+            6,
+            3,
+            -1,
+            -1,
+            -1,
+            7,
+            4,
+            5,
+            -1,
+            -1,
+            6,
+            -1,
+            -1,
+            -1,
+            -1,
+            4,
+            5,
+            2,
+            7,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            5,
+            2,
+            7,
+            -1,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            4,
+            2,
+            7,
+            -1,
+            -1,
+        ]
+        self.dem_deriv = [
+            0.13227682739491875,
+            0.01648776318803519,
+            -0.013864709953575083,
+            0.12967498112414713,
+            0.0204174282700489,
+            -0.017169201045268437,
+            0.0204174282700489,
+            -0.031583528930688706,
+            -0.0021400703852459233,
+            -0.01716920104526844,
+            -0.0021400703852459233,
+            -0.03232887285478848,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7946522798827726,
+            0.33289487400494444,
+            0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.4206262499369199,
+            0.761133214171572,
+            -0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.12240657396947655,
+            -0.0016631327984983461,
+            0.013970315507385892,
+            0.12123416269111335,
+            -0.0020346719145638054,
+            0.017091244082335703,
+            -0.002034671914563806,
+            -0.028490045221941415,
+            -0.00023221799024912971,
+            0.017091244082335703,
+            -0.00023221799024912971,
+            -0.026567059102687942,
+            0.057945707686107975,
+            0.008613551142529565,
+            -0.008091517739952026,
+            0.056503423854730866,
+            0.009417127630974357,
+            -0.008846392623036528,
+            0.009417127630974357,
+            -0.005448318729873151,
+            -0.0013150043088297543,
+            -0.008846392623036528,
+            -0.0013150043088297541,
+            -0.005612854948377751,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7946522798827726,
+            -0.33289487400494444,
+            -0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.06884320605436924,
+            0.002095928989945659,
+            -0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.3025931001933299,
+            0.11738525438534331,
+            -0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.06555082496658332,
+            -0.005338981218997747,
+            -0.002076270474054677,
+            0.06523884623439505,
+            -0.00599162877720186,
+            -0.0023300778578007205,
+            -0.00599162877720186,
+            -0.007837034455273667,
+            0.00018978009701544363,
+            -0.0023300778578007205,
+            0.00018978009701544363,
+            -0.008251237047966105,
+            0.014091999096200191,
+            0.0009521621010946066,
+            -0.00321014651226182,
+            0.013676554858123476,
+            0.0009667394698497006,
+            -0.0032592930697789946,
+            0.0009667394698497006,
+            -0.0005658690612028018,
+            -0.00022022250471479668,
+            -0.0032592930697789937,
+            -0.00022022250471479666,
+            0.00011127514881492382,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.4206262499369199,
+            -0.761133214171572,
+            0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.17265177804411166,
+            -0.01776481317495682,
+            0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.3025931001933299,
+            -0.11738525438534331,
+            0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.13298898711407747,
+            -0.03304327593938735,
+            0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            0.09709214772325653,
+            -0.00241522755530488,
+            -0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1322768273949186,
+            -0.016487763188035173,
+            0.013864709953575069,
+            0.12967498112414702,
+            0.020417428270048884,
+            -0.017169201045268423,
+            0.02041742827004888,
+            -0.03158352893068868,
+            -0.002140070385245921,
+            -0.017169201045268423,
+            -0.002140070385245921,
+            -0.03232887285478844,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.1802999914938216,
+            -0.5889799722131493,
+            0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            1.0053013143052718,
+            0.24303987818369216,
+            -0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.17265177804411166,
+            0.01776481317495682,
+            -0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            -0.06884320605436924,
+            -0.002095928989945659,
+            0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1802999914938216,
+            0.5889799722131493,
+            -0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            -0.12240657396947667,
+            0.0016631327984983487,
+            -0.013970315507385913,
+            0.12123416269111348,
+            -0.002034671914563809,
+            0.01709124408233573,
+            -0.002034671914563809,
+            -0.028490045221941467,
+            -0.00023221799024913015,
+            0.01709124408233573,
+            -0.00023221799024913015,
+            -0.026567059102687987,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.2602591506940697,
+            0.24313683814840728,
+            -0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.13298898711407747,
+            0.03304327593938735,
+            -0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            -0.0655508249665835,
+            0.005338981218997763,
+            0.002076270474054683,
+            0.0652388462343952,
+            -0.005991628777201879,
+            -0.0023300778578007283,
+            -0.005991628777201879,
+            -0.007837034455273709,
+            0.0001897800970154443,
+            -0.002330077857800728,
+            0.0001897800970154443,
+            -0.008251237047966148,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -1.0053013143052718,
+            -0.24303987818369216,
+            0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.057945707686107864,
+            -0.008613551142529548,
+            0.00809151773995201,
+            0.05650342385473076,
+            0.009417127630974336,
+            -0.00884639262303651,
+            0.009417127630974336,
+            -0.005448318729873148,
+            -0.0013150043088297515,
+            -0.00884639262303651,
+            -0.0013150043088297513,
+            -0.005612854948377747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.2602591506940697,
+            -0.24313683814840728,
+            0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.09709214772325653,
+            0.00241522755530488,
+            0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            -0.014091999096200191,
+            -0.0009521621010946064,
+            0.0032101465122618194,
+            0.013676554858123474,
+            0.0009667394698497003,
+            -0.0032592930697789933,
+            0.0009667394698497003,
+            -0.0005658690612028016,
+            -0.0002202225047147966,
+            -0.0032592930697789933,
+            -0.0002202225047147966,
+            0.00011127514881492362,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+        ]
+        self.drij = [
+            3.5299999999999976,
+            0.4399999999999995,
+            -0.37000000000000055,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.06099789543e-313,
+            0.0,
+            0.0,
+            -0.740000000000002,
+            0.31000000000000005,
+            0.5599999999999996,
+            0.41999999999999815,
+            0.7599999999999993,
+            -0.5000000000000007,
+            3.6799999999999997,
+            -0.05000000000000071,
+            0.4199999999999995,
+            4.439999999999998,
+            0.6599999999999997,
+            -0.6200000000000006,
+            1.06099789543e-313,
+            3.11,
+            -0.31999999999999984,
+            0.740000000000002,
+            -0.31000000000000005,
+            -0.5599999999999996,
+            4.27,
+            0.12999999999999945,
+            -0.9300000000000002,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.06099789543e-313,
+            3.26,
+            -0.81,
+            1.1600000000000001,
+            0.4499999999999993,
+            -1.0600000000000003,
+            4.420000000000002,
+            -0.36000000000000076,
+            -0.14000000000000012,
+            5.18,
+            0.34999999999999964,
+            -1.1800000000000002,
+            0.0,
+            0.0,
+            0.0,
+            1.06099789543e-313,
+            0.0,
+            0.0,
+            -0.41999999999999815,
+            -0.7599999999999993,
+            0.5000000000000007,
+            3.11,
+            -0.31999999999999984,
+            0.13000000000000012,
+            1.0609978957e-313,
+            2.1219957915e-314,
+            6.3659873744e-314,
+            6.3659873744e-314,
+            0.0,
+            0.0,
+            0.0,
+            0.1499999999999999,
+            -0.4900000000000002,
+            -1.1600000000000001,
+            -0.4499999999999993,
+            1.0600000000000003,
+            3.2600000000000002,
+            -0.81,
+            0.9200000000000002,
+            4.0200000000000005,
+            -0.09999999999999964,
+            -0.11999999999999988,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1499999999999999,
+            0.4900000000000002,
+            -3.529999999999998,
+            -0.4399999999999995,
+            0.37000000000000055,
+            0.0,
+            0.0,
+            0.0,
+            5e-324,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7599999999999998,
+            0.7100000000000004,
+            0.15000000000000036,
+            -0.4900000000000002,
+            0.79,
+            0.9100000000000006,
+            0.2200000000000002,
+            -0.25,
+            -3.11,
+            0.31999999999999984,
+            -0.13000000000000012,
+            -4.27,
+            -0.12999999999999945,
+            0.9300000000000002,
+            0.0,
+            -0.9099999999999997,
+            -0.2200000000000002,
+            -0.15000000000000036,
+            0.4900000000000002,
+            -0.79,
+            -3.6799999999999984,
+            0.05000000000000071,
+            -0.4199999999999995,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7599999999999998,
+            -0.7100000000000004,
+            0.7600000000000002,
+            0.7100000000000004,
+            -1.04,
+            -3.2600000000000002,
+            0.81,
+            -0.9200000000000002,
+            -4.42,
+            0.36000000000000076,
+            0.14000000000000012,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.9100000000000006,
+            -0.2200000000000002,
+            0.25,
+            -4.439999999999999,
+            -0.6599999999999997,
+            0.6200000000000006,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7400000000000002,
+            0.31000000000000005,
+            -0.7600000000000002,
+            -0.7100000000000004,
+            1.04,
+            -4.0200000000000005,
+            0.09999999999999964,
+            0.11999999999999988,
+            -5.180000000000001,
+            -0.34999999999999964,
+            1.1800000000000002,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7400000000000002,
+            -0.31000000000000005,
+        ]
         self.dcoord = np.reshape(self.dcoord, [1, -1])
         self.dtype = np.reshape(self.dtype, [1, -1])
         self.dbox = np.reshape(self.dbox, [1, -1])
@@ -52,17 +1018,455 @@ def setUp(self):
         self.dnlist = np.tile(self.dnlist, [self.nframes, 1])
         self.dem_deriv = np.tile(self.dem_deriv, [self.nframes, 1])
         self.drij = np.tile(self.drij, [self.nframes, 1])
-        self.expected_virial = [100.14628,  7.21146, -24.62874,  6.19651, 23.31547, -19.77773, -26.79150, -20.92554, 38.84203]
-        self.expected_atom_virial = [-3.24191,  1.35810,  2.45333, -9.14879,  3.83260,  6.92341, -10.54930,  4.41930,  7.98326, 14.83563, -6.21493, -11.22697,  4.51124, -1.88984, -3.41391,  2.04717, -0.85760, -1.54921, 0.84708, -0.10308,  0.07324,  3.51825, -0.49788,  0.40314,  2.91345, -0.37264,  0.27386, 12.62246, -5.19874,  7.42677,  4.80217, -2.69029,  5.41896,  9.55811, -2.42899,  5.14893, 9.90295,  4.54279, -7.75115, -2.89155, 13.50055, -20.91993,  4.00314, -1.76293,  2.92724, 20.15105,  2.86856, -3.55868, -4.22796, -1.12700,  1.46999, -21.43180, -9.30194, 12.54538, 2.86811,  5.92934, -3.94618,  4.83313,  5.21197, -3.36488,  6.67852,  8.34225, -5.44992, 5.97941,  1.92669, -4.70211,  4.91215,  1.63145, -3.96250,  3.27415,  1.02612, -2.52585,  
-                                     0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 
-                                     0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                     0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                     1.38833,  0.50613, -1.26233,  1.39901,  5.18116, -2.18118, -17.72748, -19.52039, 18.66001, 14.31034,  1.31715, -2.05955, -0.10872,  0.00743,  0.03656, -3.85572, -0.33481,  0.57900, 14.31190, -0.53814,  0.89498, -1.94166,  0.07960, -0.10726, -0.35985,  0.03981,  0.03397,  6.17091,  0.81760, -0.97011,  0.53923,  0.07572, -0.08012, -1.34189, -0.17373,  0.21536,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                     0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000]
+        self.expected_virial = [
+            100.14628,
+            7.21146,
+            -24.62874,
+            6.19651,
+            23.31547,
+            -19.77773,
+            -26.79150,
+            -20.92554,
+            38.84203,
+        ]
+        self.expected_atom_virial = [
+            -3.24191,
+            1.35810,
+            2.45333,
+            -9.14879,
+            3.83260,
+            6.92341,
+            -10.54930,
+            4.41930,
+            7.98326,
+            14.83563,
+            -6.21493,
+            -11.22697,
+            4.51124,
+            -1.88984,
+            -3.41391,
+            2.04717,
+            -0.85760,
+            -1.54921,
+            0.84708,
+            -0.10308,
+            0.07324,
+            3.51825,
+            -0.49788,
+            0.40314,
+            2.91345,
+            -0.37264,
+            0.27386,
+            12.62246,
+            -5.19874,
+            7.42677,
+            4.80217,
+            -2.69029,
+            5.41896,
+            9.55811,
+            -2.42899,
+            5.14893,
+            9.90295,
+            4.54279,
+            -7.75115,
+            -2.89155,
+            13.50055,
+            -20.91993,
+            4.00314,
+            -1.76293,
+            2.92724,
+            20.15105,
+            2.86856,
+            -3.55868,
+            -4.22796,
+            -1.12700,
+            1.46999,
+            -21.43180,
+            -9.30194,
+            12.54538,
+            2.86811,
+            5.92934,
+            -3.94618,
+            4.83313,
+            5.21197,
+            -3.36488,
+            6.67852,
+            8.34225,
+            -5.44992,
+            5.97941,
+            1.92669,
+            -4.70211,
+            4.91215,
+            1.63145,
+            -3.96250,
+            3.27415,
+            1.02612,
+            -2.52585,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.38833,
+            0.50613,
+            -1.26233,
+            1.39901,
+            5.18116,
+            -2.18118,
+            -17.72748,
+            -19.52039,
+            18.66001,
+            14.31034,
+            1.31715,
+            -2.05955,
+            -0.10872,
+            0.00743,
+            0.03656,
+            -3.85572,
+            -0.33481,
+            0.57900,
+            14.31190,
+            -0.53814,
+            0.89498,
+            -1.94166,
+            0.07960,
+            -0.10726,
+            -0.35985,
+            0.03981,
+            0.03397,
+            6.17091,
+            0.81760,
+            -0.97011,
+            0.53923,
+            0.07572,
+            -0.08012,
+            -1.34189,
+            -0.17373,
+            0.21536,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+        ]
         self.sel = [5, 5]
-        self.sec = np.array([0, 0, 0], dtype = int)
+        self.sec = np.array([0, 0, 0], dtype=int)
         self.sec[1:3] = np.cumsum(self.sel)
-        self.rcut = 6.
+        self.rcut = 6.0
         self.rcut_smth = 0.8
         self.dnatoms = [6, 48, 2, 4]
 
@@ -71,40 +1475,57 @@ def setUp(self):
         self.nnei = self.sec[-1]
         self.ndescrpt = 4 * self.nnei
         self.ntypes = np.max(self.dtype) + 1
-        self.dnet_deriv=[]
+        self.dnet_deriv = []
         for ii in range(self.nloc * self.ndescrpt):
-            self.dnet_deriv.append(10-ii*0.01)
+            self.dnet_deriv.append(10 - ii * 0.01)
         self.dnet_deriv = np.reshape(self.dnet_deriv, [1, -1])
         self.dnet_deriv = np.tile(self.dnet_deriv, [self.nframes, 1])
 
-        self.tnet_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt], name='t_net_deriv')
-        self.tem_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt * 3], name='t_em_deriv')
-        self.trij = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.nnei * 3], name='t_rij')
-        self.tnlist = tf.placeholder(tf.int32, [None, self.dnatoms[0] * self.nnei], name = "t_nlist")
-        self.tnatoms = tf.placeholder(tf.int32, [None], name = "t_natoms")
-        
+        self.tnet_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt],
+            name="t_net_deriv",
+        )
+        self.tem_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt * 3],
+            name="t_em_deriv",
+        )
+        self.trij = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.nnei * 3],
+            name="t_rij",
+        )
+        self.tnlist = tf.placeholder(
+            tf.int32, [None, self.dnatoms[0] * self.nnei], name="t_nlist"
+        )
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
     def test_prod_virial(self):
-        tvirial, tatom_virial \
-            = op_module.prod_virial_se_a(
-                self.tnet_deriv,
-                self.tem_deriv,
-                self.trij,
-                self.tnlist,
-                self.tnatoms, 
-                n_a_sel=self.nnei,
-                n_r_sel=0)
-        self.sess.run (tf.global_variables_initializer())
+        tvirial, tatom_virial = op_module.prod_virial_se_a(
+            self.tnet_deriv,
+            self.tem_deriv,
+            self.trij,
+            self.tnlist,
+            self.tnatoms,
+            n_a_sel=self.nnei,
+            n_r_sel=0,
+        )
+        self.sess.run(tf.global_variables_initializer())
         dvirial, datom_virial = self.sess.run(
             [tvirial, tatom_virial],
-            feed_dict = {
+            feed_dict={
                 self.tnet_deriv: self.dnet_deriv,
                 self.tem_deriv: self.dem_deriv,
                 self.trij: self.drij,
                 self.tnlist: self.dnlist,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
         self.assertEqual(dvirial.shape, (self.nframes, 9))
-        self.assertEqual(datom_virial.shape, (self.nframes, self.nall*9))
+        self.assertEqual(datom_virial.shape, (self.nframes, self.nall * 9))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dvirial[ff], self.expected_virial, 5)
-            np.testing.assert_almost_equal(datom_virial[ff], self.expected_atom_virial, 5)
+            np.testing.assert_almost_equal(
+                datom_virial[ff], self.expected_atom_virial, 5
+            )
diff --git a/source/tests/test_prod_virial_grad.py b/source/tests/test_prod_virial_grad.py
index 6217fa8600..c16a0635e7 100644
--- a/source/tests/test_prod_virial_grad.py
+++ b/source/tests/test_prod_virial_grad.py
@@ -1,45 +1,1011 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
+import numpy as np
+
 import deepmd.op
-from deepmd.env import tf
-from deepmd.env import op_grads_module
-from deepmd.env import GLOBAL_TF_FLOAT_PRECISION
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-from deepmd.env import GLOBAL_ENER_FLOAT_PRECISION
+from deepmd.env import (
+    GLOBAL_ENER_FLOAT_PRECISION,
+    GLOBAL_NP_FLOAT_PRECISION,
+    GLOBAL_TF_FLOAT_PRECISION,
+    op_grads_module,
+    tf,
+)
+
 
 class TestProdVirialGrad(tf.test.TestCase):
     def setUp(self):
         self.sess = self.test_session().__enter__()
         self.nframes = 2
         self.dcoord = [
-            12.83, 2.56, 2.18,
-            12.09, 2.87, 2.74,
-            00.25, 3.32, 1.68,
-            3.36, 3.00, 1.81,
-            3.51, 2.51, 2.60,
-            4.27, 3.22, 1.56]
+            12.83,
+            2.56,
+            2.18,
+            12.09,
+            2.87,
+            2.74,
+            00.25,
+            3.32,
+            1.68,
+            3.36,
+            3.00,
+            1.81,
+            3.51,
+            2.51,
+            2.60,
+            4.27,
+            3.22,
+            1.56,
+        ]
         self.dtype = [0, 1, 1, 0, 1, 1]
-        self.dbox = [13., 0., 0., 0., 13., 0., 0., 0., 13.]
-        self.dnlist = [33, -1, -1, -1, -1, 1, 32, 34, 35, -1, 
-                       0, 33, -1, -1, -1, 32, 34, 35, -1, -1, 
-                       6, 3, -1, -1, -1, 7, 4, 5, -1, -1, 
-                       6, -1, -1, -1, -1, 4, 5, 2, 7, -1, 
-                       3, 6, -1, -1, -1, 5, 2, 7, -1, -1, 
-                       3, 6, -1, -1, -1, 4, 2, 7, -1, -1]
-        self.dem_deriv = [0.13227682739491875, 0.01648776318803519, -0.013864709953575083, 0.12967498112414713, 0.0204174282700489, -0.017169201045268437, 0.0204174282700489, -0.031583528930688706, -0.0021400703852459233, -0.01716920104526844, -0.0021400703852459233, -0.03232887285478848, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7946522798827726, 0.33289487400494444, 0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.4206262499369199, 0.761133214171572, -0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898,
-                          -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.12240657396947655, -0.0016631327984983461, 0.013970315507385892, 0.12123416269111335, -0.0020346719145638054, 0.017091244082335703, -0.002034671914563806, -0.028490045221941415, -0.00023221799024912971, 0.017091244082335703, -0.00023221799024912971, -0.026567059102687942, 0.057945707686107975, 0.008613551142529565, -0.008091517739952026, 0.056503423854730866, 0.009417127630974357, -0.008846392623036528, 0.009417127630974357, -0.005448318729873151, -0.0013150043088297543, -0.008846392623036528, -0.0013150043088297541, -0.005612854948377751, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7946522798827726, -0.33289487400494444, -0.6013584820734476, 0.15412158847174678, -0.502001299580599, -0.9068410573068878, -0.502001299580599, -0.833906252681877, 0.3798928753582899, -0.9068410573068878, 0.3798928753582899, -0.3579459969766471, 0.06884320605436924, 0.002095928989945659, -0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3025931001933299, 0.11738525438534331, -0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.06555082496658332, -0.005338981218997747, -0.002076270474054677, 0.06523884623439505, -0.00599162877720186, -0.0023300778578007205, -0.00599162877720186, -0.007837034455273667, 0.00018978009701544363, -0.0023300778578007205, 0.00018978009701544363, -0.008251237047966105, 0.014091999096200191, 0.0009521621010946066, -0.00321014651226182, 0.013676554858123476, 0.0009667394698497006, -0.0032592930697789946, 0.0009667394698497006, -0.0005658690612028018, -0.00022022250471479668, -0.0032592930697789937, -0.00022022250471479666, 0.00011127514881492382, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.4206262499369199, -0.761133214171572, 0.5007455356391932, -0.6442543005863454, 0.635525177045359, -0.4181086691087898, 0.6355251770453592, 0.15453235677768898, -0.75657759172067, -0.4181086691087898, -0.75657759172067, -0.49771716703202185, 0.17265177804411166, -0.01776481317495682, 0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.3025931001933299, -0.11738525438534331, 0.2765074881076981, 0.034913562192579815, 0.15409432322878, -0.3629777391611269, 0.15409432322878003, -0.30252938969021487, -0.14081032984698866, -0.3629777391611269, -0.14081032984698866, -0.030620805157591004, 0.13298898711407747, -0.03304327593938735, 0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, 0.09709214772325653, -0.00241522755530488, -0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1322768273949186, -0.016487763188035173, 0.013864709953575069, 0.12967498112414702, 0.020417428270048884, -0.017169201045268423, 0.02041742827004888, -0.03158352893068868, -0.002140070385245921, -0.017169201045268423, -0.002140070385245921, -0.03232887285478844, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1802999914938216, -0.5889799722131493, 0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, 1.0053013143052718, 0.24303987818369216, -0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.17265177804411166, 0.01776481317495682, -0.007216955352326217, 0.1708538944675734, -0.023853120077098278, 0.009690330031321191, -0.02385312007709828, -0.05851427595224925, -0.0009970757588497682, 0.00969033003132119, -0.0009970757588497682, -0.06056355425469288, -0.06884320605436924, -0.002095928989945659, 0.01499395354345747, 0.0668001797461137, 0.0023216922720068383, -0.016609029330510533, 0.0023216922720068383, -0.009387797963986713, -0.0005056613145120282, -0.016609029330510533, -0.0005056613145120282, -0.005841058553679004, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          -0.1802999914938216, 0.5889799722131493, -0.9495799552007915, -1.070225697321266, -0.18728687322613707, 0.30195230581356786, -0.18728687322613707, -0.5157546277429348, -0.9863775323243197, 0.30195230581356786, -0.9863775323243197, 0.4627237303364723, -0.12240657396947667, 0.0016631327984983487, -0.013970315507385913, 0.12123416269111348, -0.002034671914563809, 0.01709124408233573, -0.002034671914563809, -0.028490045221941467, -0.00023221799024913015, 0.01709124408233573, -0.00023221799024913015, -0.026567059102687987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2602591506940697, 0.24313683814840728, -0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.13298898711407747, 0.03304327593938735, -0.03753063440029181, 0.11967949867634801, -0.0393666881596552, 0.044712781613435545, -0.0393666881596552, -0.02897797727002851,
-                          -0.01110961751744871, 0.044712781613435545, -0.011109617517448708, -0.026140939946396612, -0.0655508249665835, 0.005338981218997763, 0.002076270474054683, 0.0652388462343952, -0.005991628777201879, -0.0023300778578007283, -0.005991628777201879, -0.007837034455273709, 0.0001897800970154443, -0.002330077857800728, 0.0001897800970154443, -0.008251237047966148, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0053013143052718, -0.24303987818369216, 0.2761816797541954, 0.8183357773897718, 0.45521877564245394, -0.517294063230061, 0.45521877564245394, -0.9545617219529918, -0.1250601031984763, -0.517294063230061, -0.1250601031984763, -0.922500859133019, -0.057945707686107864, -0.008613551142529548, 0.00809151773995201, 0.05650342385473076, 0.009417127630974336, -0.00884639262303651, 0.009417127630974336, -0.005448318729873148, -0.0013150043088297515, -0.00884639262303651, -0.0013150043088297513, -0.005612854948377747, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
-                          0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.2602591506940697, -0.24313683814840728, 0.3561441009497795, -0.19841405298242495, 0.23891499072173572, -0.3499599864093028, 0.23891499072173572, -0.23095714382387694, -0.32693630309290145, -0.34995998640930287, -0.32693630309290145, 0.02473856993038946, -0.09709214772325653, 0.00241522755530488, 0.0028982730663658636, 0.09699249715361474, -0.0028489422636695603, -0.0034187307164034813, -0.00284894226366956, -0.017464112635362926, 8.504305264685245e-05, -0.003418730716403481, 8.504305264685245e-05, -0.017432930182725747, -0.014091999096200191, -0.0009521621010946064, 0.0032101465122618194, 0.013676554858123474, 0.0009667394698497003, -0.0032592930697789933, 0.0009667394698497003, -0.0005658690612028016, -0.0002202225047147966, -0.0032592930697789933, -0.0002202225047147966, 0.00011127514881492362, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
-        self.drij = [3.5299999999999976, 0.4399999999999995, -0.37000000000000055, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.06099789543e-313, 0.0, 0.0, -0.740000000000002, 0.31000000000000005, 0.5599999999999996, 0.41999999999999815, 0.7599999999999993, -0.5000000000000007, 3.6799999999999997, -0.05000000000000071, 0.4199999999999995, 4.439999999999998, 0.6599999999999997, -0.6200000000000006, 1.06099789543e-313, 3.11, -0.31999999999999984, 0.740000000000002, -0.31000000000000005, -0.5599999999999996, 4.27, 0.12999999999999945, -0.9300000000000002, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.06099789543e-313, 3.26, -0.81, 1.1600000000000001, 0.4499999999999993, -1.0600000000000003, 4.420000000000002, -0.36000000000000076, -0.14000000000000012, 5.18, 0.34999999999999964, -1.1800000000000002, 0.0, 0.0, 0.0, 1.06099789543e-313, 0.0, 0.0,
-                     -0.41999999999999815, -0.7599999999999993, 0.5000000000000007, 3.11, -0.31999999999999984, 0.13000000000000012, 1.0609978957e-313, 2.1219957915e-314, 6.3659873744e-314, 6.3659873744e-314, 0.0, 0.0, 0.0, 0.1499999999999999, -0.4900000000000002, -1.1600000000000001, -0.4499999999999993, 1.0600000000000003, 3.2600000000000002, -0.81, 0.9200000000000002, 4.0200000000000005, -0.09999999999999964, -0.11999999999999988, 0.0, 0.0, 0.0, 0.0, -0.1499999999999999, 0.4900000000000002, -3.529999999999998, -0.4399999999999995, 0.37000000000000055, 0.0, 0.0, 0.0, 5e-324, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7599999999999998, 0.7100000000000004, 0.15000000000000036, -0.4900000000000002, 0.79, 0.9100000000000006, 0.2200000000000002, -0.25, -3.11, 0.31999999999999984, -0.13000000000000012, -4.27, -0.12999999999999945, 0.9300000000000002, 0.0, -0.9099999999999997, -0.2200000000000002,
-                     -0.15000000000000036, 0.4900000000000002, -0.79, -3.6799999999999984, 0.05000000000000071, -0.4199999999999995, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7599999999999998, -0.7100000000000004, 0.7600000000000002, 0.7100000000000004, -1.04, -3.2600000000000002, 0.81, -0.9200000000000002, -4.42, 0.36000000000000076, 0.14000000000000012, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.9100000000000006, -0.2200000000000002, 0.25, -4.439999999999999, -0.6599999999999997, 0.6200000000000006, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.7400000000000002, 0.31000000000000005, -0.7600000000000002, -0.7100000000000004, 1.04, -4.0200000000000005, 0.09999999999999964, 0.11999999999999988, -5.180000000000001, -0.34999999999999964, 1.1800000000000002, 0.0, 0.0, 0.0, 0.0, 0.7400000000000002, -0.31000000000000005]
+        self.dbox = [13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0]
+        self.dnlist = [
+            33,
+            -1,
+            -1,
+            -1,
+            -1,
+            1,
+            32,
+            34,
+            35,
+            -1,
+            0,
+            33,
+            -1,
+            -1,
+            -1,
+            32,
+            34,
+            35,
+            -1,
+            -1,
+            6,
+            3,
+            -1,
+            -1,
+            -1,
+            7,
+            4,
+            5,
+            -1,
+            -1,
+            6,
+            -1,
+            -1,
+            -1,
+            -1,
+            4,
+            5,
+            2,
+            7,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            5,
+            2,
+            7,
+            -1,
+            -1,
+            3,
+            6,
+            -1,
+            -1,
+            -1,
+            4,
+            2,
+            7,
+            -1,
+            -1,
+        ]
+        self.dem_deriv = [
+            0.13227682739491875,
+            0.01648776318803519,
+            -0.013864709953575083,
+            0.12967498112414713,
+            0.0204174282700489,
+            -0.017169201045268437,
+            0.0204174282700489,
+            -0.031583528930688706,
+            -0.0021400703852459233,
+            -0.01716920104526844,
+            -0.0021400703852459233,
+            -0.03232887285478848,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7946522798827726,
+            0.33289487400494444,
+            0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.4206262499369199,
+            0.761133214171572,
+            -0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.12240657396947655,
+            -0.0016631327984983461,
+            0.013970315507385892,
+            0.12123416269111335,
+            -0.0020346719145638054,
+            0.017091244082335703,
+            -0.002034671914563806,
+            -0.028490045221941415,
+            -0.00023221799024912971,
+            0.017091244082335703,
+            -0.00023221799024912971,
+            -0.026567059102687942,
+            0.057945707686107975,
+            0.008613551142529565,
+            -0.008091517739952026,
+            0.056503423854730866,
+            0.009417127630974357,
+            -0.008846392623036528,
+            0.009417127630974357,
+            -0.005448318729873151,
+            -0.0013150043088297543,
+            -0.008846392623036528,
+            -0.0013150043088297541,
+            -0.005612854948377751,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7946522798827726,
+            -0.33289487400494444,
+            -0.6013584820734476,
+            0.15412158847174678,
+            -0.502001299580599,
+            -0.9068410573068878,
+            -0.502001299580599,
+            -0.833906252681877,
+            0.3798928753582899,
+            -0.9068410573068878,
+            0.3798928753582899,
+            -0.3579459969766471,
+            0.06884320605436924,
+            0.002095928989945659,
+            -0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.3025931001933299,
+            0.11738525438534331,
+            -0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.06555082496658332,
+            -0.005338981218997747,
+            -0.002076270474054677,
+            0.06523884623439505,
+            -0.00599162877720186,
+            -0.0023300778578007205,
+            -0.00599162877720186,
+            -0.007837034455273667,
+            0.00018978009701544363,
+            -0.0023300778578007205,
+            0.00018978009701544363,
+            -0.008251237047966105,
+            0.014091999096200191,
+            0.0009521621010946066,
+            -0.00321014651226182,
+            0.013676554858123476,
+            0.0009667394698497006,
+            -0.0032592930697789946,
+            0.0009667394698497006,
+            -0.0005658690612028018,
+            -0.00022022250471479668,
+            -0.0032592930697789937,
+            -0.00022022250471479666,
+            0.00011127514881492382,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.4206262499369199,
+            -0.761133214171572,
+            0.5007455356391932,
+            -0.6442543005863454,
+            0.635525177045359,
+            -0.4181086691087898,
+            0.6355251770453592,
+            0.15453235677768898,
+            -0.75657759172067,
+            -0.4181086691087898,
+            -0.75657759172067,
+            -0.49771716703202185,
+            0.17265177804411166,
+            -0.01776481317495682,
+            0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.3025931001933299,
+            -0.11738525438534331,
+            0.2765074881076981,
+            0.034913562192579815,
+            0.15409432322878,
+            -0.3629777391611269,
+            0.15409432322878003,
+            -0.30252938969021487,
+            -0.14081032984698866,
+            -0.3629777391611269,
+            -0.14081032984698866,
+            -0.030620805157591004,
+            0.13298898711407747,
+            -0.03304327593938735,
+            0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            0.09709214772325653,
+            -0.00241522755530488,
+            -0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1322768273949186,
+            -0.016487763188035173,
+            0.013864709953575069,
+            0.12967498112414702,
+            0.020417428270048884,
+            -0.017169201045268423,
+            0.02041742827004888,
+            -0.03158352893068868,
+            -0.002140070385245921,
+            -0.017169201045268423,
+            -0.002140070385245921,
+            -0.03232887285478844,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.1802999914938216,
+            -0.5889799722131493,
+            0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            1.0053013143052718,
+            0.24303987818369216,
+            -0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.17265177804411166,
+            0.01776481317495682,
+            -0.007216955352326217,
+            0.1708538944675734,
+            -0.023853120077098278,
+            0.009690330031321191,
+            -0.02385312007709828,
+            -0.05851427595224925,
+            -0.0009970757588497682,
+            0.00969033003132119,
+            -0.0009970757588497682,
+            -0.06056355425469288,
+            -0.06884320605436924,
+            -0.002095928989945659,
+            0.01499395354345747,
+            0.0668001797461137,
+            0.0023216922720068383,
+            -0.016609029330510533,
+            0.0023216922720068383,
+            -0.009387797963986713,
+            -0.0005056613145120282,
+            -0.016609029330510533,
+            -0.0005056613145120282,
+            -0.005841058553679004,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1802999914938216,
+            0.5889799722131493,
+            -0.9495799552007915,
+            -1.070225697321266,
+            -0.18728687322613707,
+            0.30195230581356786,
+            -0.18728687322613707,
+            -0.5157546277429348,
+            -0.9863775323243197,
+            0.30195230581356786,
+            -0.9863775323243197,
+            0.4627237303364723,
+            -0.12240657396947667,
+            0.0016631327984983487,
+            -0.013970315507385913,
+            0.12123416269111348,
+            -0.002034671914563809,
+            0.01709124408233573,
+            -0.002034671914563809,
+            -0.028490045221941467,
+            -0.00023221799024913015,
+            0.01709124408233573,
+            -0.00023221799024913015,
+            -0.026567059102687987,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.2602591506940697,
+            0.24313683814840728,
+            -0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.13298898711407747,
+            0.03304327593938735,
+            -0.03753063440029181,
+            0.11967949867634801,
+            -0.0393666881596552,
+            0.044712781613435545,
+            -0.0393666881596552,
+            -0.02897797727002851,
+            -0.01110961751744871,
+            0.044712781613435545,
+            -0.011109617517448708,
+            -0.026140939946396612,
+            -0.0655508249665835,
+            0.005338981218997763,
+            0.002076270474054683,
+            0.0652388462343952,
+            -0.005991628777201879,
+            -0.0023300778578007283,
+            -0.005991628777201879,
+            -0.007837034455273709,
+            0.0001897800970154443,
+            -0.002330077857800728,
+            0.0001897800970154443,
+            -0.008251237047966148,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -1.0053013143052718,
+            -0.24303987818369216,
+            0.2761816797541954,
+            0.8183357773897718,
+            0.45521877564245394,
+            -0.517294063230061,
+            0.45521877564245394,
+            -0.9545617219529918,
+            -0.1250601031984763,
+            -0.517294063230061,
+            -0.1250601031984763,
+            -0.922500859133019,
+            -0.057945707686107864,
+            -0.008613551142529548,
+            0.00809151773995201,
+            0.05650342385473076,
+            0.009417127630974336,
+            -0.00884639262303651,
+            0.009417127630974336,
+            -0.005448318729873148,
+            -0.0013150043088297515,
+            -0.00884639262303651,
+            -0.0013150043088297513,
+            -0.005612854948377747,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.2602591506940697,
+            -0.24313683814840728,
+            0.3561441009497795,
+            -0.19841405298242495,
+            0.23891499072173572,
+            -0.3499599864093028,
+            0.23891499072173572,
+            -0.23095714382387694,
+            -0.32693630309290145,
+            -0.34995998640930287,
+            -0.32693630309290145,
+            0.02473856993038946,
+            -0.09709214772325653,
+            0.00241522755530488,
+            0.0028982730663658636,
+            0.09699249715361474,
+            -0.0028489422636695603,
+            -0.0034187307164034813,
+            -0.00284894226366956,
+            -0.017464112635362926,
+            8.504305264685245e-05,
+            -0.003418730716403481,
+            8.504305264685245e-05,
+            -0.017432930182725747,
+            -0.014091999096200191,
+            -0.0009521621010946064,
+            0.0032101465122618194,
+            0.013676554858123474,
+            0.0009667394698497003,
+            -0.0032592930697789933,
+            0.0009667394698497003,
+            -0.0005658690612028016,
+            -0.0002202225047147966,
+            -0.0032592930697789933,
+            -0.0002202225047147966,
+            0.00011127514881492362,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+        ]
+        self.drij = [
+            3.5299999999999976,
+            0.4399999999999995,
+            -0.37000000000000055,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.06099789543e-313,
+            0.0,
+            0.0,
+            -0.740000000000002,
+            0.31000000000000005,
+            0.5599999999999996,
+            0.41999999999999815,
+            0.7599999999999993,
+            -0.5000000000000007,
+            3.6799999999999997,
+            -0.05000000000000071,
+            0.4199999999999995,
+            4.439999999999998,
+            0.6599999999999997,
+            -0.6200000000000006,
+            1.06099789543e-313,
+            3.11,
+            -0.31999999999999984,
+            0.740000000000002,
+            -0.31000000000000005,
+            -0.5599999999999996,
+            4.27,
+            0.12999999999999945,
+            -0.9300000000000002,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            1.06099789543e-313,
+            3.26,
+            -0.81,
+            1.1600000000000001,
+            0.4499999999999993,
+            -1.0600000000000003,
+            4.420000000000002,
+            -0.36000000000000076,
+            -0.14000000000000012,
+            5.18,
+            0.34999999999999964,
+            -1.1800000000000002,
+            0.0,
+            0.0,
+            0.0,
+            1.06099789543e-313,
+            0.0,
+            0.0,
+            -0.41999999999999815,
+            -0.7599999999999993,
+            0.5000000000000007,
+            3.11,
+            -0.31999999999999984,
+            0.13000000000000012,
+            1.0609978957e-313,
+            2.1219957915e-314,
+            6.3659873744e-314,
+            6.3659873744e-314,
+            0.0,
+            0.0,
+            0.0,
+            0.1499999999999999,
+            -0.4900000000000002,
+            -1.1600000000000001,
+            -0.4499999999999993,
+            1.0600000000000003,
+            3.2600000000000002,
+            -0.81,
+            0.9200000000000002,
+            4.0200000000000005,
+            -0.09999999999999964,
+            -0.11999999999999988,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.1499999999999999,
+            0.4900000000000002,
+            -3.529999999999998,
+            -0.4399999999999995,
+            0.37000000000000055,
+            0.0,
+            0.0,
+            0.0,
+            5e-324,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7599999999999998,
+            0.7100000000000004,
+            0.15000000000000036,
+            -0.4900000000000002,
+            0.79,
+            0.9100000000000006,
+            0.2200000000000002,
+            -0.25,
+            -3.11,
+            0.31999999999999984,
+            -0.13000000000000012,
+            -4.27,
+            -0.12999999999999945,
+            0.9300000000000002,
+            0.0,
+            -0.9099999999999997,
+            -0.2200000000000002,
+            -0.15000000000000036,
+            0.4900000000000002,
+            -0.79,
+            -3.6799999999999984,
+            0.05000000000000071,
+            -0.4199999999999995,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7599999999999998,
+            -0.7100000000000004,
+            0.7600000000000002,
+            0.7100000000000004,
+            -1.04,
+            -3.2600000000000002,
+            0.81,
+            -0.9200000000000002,
+            -4.42,
+            0.36000000000000076,
+            0.14000000000000012,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.9100000000000006,
+            -0.2200000000000002,
+            0.25,
+            -4.439999999999999,
+            -0.6599999999999997,
+            0.6200000000000006,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            -0.7400000000000002,
+            0.31000000000000005,
+            -0.7600000000000002,
+            -0.7100000000000004,
+            1.04,
+            -4.0200000000000005,
+            0.09999999999999964,
+            0.11999999999999988,
+            -5.180000000000001,
+            -0.34999999999999964,
+            1.1800000000000002,
+            0.0,
+            0.0,
+            0.0,
+            0.0,
+            0.7400000000000002,
+            -0.31000000000000005,
+        ]
         self.dcoord = np.reshape(self.dcoord, [1, -1])
         self.dtype = np.reshape(self.dtype, [1, -1])
         self.dbox = np.reshape(self.dbox, [1, -1])
@@ -52,17 +1018,253 @@ def setUp(self):
         self.dnlist = np.tile(self.dnlist, [self.nframes, 1])
         self.dem_deriv = np.tile(self.dem_deriv, [self.nframes, 1])
         self.drij = np.tile(self.drij, [self.nframes, 1])
-        self.expected_grad_net = [5.01828,  4.97546, -0.09569, -1.15305,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, 0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000, -0.61704,  1.06623,  0.15319,  0.24608,  5.28467, -2.59553,  3.00729, -8.19962,  5.03021,  5.02151, -0.86956,  0.26289,  2.75500,  2.70125,  0.22900, -0.54729,  0.00000,  0.00000,  0.00000,  0.00000, 
-                                  -0.61704, -1.06623, -0.15319, -0.24608,  2.32844,  2.23467, -0.16758, -0.70940,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.74748, -0.30379, -1.11004, -3.49833,  2.42774,  2.39284, -0.45567, -0.22216,  0.60993,  0.59054,  0.02135, -0.15332,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                  5.28467,  2.59553, -3.00729,  8.19962,  4.77234,  4.62396, -1.90919, -0.44792,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.74748,  0.30379,  1.11004,  3.49833,  4.06655,  3.57849, -2.07817,  0.88468,  3.61241,  3.58881, -0.57839, -0.39969,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                  5.01828, -4.97546,  0.09569,  1.15305,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.07573, -3.82089, -2.40143, -0.67375,  9.64382,  8.39638, -2.48922, -9.00792,  4.77234, -4.62396,  1.90919,  0.44792,  2.32844, -2.23467,  0.16758,  0.70940,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                  0.07573,  3.82089,  2.40143,  0.67375,  5.03021, -5.02151,  0.86956, -0.26289,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.44012, -1.15994, -0.66718, -3.33981,  4.06655, -3.57849,  2.07817, -0.88468,  2.42774, -2.39284,  0.45567,  0.22216,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  
-                                  9.64382, -8.39638,  2.48922,  9.00792,  2.75500, -2.70125, -0.22900,  0.54729,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  1.44012,  1.15994,  0.66718,  3.33981,  3.61241, -3.58881,  0.57839,  0.39969,  0.60993, -0.59054, -0.02135,  0.15332,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000,  0.00000]
+        self.expected_grad_net = [
+            5.01828,
+            4.97546,
+            -0.09569,
+            -1.15305,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.61704,
+            1.06623,
+            0.15319,
+            0.24608,
+            5.28467,
+            -2.59553,
+            3.00729,
+            -8.19962,
+            5.03021,
+            5.02151,
+            -0.86956,
+            0.26289,
+            2.75500,
+            2.70125,
+            0.22900,
+            -0.54729,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            -0.61704,
+            -1.06623,
+            -0.15319,
+            -0.24608,
+            2.32844,
+            2.23467,
+            -0.16758,
+            -0.70940,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.74748,
+            -0.30379,
+            -1.11004,
+            -3.49833,
+            2.42774,
+            2.39284,
+            -0.45567,
+            -0.22216,
+            0.60993,
+            0.59054,
+            0.02135,
+            -0.15332,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            5.28467,
+            2.59553,
+            -3.00729,
+            8.19962,
+            4.77234,
+            4.62396,
+            -1.90919,
+            -0.44792,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.74748,
+            0.30379,
+            1.11004,
+            3.49833,
+            4.06655,
+            3.57849,
+            -2.07817,
+            0.88468,
+            3.61241,
+            3.58881,
+            -0.57839,
+            -0.39969,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            5.01828,
+            -4.97546,
+            0.09569,
+            1.15305,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.07573,
+            -3.82089,
+            -2.40143,
+            -0.67375,
+            9.64382,
+            8.39638,
+            -2.48922,
+            -9.00792,
+            4.77234,
+            -4.62396,
+            1.90919,
+            0.44792,
+            2.32844,
+            -2.23467,
+            0.16758,
+            0.70940,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.07573,
+            3.82089,
+            2.40143,
+            0.67375,
+            5.03021,
+            -5.02151,
+            0.86956,
+            -0.26289,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.44012,
+            -1.15994,
+            -0.66718,
+            -3.33981,
+            4.06655,
+            -3.57849,
+            2.07817,
+            -0.88468,
+            2.42774,
+            -2.39284,
+            0.45567,
+            0.22216,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            9.64382,
+            -8.39638,
+            2.48922,
+            9.00792,
+            2.75500,
+            -2.70125,
+            -0.22900,
+            0.54729,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            1.44012,
+            1.15994,
+            0.66718,
+            3.33981,
+            3.61241,
+            -3.58881,
+            0.57839,
+            0.39969,
+            0.60993,
+            -0.59054,
+            -0.02135,
+            0.15332,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+            0.00000,
+        ]
 
         self.sel = [5, 5]
-        self.sec = np.array([0, 0, 0], dtype = int)
+        self.sec = np.array([0, 0, 0], dtype=int)
         self.sec[1:3] = np.cumsum(self.sel)
-        self.rcut = 6.
+        self.rcut = 6.0
         self.rcut_smth = 0.8
         self.dnatoms = [6, 6, 2, 4]
 
@@ -71,46 +1273,61 @@ def setUp(self):
         self.nnei = self.sec[-1]
         self.ndescrpt = 4 * self.nnei
         self.ntypes = np.max(self.dtype) + 1
-        self.dnet_deriv=[]
+        self.dnet_deriv = []
         for ii in range(self.nloc * self.ndescrpt):
-            self.dnet_deriv.append(10-ii*0.01)
-        self.dgrad=[]
+            self.dnet_deriv.append(10 - ii * 0.01)
+        self.dgrad = []
         for ii in range(9):
-            self.dgrad.append(10-ii*1.)
+            self.dgrad.append(10 - ii * 1.0)
         self.dnet_deriv = np.reshape(self.dnet_deriv, [1, -1])
         self.dgrad = np.reshape(self.dgrad, [1, -1])
         self.dnet_deriv = np.tile(self.dnet_deriv, [self.nframes, 1])
         self.dgrad = np.tile(self.dgrad, [self.nframes, 1])
 
-        self.tgrad = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='t_grad')
-        self.tnet_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt], name='t_net_deriv')
-        self.tem_deriv = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.ndescrpt * 3], name='t_em_deriv')
-        self.trij = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, self.dnatoms[0] * self.nnei * 3], name='t_rij')
-        self.tnlist = tf.placeholder(tf.int32, [None, self.dnatoms[0] * self.nnei], name = "t_nlist")
-        self.tnatoms = tf.placeholder(tf.int32, [None], name = "t_natoms")
-        
+        self.tgrad = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="t_grad")
+        self.tnet_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt],
+            name="t_net_deriv",
+        )
+        self.tem_deriv = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.ndescrpt * 3],
+            name="t_em_deriv",
+        )
+        self.trij = tf.placeholder(
+            GLOBAL_TF_FLOAT_PRECISION,
+            [None, self.dnatoms[0] * self.nnei * 3],
+            name="t_rij",
+        )
+        self.tnlist = tf.placeholder(
+            tf.int32, [None, self.dnatoms[0] * self.nnei], name="t_nlist"
+        )
+        self.tnatoms = tf.placeholder(tf.int32, [None], name="t_natoms")
+
     def test_prod_virial_grad(self):
-        tgrad_net \
-            = op_grads_module.prod_virial_se_a_grad(
-                self.tgrad,
-                self.tnet_deriv,
-                self.tem_deriv,
-                self.trij,
-                self.tnlist,
-                self.tnatoms, 
-                n_a_sel=self.nnei,
-                n_r_sel=0)
-        self.sess.run (tf.global_variables_initializer())
+        tgrad_net = op_grads_module.prod_virial_se_a_grad(
+            self.tgrad,
+            self.tnet_deriv,
+            self.tem_deriv,
+            self.trij,
+            self.tnlist,
+            self.tnatoms,
+            n_a_sel=self.nnei,
+            n_r_sel=0,
+        )
+        self.sess.run(tf.global_variables_initializer())
         dgrad_net = self.sess.run(
             tgrad_net,
-            feed_dict = {
+            feed_dict={
                 self.tgrad: self.dgrad,
                 self.tnet_deriv: self.dnet_deriv,
                 self.tem_deriv: self.dem_deriv,
                 self.trij: self.drij,
                 self.tnlist: self.dnlist,
-                self.tnatoms: self.dnatoms}
+                self.tnatoms: self.dnatoms,
+            },
         )
-        self.assertEqual(dgrad_net.shape, (self.nframes, self.nloc*self.ndescrpt))
+        self.assertEqual(dgrad_net.shape, (self.nframes, self.nloc * self.ndescrpt))
         for ff in range(self.nframes):
             np.testing.assert_almost_equal(dgrad_net[ff], self.expected_grad_net, 5)
diff --git a/source/tests/test_sel_idx.py b/source/tests/test_sel_idx.py
index 23d0869b61..406385a14f 100644
--- a/source/tests/test_sel_idx.py
+++ b/source/tests/test_sel_idx.py
@@ -1,18 +1,23 @@
-import os,sys
-import numpy as np
+import os
+import sys
 import unittest
 
-from deepmd.common import select_idx_map
+import numpy as np
+
+from deepmd.common import (
+    select_idx_map,
+)
 
 
-class TestSelIdx (unittest.TestCase) :
-    def test_add (self) :
-        atom_type = np.array([0,1,2,2,1,0], dtype = int)
-        type_sel = np.array([1,0], dtype = int)
+class TestSelIdx(unittest.TestCase):
+    def test_add(self):
+        atom_type = np.array([0, 1, 2, 2, 1, 0], dtype=int)
+        type_sel = np.array([1, 0], dtype=int)
         idx_map = select_idx_map(atom_type, type_sel)
         new_atom_type = atom_type[idx_map]
         self.assertEqual(list(idx_map), [0, 5, 1, 4])
         self.assertEqual(list(new_atom_type), [0, 0, 1, 1])
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_tab_nonsmth.py b/source/tests/test_tab_nonsmth.py
index 0d8305cef4..3174f81e85 100644
--- a/source/tests/test_tab_nonsmth.py
+++ b/source/tests/test_tab_nonsmth.py
@@ -1,156 +1,174 @@
-import os,sys
-import numpy as np
-from deepmd.env import tf
+import os
+import sys
 import unittest
 
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
+from test_descrpt_nonsmth import (
+    Inter,
+)
 
 # load grad of force module
 import deepmd.op
-from deepmd.utils.pair_tab import PairTab
-
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-from test_descrpt_nonsmth import Inter 
-
-from deepmd.env import op_module
-
-def _make_tab(ntype) :
-    xx = np.arange(0,9,0.001)
-    yy = 1000/(xx+.5)**6
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+
+def _make_tab(ntype):
+    xx = np.arange(0, 9, 0.001)
+    yy = 1000 / (xx + 0.5) ** 6
     prt = xx
     ninter = ntype * (ntype + 1) // 2
-    for ii in range(ninter) :
+    for ii in range(ninter):
         prt = np.append(prt, yy)
-    prt = np.reshape(prt, [ninter+1, -1])
-    np.savetxt('tab.xvg', prt.T)
+    prt = np.reshape(prt, [ninter + 1, -1])
+    np.savetxt("tab.xvg", prt.T)
 
 
 class IntplInter(Inter):
-    def setUp (self, 
-               data,
-               sess=None) :
+    def setUp(self, data, sess=None):
         # tabulated
         Inter.setUp(self, data, sess=sess)
         _make_tab(data.get_ntypes())
-        self.srtab = PairTab('tab.xvg')
+        self.srtab = PairTab("tab.xvg")
         self.smin_alpha = 0.3
         self.sw_rmin = 1
         self.sw_rmax = 3.45
         tab_info, tab_data = self.srtab.get()
-        with tf.variable_scope('tab', reuse=tf.AUTO_REUSE):
-            self.tab_info = tf.get_variable('t_tab_info',
-                                            tab_info.shape,
-                                            dtype = tf.float64,
-                                            trainable = False,
-                                            initializer = tf.constant_initializer(tab_info))
-            self.tab_data = tf.get_variable('t_tab_data',
-                                            tab_data.shape,
-                                            dtype = tf.float64,
-                                            trainable = False,
-                                            initializer = tf.constant_initializer(tab_data))
+        with tf.variable_scope("tab", reuse=tf.AUTO_REUSE):
+            self.tab_info = tf.get_variable(
+                "t_tab_info",
+                tab_info.shape,
+                dtype=tf.float64,
+                trainable=False,
+                initializer=tf.constant_initializer(tab_info),
+            )
+            self.tab_data = tf.get_variable(
+                "t_tab_data",
+                tab_data.shape,
+                dtype=tf.float64,
+                trainable=False,
+                initializer=tf.constant_initializer(tab_data),
+            )
 
     def tearDown(self):
-        os.remove('tab.xvg')
-        
-    def comp_interpl_ef (self, 
-                         dcoord, 
-                         dbox, 
-                         dtype,
-                         tnatoms,
-                         name,
-                         reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist, axis \
-            = op_module.descrpt (dcoord, 
-                                 dtype,
-                                 tnatoms,
-                                 dbox, 
-                                 tf.constant(self.default_mesh),
-                                 self.t_avg,
-                                 self.t_std,
-                                 rcut_a = self.rcut_a, 
-                                 rcut_r = self.rcut_r, 
-                                 sel_a = self.sel_a, 
-                                 sel_r = self.sel_r, 
-                                 axis_rule = self.axis_rule)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-
-        sw_lambda, sw_deriv \
-            = op_module.soft_min_switch(dtype, 
-                                        rij, 
-                                        nlist,
-                                        tnatoms,
-                                        sel_a = self.sel_a,
-                                        sel_r = self.sel_r,
-                                        alpha = self.smin_alpha,
-                                        rmin = self.sw_rmin,
-                                        rmax = self.sw_rmax)
+        os.remove("tab.xvg")
+
+    def comp_interpl_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist, axis = op_module.descrpt(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+            axis_rule=self.axis_rule,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+
+        sw_lambda, sw_deriv = op_module.soft_min_switch(
+            dtype,
+            rij,
+            nlist,
+            tnatoms,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+            alpha=self.smin_alpha,
+            rmin=self.sw_rmin,
+            rmax=self.sw_rmax,
+        )
         inv_sw_lambda = 1.0 - sw_lambda
-        tab_atom_ener, tab_force, tab_atom_virial \
-            = op_module.pair_tab(
-                self.tab_info,
-                self.tab_data,
-                dtype,
-                rij,
-                nlist,
-                tnatoms,
-                sw_lambda,
-                sel_a = self.sel_a,
-                sel_r = self.sel_r)
+        tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab(
+            self.tab_info,
+            self.tab_data,
+            dtype,
+            rij,
+            nlist,
+            tnatoms,
+            sw_lambda,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
         energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, self.natoms[0]])
         tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
         atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
         energy_raw = tab_atom_ener + atom_ener
 
         energy_raw = tf.reshape(energy_raw, [-1, self.natoms[0]])
-        energy = tf.reduce_sum (energy_raw, axis = 1)
+        energy = tf.reduce_sum(energy_raw, axis=1)
 
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force (net_deriv_reshape, 
-                                      descrpt_deriv, 
-                                      nlist, 
-                                      axis, 
-                                      tnatoms,
-                                      n_a_sel = self.nnei_a, 
-                                      n_r_sel = self.nnei_r)
-        sw_force \
-            = op_module.soft_min_force(energy_diff, 
-                                       sw_deriv,
-                                       nlist, 
-                                       tnatoms,
-                                       n_a_sel = self.nnei_a,
-                                       n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            axis,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        sw_force = op_module.soft_min_force(
+            energy_diff,
+            sw_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         force = force + sw_force + tab_force
-        virial, atom_vir = op_module.prod_virial (net_deriv_reshape, 
-                                                  descrpt_deriv, 
-                                                  rij,
-                                                  nlist, 
-                                                  axis, 
-                                                  tnatoms,
-                                                  n_a_sel = self.nnei_a, 
-                                                  n_r_sel = self.nnei_r)
-        sw_virial, sw_atom_virial \
-            = op_module.soft_min_virial (energy_diff,
-                                         sw_deriv,
-                                         rij,
-                                         nlist,
-                                         tnatoms,
-                                         n_a_sel = self.nnei_a,
-                                         n_r_sel = self.nnei_r)
+        virial, atom_vir = op_module.prod_virial(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            axis,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        sw_virial, sw_atom_virial = op_module.soft_min_virial(
+            energy_diff,
+            sw_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         # atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
-        virial = virial + sw_virial \
-                 + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, self.natoms[1], 9]), axis = 1)
+        virial = (
+            virial
+            + sw_virial
+            + tf.reduce_sum(
+                tf.reshape(tab_atom_virial, [-1, self.natoms[1], 9]), axis=1
+            )
+        )
 
         return energy, force, virial
 
-    
 
 class TestTabNonSmooth(IntplInter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
@@ -165,18 +183,18 @@ def setUp(self):
         data = Data()
         IntplInter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, places=5, suffix = '_tab')
+    def test_force(self):
+        force_test(self, self, places=5, suffix="_tab")
 
-    def test_virial (self) :
-        virial_test(self, self, places=5, suffix = '_tab')
+    def test_virial(self):
+        virial_test(self, self, places=5, suffix="_tab")
 
-    def test_force_dw (self) :
-        force_dw_test(self, self, places=5, suffix = '_tab')
+    def test_force_dw(self):
+        force_dw_test(self, self, places=5, suffix="_tab")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, places=5, suffix = '_tab')
+    def test_virial_dw(self):
+        virial_dw_test(self, self, places=5, suffix="_tab")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_tab_smooth.py b/source/tests/test_tab_smooth.py
index ab4dc65c7a..2f5bcd9b71 100644
--- a/source/tests/test_tab_smooth.py
+++ b/source/tests/test_tab_smooth.py
@@ -1,154 +1,172 @@
-import os,sys
-import numpy as np
-from deepmd.env import tf
+import os
+import sys
 import unittest
 
-from tensorflow.python.framework import ops
+import numpy as np
+from common import (
+    Data,
+    force_dw_test,
+    force_test,
+    virial_dw_test,
+    virial_test,
+)
+from tensorflow.python.framework import (
+    ops,
+)
+from test_descrpt_smooth import (
+    Inter,
+)
 
 # load grad of force module
 import deepmd.op
-from deepmd.utils.pair_tab import PairTab
-
-from common import force_test
-from common import virial_test
-from common import force_dw_test
-from common import virial_dw_test
-from common import Data
-from test_descrpt_smooth import Inter 
-
-from deepmd.env import op_module
-
-def _make_tab(ntype) :
-    xx = np.arange(0,9,0.001)
-    yy = 1000/(xx+.5)**6
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.pair_tab import (
+    PairTab,
+)
+
+
+def _make_tab(ntype):
+    xx = np.arange(0, 9, 0.001)
+    yy = 1000 / (xx + 0.5) ** 6
     prt = xx
     ninter = ntype * (ntype + 1) // 2
-    for ii in range(ninter) :
+    for ii in range(ninter):
         prt = np.append(prt, yy)
-    prt = np.reshape(prt, [ninter+1, -1])
-    np.savetxt('tab.xvg', prt.T)
+    prt = np.reshape(prt, [ninter + 1, -1])
+    np.savetxt("tab.xvg", prt.T)
 
 
 class IntplInter(Inter):
-    def setUp (self, 
-               data,
-               sess=None) :
+    def setUp(self, data, sess=None):
         # tabulated
         Inter.setUp(self, data, sess=sess)
         _make_tab(data.get_ntypes())
-        self.srtab = PairTab('tab.xvg')
+        self.srtab = PairTab("tab.xvg")
         self.smin_alpha = 0.3
         self.sw_rmin = 1
         self.sw_rmax = 3.45
         tab_info, tab_data = self.srtab.get()
-        with tf.variable_scope('tab', reuse=tf.AUTO_REUSE):
-            self.tab_info = tf.get_variable('t_tab_info',
-                                            tab_info.shape,
-                                            dtype = tf.float64,
-                                            trainable = False,
-                                            initializer = tf.constant_initializer(tab_info))
-            self.tab_data = tf.get_variable('t_tab_data',
-                                            tab_data.shape,
-                                            dtype = tf.float64,
-                                            trainable = False,
-                                            initializer = tf.constant_initializer(tab_data))
+        with tf.variable_scope("tab", reuse=tf.AUTO_REUSE):
+            self.tab_info = tf.get_variable(
+                "t_tab_info",
+                tab_info.shape,
+                dtype=tf.float64,
+                trainable=False,
+                initializer=tf.constant_initializer(tab_info),
+            )
+            self.tab_data = tf.get_variable(
+                "t_tab_data",
+                tab_data.shape,
+                dtype=tf.float64,
+                trainable=False,
+                initializer=tf.constant_initializer(tab_data),
+            )
 
     def tearDown(self):
-        os.remove('tab.xvg')
-        
-    def comp_ef (self, 
-                 dcoord, 
-                 dbox, 
-                 dtype,
-                 tnatoms,
-                 name,
-                 reuse = None) :
-        descrpt, descrpt_deriv, rij, nlist \
-            = op_module.prod_env_mat_a (dcoord, 
-                                       dtype,
-                                       tnatoms,
-                                       dbox, 
-                                       tf.constant(self.default_mesh),
-                                       self.t_avg,
-                                       self.t_std,
-                                       rcut_a = self.rcut_a, 
-                                       rcut_r = self.rcut_r, 
-                                       rcut_r_smth = self.rcut_r_smth,
-                                       sel_a = self.sel_a, 
-                                       sel_r = self.sel_r)
-        inputs_reshape = tf.reshape (descrpt, [-1, self.ndescrpt])
-        atom_ener = self._net (inputs_reshape, name, reuse = reuse)
-
-        sw_lambda, sw_deriv \
-            = op_module.soft_min_switch(dtype, 
-                                        rij, 
-                                        nlist,
-                                        tnatoms,
-                                        sel_a = self.sel_a,
-                                        sel_r = self.sel_r,
-                                        alpha = self.smin_alpha,
-                                        rmin = self.sw_rmin,
-                                        rmax = self.sw_rmax)
+        os.remove("tab.xvg")
+
+    def comp_ef(self, dcoord, dbox, dtype, tnatoms, name, reuse=None):
+        descrpt, descrpt_deriv, rij, nlist = op_module.prod_env_mat_a(
+            dcoord,
+            dtype,
+            tnatoms,
+            dbox,
+            tf.constant(self.default_mesh),
+            self.t_avg,
+            self.t_std,
+            rcut_a=self.rcut_a,
+            rcut_r=self.rcut_r,
+            rcut_r_smth=self.rcut_r_smth,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
+        inputs_reshape = tf.reshape(descrpt, [-1, self.ndescrpt])
+        atom_ener = self._net(inputs_reshape, name, reuse=reuse)
+
+        sw_lambda, sw_deriv = op_module.soft_min_switch(
+            dtype,
+            rij,
+            nlist,
+            tnatoms,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+            alpha=self.smin_alpha,
+            rmin=self.sw_rmin,
+            rmax=self.sw_rmax,
+        )
         inv_sw_lambda = 1.0 - sw_lambda
-        tab_atom_ener, tab_force, tab_atom_virial \
-            = op_module.pair_tab(
-                self.tab_info,
-                self.tab_data,
-                dtype,
-                rij,
-                nlist,
-                tnatoms,
-                sw_lambda,
-                sel_a = self.sel_a,
-                sel_r = self.sel_r)
+        tab_atom_ener, tab_force, tab_atom_virial = op_module.pair_tab(
+            self.tab_info,
+            self.tab_data,
+            dtype,
+            rij,
+            nlist,
+            tnatoms,
+            sw_lambda,
+            sel_a=self.sel_a,
+            sel_r=self.sel_r,
+        )
         energy_diff = tab_atom_ener - tf.reshape(atom_ener, [-1, self.natoms[0]])
         tab_atom_ener = tf.reshape(sw_lambda, [-1]) * tf.reshape(tab_atom_ener, [-1])
         atom_ener = tf.reshape(inv_sw_lambda, [-1]) * atom_ener
         energy_raw = tab_atom_ener + atom_ener
 
         energy_raw = tf.reshape(energy_raw, [-1, self.natoms[0]])
-        energy = tf.reduce_sum (energy_raw, axis = 1)
+        energy = tf.reduce_sum(energy_raw, axis=1)
 
-        net_deriv_ = tf.gradients (atom_ener, inputs_reshape)
+        net_deriv_ = tf.gradients(atom_ener, inputs_reshape)
         net_deriv = net_deriv_[0]
-        net_deriv_reshape = tf.reshape (net_deriv, [-1, self.natoms[0] * self.ndescrpt]) 
-
-        force = op_module.prod_force_se_a (net_deriv_reshape, 
-                                      descrpt_deriv, 
-                                      nlist, 
-                                      tnatoms,
-                                      n_a_sel = self.nnei_a, 
-                                      n_r_sel = self.nnei_r)
-        sw_force \
-            = op_module.soft_min_force(energy_diff, 
-                                       sw_deriv,
-                                       nlist, 
-                                       tnatoms,
-                                       n_a_sel = self.nnei_a,
-                                       n_r_sel = self.nnei_r)
+        net_deriv_reshape = tf.reshape(net_deriv, [-1, self.natoms[0] * self.ndescrpt])
+
+        force = op_module.prod_force_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        sw_force = op_module.soft_min_force(
+            energy_diff,
+            sw_deriv,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         force = force + sw_force + tab_force
-        virial, atom_vir = op_module.prod_virial_se_a (net_deriv_reshape, 
-                                                  descrpt_deriv, 
-                                                  rij,
-                                                  nlist, 
-                                                  tnatoms,
-                                                  n_a_sel = self.nnei_a, 
-                                                  n_r_sel = self.nnei_r)
-        sw_virial, sw_atom_virial \
-            = op_module.soft_min_virial (energy_diff,
-                                         sw_deriv,
-                                         rij,
-                                         nlist,
-                                         tnatoms,
-                                         n_a_sel = self.nnei_a,
-                                         n_r_sel = self.nnei_r)
+        virial, atom_vir = op_module.prod_virial_se_a(
+            net_deriv_reshape,
+            descrpt_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
+        sw_virial, sw_atom_virial = op_module.soft_min_virial(
+            energy_diff,
+            sw_deriv,
+            rij,
+            nlist,
+            tnatoms,
+            n_a_sel=self.nnei_a,
+            n_r_sel=self.nnei_r,
+        )
         # atom_virial = atom_virial + sw_atom_virial + tab_atom_virial
-        virial = virial + sw_virial \
-                 + tf.reduce_sum(tf.reshape(tab_atom_virial, [-1, self.natoms[1], 9]), axis = 1)
+        virial = (
+            virial
+            + sw_virial
+            + tf.reduce_sum(
+                tf.reshape(tab_atom_virial, [-1, self.natoms[1], 9]), axis=1
+            )
+        )
 
         return energy, force, virial
 
-    
 
 class TestTabSmooth(IntplInter, tf.test.TestCase):
     # def __init__ (self, *args, **kwargs):
@@ -163,18 +181,18 @@ def setUp(self):
         data = Data()
         IntplInter.setUp(self, data, sess=self.test_session().__enter__())
 
-    def test_force (self) :
-        force_test(self, self, places=5, suffix = '_tab_smth')
+    def test_force(self):
+        force_test(self, self, places=5, suffix="_tab_smth")
 
-    def test_virial (self) :
-        virial_test(self, self, places=5, suffix = '_tab_smth')
+    def test_virial(self):
+        virial_test(self, self, places=5, suffix="_tab_smth")
 
-    def test_force_dw (self) :
-        force_dw_test(self, self, places=8, suffix = '_tab_smth')
+    def test_force_dw(self):
+        force_dw_test(self, self, places=8, suffix="_tab_smth")
 
-    def test_virial_dw (self) :
-        virial_dw_test(self, self, places=8, suffix = '_tab_smth')
+    def test_virial_dw(self):
+        virial_dw_test(self, self, places=8, suffix="_tab_smth")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_tabulate.py b/source/tests/test_tabulate.py
index 341850e68c..65e13ef9d2 100644
--- a/source/tests/test_tabulate.py
+++ b/source/tests/test_tabulate.py
@@ -1,48 +1,114 @@
 import unittest
+
 import numpy as np
-from deepmd.utils.tabulate import DPTabulate
-from deepmd.env import op_module
-from deepmd.env import tf
-from deepmd.common import gelu
+
+from deepmd.common import (
+    gelu,
+)
+from deepmd.env import (
+    op_module,
+    tf,
+)
+from deepmd.utils.tabulate import (
+    DPTabulate,
+)
 
 # Now just test some OPs utilized by DPTabulate sourced in /opt/deepmd-kit/source/op/unaggregated_grad.cc
 
+
 class TestDPTabulate(unittest.TestCase):
     def test_op_tanh(self):
-        w=tf.constant([[0.1,0.2,0.3,0.4],[0.5,0.6,0.7,0.8],[0.9,1,1.1,1.2]],dtype='double')
-        x=tf.constant([[0.1,0.2,0.3],[0.4,0.5,0.6],[0.7,0.8,0.9],[1.0,1.1,1.2]],dtype='double')
-        b=tf.constant([[0.1],[0.2],[0.3],[0.4]],dtype='double')
+        w = tf.constant(
+            [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1, 1.1, 1.2]],
+            dtype="double",
+        )
+        x = tf.constant(
+            [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
+            dtype="double",
+        )
+        b = tf.constant([[0.1], [0.2], [0.3], [0.4]], dtype="double")
         xbar = tf.matmul(x, w) + b
-        y=tf.nn.tanh(xbar)
+        y = tf.nn.tanh(xbar)
         dy = op_module.unaggregated_dy_dx_s(y, w, xbar, tf.constant(1))
         dy_array = tf.Session().run(dy)
-        answer = np.array([[8.008666403121351973e-02, 1.513925729426658651e-01, 2.134733287761668430e-01, 2.661983049806041501e-01], 
-                           [4.010658815015744061e-02, 6.306476628799793926e-02, 7.332167904608145881e-02, 7.494218676568849269e-02],
-                           [1.561705624394135218e-02, 1.994112926507514427e-02, 1.887519955881525671e-02, 1.576442161040989692e-02],
-                           [5.492686739421748753e-03, 5.754985286040992763e-03, 4.493113544969218158e-03, 3.107638130764600777e-03]])
-        
+        answer = np.array(
+            [
+                [
+                    8.008666403121351973e-02,
+                    1.513925729426658651e-01,
+                    2.134733287761668430e-01,
+                    2.661983049806041501e-01,
+                ],
+                [
+                    4.010658815015744061e-02,
+                    6.306476628799793926e-02,
+                    7.332167904608145881e-02,
+                    7.494218676568849269e-02,
+                ],
+                [
+                    1.561705624394135218e-02,
+                    1.994112926507514427e-02,
+                    1.887519955881525671e-02,
+                    1.576442161040989692e-02,
+                ],
+                [
+                    5.492686739421748753e-03,
+                    5.754985286040992763e-03,
+                    4.493113544969218158e-03,
+                    3.107638130764600777e-03,
+                ],
+            ]
+        )
+
         places = 18
         np.testing.assert_almost_equal(dy_array, answer, places)
 
     def test_op_gelu(self):
-        w = tf.constant([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [
-                        0.9, 1, 1.1, 1.2]], dtype='double')
-        x = tf.constant([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [
-                        0.7, 0.8, 0.9], [1.0, 1.1, 1.2]], dtype='double')
-        b = tf.constant([[0.1], [0.2], [0.3], [0.4]], dtype='double')
+        w = tf.constant(
+            [[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1, 1.1, 1.2]],
+            dtype="double",
+        )
+        x = tf.constant(
+            [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9], [1.0, 1.1, 1.2]],
+            dtype="double",
+        )
+        b = tf.constant([[0.1], [0.2], [0.3], [0.4]], dtype="double")
         xbar = tf.matmul(x, w) + b
         y = gelu(xbar)
         dy = op_module.unaggregated_dy_dx_s(y, w, xbar, tf.constant(2))
         dy_array = tf.Session().run(dy)
-        answer = np.array([[8.549286163555620821e-02, 1.782905778685600906e-01, 2.776474599997448833e-01, 3.827650237273348965e-01],
-                           [1.089906023807040714e-01, 2.230820937721638697e-01, 3.381867859682909927e-01, 4.513008399758057232e-01],
-                           [1.124254240556722684e-01, 2.209918074710395253e-01, 3.238894323148118759e-01, 4.220357318198978414e-01],
-                           [1.072173273655498138e-01, 2.082159073100979807e-01, 3.059816075270163083e-01, 4.032981557798429595e-01]])
+        answer = np.array(
+            [
+                [
+                    8.549286163555620821e-02,
+                    1.782905778685600906e-01,
+                    2.776474599997448833e-01,
+                    3.827650237273348965e-01,
+                ],
+                [
+                    1.089906023807040714e-01,
+                    2.230820937721638697e-01,
+                    3.381867859682909927e-01,
+                    4.513008399758057232e-01,
+                ],
+                [
+                    1.124254240556722684e-01,
+                    2.209918074710395253e-01,
+                    3.238894323148118759e-01,
+                    4.220357318198978414e-01,
+                ],
+                [
+                    1.072173273655498138e-01,
+                    2.082159073100979807e-01,
+                    3.059816075270163083e-01,
+                    4.032981557798429595e-01,
+                ],
+            ]
+        )
 
         places = 18
         np.testing.assert_almost_equal(dy_array, answer, places)
 
 
-
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/test_train.py b/source/tests/test_train.py
index ade7ab7cbd..e831790d48 100644
--- a/source/tests/test_train.py
+++ b/source/tests/test_train.py
@@ -1,19 +1,28 @@
 import unittest
-from unittest.mock import patch, MagicMock
+from unittest.mock import (
+    MagicMock,
+    patch,
+)
 
 import deepmd
-from deepmd.entrypoints.train import parse_auto_sel, parse_auto_sel_ratio, wrap_up_4, update_one_sel, update_sel
+from deepmd.entrypoints.train import (
+    parse_auto_sel,
+    parse_auto_sel_ratio,
+    update_one_sel,
+    update_sel,
+    wrap_up_4,
+)
 
-class TestTrain (unittest.TestCase) :
-    def test_train_parse_auto_sel (self) :
+
+class TestTrain(unittest.TestCase):
+    def test_train_parse_auto_sel(self):
         self.assertTrue(parse_auto_sel("auto"))
         self.assertTrue(parse_auto_sel("auto:12"))
         self.assertTrue(parse_auto_sel("auto:12:13"))
-        self.assertFalse(parse_auto_sel([1,2]))
+        self.assertFalse(parse_auto_sel([1, 2]))
         self.assertFalse(parse_auto_sel("abc:12:13"))
 
-
-    def test_train_parse_auto_sel_ratio (self) :
+    def test_train_parse_auto_sel_ratio(self):
         self.assertEqual(parse_auto_sel_ratio("auto"), 1.1)
         self.assertEqual(parse_auto_sel_ratio("auto:1.2"), 1.2)
         with self.assertRaises(RuntimeError):
@@ -21,96 +30,55 @@ def test_train_parse_auto_sel_ratio (self) :
         with self.assertRaises(RuntimeError):
             parse_auto_sel_ratio("abc")
         with self.assertRaises(RuntimeError):
-            parse_auto_sel_ratio([1,2,3])
-
+            parse_auto_sel_ratio([1, 2, 3])
 
     @patch("deepmd.entrypoints.train.get_sel")
     def test_update_one_sel(self, sel_mock):
-        sel_mock.return_value = [10,20]
+        sel_mock.return_value = [10, 20]
         jdata = {}
-        descriptor = {
-            'type': 'se_e2_a',
-            'rcut': 6,
-            'sel': "auto"
-        }
+        descriptor = {"type": "se_e2_a", "rcut": 6, "sel": "auto"}
         descriptor = update_one_sel(jdata, descriptor)
         # self.assertEqual(descriptor['sel'], [11,22])
-        self.assertEqual(descriptor['sel'], [12,24])
-        descriptor = {
-            'type': 'se_e2_a',
-            'rcut': 6,
-            'sel': "auto:1.5"
-        }
+        self.assertEqual(descriptor["sel"], [12, 24])
+        descriptor = {"type": "se_e2_a", "rcut": 6, "sel": "auto:1.5"}
         descriptor = update_one_sel(jdata, descriptor)
         # self.assertEqual(descriptor['sel'], [15,30])
-        self.assertEqual(descriptor['sel'], [16,32])
-
+        self.assertEqual(descriptor["sel"], [16, 32])
 
     @patch("deepmd.entrypoints.train.get_sel")
     def test_update_sel_hybrid(self, sel_mock):
-        sel_mock.return_value = [10,20]
+        sel_mock.return_value = [10, 20]
         jdata = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'hybrid',
-                    'list' : [
-                        {
-                            'type': 'se_e2_a',
-                            'rcut': 6,
-                            'sel': "auto"                            
-                        },
-                        {
-                            'type': 'se_e2_a',
-                            'rcut': 6,
-                            'sel': "auto:1.5"
-                        }
-                    ]
+            "model": {
+                "descriptor": {
+                    "type": "hybrid",
+                    "list": [
+                        {"type": "se_e2_a", "rcut": 6, "sel": "auto"},
+                        {"type": "se_e2_a", "rcut": 6, "sel": "auto:1.5"},
+                    ],
                 }
             }
         }
         expected_out = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'hybrid',
-                    'list' : [
-                        {
-                            'type': 'se_e2_a',
-                            'rcut': 6,
-                            'sel': [12,24] 
-                        },
-                        {
-                            'type': 'se_e2_a',
-                            'rcut': 6,
-                            'sel': [16,32]
-                        }
-                    ]
+            "model": {
+                "descriptor": {
+                    "type": "hybrid",
+                    "list": [
+                        {"type": "se_e2_a", "rcut": 6, "sel": [12, 24]},
+                        {"type": "se_e2_a", "rcut": 6, "sel": [16, 32]},
+                    ],
                 }
             }
         }
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-
     @patch("deepmd.entrypoints.train.get_sel")
     def test_update_sel(self, sel_mock):
-        sel_mock.return_value = [10,20]
-        jdata = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_e2_a',
-                    'rcut': 6,
-                    'sel': "auto"
-                }
-            }
-        }
+        sel_mock.return_value = [10, 20]
+        jdata = {"model": {"descriptor": {"type": "se_e2_a", "rcut": 6, "sel": "auto"}}}
         expected_out = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_e2_a',
-                    'rcut': 6,
-                    'sel': [12,24] 
-                }
-            }
+            "model": {"descriptor": {"type": "se_e2_a", "rcut": 6, "sel": [12, 24]}}
         }
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
@@ -119,20 +87,20 @@ def test_update_sel(self, sel_mock):
     def test_update_sel_atten_auto(self, sel_mock):
         sel_mock.return_value = [25]
         jdata = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_atten',
-                    'sel' : "auto",
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": "auto",
+                    "rcut": 6,
                 }
             }
         }
         expected_out = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_atten',
-                    'sel' : 28,
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 28,
+                    "rcut": 6,
                 }
             }
         }
@@ -143,20 +111,20 @@ def test_update_sel_atten_auto(self, sel_mock):
     def test_update_sel_atten_int(self, sel_mock):
         sel_mock.return_value = [25]
         jdata = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_atten',
-                    'sel' : 30,
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
                 }
             }
         }
         expected_out = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_atten',
-                    'sel' : 30,
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
                 }
             }
         }
@@ -167,20 +135,20 @@ def test_update_sel_atten_int(self, sel_mock):
     def test_update_sel_atten_list(self, sel_mock):
         sel_mock.return_value = [25]
         jdata = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_atten',
-                    'sel' : 30,
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
                 }
             }
         }
         expected_out = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'se_atten',
-                    'sel' : 30,
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "se_atten",
+                    "sel": 30,
+                    "rcut": 6,
                 }
             }
         }
@@ -189,18 +157,18 @@ def test_update_sel_atten_list(self, sel_mock):
 
     def test_skip_loc_frame(self):
         jdata = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'loc_frame',
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "loc_frame",
+                    "rcut": 6,
                 }
             }
         }
         expected_out = {
-            'model' : {
-                'descriptor': {
-                    'type' : 'loc_frame',
-                    'rcut': 6,
+            "model": {
+                "descriptor": {
+                    "type": "loc_frame",
+                    "rcut": 6,
                 }
             }
         }
@@ -214,4 +182,3 @@ def test_wrap_up_4(self):
         self.assertEqual(wrap_up_4(15), 4 * 4)
         self.assertEqual(wrap_up_4(16), 4 * 4)
         self.assertEqual(wrap_up_4(17), 5 * 4)
-        
diff --git a/source/tests/test_transfer.py b/source/tests/test_transfer.py
index 7322be71fd..4caff02c6c 100644
--- a/source/tests/test_transfer.py
+++ b/source/tests/test_transfer.py
@@ -1,57 +1,193 @@
-import os,sys,platform,shutil,dpdata,json
-import numpy as np
-import unittest
+import json
+import os
+import platform
+import shutil
 import subprocess as sp
+import sys
+import unittest
+
+import dpdata
+import numpy as np
+from common import (
+    j_loader,
+    run_dp,
+    tests_path,
+)
+from infer.convert2pb import (
+    convert_pbtxt_to_pb,
+)
 
-from deepmd.env import tf
-from deepmd.infer import DeepPot
-from common import j_loader, tests_path, run_dp
-from infer.convert2pb import convert_pbtxt_to_pb
-from deepmd.entrypoints.transfer import load_graph, transform_graph
+from deepmd.entrypoints.transfer import (
+    load_graph,
+    transform_graph,
+)
+from deepmd.env import (
+    GLOBAL_NP_FLOAT_PRECISION,
+    tf,
+)
+from deepmd.infer import (
+    DeepPot,
+)
 
-from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
-if GLOBAL_NP_FLOAT_PRECISION == np.float32 :
+if GLOBAL_NP_FLOAT_PRECISION == np.float32:
     default_places = 4
-else :
+else:
     default_places = 10
 
-def _file_delete(file) :
+
+def _file_delete(file):
     if os.path.exists(file):
         os.remove(file)
 
+
 def _subprocess_run(command):
     popen = sp.Popen(command.split(), shell=False, stdout=sp.PIPE, stderr=sp.STDOUT)
-    for line in iter(popen.stdout.readline, b''):
-        if hasattr(line, 'decode'):
-            line = line.decode('utf-8')
+    for line in iter(popen.stdout.readline, b""):
+        if hasattr(line, "decode"):
+            line = line.decode("utf-8")
         line = line.rstrip()
         print(line)
     popen.wait()
     return popen.returncode
 
-class TestTransform(unittest.TestCase) :
+
+class TestTransform(unittest.TestCase):
     @classmethod
     def setUpClass(self):
         self.old_model = str(tests_path / "dp-old.pb")
         self.raw_model = str(tests_path / "dp-raw.pb")
         self.new_model = str(tests_path / "dp-new.pb")
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot.pbtxt")), self.old_model)
-        convert_pbtxt_to_pb(str(tests_path / os.path.join("infer","deeppot-1.pbtxt")), self.raw_model)
-        ret = run_dp("dp transfer -O " + self.old_model + " -r " + self.raw_model + " -o " + self.new_model)
-        np.testing.assert_equal(ret, 0, 'DP transfer failed!')
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot.pbtxt")), self.old_model
+        )
+        convert_pbtxt_to_pb(
+            str(tests_path / os.path.join("infer", "deeppot-1.pbtxt")), self.raw_model
+        )
+        ret = run_dp(
+            "dp transfer -O "
+            + self.old_model
+            + " -r "
+            + self.raw_model
+            + " -o "
+            + self.new_model
+        )
+        np.testing.assert_equal(ret, 0, "DP transfer failed!")
 
         self.dp = DeepPot(self.new_model)
-        self.coords = np.array([12.83, 2.56, 2.18,
-                                12.09, 2.87, 2.74,
-                                00.25, 3.32, 1.68,
-                                3.36, 3.00, 1.81,
-                                3.51, 2.51, 2.60,
-                                4.27, 3.22, 1.56])
+        self.coords = np.array(
+            [
+                12.83,
+                2.56,
+                2.18,
+                12.09,
+                2.87,
+                2.74,
+                00.25,
+                3.32,
+                1.68,
+                3.36,
+                3.00,
+                1.81,
+                3.51,
+                2.51,
+                2.60,
+                4.27,
+                3.22,
+                1.56,
+            ]
+        )
         self.atype = [0, 1, 1, 0, 1, 1]
-        self.box = np.array([13., 0., 0., 0., 13., 0., 0., 0., 13.])
-        self.expected_e = np.array([-9.275780747115504710e+01,-1.863501786584258468e+02,-1.863392472863538103e+02,-9.279281325486221021e+01,-1.863671545232153903e+02,-1.863619822847602165e+02])
-        self.expected_f = np.array([-3.034045420701179663e-01,8.405844663871177014e-01,7.696947487118485642e-02,7.662001266663505117e-01,-1.880601391333554251e-01,-6.183333871091722944e-01,-5.036172391059643427e-01,-6.529525836149027151e-01,5.432962643022043459e-01,6.382357912332115024e-01,-1.748518296794561167e-01,3.457363524891907125e-01,1.286482986991941552e-03,3.757251165286925043e-01,-5.972588700887541124e-01,-5.987006197104716154e-01,-2.004450304880958100e-01,2.495901655353461868e-01])
-        self.expected_v = np.array([-2.912234126853306959e-01,-3.800610846612756388e-02,2.776624987489437202e-01,-5.053761003913598976e-02,-3.152373041953385746e-01,1.060894290092162379e-01,2.826389131596073745e-01,1.039129970665329250e-01,-2.584378792325942586e-01,-3.121722367954994914e-01,8.483275876786681990e-02,2.524662342344257682e-01,4.142176771106586414e-02,-3.820285230785245428e-02,-2.727311173065460545e-02,2.668859789777112135e-01,-6.448243569420382404e-02,-2.121731470426218846e-01,-8.624335220278558922e-02,-1.809695356746038597e-01,1.529875294531883312e-01,-1.283658185172031341e-01,-1.992682279795223999e-01,1.409924999632362341e-01,1.398322735274434292e-01,1.804318474574856390e-01,-1.470309318999652726e-01,-2.593983661598450730e-01,-4.236536279233147489e-02,3.386387920184946720e-02,-4.174017537818433543e-02,-1.003500282164128260e-01,1.525690815194478966e-01,3.398976109910181037e-02,1.522253908435125536e-01,-2.349125581341701963e-01,9.515545977581392825e-04,-1.643218849228543846e-02,1.993234765412972564e-02,6.027265332209678569e-04,-9.563256398907417355e-02,1.510815124001868293e-01,-7.738094816888557714e-03,1.502832772532304295e-01,-2.380965783745832010e-01,-2.309456719810296654e-01,-6.666961081213038098e-02,7.955566551234216632e-02,-8.099093777937517447e-02,-3.386641099800401927e-02,4.447884755740908608e-02,1.008593228579038742e-01,4.556718179228393811e-02,-6.078081273849572641e-02])
+        self.box = np.array([13.0, 0.0, 0.0, 0.0, 13.0, 0.0, 0.0, 0.0, 13.0])
+        self.expected_e = np.array(
+            [
+                -9.275780747115504710e01,
+                -1.863501786584258468e02,
+                -1.863392472863538103e02,
+                -9.279281325486221021e01,
+                -1.863671545232153903e02,
+                -1.863619822847602165e02,
+            ]
+        )
+        self.expected_f = np.array(
+            [
+                -3.034045420701179663e-01,
+                8.405844663871177014e-01,
+                7.696947487118485642e-02,
+                7.662001266663505117e-01,
+                -1.880601391333554251e-01,
+                -6.183333871091722944e-01,
+                -5.036172391059643427e-01,
+                -6.529525836149027151e-01,
+                5.432962643022043459e-01,
+                6.382357912332115024e-01,
+                -1.748518296794561167e-01,
+                3.457363524891907125e-01,
+                1.286482986991941552e-03,
+                3.757251165286925043e-01,
+                -5.972588700887541124e-01,
+                -5.987006197104716154e-01,
+                -2.004450304880958100e-01,
+                2.495901655353461868e-01,
+            ]
+        )
+        self.expected_v = np.array(
+            [
+                -2.912234126853306959e-01,
+                -3.800610846612756388e-02,
+                2.776624987489437202e-01,
+                -5.053761003913598976e-02,
+                -3.152373041953385746e-01,
+                1.060894290092162379e-01,
+                2.826389131596073745e-01,
+                1.039129970665329250e-01,
+                -2.584378792325942586e-01,
+                -3.121722367954994914e-01,
+                8.483275876786681990e-02,
+                2.524662342344257682e-01,
+                4.142176771106586414e-02,
+                -3.820285230785245428e-02,
+                -2.727311173065460545e-02,
+                2.668859789777112135e-01,
+                -6.448243569420382404e-02,
+                -2.121731470426218846e-01,
+                -8.624335220278558922e-02,
+                -1.809695356746038597e-01,
+                1.529875294531883312e-01,
+                -1.283658185172031341e-01,
+                -1.992682279795223999e-01,
+                1.409924999632362341e-01,
+                1.398322735274434292e-01,
+                1.804318474574856390e-01,
+                -1.470309318999652726e-01,
+                -2.593983661598450730e-01,
+                -4.236536279233147489e-02,
+                3.386387920184946720e-02,
+                -4.174017537818433543e-02,
+                -1.003500282164128260e-01,
+                1.525690815194478966e-01,
+                3.398976109910181037e-02,
+                1.522253908435125536e-01,
+                -2.349125581341701963e-01,
+                9.515545977581392825e-04,
+                -1.643218849228543846e-02,
+                1.993234765412972564e-02,
+                6.027265332209678569e-04,
+                -9.563256398907417355e-02,
+                1.510815124001868293e-01,
+                -7.738094816888557714e-03,
+                1.502832772532304295e-01,
+                -2.380965783745832010e-01,
+                -2.309456719810296654e-01,
+                -6.666961081213038098e-02,
+                7.955566551234216632e-02,
+                -8.099093777937517447e-02,
+                -3.386641099800401927e-02,
+                4.447884755740908608e-02,
+                1.008593228579038742e-01,
+                4.556718179228393811e-02,
+                -6.078081273849572641e-02,
+            ]
+        )
 
     @classmethod
     def tearDownClass(self):
@@ -61,67 +197,75 @@ def tearDownClass(self):
 
     def test_attrs(self):
         self.assertEqual(self.dp.get_ntypes(), 2)
-        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places = default_places)
-        self.assertEqual(self.dp.get_type_map(), ['O', 'H'])
+        self.assertAlmostEqual(self.dp.get_rcut(), 6.0, places=default_places)
+        self.assertEqual(self.dp.get_type_map(), ["O", "H"])
         self.assertEqual(self.dp.get_dim_fparam(), 0)
         self.assertEqual(self.dp.get_dim_aparam(), 0)
 
     def test_1frame(self):
-        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic = False)
+        ee, ff, vv = self.dp.eval(self.coords, self.box, self.atype, atomic=False)
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
     def test_1frame_atm(self):
-        ee, ff, vv, ae, av = self.dp.eval(self.coords, self.box, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(
+            self.coords, self.box, self.atype, atomic=True
+        )
         # check shape of the returns
         nframes = 1
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        np.testing.assert_almost_equal(ff.ravel(), self.expected_f.ravel(), default_places)
-        np.testing.assert_almost_equal(ae.ravel(), self.expected_e.ravel(), default_places)
-        np.testing.assert_almost_equal(av.ravel(), self.expected_v.ravel(), default_places)
-        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis = 1)
+        np.testing.assert_almost_equal(
+            ff.ravel(), self.expected_f.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            ae.ravel(), self.expected_e.ravel(), default_places
+        )
+        np.testing.assert_almost_equal(
+            av.ravel(), self.expected_v.ravel(), default_places
+        )
+        expected_se = np.sum(self.expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(self.expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
-
     def test_2frame_atm(self):
         coords2 = np.concatenate((self.coords, self.coords))
         box2 = np.concatenate((self.box, self.box))
-        ee, ff, vv, ae, av = self.dp.eval(coords2, box2, self.atype, atomic = True)
+        ee, ff, vv, ae, av = self.dp.eval(coords2, box2, self.atype, atomic=True)
         # check shape of the returns
         nframes = 2
         natoms = len(self.atype)
-        self.assertEqual(ee.shape, (nframes,1))
-        self.assertEqual(ff.shape, (nframes,natoms,3))
-        self.assertEqual(vv.shape, (nframes,9))
-        self.assertEqual(ae.shape, (nframes,natoms,1))
-        self.assertEqual(av.shape, (nframes,natoms,9))
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        self.assertEqual(ae.shape, (nframes, natoms, 1))
+        self.assertEqual(av.shape, (nframes, natoms, 9))
         # check values
-        expected_f = np.concatenate((self.expected_f, self.expected_f), axis = 0)
-        expected_e = np.concatenate((self.expected_e, self.expected_e), axis = 0)
-        expected_v = np.concatenate((self.expected_v, self.expected_v), axis = 0)
+        expected_f = np.concatenate((self.expected_f, self.expected_f), axis=0)
+        expected_e = np.concatenate((self.expected_e, self.expected_e), axis=0)
+        expected_v = np.concatenate((self.expected_v, self.expected_v), axis=0)
         np.testing.assert_almost_equal(ff.ravel(), expected_f.ravel(), default_places)
         np.testing.assert_almost_equal(ae.ravel(), expected_e.ravel(), default_places)
         np.testing.assert_almost_equal(av.ravel(), expected_v.ravel(), default_places)
-        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis = 1)
+        expected_se = np.sum(expected_e.reshape([nframes, -1]), axis=1)
         np.testing.assert_almost_equal(ee.ravel(), expected_se.ravel(), default_places)
-        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis = 1)
+        expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
-
diff --git a/source/tests/test_type_embed.py b/source/tests/test_type_embed.py
index 57b153c24d..f563ea9e50 100644
--- a/source/tests/test_type_embed.py
+++ b/source/tests/test_type_embed.py
@@ -1,39 +1,60 @@
-import dpdata,os,sys,unittest
+import os
+import sys
+import unittest
+
+import dpdata
 import numpy as np
-from deepmd.env import tf
-from deepmd.utils.type_embed import embed_atom_type, TypeEmbedNet
+
+from deepmd.env import (
+    tf,
+)
+from deepmd.utils.type_embed import (
+    TypeEmbedNet,
+    embed_atom_type,
+)
+
 
 class TestTypeEbd(tf.test.TestCase):
     def test_embed_atom_type(self):
         ntypes = 3
         natoms = tf.constant([5, 5, 3, 0, 2])
         type_embedding = tf.constant(
-            [ 
+            [
                 [1, 2, 3],
                 [3, 2, 1],
                 [7, 7, 7],
-            ])
-        expected_out = [[1,2,3],
-                        [1,2,3],
-                        [1,2,3],
-                        [7,7,7],
-                        [7,7,7]]            
+            ]
+        )
+        expected_out = [[1, 2, 3], [1, 2, 3], [1, 2, 3], [7, 7, 7], [7, 7, 7]]
         atom_embed = embed_atom_type(ntypes, natoms, type_embedding)
         sess = self.test_session().__enter__()
         atom_embed = sess.run(atom_embed)
-        np.testing.assert_almost_equal(
-                    atom_embed, expected_out, 10)
+        np.testing.assert_almost_equal(atom_embed, expected_out, 10)
 
     def test_type_embed_net(self):
-        ten = TypeEmbedNet([2, 4, 8], seed = 1, uniform_seed = True)
+        ten = TypeEmbedNet([2, 4, 8], seed=1, uniform_seed=True)
         type_embedding = ten.build(2)
         sess = self.test_session().__enter__()
         sess.run(tf.global_variables_initializer())
         type_embedding = sess.run(type_embedding)
 
         expected_out = [
-            1.429967002262267917e+00,-9.138175897677495163e-01,-3.799606588218059633e-01,-2.143157692726757046e-01,2.341138114260268743e+00,-1.568346043255314015e+00,8.917082000854256174e-01,-1.500356675378008209e+00,
-            8.955885646123034061e-01,-5.835326470989941061e-01,-1.465708662924672057e+00,-4.052047884085572260e-01,1.367825594590430072e+00,-2.736204307656463497e-01,-4.044263041521370394e-01,-9.438057524881729998e-01
+            1.429967002262267917e00,
+            -9.138175897677495163e-01,
+            -3.799606588218059633e-01,
+            -2.143157692726757046e-01,
+            2.341138114260268743e00,
+            -1.568346043255314015e00,
+            8.917082000854256174e-01,
+            -1.500356675378008209e00,
+            8.955885646123034061e-01,
+            -5.835326470989941061e-01,
+            -1.465708662924672057e00,
+            -4.052047884085572260e-01,
+            1.367825594590430072e00,
+            -2.736204307656463497e-01,
+            -4.044263041521370394e-01,
+            -9.438057524881729998e-01,
         ]
         expected_out = np.reshape(expected_out, [2, 8])
 
@@ -42,5 +63,4 @@ def test_type_embed_net(self):
         # size of embedded vec 8
         self.assertEqual(type_embedding.shape[1], 8)
         # check value
-        np.testing.assert_almost_equal(
-                    type_embedding, expected_out, 10)
+        np.testing.assert_almost_equal(type_embedding, expected_out, 10)
diff --git a/source/tests/test_type_one_side.py b/source/tests/test_type_one_side.py
index c457b5dbeb..803a4be253 100644
--- a/source/tests/test_type_one_side.py
+++ b/source/tests/test_type_one_side.py
@@ -1,16 +1,25 @@
-
 import numpy as np
-from deepmd.env import tf
-from common import gen_data, j_loader
-
-from common import DataSystem
-from deepmd.descriptor import Descriptor
-from deepmd.common import j_must_have
+from common import (
+    DataSystem,
+    gen_data,
+    j_loader,
+)
+
+from deepmd.common import (
+    j_must_have,
+)
+from deepmd.descriptor import (
+    Descriptor,
+)
+from deepmd.env import (
+    tf,
+)
 
 GLOBAL_ENER_FLOAT_PRECISION = tf.float64
 GLOBAL_TF_FLOAT_PRECISION = tf.float64
 GLOBAL_NP_FLOAT_PRECISION = np.float64
 
+
 class TestModel(tf.test.TestCase):
     def setUp(self):
         gen_data(nframes=2)
@@ -27,87 +36,88 @@ def test_descriptor_one_side_exclude_types(self):
         Otherwise, it should be different (with random initial variables). We test
         both situation.
         """
-        jfile = 'water_se_a.json'
+        jfile = "water_se_a.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have (jdata['model']['descriptor'], 'sel')
-        ntypes=len(sel)
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
-        numb_test = 1
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(sel)
 
-        # set parameters
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['axis_neuron'] = 2
-        jdata['model']['descriptor']['type_one_side'] = True
-        jdata['model']['descriptor']['exclude_types'] = [[0, 0]]
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        test_data = data.get_test()
+        numb_test = 1
 
+        # set parameters
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["axis_neuron"] = 2
+        jdata["model"]["descriptor"]["type_one_side"] = True
+        jdata["model"]["descriptor"]["exclude_types"] = [[0, 0]]
+
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
 
         # successful
-        descrpt = Descriptor(**jdata['model']['descriptor'])
-        dout \
-            = descrpt.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {},
-                reuse = False,
-                suffix = "_se_a_1side_exclude_types"
-            )
+        descrpt = Descriptor(**jdata["model"]["descriptor"])
+        dout = descrpt.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            reuse=False,
+            suffix="_se_a_1side_exclude_types",
+        )
         # failed
-        descrpt_failed = Descriptor(**{**jdata['model']['descriptor'], "type_one_side": False})
-        dout_failed \
-            = descrpt_failed.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {},
-                reuse = False,
-                suffix = "_se_a_1side_exclude_types_failed"
-            )
-
-        feed_dict_test1 = {t_prop_c:        test_data['prop_c'],
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        descrpt_failed = Descriptor(
+            **{**jdata["model"]["descriptor"], "type_one_side": False}
+        )
+        dout_failed = descrpt_failed.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            reuse=False,
+            suffix="_se_a_1side_exclude_types_failed",
+        )
+
+        feed_dict_test1 = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
         feed_dict_test2 = feed_dict_test1.copy()
         # original type: 0 0 1 1 1 1
         # current: 0 1 1 1 1 1
         # current: 1 1 1 1 1 1
-        new_natoms1 = test_data['natoms_vec'].copy()
+        new_natoms1 = test_data["natoms_vec"].copy()
         new_natoms1[2] = 1
         new_natoms1[3] = 5
-        new_type1 = test_data['type'].copy()
+        new_type1 = test_data["type"].copy()
         new_type1[:numb_test, 0] = 0
         new_type1[:numb_test, 1:6] = 1
-        new_natoms2 = test_data['natoms_vec'].copy()
+        new_natoms2 = test_data["natoms_vec"].copy()
         new_natoms2[2] = 0
         new_natoms2[3] = 6
-        new_type2 = test_data['type'].copy()
+        new_type2 = test_data["type"].copy()
         new_type2[:numb_test] = 1
         feed_dict_test1[t_type] = np.reshape(new_type1[:numb_test, :], [-1])
         feed_dict_test1[t_natoms] = new_natoms1
@@ -116,14 +126,10 @@ def test_descriptor_one_side_exclude_types(self):
 
         with self.test_session() as sess:
             sess.run(tf.global_variables_initializer())
-            [model_dout1] = sess.run([dout], 
-                                feed_dict = feed_dict_test1)
-            [model_dout2] = sess.run([dout], 
-                                feed_dict = feed_dict_test2)
-            [model_dout1_failed] = sess.run([dout_failed], 
-                                feed_dict = feed_dict_test1)
-            [model_dout2_failed] = sess.run([dout_failed], 
-                                feed_dict = feed_dict_test2)
+            [model_dout1] = sess.run([dout], feed_dict=feed_dict_test1)
+            [model_dout2] = sess.run([dout], feed_dict=feed_dict_test2)
+            [model_dout1_failed] = sess.run([dout_failed], feed_dict=feed_dict_test1)
+            [model_dout2_failed] = sess.run([dout_failed], feed_dict=feed_dict_test2)
         model_dout1 = model_dout1.reshape([6, -1])
         model_dout2 = model_dout2.reshape([6, -1])
         model_dout1_failed = model_dout1_failed.reshape([6, -1])
@@ -131,92 +137,93 @@ def test_descriptor_one_side_exclude_types(self):
 
         np.testing.assert_almost_equal(model_dout1[0], model_dout2[0], 10)
         with self.assertRaises(AssertionError):
-            np.testing.assert_almost_equal(model_dout1_failed[0], model_dout2_failed[0], 10)
-
+            np.testing.assert_almost_equal(
+                model_dout1_failed[0], model_dout2_failed[0], 10
+            )
 
     def test_se_r_one_side_exclude_types(self):
-        """se_r
-        """
-        jfile = 'water_se_r.json'
+        """se_r"""
+        jfile = "water_se_r.json"
         jdata = j_loader(jfile)
 
-        systems = j_must_have(jdata, 'systems')
-        set_pfx = j_must_have(jdata, 'set_prefix')
-        batch_size = j_must_have(jdata, 'batch_size')
-        test_size = j_must_have(jdata, 'numb_test')
+        systems = j_must_have(jdata, "systems")
+        set_pfx = j_must_have(jdata, "set_prefix")
+        batch_size = j_must_have(jdata, "batch_size")
+        test_size = j_must_have(jdata, "numb_test")
         batch_size = 1
         test_size = 1
-        rcut = j_must_have (jdata['model']['descriptor'], 'rcut')
-        sel = j_must_have (jdata['model']['descriptor'], 'sel')
-        ntypes=len(sel)
-        
-        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt = None)
-        
-        test_data = data.get_test ()
-        numb_test = 1
+        rcut = j_must_have(jdata["model"]["descriptor"], "rcut")
+        sel = j_must_have(jdata["model"]["descriptor"], "sel")
+        ntypes = len(sel)
 
-        # set parameters
-        jdata['model']['descriptor']['neuron'] = [5, 5, 5]
-        jdata['model']['descriptor']['type_one_side'] = True
-        jdata['model']['descriptor']['exclude_types'] = [[0, 0]]
+        data = DataSystem(systems, set_pfx, batch_size, test_size, rcut, run_opt=None)
 
-        t_prop_c           = tf.placeholder(tf.float32, [5],    name='t_prop_c')
-        t_coord            = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name='i_coord')
-        t_type             = tf.placeholder(tf.int32,   [None], name='i_type')
-        t_natoms           = tf.placeholder(tf.int32,   [ntypes+2], name='i_natoms')
-        t_box              = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name='i_box')
-        t_mesh             = tf.placeholder(tf.int32,   [None], name='i_mesh')
-        is_training        = tf.placeholder(tf.bool)
+        test_data = data.get_test()
+        numb_test = 1
 
+        # set parameters
+        jdata["model"]["descriptor"]["neuron"] = [5, 5, 5]
+        jdata["model"]["descriptor"]["type_one_side"] = True
+        jdata["model"]["descriptor"]["exclude_types"] = [[0, 0]]
+
+        t_prop_c = tf.placeholder(tf.float32, [5], name="t_prop_c")
+        t_coord = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None], name="i_coord")
+        t_type = tf.placeholder(tf.int32, [None], name="i_type")
+        t_natoms = tf.placeholder(tf.int32, [ntypes + 2], name="i_natoms")
+        t_box = tf.placeholder(GLOBAL_TF_FLOAT_PRECISION, [None, 9], name="i_box")
+        t_mesh = tf.placeholder(tf.int32, [None], name="i_mesh")
+        is_training = tf.placeholder(tf.bool)
 
         # successful
-        descrpt = Descriptor(**jdata['model']['descriptor'])
-        dout \
-            = descrpt.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {},
-                reuse = False,
-                suffix = "_se_r_1side_exclude_types"
-            )
+        descrpt = Descriptor(**jdata["model"]["descriptor"])
+        dout = descrpt.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            reuse=False,
+            suffix="_se_r_1side_exclude_types",
+        )
         # failed
-        descrpt_failed = Descriptor(**{**jdata['model']['descriptor'], "type_one_side": False})
-        dout_failed \
-            = descrpt_failed.build(
-                t_coord,
-                t_type,
-                t_natoms,
-                t_box,
-                t_mesh,
-                {},
-                reuse = False,
-                suffix = "_se_r_1side_exclude_types_failed"
-            )
-
-        feed_dict_test1 = {t_prop_c:        test_data['prop_c'],
-                          t_coord:         np.reshape(test_data['coord']    [:numb_test, :], [-1]),
-                          t_box:           test_data['box']                 [:numb_test, :],
-                          t_type:          np.reshape(test_data['type']     [:numb_test, :], [-1]),
-                          t_natoms:        test_data['natoms_vec'],
-                          t_mesh:          test_data['default_mesh'],
-                          is_training:     False}
+        descrpt_failed = Descriptor(
+            **{**jdata["model"]["descriptor"], "type_one_side": False}
+        )
+        dout_failed = descrpt_failed.build(
+            t_coord,
+            t_type,
+            t_natoms,
+            t_box,
+            t_mesh,
+            {},
+            reuse=False,
+            suffix="_se_r_1side_exclude_types_failed",
+        )
+
+        feed_dict_test1 = {
+            t_prop_c: test_data["prop_c"],
+            t_coord: np.reshape(test_data["coord"][:numb_test, :], [-1]),
+            t_box: test_data["box"][:numb_test, :],
+            t_type: np.reshape(test_data["type"][:numb_test, :], [-1]),
+            t_natoms: test_data["natoms_vec"],
+            t_mesh: test_data["default_mesh"],
+            is_training: False,
+        }
         feed_dict_test2 = feed_dict_test1.copy()
         # original type: 0 0 1 1 1 1
         # current: 0 1 1 1 1 1
         # current: 1 1 1 1 1 1
-        new_natoms1 = test_data['natoms_vec'].copy()
+        new_natoms1 = test_data["natoms_vec"].copy()
         new_natoms1[2] = 1
         new_natoms1[3] = 5
-        new_type1 = test_data['type'].copy()
+        new_type1 = test_data["type"].copy()
         new_type1[:numb_test, 0] = 0
         new_type1[:numb_test, 1:6] = 1
-        new_natoms2 = test_data['natoms_vec'].copy()
+        new_natoms2 = test_data["natoms_vec"].copy()
         new_natoms2[2] = 0
         new_natoms2[3] = 6
-        new_type2 = test_data['type'].copy()
+        new_type2 = test_data["type"].copy()
         new_type2[:numb_test] = 1
         feed_dict_test1[t_type] = np.reshape(new_type1[:numb_test, :], [-1])
         feed_dict_test1[t_natoms] = new_natoms1
@@ -225,14 +232,10 @@ def test_se_r_one_side_exclude_types(self):
 
         with self.test_session() as sess:
             sess.run(tf.global_variables_initializer())
-            [model_dout1] = sess.run([dout], 
-                                feed_dict = feed_dict_test1)
-            [model_dout2] = sess.run([dout], 
-                                feed_dict = feed_dict_test2)
-            [model_dout1_failed] = sess.run([dout_failed], 
-                                feed_dict = feed_dict_test1)
-            [model_dout2_failed] = sess.run([dout_failed], 
-                                feed_dict = feed_dict_test2)
+            [model_dout1] = sess.run([dout], feed_dict=feed_dict_test1)
+            [model_dout2] = sess.run([dout], feed_dict=feed_dict_test2)
+            [model_dout1_failed] = sess.run([dout_failed], feed_dict=feed_dict_test1)
+            [model_dout2_failed] = sess.run([dout_failed], feed_dict=feed_dict_test2)
         model_dout1 = model_dout1.reshape([6, -1])
         model_dout2 = model_dout2.reshape([6, -1])
         model_dout1_failed = model_dout1_failed.reshape([6, -1])
@@ -240,4 +243,6 @@ def test_se_r_one_side_exclude_types(self):
 
         np.testing.assert_almost_equal(model_dout1[0], model_dout2[0], 10)
         with self.assertRaises(AssertionError):
-            np.testing.assert_almost_equal(model_dout1_failed[0], model_dout2_failed[0], 10)
+            np.testing.assert_almost_equal(
+                model_dout1_failed[0], model_dout2_failed[0], 10
+            )
diff --git a/source/tests/water.json b/source/tests/water.json
index f4909a0971..32522776a4 100644
--- a/source/tests/water.json
+++ b/source/tests/water.json
@@ -1,54 +1,80 @@
 {
-    "_comment": " model parameters",
-    "model" :{
-	"descriptor":{
-	    "type":		"loc_frame",
-	    "sel_a":		[16, 32],
-	    "sel_r":		[30, 60],
-	    "rcut":		6.00,
-	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
-	    "_comment":	" default rule: []",
-	    "_comment":	" user defined rule: for each type provides two axes, ",
-	    "_comment":	"                    for each axis: (a_or_r, type, idx)",
-	    "_comment":	"                    if type < 0, exclude type -(type+1)",
-	    "_comment": "                    for water (O:0, H:1) it can be",
-	    "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]"
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 120, 60, 30, 10],
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "loc_frame",
+      "sel_a": [
+        16,
+        32
+      ],
+      "sel_r": [
+        30,
+        60
+      ],
+      "rcut": 6.00,
+      "axis_rule": [
+        0,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0
+      ],
+      "_comment": " default rule: []",
+      "_comment": " user defined rule: for each type provides two axes, ",
+      "_comment": "                    for each axis: (a_or_r, type, idx)",
+      "_comment": "                    if type < 0, exclude type -(type+1)",
+      "_comment": "                    for water (O:0, H:1) it can be",
+      "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]"
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        120,
+        60,
+        30,
+        10
+      ],
+      "seed": 1
+    }
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	4,
-    "start_lr":		0.001,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 4,
+  "start_lr": 0.001,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	8,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 8,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "seed":		1,
+  "seed": 1,
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_layer_name.json b/source/tests/water_layer_name.json
index 772a302cee..67ea10d1bd 100644
--- a/source/tests/water_layer_name.json
+++ b/source/tests/water_layer_name.json
@@ -1,88 +1,106 @@
 {
-    "_comment": "layer_name",
-    "model" : {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[8, 16, 32],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net_dict" : {
+  "_comment": "layer_name",
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        8,
+        16,
+        32
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
+    },
+    "fitting_net_dict": {
       "water_ener": {
-        "type":		"ener",
+        "type": "ener",
         "neuron": [
           32,
           32,
           32
         ],
         "resnet_dt": true,
-        "layer_name": ["layer0", "layer1", "layer2", "final_layer"],
+        "layer_name": [
+          "layer0",
+          "layer1",
+          "layer2",
+          "final_layer"
+        ],
         "seed": 1
       },
       "water_ener2": {
-        "type":		"ener",
+        "type": "ener",
         "neuron": [
           32,
           32,
           32
         ],
         "resnet_dt": true,
-        "layer_name": ["layer0", "layer1", "layer2", "final_layer"],
+        "layer_name": [
+          "layer0",
+          "layer1",
+          "layer2",
+          "final_layer"
+        ],
         "seed": 2
       }
     }
-    },
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.001,
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"_comment":	"that's all"
-    },
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "_comment": "that's all"
+  },
 
-    "loss_dict" : {
-      "water_ener": {
-        "type": "ener",
-        "start_pref_e":	0.02,
-        "limit_pref_e":	1,
-        "start_pref_f":	1000,
-        "limit_pref_f":	1,
-        "start_pref_v":	0,
-        "limit_pref_v":	0
-      },
-      "water_ener2": {
-        "type": "ener",
-        "start_pref_e":	0.02,
-        "limit_pref_e":	1,
-        "start_pref_f":	1000,
-        "limit_pref_f":	1,
-        "start_pref_v":	0,
-        "limit_pref_v":	0
-      }
+  "loss_dict": {
+    "water_ener": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
     },
+    "water_ener2": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
+    }
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "seed":		1,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "seed": 1,
 
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_multi.json b/source/tests/water_multi.json
index 6202e8ebd2..f16e0eadac 100644
--- a/source/tests/water_multi.json
+++ b/source/tests/water_multi.json
@@ -1,19 +1,26 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net_dict" : {
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
+    },
+    "fitting_net_dict": {
       "water_ener": {
-        "type":		"ener",
+        "type": "ener",
         "neuron": [
           240,
           240,
@@ -23,58 +30,65 @@
         "seed": 1
       },
       "water_dipole": {
-	    "type":		"dipole",
-	    "sel_type":		[0],
-	    "fit_diag":		false,
-	    "neuron":		[100, 100, 100],
-	    "resnet_dt":	true,
-	    "seed":		1
+        "type": "dipole",
+        "sel_type": [
+          0
+        ],
+        "fit_diag": false,
+        "neuron": [
+          100,
+          100,
+          100
+        ],
+        "resnet_dt": true,
+        "seed": 1
       }
     }
-    },
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.001,
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"_comment":	"that's all"
-    },
+  },
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "_comment": "that's all"
+  },
 
-    "loss_dict" : {
-      "water_ener": {
-        "type": "ener",
-        "start_pref_e":	0.02,
-        "limit_pref_e":	1,
-        "start_pref_f":	1000,
-        "limit_pref_f":	1,
-        "start_pref_v":	0,
-        "limit_pref_v":	0
-      },
-      "water_dipole": {
-        "type": "tensor",
-        "pref": 1.0,
-        "pref_atomic": 1.0
-      }
+  "loss_dict": {
+    "water_ener": {
+      "type": "ener",
+      "start_pref_e": 0.02,
+      "limit_pref_e": 1,
+      "start_pref_f": 1000,
+      "limit_pref_f": 1,
+      "start_pref_v": 0,
+      "limit_pref_v": 0
     },
+    "water_dipole": {
+      "type": "tensor",
+      "pref": 1.0,
+      "pref_atomic": 1.0
+    }
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "seed":		1,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "seed": 1,
 
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_a.json b/source/tests/water_se_a.json
index f948682368..4cc9e3c21d 100644
--- a/source/tests/water_se_a.json
+++ b/source/tests/water_se_a.json
@@ -1,55 +1,66 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_a_afparam.json b/source/tests/water_se_a_afparam.json
index ca2187b988..fa4ab39231 100644
--- a/source/tests/water_se_a_afparam.json
+++ b/source/tests/water_se_a_afparam.json
@@ -1,57 +1,68 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "numb_fparam":	2,
-	    "numb_aparam":	2,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 2,
+      "numb_aparam": 2,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_a_aparam.json b/source/tests/water_se_a_aparam.json
index daaee54cb2..d1a453e129 100644
--- a/source/tests/water_se_a_aparam.json
+++ b/source/tests/water_se_a_aparam.json
@@ -1,56 +1,67 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "numb_aparam":	2,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "numb_aparam": 2,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_a_fparam.json b/source/tests/water_se_a_fparam.json
index b27ae4c467..48cefcfc86 100644
--- a/source/tests/water_se_a_fparam.json
+++ b/source/tests/water_se_a_fparam.json
@@ -1,56 +1,67 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "numb_fparam":	2,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "numb_fparam": 2,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_a_srtab.json b/source/tests/water_se_a_srtab.json
index 84c920d3e7..962659d9f0 100644
--- a/source/tests/water_se_a_srtab.json
+++ b/source/tests/water_se_a_srtab.json
@@ -1,59 +1,70 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"use_srtab":		"tab.xvg",
-	"smin_alpha":		0.3,
-	"sw_rmin":		0.6,
-	"sw_rmax":		1.4,	
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "use_srtab": "tab.xvg",
+    "smin_alpha": 0.3,
+    "sw_rmin": 0.6,
+    "sw_rmax": 1.4,
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "axis_neuron": 16,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_a_type.json b/source/tests/water_se_a_type.json
index 5fd5b27ab7..e764b22d0f 100644
--- a/source/tests/water_se_a_type.json
+++ b/source/tests/water_se_a_type.json
@@ -1,61 +1,76 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"type_embedding":{
-	    "neuron":		[5,10,10],
-	    "resnet_dt":	false,	    
-	    "seed":		1
-	},
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "type_one_side":    false,
-	    "axis_neuron":	16,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "type_embedding": {
+      "neuron": [
+        5,
+        10,
+        10
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "type_one_side": false,
+      "axis_neuron": 16,
+      "seed": 1
+    },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
-
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "seed":		1,
+  "seed": 1,
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_atten.json b/source/tests/water_se_atten.json
index 1a1e02c11c..2b46f06b6a 100644
--- a/source/tests/water_se_atten.json
+++ b/source/tests/water_se_atten.json
@@ -1,66 +1,79 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-    "type_map":     ["O", "H"],
-	"type_embedding":{
-	    "neuron":		[8],
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"descriptor" :{
-	    "type":		"se_atten",
-	    "sel":		120,
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "type_one_side":    false,
-	    "axis_neuron":	16,
-	    "seed":		1,
-        "attn":		128,
-		"attn_layer":	2,
-		"attn_dotr": 	true,
-		"attn_mask": 	false
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "type_embedding": {
+      "neuron": [
+        8
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 120,
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "type_one_side": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "attn": 128,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_atten_mixed_type.json b/source/tests/water_se_atten_mixed_type.json
index ac60aac921..67ab8e782d 100644
--- a/source/tests/water_se_atten_mixed_type.json
+++ b/source/tests/water_se_atten_mixed_type.json
@@ -1,66 +1,79 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-    "type_map":     ["foo", "bar"],
-	"type_embedding":{
-	    "neuron":		[8],
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"descriptor" :{
-	    "type":		"se_atten",
-	    "sel":		120,
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "type_one_side":    false,
-	    "axis_neuron":	16,
-	    "seed":		1,
-        "attn":		128,
-		"attn_layer":	2,
-		"attn_dotr": 	true,
-		"attn_mask": 	false
+  "_comment": " model parameters",
+  "model": {
+    "type_map": [
+      "foo",
+      "bar"
+    ],
+    "type_embedding": {
+      "neuron": [
+        8
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+    "descriptor": {
+      "type": "se_atten",
+      "sel": 120,
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "type_one_side": false,
+      "axis_neuron": 16,
+      "seed": 1,
+      "attn": 128,
+      "attn_layer": 2,
+      "attn_dotr": true,
+      "attn_mask": false
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
+  "_comment": " traing controls",
+  "systems": [
+    "system_mixed_type"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "_comment": " traing controls",
-    "systems":		["system_mixed_type"],
-    "set_prefix":	"set",
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "seed": 1,
 
-    "seed":		1,
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
-
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_r.json b/source/tests/water_se_r.json
index 43194f42fd..37c0239b36 100644
--- a/source/tests/water_se_r.json
+++ b/source/tests/water_se_r.json
@@ -1,53 +1,65 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" : {
-	    "type":		"se_r",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_r",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "seed":		1,
+  "seed": 1,
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/water_se_t.json b/source/tests/water_se_t.json
index 99b2571091..e7d1b3e486 100644
--- a/source/tests/water_se_t.json
+++ b/source/tests/water_se_t.json
@@ -1,53 +1,65 @@
 {
-    "_comment": " model parameters",
-    "model" : {
-	"descriptor" : {
-	    "type":		"se_3be",
-	    "sel":		[40, 80],
-	    "rcut_smth":	0.50,
-	    "rcut":		6.00,
-	    "neuron":		[2, 4, 8],
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "_comment": " model parameters",
+  "model": {
+    "descriptor": {
+      "type": "se_3be",
+      "sel": [
+        40,
+        80
+      ],
+      "rcut_smth": 0.50,
+      "rcut": 6.00,
+      "neuron": [
+        2,
+        4,
+        8
+      ],
+      "resnet_dt": false,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	1,
-    "start_lr":		0.005,
-    "decay_steps":	5000,
-    "decay_rate":	0.95,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": 1,
+  "start_lr": 0.005,
+  "decay_steps": 5000,
+  "decay_rate": 0.95,
 
-    "start_pref_e":	0.02,
-    "limit_pref_e":	1,
-    "start_pref_f":	1000,
-    "limit_pref_f":	1,
-    "start_pref_v":	0,
-    "limit_pref_v":	0,
+  "start_pref_e": 0.02,
+  "limit_pref_e": 1,
+  "start_pref_f": 1000,
+  "limit_pref_f": 1,
+  "start_pref_v": 0,
+  "limit_pref_v": 0,
 
-    "seed":		1,
+  "seed": 1,
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	1,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":	true,
-    "time_training":	true,
-    "profiling":	false,
-    "profiling_file":	"timeline.json",
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 1,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
+  "profiling": false,
+  "profiling_file": "timeline.json",
 
-    "_comment":		"that's all"
+  "_comment": "that's all"
 }
-
diff --git a/source/tests/wfc.json b/source/tests/wfc.json
index ab2ba7fc99..b1cb832a33 100644
--- a/source/tests/wfc.json
+++ b/source/tests/wfc.json
@@ -1,61 +1,92 @@
 {
-    "_comment": " model parameters",
-    "model":{
-	"type":			"polar",
-	"type_map":		["O", "H"],
-	"descriptor" :{
-	    "type":		"loc_frame",
-	    "sel_a":		[16, 32],
-	    "sel_r":		[30, 60],
-	    "rcut":		6.00,
-	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0],
-	    "_comment":	" default rule: []",
-	    "_comment":	" user defined rule: for each type provides two axes, ",
-	    "_comment":	"                    for each axis: (a_or_r, type, idx)",
-	    "_comment":	"                    if type < 0, exclude type -(type+1)",
-	    "_comment":	"                    for water (O:0, H:1) it can be",
-	    "_comment":	"                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
-	    "_comment": " that's all"
-	},
-	"fitting_net": {
-	    "type":		"wfc",
-	    "wfc_numb":		4,
-	    "sel_type":		[0],
-	    "neuron":		[100, 100, 100],
-	    "resnet_dt":	true,
-	    "seed":		1,
-	    "_comment":		" that's all"
-	},
-	"_comment":	" that's all"
+  "_comment": " model parameters",
+  "model": {
+    "type": "polar",
+    "type_map": [
+      "O",
+      "H"
+    ],
+    "descriptor": {
+      "type": "loc_frame",
+      "sel_a": [
+        16,
+        32
+      ],
+      "sel_r": [
+        30,
+        60
+      ],
+      "rcut": 6.00,
+      "axis_rule": [
+        0,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0
+      ],
+      "_comment": " default rule: []",
+      "_comment": " user defined rule: for each type provides two axes, ",
+      "_comment": "                    for each axis: (a_or_r, type, idx)",
+      "_comment": "                    if type < 0, exclude type -(type+1)",
+      "_comment": "                    for water (O:0, H:1) it can be",
+      "_comment": "                    [0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]",
+      "_comment": " that's all"
     },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"start_lr":	0.001,
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"_comment":	"that's all"
+    "fitting_net": {
+      "type": "wfc",
+      "wfc_numb": 4,
+      "sel_type": [
+        0
+      ],
+      "neuron": [
+        100,
+        100,
+        100
+      ],
+      "resnet_dt": true,
+      "seed": 1,
+      "_comment": " that's all"
     },
+    "_comment": " that's all"
+  },
 
-    "_comment": " traing controls",
-    "systems":		["system"],
-    "set_prefix":	"set",    
-    "stop_batch":	1000000,
-    "batch_size":	[1],
+  "learning_rate": {
+    "type": "exp",
+    "start_lr": 0.001,
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "_comment": "that's all"
+  },
 
-    "seed":		1,
+  "_comment": " traing controls",
+  "systems": [
+    "system"
+  ],
+  "set_prefix": "set",
+  "stop_batch": 1000000,
+  "batch_size": [
+    1
+  ],
 
-    "_comment": " display and restart",
-    "_comment": " frequencies counted in batch",
-    "disp_file":	"lcurve.out",
-    "disp_freq":	100,
-    "numb_test":	10,
-    "save_freq":	1000,
-    "save_ckpt":	"model.ckpt",
-    "load_ckpt":	"model.ckpt",
-    "disp_training":true,
-    "time_training":true,
+  "seed": 1,
 
-    "_comment":		"that's all"
-}
+  "_comment": " display and restart",
+  "_comment": " frequencies counted in batch",
+  "disp_file": "lcurve.out",
+  "disp_freq": 100,
+  "numb_test": 10,
+  "save_freq": 1000,
+  "save_ckpt": "model.ckpt",
+  "load_ckpt": "model.ckpt",
+  "disp_training": true,
+  "time_training": true,
 
+  "_comment": "that's all"
+}
diff --git a/source/tests/yaml_inputs/water_se_a_v1.json b/source/tests/yaml_inputs/water_se_a_v1.json
index 402da962ca..7ebec4d5a3 100644
--- a/source/tests/yaml_inputs/water_se_a_v1.json
+++ b/source/tests/yaml_inputs/water_se_a_v1.json
@@ -1,55 +1,69 @@
 {
-    "model": {
-	"descriptor" :{
-	    "type":		"se_a",
-	    "sel":		[46, 92],
-	    "rcut_smth":	5.80,
-	    "rcut":		6.00,
-	    "neuron":		[25, 50, 100],
-	    "axis_neuron":	16,
-	    "resnet_dt":	false,
-	    "seed":		1
-	},
-	"fitting_net" : {
-	    "neuron":		[240, 240, 240],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
+  "model": {
+    "descriptor": {
+      "type": "se_a",
+      "sel": [
+        46,
+        92
+      ],
+      "rcut_smth": 5.80,
+      "rcut": 6.00,
+      "neuron": [
+        25,
+        50,
+        100
+      ],
+      "axis_neuron": 16,
+      "resnet_dt": false,
+      "seed": 1
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        240,
+        240
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"start_lr":	0.001
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "start_lr": 0.001
+  },
 
-    "loss" :{
-	"start_pref_e":	0.02,
-	"limit_pref_e":	1,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0
-    },
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 1,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
 
-    "training" : {
-	"systems":	["../data/"],
-	"set_prefix":	"set",    
-	"stop_batch":	1000000,
-	"batch_size":	[1],
+  "training": {
+    "systems": [
+      "../data/"
+    ],
+    "set_prefix": "set",
+    "stop_batch": 1000000,
+    "batch_size": [
+      1
+    ],
 
-	"seed":		1,
+    "seed": 1,
 
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"numb_test":	10,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true,
-	"profiling":	true,
-	"profiling_file":"timeline.json"
-    }
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 10,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true,
+    "profiling": true,
+    "profiling_file": "timeline.json"
+  }
 }
-
diff --git a/source/tests/yaml_inputs/water_v1.json b/source/tests/yaml_inputs/water_v1.json
index e8b1d8a196..076a576d7b 100644
--- a/source/tests/yaml_inputs/water_v1.json
+++ b/source/tests/yaml_inputs/water_v1.json
@@ -1,50 +1,78 @@
 {
-    "model":{
-	"descriptor": {
-	    "type":		"loc_frame",
-	    "sel_a":		[16, 32],
-	    "sel_r":		[30, 60],
-	    "rcut":		6.00,
-	    "axis_rule":	[0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0]
-	},
-	"fitting_net": {
-	    "neuron":		[240, 120, 60, 30, 10],
-	    "resnet_dt":	true,
-	    "seed":		1
-	}
-    },
-    
-    "learning_rate" :{
-	"type":		"exp",
-	"decay_steps":	5000,
-	"decay_rate":	0.95,
-	"start_lr":	0.001
+  "model": {
+    "descriptor": {
+      "type": "loc_frame",
+      "sel_a": [
+        16,
+        32
+      ],
+      "sel_r": [
+        30,
+        60
+      ],
+      "rcut": 6.00,
+      "axis_rule": [
+        0,
+        1,
+        0,
+        0,
+        1,
+        1,
+        0,
+        0,
+        0,
+        0,
+        1,
+        0
+      ]
     },
+    "fitting_net": {
+      "neuron": [
+        240,
+        120,
+        60,
+        30,
+        10
+      ],
+      "resnet_dt": true,
+      "seed": 1
+    }
+  },
 
-    "loss" : {
-	"start_pref_e":	0.02,
-	"limit_pref_e":	8,
-	"start_pref_f":	1000,
-	"limit_pref_f":	1,
-	"start_pref_v":	0,
-	"limit_pref_v":	0
-    },
+  "learning_rate": {
+    "type": "exp",
+    "decay_steps": 5000,
+    "decay_rate": 0.95,
+    "start_lr": 0.001
+  },
 
-    "training": {
-	"systems":	["../data/"], 
-	"set_prefix":	"set",    
-	"stop_batch":	1000000,
-	"batch_size":	[4],
+  "loss": {
+    "start_pref_e": 0.02,
+    "limit_pref_e": 8,
+    "start_pref_f": 1000,
+    "limit_pref_f": 1,
+    "start_pref_v": 0,
+    "limit_pref_v": 0
+  },
 
-	"seed":		1,
+  "training": {
+    "systems": [
+      "../data/"
+    ],
+    "set_prefix": "set",
+    "stop_batch": 1000000,
+    "batch_size": [
+      4
+    ],
 
-	"disp_file":	"lcurve.out",
-	"disp_freq":	100,
-	"numb_test":	10,
-	"save_freq":	1000,
-	"save_ckpt":	"model.ckpt",
-	"disp_training":true,
-	"time_training":true
-    }
-}
+    "seed": 1,
 
+    "disp_file": "lcurve.out",
+    "disp_freq": 100,
+    "numb_test": 10,
+    "save_freq": 1000,
+    "save_ckpt": "model.ckpt",
+    "disp_training": true,
+    "time_training": true
+  }
+}

From 218cd2c6392f071a8a2ea2af69ea210291043a57 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 3 Feb 2023 04:29:04 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 source/lib/include/ComputeDescriptor.h |  4 ++--
 source/lib/include/coord.h             |  6 ++++--
 source/lib/tests/test_env_mat_a.cc     | 20 +++++++++---------
 source/lib/tests/test_env_mat_a_mix.cc | 28 +++++++++++++-------------
 source/lib/tests/test_env_mat_r.cc     | 20 +++++++++---------
 5 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/source/lib/include/ComputeDescriptor.h b/source/lib/include/ComputeDescriptor.h
index cde9c0eb92..d2128217d6 100644
--- a/source/lib/include/ComputeDescriptor.h
+++ b/source/lib/include/ComputeDescriptor.h
@@ -266,8 +266,8 @@ static void compute_dRdT_2(double (*dRdT)[9],
   }
 }
 
-// output deriv size: n_sel_a_nei x 4 x 12				    + n_sel_r_nei
-// x 12
+// output deriv size: n_sel_a_nei x 4 x 12				    +
+// n_sel_r_nei x 12
 //		      (1./rr, cos_theta, cos_phi, sin_phi)  x 4 x (x, y, z) +
 //(1./rr) x 4 x (x, y, z)
 void compute_descriptor(std::vector<double> &descrpt_a,
diff --git a/source/lib/include/coord.h b/source/lib/include/coord.h
index 17aa1e3dfb..50463c1254 100644
--- a/source/lib/include/coord.h
+++ b/source/lib/include/coord.h
@@ -62,7 +62,8 @@ void normalize_coord_gpu(FPTYPE* coord,
 //                             cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
 // inputs:
 //	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
-//box_info 	mem_nall is the size of allocated memory for out_c, out_t, mapping
+// box_info 	mem_nall is the size of allocated memory for out_c, out_t,
+// mapping
 // returns
 //	0: succssful
 //	1: the memory is not large enough to hold all copied coords and types.
@@ -102,7 +103,8 @@ void normalize_coord_gpu_rocm(FPTYPE* coord,
 //                             cell_map,cell_shift_map,sec_loc_cellnum_map,sec_total_cellnum_map,loc_clist
 // inputs:
 //	in_c, in_t, nloc, mem_nall, loc_cellnum, total_cellnum, cell_info,
-//box_info 	mem_nall is the size of allocated memory for out_c, out_t, mapping
+// box_info 	mem_nall is the size of allocated memory for out_c, out_t,
+// mapping
 // returns
 //	0: succssful
 //	1: the memory is not large enough to hold all copied coords and types.
diff --git a/source/lib/tests/test_env_mat_a.cc b/source/lib/tests/test_env_mat_a.cc
index 3cac18a494..e88639e2ea 100644
--- a/source/lib/tests/test_env_mat_a.cc
+++ b/source/lib/tests/test_env_mat_a.cc
@@ -495,7 +495,7 @@ TEST_F(TestEnvMatA, prod_cpu) {
     }
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
 
@@ -530,7 +530,7 @@ TEST_F(TestEnvMatA, prod_cpu_equal_cpu) {
     }
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
   std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
@@ -605,7 +605,7 @@ TEST_F(TestEnvMatA, prod_gpu_cuda) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -619,7 +619,7 @@ TEST_F(TestEnvMatA, prod_gpu_cuda) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -683,7 +683,7 @@ TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -697,7 +697,7 @@ TEST_F(TestEnvMatA, prod_gpu_cuda_equal_cpu) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -799,7 +799,7 @@ TEST_F(TestEnvMatA, prod_gpu_rocm) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -813,7 +813,7 @@ TEST_F(TestEnvMatA, prod_gpu_rocm) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -877,7 +877,7 @@ TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -891,7 +891,7 @@ TEST_F(TestEnvMatA, prod_gpu_rocm_equal_cpu) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
diff --git a/source/lib/tests/test_env_mat_a_mix.cc b/source/lib/tests/test_env_mat_a_mix.cc
index e257c66dc6..4c4a99e43a 100644
--- a/source/lib/tests/test_env_mat_a_mix.cc
+++ b/source/lib/tests/test_env_mat_a_mix.cc
@@ -523,7 +523,7 @@ TEST_F(TestEnvMatAMix, prod_cpu) {
     }
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   deepmd::convert_nlist(inlist, nlist_a_cpy);
 
@@ -531,7 +531,7 @@ TEST_F(TestEnvMatAMix, prod_cpu) {
       rij(nloc * nnei * 3);
   std::vector<int> nlist(nloc * nnei);
   std::vector<int> ntype(nloc * nnei);
-  bool* nmask = new bool[nloc * nnei];
+  bool *nmask = new bool[nloc * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
@@ -567,7 +567,7 @@ TEST_F(TestEnvMatAMix, prod_cpu_equal_cpu) {
     }
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
   std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
@@ -643,7 +643,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -651,18 +651,18 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda) {
       em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
   std::vector<int> ntype(nloc * nnei, 0);
-  bool* nmask = new bool[nloc * nnei];
+  bool *nmask = new bool[nloc * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
 
   double *em_dev = NULL, *em_deriv_dev = NULL, *rij_dev = NULL;
-  bool* nmask_dev = NULL;
+  bool *nmask_dev = NULL;
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
       *ntype_dev = NULL, *mapping_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -743,7 +743,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -757,7 +757,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_cuda_equal_cpu) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
       *array_int_dev = NULL, *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -861,7 +861,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -869,7 +869,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm) {
       em_deriv(nloc * ndescrpt * 3, 0.0), rij(nloc * nnei * 3, 0.0);
   std::vector<int> nlist(nloc * nnei, 0);
   std::vector<int> ntype(nloc * nnei, 0);
-  bool* nmask = new bool[nloc * nnei];
+  bool *nmask = new bool[nloc * nnei];
   memset(nmask, 0, sizeof(bool) * nloc * nnei);
   std::vector<double> avg(ntypes * ndescrpt, 0);
   std::vector<double> std(ntypes * ndescrpt, 1);
@@ -880,7 +880,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm) {
   int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
       *ntype_dev = NULL, *mapping_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -961,7 +961,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -975,7 +975,7 @@ TEST_F(TestEnvMatAMix, prod_gpu_rocm_equal_cpu) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *f_atype_cpy_dev = NULL, *atype_dev = NULL, *nlist_dev = NULL,
       *array_int_dev = NULL, *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
diff --git a/source/lib/tests/test_env_mat_r.cc b/source/lib/tests/test_env_mat_r.cc
index a889d89749..8a503c8c37 100644
--- a/source/lib/tests/test_env_mat_r.cc
+++ b/source/lib/tests/test_env_mat_r.cc
@@ -273,7 +273,7 @@ TEST_F(TestEnvMatR, prod_cpu) {
     }
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
 
@@ -308,7 +308,7 @@ TEST_F(TestEnvMatR, prod_cpu_equal_cpu) {
     }
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]);
   convert_nlist(inlist, nlist_a_cpy);
   std::vector<double> em(nloc * ndescrpt), em_deriv(nloc * ndescrpt * 3),
@@ -373,7 +373,7 @@ TEST_F(TestEnvMatR, prod_gpu_cuda) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -387,7 +387,7 @@ TEST_F(TestEnvMatR, prod_gpu_cuda) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -452,7 +452,7 @@ TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -466,7 +466,7 @@ TEST_F(TestEnvMatR, prod_gpu_cuda_equal_cpu) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -558,7 +558,7 @@ TEST_F(TestEnvMatR, prod_gpu_rocm) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -572,7 +572,7 @@ TEST_F(TestEnvMatR, prod_gpu_rocm) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);
@@ -637,7 +637,7 @@ TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu) {
     max_nbor_size = 4096;
   }
   std::vector<int> ilist(nloc), numneigh(nloc);
-  std::vector<int*> firstneigh(nloc);
+  std::vector<int *> firstneigh(nloc);
   deepmd::InputNlist inlist(nloc, &ilist[0], &numneigh[0], &firstneigh[0]),
       gpu_inlist;
   convert_nlist(inlist, nlist_a_cpy);
@@ -651,7 +651,7 @@ TEST_F(TestEnvMatR, prod_gpu_rocm_equal_cpu) {
   double *posi_cpy_dev = NULL, *avg_dev = NULL, *std_dev = NULL;
   int *atype_cpy_dev = NULL, *nlist_dev = NULL, *array_int_dev = NULL,
       *memory_dev = NULL;
-  uint_64* array_longlong_dev = NULL;
+  uint_64 *array_longlong_dev = NULL;
   deepmd::malloc_device_memory_sync(em_dev, em);
   deepmd::malloc_device_memory_sync(em_deriv_dev, em_deriv);
   deepmd::malloc_device_memory_sync(rij_dev, rij);