diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml new file mode 100644 index 0000000000..115bdb017f --- /dev/null +++ b/.github/workflows/build_cc.yml @@ -0,0 +1,25 @@ +on: + push: + pull_request: +name: Build C++ +jobs: + testpython: + name: Build C++ + runs-on: ubuntu-20.04 + strategy: + matrix: + include: + - variant: cpu + - variant: cuda + steps: + - uses: actions/checkout@master + with: + submodules: true + - run: sudo apt update && sudo apt install g++-7 + - run: sudo apt install nvidia-cuda-toolkit + if: matrix.variant == 'cuda' + - run: source/install/build_cc.sh + env: + DP_VARIANT: ${{ matrix.variant }} + CC: gcc-7 + CXX: g++-7 diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml new file mode 100644 index 0000000000..ce72d9cc41 --- /dev/null +++ b/.github/workflows/build_wheel.yml @@ -0,0 +1,68 @@ +name: Build and upload to PyPI + +on: + push: + pull_request: + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-18.04] #, windows-latest, macos-latest] + + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: '3.8' + + - name: Install cibuildwheel + run: | + python -m pip install cibuildwheel + + - name: Build wheels + env: + CIBW_BUILD: "cp36-* cp37-* cp38-* cp39-*" + CIBW_MANYLINUX_X86_64_IMAGE: ghcr.io/deepmodeling/manylinux2010_x86_64_tensorflow + CIBW_BEFORE_BUILD: pip install tensorflow + CIBW_SKIP: "*-win32 *-manylinux_i686" + run: | + python -m cibuildwheel --output-dir wheelhouse + - uses: actions/upload-artifact@v2 + with: + path: ./wheelhouse/*.whl + + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + name: Install Python + with: + python-version: '3.8' + - run: pip install -U scikit-build tensorflow setuptools_scm + - name: Build sdist + run: python setup.py sdist + + - uses: actions/upload-artifact@v2 + with: + path: dist/*.tar.gz + + upload_pypi: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + if: startsWith(github.event.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v2 + with: + name: artifact + path: dist + + - uses: pypa/gh-action-pypi-publish@master + with: + user: __token__ + password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml new file mode 100644 index 0000000000..92a609edc1 --- /dev/null +++ b/.github/workflows/lint_python.yml @@ -0,0 +1,33 @@ +on: + push: + pull_request: +name: Lint Python +jobs: + lintpython: + name: Lint Python + runs-on: ubuntu-20.04 + strategy: + matrix: + python-version: [3.8] + + steps: + - uses: actions/checkout@v1 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Install requirements + run: pip install -r requirements.txt + - uses: marian-code/python-lint-annotate@v2.5.0 + with: + python-root-list: "./deepmd/*.py ./deepmd/*/*.py ./source/train/*.py ./source/tests/*.py ./source/op/*.py" + use-black: true + use-isort: true + use-mypy: true + use-pycodestyle: true + use-pydocstyle: true + extra-pycodestyle-options: "--max-line-length=88" + use-pylint: false + use-flake8: false + use-vulture: true + conda-python-version: "3.8" + diff --git a/.github/workflows/mirror_gitee.yml b/.github/workflows/mirror_gitee.yml new file mode 100644 index 0000000000..4cb2c4f620 --- /dev/null +++ b/.github/workflows/mirror_gitee.yml @@ -0,0 +1,18 @@ +name: Mirror to Gitee Repo + +on: [ push, delete, create ] + +# Ensures that only one mirror task will run at a time. +concurrency: + group: git-mirror + +jobs: + git-mirror: + runs-on: ubuntu-latest + steps: + - uses: wearerequired/git-mirror-action@v1 + env: + SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }} + with: + source-repo: "git@github.com:deepmodeling/deepmd-kit.git" + destination-repo: "git@gitee.com:deepmodeling/deepmd-kit.git" diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml new file mode 100644 index 0000000000..5192eda6e9 --- /dev/null +++ b/.github/workflows/test_cc.yml @@ -0,0 +1,11 @@ +on: + push: + pull_request: +name: Test C++ +jobs: + testpython: + name: Test C++ + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - run: source/install/test_cc.sh diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml new file mode 100644 index 0000000000..8517560fc9 --- /dev/null +++ b/.github/workflows/test_python.yml @@ -0,0 +1,74 @@ +on: + push: + pull_request: +name: Test Python +jobs: + testpython: + name: Test Python + runs-on: ubuntu-18.04 + strategy: + matrix: + include: + - python: 3.6 + gcc: 4.8 + tf: 1.8 + - python: 3.6 + gcc: 4.8 + tf: 1.12 + - python: 3.6 + gcc: 4.8 + tf: 1.14 + - python: 3.6 + gcc: 5 + tf: 1.14 + - python: 3.6 + gcc: 8 + tf: 1.14 + - python: 3.7 + gcc: 5 + tf: 1.14 + - python: 3.7 + gcc: 6 + tf: 1.14 + - python: 3.7 + gcc: 7 + tf: 1.14 + - python: 3.7 + gcc: 8 + tf: 1.14 + - python: 3.7 + gcc: 5 + tf: + - python: 3.7 + gcc: 8 + tf: + - python: 3.8 + gcc: 5 + tf: + - python: 3.8 + gcc: 8 + tf: + + steps: + - uses: actions/checkout@master + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: pip cache + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: + ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + - run: | + sudo apt update + sudo apt install gcc-${{ matrix.gcc }} g++-${{ matrix.gcc }} + - run: pip install -e .[cpu,test] codecov + env: + CC: gcc-${{ matrix.gcc }} + CXX: g++-${{ matrix.gcc }} + TENSORFLOW_VERSION: ${{ matrix.tf }} + - run: dp --version + - run: pytest --cov=deepmd source/tests && codecov diff --git a/.gitignore b/.gitignore index b392cdbca5..e9b29e2382 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,9 @@ venv* .vscode/** _build _templates +API_CC +doc/api_py/ +dp/ +build_lammps/ +build_tests/ +build_cc_tests diff --git a/.gitmodules b/.gitmodules index c2225c5f76..5373ec05b3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "source/op/cuda/cub"] - path = source/op/cuda/cub - url = git://github.com/NVlabs/cub.git +[submodule "source/lib/src/cuda/cub"] + path = source/lib/src/cuda/cub + url = git://github.com/NVIDIA/cub.git diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b434af6819..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,95 +0,0 @@ -language: python -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-4.8 - - g++-4.8 - - gcc-5 - - g++-5 - - gcc-6 - - g++-6 - - gcc-7 - - g++-7 - - gcc-8 - - g++-8 -jobs: - include: - - stage: unit tests - python: 3.6 - env: - - CC=gcc-4.8 - - CXX=g++-4.8 - - TENSORFLOW_VERSION=1.8 - - python: 3.6 - env: - - CC=gcc-4.8 - - CXX=g++-4.8 - - TENSORFLOW_VERSION=1.12 - - python: 3.6 - env: - - CC=gcc-4.8 - - CXX=g++-4.8 - - TENSORFLOW_VERSION=1.14 - - python: 3.6 - env: - - CC=gcc-5 - - CXX=g++-5 - - TENSORFLOW_VERSION=1.14 - - python: 3.6 - env: - - CC=gcc-8 - - CXX=g++-8 - - TENSORFLOW_VERSION=1.14 - - python: 3.7 - env: - - CC=gcc-5 - - CXX=g++-5 - - TENSORFLOW_VERSION=1.14 - - python: 3.7 - env: - - CC=gcc-6 - - CXX=g++-6 - - TENSORFLOW_VERSION=1.14 - - python: 3.7 - env: - - CC=gcc-7 - - CXX=g++-7 - - TENSORFLOW_VERSION=1.14 - - python: 3.7 - env: - - CC=gcc-8 - - CXX=g++-8 - - TENSORFLOW_VERSION=1.14 - - python: 3.7 - env: - - CC=gcc-5 - - CXX=g++-5 - - TENSORFLOW_VERSION=2.3 - - python: 3.7 - env: - - CC=gcc-8 - - CXX=g++-8 - - TENSORFLOW_VERSION=2.3 - - stage: build whls - services: docker - env: - - TWINE_USERNAME=__token__ - - CIBW_BUILD="cp36-* cp37-*" - - CIBW_BEFORE_BUILD="sed -i 's/libresolv.so.2\"/libresolv.so.2\", \"libtensorflow_framework.so.2\"/g' \$(find / -name policy.json)" - - CIBW_SKIP="*-win32 *-manylinux_i686" - - CC=gcc-7 - - CXX=g++-7 - - TENSORFLOW_VERSION=2.3 - install: - - python -m pip install twine cibuildwheel==1.6.3 scikit-build setuptools_scm - script: - - python -m cibuildwheel --output-dir wheelhouse - - python setup.py sdist - after_success: - - if [[ $TRAVIS_TAG ]]; then python -m twine upload wheelhouse/*; python -m twine upload dist/*.tar.gz; fi -install: - - pip install .[cpu,test] -script: - - cd source/tests && python -m unittest diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000000..f946fed778 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,19 @@ +preferred-citation: + type: article + authors: + - family-names: "Wang" + given-names: "Han" + - family-names: "Zhang" + given-names: "Linfeng" + - family-names: "Han" + given-names: "Jiequn" + - family-names: "E" + given-names: "Weinan" + doi: "10.1016/j.cpc.2018.03.016" + journal: "Computer Physics Communications" + month: 7 + start: 178 # First page number + end: 184 # Last page number + title: "DeePMD-kit: A deep learning package for many-body potential energy representation and molecular dynamics" + volume: 228 + year: 2018 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..86199588d7 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,136 @@ +# DeePMD-kit Contributing Guide + +Welcome to [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit)! + +## What you can contribute + +You can either make a code contribution, help improve our document or offer help to other users. Your help is always appreciated. Come and have fun! + +### Code contribution +You can start from any one of the following items to help improve deepmd-kit + +- Smash a bug +- Implement a feature or add a patch, whatever you think deepmd-kit is missing +- Browse [issues](https://github.com/deepmodeling/deepmd-kit/issues), find an issue labeled enhancement or bug, and help to solve it. + +See [here](#before-you-contribute) for some before-hand heads-up. + +See [here](#how-to-contribute) to learn how to contribute. + +### Document improvement +You can start from any one of the following items to help improve [DeePMD-kit Docs](https://deepmd.readthedocs.io/en/latest/?badge=latest): + +- Fix typos or format (punctuation, space, indentation, code block, etc.) +- Fix or update inappropriate or outdated descriptions +- Add missing content (sentence, paragraph, or a new document) +- Translate docs changes from English to Chinese + +### Offer help +You can help other users of deepmd-kit in the following way + +- Submit, reply to, and resolve [issues](https://github.com/deepmodeling/deepmd-kit/issues) +- (Advanced) Review Pull Requests created by others + +## Before you contribute +### Overview of DeePMD-kit +Currently, we maintain two main branch: +- master: stable branch with version tag +- devel : branch for developers + +### Developer guide +See [here](doc/development/index.md) for coding conventions, API and other needs-to-know of the code. + +## How to contribute +Please perform the following steps to create your Pull Request to this repository. If don't like to use commands, you can also use [GitHub Desktop](https://desktop.github.com/), which is easier to get started. Go to [git documentation](https://git-scm.com/doc) if you want to really master git. + +### Step 1: Fork the repository + +1. Visit the project: +2. Click the **Fork** button on the top right and wait it to finish. + +### Step 2: Clone the forked repository to local storage and set configurations + +1. Clone your own repo, not the public repo (from deepmodeling) ! And change the branch to devel. + ```bash + git clone https://github.com/$username/deepmd-kit.git + # Replace `$username` with your GitHub ID + + git checkout devel + ``` + +2. Add deepmodeling's repo as your remote repo, we can name it "upstream". And fetch upstream's latest codes to your workstation. + ```bash + git remote add upstream https://github.com/deepmodeling/deepmd-kit.git + # After you add a remote repo, your local repo will be automatically named "origin". + + git fetch upstream + + # If your current codes are behind the latest codes, you should merge latest codes first. + # Notice you should merge from "devel"! + git merge upstream/devel + ``` + +3. Modify your codes and design unit tests. + +4. Commit your changes + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` + +5. Push the changed codes to your original repo on github. + ```bash + git push origin devel + ``` + +### Alternatively: Create a new branch + +1. Get your local master up-to-date with upstream/master. + + ```bash + cd $working_dir/deepmd-kit + git fetch upstream + git checkout master + git rebase upstream/master + ``` + +2. Create a new branch based on the master branch. + + ```bash + git checkout -b new-branch-name + ``` + +3. Modify your codes and design unit tests. + +4. Commit your changes + + ```bash + git status # Checks the local status + git add ... # Adds the file(s) you want to commit. If you want to commit all changes, you can directly use `git add.` + git commit -m "commit-message: update the xx" + ``` + +5. Keep your branch in sync with upstream/master + + ```bash + # While on your new branch + git fetch upstream + git rebase upstream/master + ``` + +6. Push your changes to the remote + + ```bash + git push -u origin new-branch-name # "-u" is used to track the remote branch from origin + ``` + +### Step 3: Create a pull request + +1. Visit your fork at (replace `$username` with your GitHub ID) +2. Click `pull requests`, followed by `New pull request` and `Compare & pull request` to create your PR. + +Now, your PR is successfully submitted! After this PR is merged, you will automatically become a contributor to DeePMD-kit. + +## Contact us +E-mail: contact@deepmodeling.org diff --git a/README.md b/README.md index 77c867bc49..c05be445c8 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,13 @@ # Table of contents - [About DeePMD-kit](#about-deepmd-kit) + - [Highlights in v2.0](#highlights-in-deepmd-kit-v2.0) - [Highlighted features](#highlighted-features) - - [Code structure](#code-structure) - [License and credits](#license-and-credits) - [Deep Potential in a nutshell](#deep-potential-in-a-nutshell) - [Download and install](#download-and-install) - [Use DeePMD-kit](#use-deepmd-kit) +- [Code structure](#code-structure) - [Troubleshooting](#troubleshooting) # About DeePMD-kit @@ -23,37 +24,22 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r For more information, check the [documentation](https://deepmd.readthedocs.io/). +# Highlights in DeePMD-kit v2.0 +* [Model compression](doc/freeze/compress.md). Accelerate the efficiency of model inference for 4-15 times. +* [New descriptors](doc/model/overall.md). Including [`se_e2_r`](doc/model/train-se-e2-r.md) and [`se_e3`](doc/model/train-se-e3.md). +* [Hybridization of descriptors](doc/model/train-hybrid.md). Hybrid descriptor constructed from concatenation of several descriptors. +* [Atom type embedding](doc/model/train-se-e2-a-tebd.md). Enable atom type embedding to decline training complexity and refine performance. +* Training and inference the dipole (vector) and polarizability (matrix). +* Split of training and validation dataset. +* Optimized training on GPUs. + ## Highlighted features -* **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient. +* **interfaced with TensorFlow**, one of the most popular deep learning frameworks, making the training process highly automatic and efficient, in addition Tensorboard can be used to visualize training procedure. * **interfaced with high-performance classical MD and quantum (path-integral) MD packages**, i.e., LAMMPS and i-PI, respectively. * **implements the Deep Potential series models**, which have been successfully applied to finite and extended systems including organic molecules, metals, semiconductors, and insulators, etc. * **implements MPI and GPU supports**, makes it highly efficient for high performance parallel and distributed computing. * **highly modularized**, easy to adapt to different descriptors for deep learning based potential energy models. -## Code structure -The code is organized as follows: - -* `data/raw`: tools manipulating the raw data files. - -* `examples`: example json parameter files. - -* `source/3rdparty`: third-party packages used by DeePMD-kit. - -* `source/cmake`: cmake scripts for building. - -* `source/ipi`: source code of i-PI client. - -* `source/lib`: source code of DeePMD-kit library. - -* `source/lmp`: source code of Lammps module. - -* `source/op`: tensorflow op implementation. working with library. - -* `source/scripts`: Python script for model freezing. - -* `source/train`: Python modules and scripts for training and testing. - - ## License and credits The project DeePMD-kit is licensed under [GNU LGPLv3.0](./LICENSE). If you use this code in any future publications, please cite this using @@ -70,68 +56,106 @@ In addition to building up potential energy models, DeePMD-kit can also be used # Download and install -Please follow our [github](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel). +Please follow our [GitHub](https://github.com/deepmodeling/deepmd-kit) webpage to download the [latest released version](https://github.com/deepmodeling/deepmd-kit/tree/master) and [development version](https://github.com/deepmodeling/deepmd-kit/tree/devel). -DeePMD-kit offers multiple installation methods. It is recommend using easily methods like [offline packages](doc/install.md#offline-packages), [conda](doc/install.md#with-conda) and [docker](doc/install.md#with-docker). +DeePMD-kit offers multiple installation methods. It is recommend using easily methods like [offline packages](doc/install/easy-install.md#offline-packages), [conda](doc/install/easy-install.md#with-conda) and [docker](doc/install/easy-install.md#with-docker). -One may manually install DeePMD-kit by following the instuctions on [installing the python interface](doc/install.md#install-the-python-interface) and [installing the C++ interface](doc/install.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS and i-PI. +One may manually install DeePMD-kit by following the instuctions on [installing the Python interface](doc/install/install-from-source.md#install-the-python-interface) and [installing the C++ interface](doc/install/install-from-source.md#install-the-c-interface). The C++ interface is necessary when using DeePMD-kit with LAMMPS and i-PI. # Use DeePMD-kit -The typical procedure of using DeePMD-kit includes 5 steps +A quick-start on using DeePMD-kit can be found as follows: + +- [Prepare data with dpdata](doc/data/dpdata.md) +- [Training a model](doc/train/training.md) +- [Freeze a model](doc/freeze/freeze.md) +- [Test a model](doc/test/test.md) +- [Running MD with LAMMPS](doc/third-party/lammps.md) + +A full [document](doc/train/train-input-auto.rst) on options in the training input script is available. + +# Advanced + +- [Installation](doc/install/index.md) + - [Easy install](doc/install/easy-install.md) + - [Install from source code](doc/install/install-from-source.md) + - [Install LAMMPS](doc/install/install-lammps.md) + - [Install i-PI](doc/install/install-ipi.md) + - [Building conda packages](doc/install/build-conda.md) +- [Data](doc/data/index.md) + - [Data conversion](doc/data/data-conv.md) + - [Prepare data with dpdata](doc/data/dpdata.md) +- [Model](doc/model/index.md) + - [Overall](doc/model/overall.md) + - [Descriptor `"se_e2_a"`](doc/model/train-se-e2-a.md) + - [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md) + - [Descriptor `"se_e3"`](doc/model/train-se-e3.md) + - [Descriptor `"hybrid"`](doc/model/train-hybrid.md) + - [Fit energy](doc/model/train-energy.md) + - [Fit `tensor` like `Dipole` and `Polarizability`](doc/model/train-fitting-tensor.md) + - [Train a Deep Potential model using `type embedding` approach](doc/model/train-se-e2-a-tebd.md) +- [Training](doc/train/index.md) + - [Training a model](doc/train/training.md) + - [Advanced options](doc/train/training-advanced.md) + - [Parallel training](doc/train/parallel-training.md) + - [TensorBoard Usage](doc/train/tensorboard.md) + - [Known limitations of using GPUs](doc/train/gpu-limitations.md) + - [Training Parameters](doc/train/train-input-auto.rst) +- [Freeze and Compress](doc/freeze/index.rst) + - [Freeze a model](doc/freeze/freeze.md) + - [Compress a model](doc/freeze/compress.md) +- [Test](doc/test/index.rst) + - [Test a model](doc/test/test.md) + - [Calculate Model Deviation](doc/test/model-deviation.md) +- [Inference](doc/inference/index.rst) + - [Python interface](doc/inference/python.md) + - [C++ interface](doc/inference/cxx.md) +- [Integrate with third-party packages](doc/third-party/index.rst) + - [Use deep potential with ASE](doc/third-party/ase.md) + - [Running MD with LAMMPS](doc/third-party/lammps.md) + - [LAMMPS commands](doc/third-party/lammps-command.md) + - [Run path-integral MD with i-PI](doc/third-party/ipi.md) + + +# Code structure +The code is organized as follows: -1. [Prepare data](doc/use-deepmd-kit.md#prepare-data) -2. [Train a model](doc/use-deepmd-kit.md#train-a-model) -3. [Freeze the model](doc/use-deepmd-kit.md#freeze-a-model) -4. [Test the model](doc/use-deepmd-kit.md#test-a-model) -5. [Inference the model in python](doc/use-deepmd-kit.md#model-inference) or using the model in other molecular simulation packages like [LAMMPS](doc/use-deepmd-kit.md#run-md-with-lammps), [i-PI](doc/use-deepmd-kit.md#run-path-integral-md-with-i-pi) or [ASE](doc/use-deepmd-kit.md#use-deep-potential-with-ase). +* `data/raw`: tools manipulating the raw data files. -A quick-start on using DeePMD-kit can be found [here](doc/use-deepmd-kit.md). +* `examples`: examples. -A full [document](doc/train-input-auto.rst) on options in the training input script is available. +* `deepmd`: DeePMD-kit python modules. +* `source/api_cc`: source code of DeePMD-kit C++ API. -# Troubleshooting -In consequence of various differences of computers or systems, problems may occur. Some common circumstances are listed as follows. -If other unexpected problems occur, you're welcome to contact us for help. +* `source/ipi`: source code of i-PI client. -## Model compatability +* `source/lib`: source code of DeePMD-kit library. -When the version of DeePMD-kit used to training model is different from the that of DeePMD-kit running MDs, one has the problem of model compatability. +* `source/lmp`: source code of Lammps module. -DeePMD-kit guarantees that the codes with the same major and minor revisions are compatible. That is to say v0.12.5 is compatible to v0.12.0, but is not compatible to v0.11.0 nor v1.0.0. +* `source/op`: tensorflow op implementation. working with library. -## Installation: inadequate versions of gcc/g++ -Sometimes you may use a gcc/g++ of version <4.9. If you have a gcc/g++ of version > 4.9, say, 7.2.0, you may choose to use it by doing -```bash -export CC=/path/to/gcc-7.2.0/bin/gcc -export CXX=/path/to/gcc-7.2.0/bin/g++ -``` -If, for any reason, for example, you only have a gcc/g++ of version 4.8.5, you can still compile all the parts of TensorFlow and most of the parts of DeePMD-kit. i-Pi will be disabled automatically. +# Troubleshooting -## Installation: build files left in DeePMD-kit -When you try to build a second time when installing DeePMD-kit, files produced before may contribute to failure. Thus, you may clear them by -```bash -cd build -rm -r * -``` -and redo the `cmake` process. +- [Model compatibility](doc/troubleshooting/model-compatability.md) +- [Installation](doc/troubleshooting/installation.md) +- [The temperature undulates violently during early stages of MD](doc/troubleshooting/md-energy-undulation.md) +- [MD: cannot run LAMMPS after installing a new version of DeePMD-kit](doc/troubleshooting/md-version-compatibility.md) +- [Do we need to set rcut < half boxsize?](doc/troubleshooting/howtoset-rcut.md) +- [How to set sel?](doc/troubleshooting/howtoset-sel.md) +- [How to control the number of nodes used by a job?](doc/troubleshooting/howtoset_num_nodes.md) +- [How to tune Fitting/embedding-net size?](doc/troubleshooting/howtoset_netsize.md) -## MD: cannot run LAMMPS after installing a new version of DeePMD-kit -This typically happens when you install a new version of DeePMD-kit and copy directly the generated `USER-DEEPMD` to a LAMMPS source code folder and re-install LAMMPS. -To solve this problem, it suffices to first remove `USER-DEEPMD` from LAMMPS source code by -```bash -make no-user-deepmd -``` -and then install the new `USER-DEEPMD`. +# Contributing -If this does not solve your problem, try to decompress the LAMMPS source tarball and install LAMMPS from scratch again, which typically should be very fast. +See [DeePMD-kit Contributing Guide](CONTRIBUTING.md) to become a contributor! 🤓 -[1]: http://www.global-sci.com/galley/CiCP-2017-0213.pdf +[1]: https://arxiv.org/abs/1707.01478 [2]: https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.120.143001 -[3]:https://arxiv.org/abs/1805.09003 -[4]:https://aip.scitation.org/doi/full/10.1063/1.5027645 +[3]: https://arxiv.org/abs/1805.09003 +[4]: https://aip.scitation.org/doi/full/10.1063/1.5027645 diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 0000000000..b882b3989c --- /dev/null +++ b/codecov.yml @@ -0,0 +1,2 @@ +ignore: + - "source/**/tests" diff --git a/data/raw/raw_to_set.sh b/data/raw/raw_to_set.sh index 8e4b917adb..a89ef3a872 100755 --- a/data/raw/raw_to_set.sh +++ b/data/raw/raw_to_set.sh @@ -17,6 +17,10 @@ test -f force.raw && split force.raw -l $nline_per_set -d -a 3 force.raw test -f virial.raw && split virial.raw -l $nline_per_set -d -a 3 virial.raw test -f atom_ener.raw && split atom_ener.raw -l $nline_per_set -d -a 3 atom_ener.raw test -f fparam.raw && split fparam.raw -l $nline_per_set -d -a 3 fparam.raw +test -f dipole.raw && split dipole.raw -l $nline_per_set -d -a 3 dipole.raw +test -f polarizability.raw && split polarizability.raw -l $nline_per_set -d -a 3 polarizability.raw +test -f atomic_dipole.raw && split atomic_dipole.raw -l $nline_per_set -d -a 3 atomic_dipole.raw +test -f atomic_polarizability.raw && split atomic_polarizability.raw -l $nline_per_set -d -a 3 atomic_polarizability.raw nset=`ls | grep box.raw[0-9] | wc -l` nset_1=$(($nset-1)) @@ -34,6 +38,8 @@ do test -f virial.raw$pi && mv virial.raw$pi set.$pi/virial.raw test -f atom_ener.raw$pi && mv atom_ener.raw$pi set.$pi/atom_ener.raw test -f fparam.raw$pi && mv fparam.raw$pi set.$pi/fparam.raw + test -f atomic_dipole.raw$pi && mv atomic_dipole.raw$pi set.$pi/atomic_dipole.raw + test -f atomic_polarizability.raw$pi && mv atomic_polarizability.raw$pi set.$pi/atomic_polarizability.raw cd set.$pi python -c 'import numpy as np; data = np.loadtxt("box.raw" , ndmin = 2); data = data.astype (np.float32); np.save ("box", data)' @@ -72,6 +78,34 @@ if os.path.isfile("fparam.raw"): data = np.loadtxt("fparam.raw", ndmin = 2); data = data.astype (np.float32); np.save ("fparam", data) +' + python -c \ +'import numpy as np; import os.path; +if os.path.isfile("dipole.raw"): + data = np.loadtxt("dipole.raw", ndmin = 2); + data = data.astype (np.float32); + np.save ("dipole", data) +' + python -c \ +'import numpy as np; import os.path; +if os.path.isfile("polarizability.raw"): + data = np.loadtxt("polarizability.raw", ndmin = 2); + data = data.astype (np.float32); + np.save ("polarizability", data) +' + python -c \ +'import numpy as np; import os.path; +if os.path.isfile("atomic_dipole.raw"): + data = np.loadtxt("atomic_dipole.raw", ndmin = 2); + data = data.astype (np.float32); + np.save ("atomic_dipole", data) +' + python -c \ +'import numpy as np; import os.path; +if os.path.isfile("atomic_polarizability.raw"): + data = np.loadtxt("atomic_polarizability.raw", ndmin = 2); + data = data.astype (np.float32); + np.save ("atomic_polarizability", data) ' rm *.raw cd ../ diff --git a/deepmd/.gitignore b/deepmd/.gitignore new file mode 100644 index 0000000000..8db60db5b8 --- /dev/null +++ b/deepmd/.gitignore @@ -0,0 +1,3 @@ +op/_*.py +pkg_config +!op/__init__.py \ No newline at end of file diff --git a/deepmd/__init__.py b/deepmd/__init__.py index 231145b989..3a295dcbef 100644 --- a/deepmd/__init__.py +++ b/deepmd/__init__.py @@ -1,10 +1,11 @@ +"""Root of the deepmd package, exposes all public classes and submodules.""" + +import deepmd.utils.network as network + +from . import cluster, descriptor, fit, loss, utils from .env import set_mkl -from .DeepEval import DeepEval -from .DeepPot import DeepPot -from .DeepDipole import DeepDipole -from .DeepPolar import DeepPolar -from .DeepPolar import DeepGlobalPolar -from .DeepWFC import DeepWFC +from .infer import DeepEval, DeepPotential +from .infer.data_modifier import DipoleChargeModifier set_mkl() @@ -13,3 +14,14 @@ except ImportError: from .__about__ import __version__ +__all__ = [ + "descriptor", + "fit", + "loss", + "utils", + "cluster", + "network", + "DeepEval", + "DeepPotential", + "DipoleChargeModifier", +] diff --git a/deepmd/__main__.py b/deepmd/__main__.py index 04b1c9aca5..2dea15ee78 100644 --- a/deepmd/__main__.py +++ b/deepmd/__main__.py @@ -1,5 +1,6 @@ -from .main import main +"""Package dp entry point.""" + +from .entrypoints.main import main if __name__ == '__main__': main() - diff --git a/deepmd/calculator.py b/deepmd/calculator.py new file mode 100644 index 0000000000..25dc7fd5ee --- /dev/null +++ b/deepmd/calculator.py @@ -0,0 +1,117 @@ +"""ASE calculator interface module.""" + +from pathlib import Path +from typing import TYPE_CHECKING, Dict, List, Optional, Union + +from ase.calculators.calculator import ( + Calculator, all_changes, PropertyNotImplementedError +) + +from deepmd import DeepPotential + +if TYPE_CHECKING: + from ase import Atoms + +__all__ = ["DP"] + + +class DP(Calculator): + """Implementation of ASE deepmd calculator. + + Implemented propertie are `energy`, `forces` and `stress` + + Parameters + ---------- + model : Union[str, Path] + path to the model + label : str, optional + calculator label, by default "DP" + type_dict : Dict[str, int], optional + mapping of element types and their numbers, best left None and the calculator + will infer this information from model, by default None + + Examples + -------- + Compute potential energy + + >>> from ase import Atoms + >>> from deepmd.calculator import DP + >>> water = Atoms('H2O', + >>> positions=[(0.7601, 1.9270, 1), + >>> (1.9575, 1, 1), + >>> (1., 1., 1.)], + >>> cell=[100, 100, 100], + >>> calculator=DP(model="frozen_model.pb")) + >>> print(water.get_potential_energy()) + >>> print(water.get_forces()) + + Run BFGS structure optimization + + >>> from ase.optimize import BFGS + >>> dyn = BFGS(water) + >>> dyn.run(fmax=1e-6) + >>> print(water.get_positions()) + """ + + name = "DP" + implemented_properties = ["energy", "forces", "virial", "stress"] + + def __init__( + self, + model: Union[str, "Path"], + label: str = "DP", + type_dict: Dict[str, int] = None, + **kwargs + ) -> None: + Calculator.__init__(self, label=label, **kwargs) + self.dp = DeepPotential(str(Path(model).resolve())) + if type_dict: + self.type_dict = type_dict + else: + self.type_dict = dict( + zip(self.dp.get_type_map(), range(self.dp.get_ntypes())) + ) + + def calculate( + self, + atoms: Optional["Atoms"] = None, + properties: List[str] = ["energy", "forces", "virial"], + system_changes: List[str] = all_changes, + ): + """Run calculation with deepmd model. + + Parameters + ---------- + atoms : Optional[Atoms], optional + atoms object to run the calculation on, by default None + properties : List[str], optional + unused, only for function signature compatibility, + by default ["energy", "forces", "stress"] + system_changes : List[str], optional + unused, only for function signature compatibility, by default all_changes + """ + if atoms is not None: + self.atoms = atoms.copy() + + coord = self.atoms.get_positions().reshape([1, -1]) + if sum(self.atoms.get_pbc()) > 0: + cell = self.atoms.get_cell().reshape([1, -1]) + else: + cell = None + symbols = self.atoms.get_chemical_symbols() + atype = [self.type_dict[k] for k in symbols] + e, f, v = self.dp.eval(coords=coord, cells=cell, atom_types=atype) + self.results['energy'] = e[0][0] + self.results['forces'] = f[0] + self.results['virial'] = v[0].reshape(3, 3) + + # convert virial into stress for lattice relaxation + if "stress" in properties: + if sum(atoms.get_pbc()) > 0: + # the usual convention (tensile stress is positive) + # stress = -virial / volume + stress = -0.5 * (v[0].copy() + v[0].copy().T) / atoms.get_volume() + # Voigt notation + self.results['stress'] = stress.flat[[0, 4, 8, 5, 2, 1]] + else: + raise PropertyNotImplementedError diff --git a/deepmd/cluster/__init__.py b/deepmd/cluster/__init__.py new file mode 100644 index 0000000000..1875b21f9b --- /dev/null +++ b/deepmd/cluster/__init__.py @@ -0,0 +1,22 @@ +"""Module that reads node resources, auto detects if running local or on SLURM.""" + +from .local import get_resource as get_local_res +from .slurm import get_resource as get_slurm_res +import os +from typing import List, Tuple, Optional + +__all__ = ["get_resource"] + + +def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: + """Get local or slurm resources: nodename, nodelist, and gpus. + + Returns + ------- + Tuple[str, List[str], Optional[List[int]]] + nodename, nodelist, and gpus + """ + if "SLURM_JOB_NODELIST" in os.environ: + return get_slurm_res() + else: + return get_local_res() diff --git a/deepmd/cluster/local.py b/deepmd/cluster/local.py new file mode 100644 index 0000000000..6fe454a9a2 --- /dev/null +++ b/deepmd/cluster/local.py @@ -0,0 +1,49 @@ +"""Get local GPU resources.""" + +import os +import socket +import subprocess as sp +import sys + +from deepmd.env import tf +from typing import List, Tuple, Optional + + +__all__ = ["get_gpus", "get_resource"] + + +def get_gpus(): + """Get available IDs of GPU cards at local. + These IDs are valid when used as the TensorFlow device ID. + + Returns + ------- + Optional[List[int]] + List of available GPU IDs. Otherwise, None. + """ + test_cmd = 'from tensorflow.python.client import device_lib; ' \ + 'devices = device_lib.list_local_devices(); ' \ + 'gpus = [d.name for d in devices if d.device_type == "GPU"]; ' \ + 'print(len(gpus))' + with sp.Popen([sys.executable, "-c", test_cmd], stderr=sp.PIPE, stdout=sp.PIPE) as p: + stdout, stderr = p.communicate() + if p.returncode != 0: + decoded = stderr.decode('UTF-8') + raise RuntimeError('Failed to detect availbe GPUs due to:\n%s' % decoded) + decoded = stdout.decode('UTF-8').strip() + num_gpus = int(decoded) + return list(range(num_gpus)) if num_gpus > 0 else None + + +def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: + """Get local resources: nodename, nodelist, and gpus. + + Returns + ------- + Tuple[str, List[str], Optional[List[int]]] + nodename, nodelist, and gpus + """ + nodename = socket.gethostname() + nodelist = [nodename] + gpus = get_gpus() + return nodename, nodelist, gpus diff --git a/deepmd/cluster/slurm.py b/deepmd/cluster/slurm.py new file mode 100644 index 0000000000..feafd84117 --- /dev/null +++ b/deepmd/cluster/slurm.py @@ -0,0 +1,51 @@ +"""MOdule to get resources on SLURM cluster. + +References +---------- +https://github.com/deepsense-ai/tensorflow_on_slurm #### +""" + +import hostlist +import os + +from deepmd.cluster import local +from typing import List, Tuple, Optional + +__all__ = ["get_resource"] + + +def get_resource() -> Tuple[str, List[str], Optional[List[int]]]: + """Get SLURM resources: nodename, nodelist, and gpus. + + Returns + ------- + Tuple[str, List[str], Optional[List[int]]] + nodename, nodelist, and gpus + + Raises + ------ + RuntimeError + if number of nodes could not be retrieved + ValueError + list of nodes is not of the same length sa number of nodes + ValueError + if current nodename is not found in node list + """ + nodelist = hostlist.expand_hostlist(os.environ["SLURM_JOB_NODELIST"]) + nodename = os.environ["SLURMD_NODENAME"] + num_nodes_env = os.getenv("SLURM_JOB_NUM_NODES") + if num_nodes_env: + num_nodes = int(num_nodes_env) + else: + raise RuntimeError("Could not get SLURM number of nodes") + + if len(nodelist) != num_nodes: + raise ValueError( + f"Number of slurm nodes {len(nodelist)} not equal to {num_nodes}" + ) + if nodename not in nodelist: + raise ValueError( + f"Nodename({nodename}) not in nodelist({nodelist}). This should not happen!" + ) + gpus = local.get_gpus() + return nodename, nodelist, gpus diff --git a/deepmd/common.py b/deepmd/common.py new file mode 100644 index 0000000000..03d7d8caf3 --- /dev/null +++ b/deepmd/common.py @@ -0,0 +1,487 @@ +"""Collection of functions and classes used throughout the whole package.""" + +import json +import warnings +from functools import wraps +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Optional, + Tuple, + TypeVar, + Union, +) + +import numpy as np +import yaml + +from deepmd.env import op_module, tf +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION, GLOBAL_NP_FLOAT_PRECISION +from deepmd.utils.sess import run_sess +from deepmd.utils.errors import GraphWithoutTensorError + +if TYPE_CHECKING: + _DICT_VAL = TypeVar("_DICT_VAL") + _OBJ = TypeVar("_OBJ") + try: + from typing import Literal # python >3.6 + except ImportError: + from typing_extensions import Literal # type: ignore + _ACTIVATION = Literal["relu", "relu6", "softplus", "sigmoid", "tanh", "gelu"] + _PRECISION = Literal["default", "float16", "float32", "float64"] + +# define constants +PRECISION_DICT = { + "default": GLOBAL_TF_FLOAT_PRECISION, + "float16": tf.float16, + "float32": tf.float32, + "float64": tf.float64, +} + + +def gelu(x: tf.Tensor) -> tf.Tensor: + """Gaussian Error Linear Unit. + + This is a smoother version of the RELU. + + Parameters + ---------- + x : tf.Tensor + float Tensor to perform activation + + Returns + ------- + `x` with the GELU activation applied + + References + ---------- + Original paper + https://arxiv.org/abs/1606.08415 + """ + return op_module.gelu(x) + + +# TODO this is not a good way to do things. This is some global variable to which +# TODO anyone can write and there is no good way to keep track of the changes +data_requirement = {} + +ACTIVATION_FN_DICT = { + "relu": tf.nn.relu, + "relu6": tf.nn.relu6, + "softplus": tf.nn.softplus, + "sigmoid": tf.sigmoid, + "tanh": tf.nn.tanh, + "gelu": gelu, +} + + +def add_data_requirement( + key: str, + ndof: int, + atomic: bool = False, + must: bool = False, + high_prec: bool = False, + type_sel: bool = None, + repeat: int = 1, +): + """Specify data requirements for training. + + Parameters + ---------- + key : str + type of data stored in corresponding `*.npy` file e.g. `forces` or `energy` + ndof : int + number of the degrees of freedom, this is tied to `atomic` parameter e.g. forces + have `atomic=True` and `ndof=3` + atomic : bool, optional + specifies whwther the `ndof` keyworrd applies to per atom quantity or not, + by default False + must : bool, optional + specifi if the `*.npy` data file must exist, by default False + high_prec : bool, optional + if tru load data to `np.float64` else `np.float32`, by default False + type_sel : bool, optional + select only certain type of atoms, by default None + repeat : int, optional + if specify repaeat data `repeat` times, by default 1 + """ + data_requirement[key] = { + "ndof": ndof, + "atomic": atomic, + "must": must, + "high_prec": high_prec, + "type_sel": type_sel, + "repeat": repeat, + } + + +def select_idx_map( + atom_types: np.ndarray, select_types: np.ndarray +) -> np.ndarray: + """Build map of indices for element supplied element types from all atoms list. + + Parameters + ---------- + atom_types : np.ndarray + array specifing type for each atoms as integer + select_types : np.ndarray + types of atoms you want to find indices for + + Returns + ------- + np.ndarray + indices of types of atoms defined by `select_types` in `atom_types` array + + Warnings + -------- + `select_types` array will be sorted before finding indices in `atom_types` + """ + sort_select_types = np.sort(select_types) + idx_map = np.array([], dtype=int) + for ii in sort_select_types: + idx_map = np.append(idx_map, np.where(atom_types == ii)) + return idx_map + + +# TODO not really sure if the docstring is right the purpose of this is a bit unclear +def make_default_mesh( + test_box: np.ndarray, cell_size: float = 3.0 +) -> np.ndarray: + """Get number of cells of size=`cell_size` fit into average box. + + Parameters + ---------- + test_box : np.ndarray + numpy array with cells of shape Nx9 + cell_size : float, optional + length of one cell, by default 3.0 + + Returns + ------- + np.ndarray + mesh for supplied boxes, how many cells fit in each direction + """ + cell_lengths = np.linalg.norm(test_box.reshape([-1, 3, 3]), axis=2) + avg_cell_lengths = np.average(cell_lengths, axis=0) + ncell = (avg_cell_lengths / cell_size).astype(np.int32) + ncell[ncell < 2] = 2 + default_mesh = np.zeros(6, dtype=np.int32) + default_mesh[3:6] = ncell + return default_mesh + + +# TODO not an ideal approach, every class uses this to parse arguments on its own, json +# TODO should be parsed once and the parsed result passed to all objects that need it +class ClassArg: + """Class that take care of input json/yaml parsing. + + The rules for parsing are defined by the `add` method, than `parse` is called to + process the supplied dict + + Attributes + ---------- + arg_dict: Dict[str, Any] + dictionary containing parsing rules + alias_map: Dict[str, Any] + dictionary with keyword aliases + """ + + def __init__(self) -> None: + self.arg_dict = {} + self.alias_map = {} + + def add( + self, + key: str, + types_: Union[type, List[type]], + alias: Optional[Union[str, List[str]]] = None, + default: Any = None, + must: bool = False, + ) -> "ClassArg": + """Add key to be parsed. + + Parameters + ---------- + key : str + key name + types_ : Union[type, List[type]] + list of allowed key types + alias : Optional[Union[str, List[str]]], optional + alias for the key, by default None + default : Any, optional + default value for the key, by default None + must : bool, optional + if the key is mandatory, by default False + + Returns + ------- + ClassArg + instance with added key + """ + if not isinstance(types_, list): + types = [types_] + else: + types = types_ + if alias is not None: + if not isinstance(alias, list): + alias_ = [alias] + else: + alias_ = alias + else: + alias_ = [] + + self.arg_dict[key] = { + "types": types, + "alias": alias_, + "value": default, + "must": must, + } + for ii in alias_: + self.alias_map[ii] = key + + return self + + def _add_single(self, key: str, data: Any): + vtype = type(data) + if data is None: + return data + if not (vtype in self.arg_dict[key]["types"]): + for tp in self.arg_dict[key]["types"]: + try: + vv = tp(data) + except TypeError: + pass + else: + break + else: + raise TypeError( + f"cannot convert provided key {key} to type(s) " + f'{self.arg_dict[key]["types"]} ' + ) + else: + vv = data + self.arg_dict[key]["value"] = vv + + def _check_must(self): + for kk in self.arg_dict: + if self.arg_dict[kk]["must"] and self.arg_dict[kk]["value"] is None: + raise RuntimeError(f"key {kk} must be provided") + + def parse(self, jdata: Dict[str, Any]) -> Dict[str, Any]: + """Parse input dictionary, use the rules defined by add method. + + Parameters + ---------- + jdata : Dict[str, Any] + loaded json/yaml data + + Returns + ------- + Dict[str, Any] + parsed dictionary + """ + for kk in jdata.keys(): + if kk in self.arg_dict: + key = kk + self._add_single(key, jdata[kk]) + else: + if kk in self.alias_map: + key = self.alias_map[kk] + self._add_single(key, jdata[kk]) + self._check_must() + return self.get_dict() + + def get_dict(self) -> Dict[str, Any]: + """Get dictionary built from rules defined by add method. + + Returns + ------- + Dict[str, Any] + settings dictionary with default values + """ + ret = {} + for kk in self.arg_dict.keys(): + ret[kk] = self.arg_dict[kk]["value"] + return ret + + +# TODO maybe rename this to j_deprecated and only warn about deprecated keys, +# TODO if the deprecated_key argument is left empty function puppose is only custom +# TODO error since dict[key] already raises KeyError when the key is missing +def j_must_have( + jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = [] +) -> "_DICT_VAL": + """Assert that supplied dictionary conaines specified key. + + Returns + ------- + _DICT_VAL + value that was store unde supplied key + + Raises + ------ + RuntimeError + if the key is not present + """ + if key not in jdata.keys(): + for ii in deprecated_key: + if ii in jdata.keys(): + warnings.warn(f"the key {ii} is deprecated, please use {key} instead") + return jdata[ii] + else: + raise RuntimeError(f"json database must provide key {key}") + else: + return jdata[key] + + +def j_loader(filename: Union[str, Path]) -> Dict[str, Any]: + """Load yaml or json settings file. + + Parameters + ---------- + filename : Union[str, Path] + path to file + + Returns + ------- + Dict[str, Any] + loaded dictionary + + Raises + ------ + TypeError + if the supplied file is of unsupported type + """ + filepath = Path(filename) + if filepath.suffix.endswith("json"): + with filepath.open() as fp: + return json.load(fp) + elif filepath.suffix.endswith(("yml", "yaml")): + with filepath.open() as fp: + return yaml.safe_load(fp) + else: + raise TypeError("config file must be json, or yaml/yml") + + +def get_activation_func( + activation_fn: "_ACTIVATION", +) -> Callable[[tf.Tensor], tf.Tensor]: + """Get activation function callable based on string name. + + Parameters + ---------- + activation_fn : _ACTIVATION + one of the defined activation functions + + Returns + ------- + Callable[[tf.Tensor], tf.Tensor] + correspondingg TF callable + + Raises + ------ + RuntimeError + if unknown activation function is specified + """ + if activation_fn not in ACTIVATION_FN_DICT: + raise RuntimeError(f"{activation_fn} is not a valid activation function") + return ACTIVATION_FN_DICT[activation_fn] + + +def get_precision(precision: "_PRECISION") -> Any: + """Convert str to TF DType constant. + + Parameters + ---------- + precision : _PRECISION + one of the allowed precisions + + Returns + ------- + tf.python.framework.dtypes.DType + appropriate TF constant + + Raises + ------ + RuntimeError + if supplied precision string does not have acorresponding TF constant + """ + if precision not in PRECISION_DICT: + raise RuntimeError(f"{precision} is not a valid precision") + return PRECISION_DICT[precision] + + +# TODO port completely to pathlib when all callers are ported +def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: + """Recursively iterate over directories taking those that contain `type.raw` file. + + Parameters + ---------- + root_dir : Union[str, Path] + starting directory + + Returns + ------- + List[str] + list of string pointing to system directories + """ + matches = [str(d) for d in Path(root_dir).rglob("*") if (d / "type.raw").is_file()] + if (Path(root_dir) / "type.raw").is_file(): + matches += [root_dir] + return matches + + +def docstring_parameter(*sub: Tuple[str, ...]): + """Add parameters to object docstring. + + Parameters + ---------- + sub: Tuple[str, ...] + list of strings that will be inserted into prepared locations in docstring. + + Note + ---- + Can be used on both object and classes. + """ + + @wraps + def dec(obj: "_OBJ") -> "_OBJ": + if obj.__doc__ is not None: + obj.__doc__ = obj.__doc__.format(*sub) + return obj + + return dec + + +def get_np_precision(precision: "_PRECISION") -> np.dtype: + """Get numpy precision constant from string. + + Parameters + ---------- + precision : _PRECISION + string name of numpy constant or default + + Returns + ------- + np.dtype + numpy presicion constant + + Raises + ------ + RuntimeError + if string is invalid + """ + if precision == "default": + return GLOBAL_NP_FLOAT_PRECISION + elif precision == "float16": + return np.float16 + elif precision == "float32": + return np.float32 + elif precision == "float64": + return np.float64 + else: + raise RuntimeError(f"{precision} is not a valid precision") diff --git a/deepmd/descriptor/__init__.py b/deepmd/descriptor/__init__.py new file mode 100644 index 0000000000..7c3d910091 --- /dev/null +++ b/deepmd/descriptor/__init__.py @@ -0,0 +1,9 @@ +from .hybrid import DescrptHybrid +from .se_a import DescrptSeA +from .se_r import DescrptSeR +from .se_ar import DescrptSeAR +from .se_t import DescrptSeT +from .se_a_ebd import DescrptSeAEbd +from .se_a_ef import DescrptSeAEf +from .se_a_ef import DescrptSeAEfLower +from .loc_frame import DescrptLocFrame diff --git a/deepmd/descriptor/hybrid.py b/deepmd/descriptor/hybrid.py new file mode 100644 index 0000000000..013ee9f753 --- /dev/null +++ b/deepmd/descriptor/hybrid.py @@ -0,0 +1,221 @@ +import numpy as np +from typing import Tuple, List + +from deepmd.env import tf +from deepmd.common import ClassArg +from deepmd.env import op_module +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION +# from deepmd.descriptor import DescrptLocFrame +# from deepmd.descriptor import DescrptSeA +# from deepmd.descriptor import DescrptSeT +# from deepmd.descriptor import DescrptSeAEbd +# from deepmd.descriptor import DescrptSeAEf +# from deepmd.descriptor import DescrptSeR +from .se_a import DescrptSeA +from .se_r import DescrptSeR +from .se_ar import DescrptSeAR +from .se_t import DescrptSeT +from .se_a_ebd import DescrptSeAEbd +from .se_a_ef import DescrptSeAEf +from .loc_frame import DescrptLocFrame + +class DescrptHybrid (): + """Concate a list of descriptors to form a new descriptor. + + Parameters + ---------- + descrpt_list : list + Build a descriptor from the concatenation of the list of descriptors. + """ + def __init__ (self, + descrpt_list : list + ) -> None : + """ + Constructor + """ + if descrpt_list == [] or descrpt_list is None: + raise RuntimeError('cannot build descriptor from an empty list of descriptors.') + # args = ClassArg()\ + # .add('list', list, must = True) + # class_data = args.parse(jdata) + # dict_list = class_data['list'] + self.descrpt_list = descrpt_list + self.numb_descrpt = len(self.descrpt_list) + for ii in range(1, self.numb_descrpt): + assert(self.descrpt_list[ii].get_ntypes() == + self.descrpt_list[ 0].get_ntypes()), \ + f'number of atom types in {ii}th descrptor does not match others' + + + def get_rcut (self) -> float: + """ + Returns the cut-off radius + """ + all_rcut = [ii.get_rcut() for ii in self.descrpt_list] + return np.max(all_rcut) + + + def get_ntypes (self) -> int: + """ + Returns the number of atom types + """ + return self.descrpt_list[0].get_ntypes() + + + def get_dim_out (self) -> int: + """ + Returns the output dimension of this descriptor + """ + all_dim_out = [ii.get_dim_out() for ii in self.descrpt_list] + return sum(all_dim_out) + + + def get_nlist_i(self, + ii : int + ) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: + """Get the neighbor information of the ii-th descriptor + + Parameters + ---------- + ii : int + The index of the descriptor + + Returns + ------- + nlist + Neighbor list + rij + The relative distance between the neighbor and the center atom. + sel_a + The number of neighbors with full information + sel_r + The number of neighbors with only radial information + """ + return self.descrpt_list[ii].nlist, self.descrpt_list[ii].rij, self.descrpt_list[ii].sel_a, self.descrpt_list[ii].sel_r + + + def compute_input_stats (self, + data_coord : list, + data_box : list, + data_atype : list, + natoms_vec : list, + mesh : list, + input_dict : dict + ) -> None : + """ + Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics. + + Parameters + ---------- + data_coord + The coordinates. Can be generated by deepmd.model.make_stat_input + data_box + The box. Can be generated by deepmd.model.make_stat_input + data_atype + The atom types. Can be generated by deepmd.model.make_stat_input + natoms_vec + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + mesh + The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + input_dict + Dictionary for additional input + """ + for ii in self.descrpt_list: + ii.compute_input_stats(data_coord, data_box, data_atype, natoms_vec, mesh, input_dict) + + + def build (self, + coord_ : tf.Tensor, + atype_ : tf.Tensor, + natoms : tf.Tensor, + box_ : tf.Tensor, + mesh : tf.Tensor, + input_dict : dict, + reuse : bool = None, + suffix : str = '' + ) -> tf.Tensor: + """ + Build the computational graph for the descriptor + + Parameters + ---------- + coord_ + The coordinate of atoms + atype_ + The type of atoms + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + mesh + For historical reasons, only the length of the Tensor matters. + if size of mesh == 6, pbc is assumed. + if size of mesh == 0, no-pbc is assumed. + input_dict + Dictionary for additional inputs + reuse + The weights in the networks should be reused when get the variable. + suffix + Name suffix to identify this descriptor + + Returns + ------- + descriptor + The output descriptor + """ + with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) : + t_rcut = tf.constant(self.get_rcut(), + name = 'rcut', + dtype = GLOBAL_TF_FLOAT_PRECISION) + t_ntypes = tf.constant(self.get_ntypes(), + name = 'ntypes', + dtype = tf.int32) + all_dout = [] + for idx,ii in enumerate(self.descrpt_list): + dout = ii.build(coord_, atype_, natoms, box_, mesh, input_dict, suffix=suffix+f'_{idx}', reuse=reuse) + dout = tf.reshape(dout, [-1, ii.get_dim_out()]) + all_dout.append(dout) + dout = tf.concat(all_dout, axis = 1) + dout = tf.reshape(dout, [-1, natoms[0] * self.get_dim_out()]) + return dout + + + def prod_force_virial(self, + atom_ener : tf.Tensor, + natoms : tf.Tensor + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + """ + Compute force and virial + + Parameters + ---------- + atom_ener + The atomic energy + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + + Returns + ------- + force + The force on atoms + virial + The total virial + atom_virial + The atomic virial + """ + for idx,ii in enumerate(self.descrpt_list): + ff, vv, av = ii.prod_force_virial(atom_ener, natoms) + if idx == 0: + force = ff + virial = vv + atom_virial = av + else: + force += ff + virial += vv + atom_virial += av + return force, virial, atom_virial diff --git a/source/train/DescrptLocFrame.py b/deepmd/descriptor/loc_frame.py similarity index 52% rename from source/train/DescrptLocFrame.py rename to deepmd/descriptor/loc_frame.py index 69c1473db0..be57403d60 100644 --- a/source/train/DescrptLocFrame.py +++ b/deepmd/descriptor/loc_frame.py @@ -1,23 +1,58 @@ import numpy as np +from typing import Tuple, List + from deepmd.env import tf -from deepmd.common import ClassArg -from deepmd.RunOptions import global_tf_float_precision -from deepmd.RunOptions import global_np_float_precision +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION from deepmd.env import op_module from deepmd.env import default_tf_session_config +from deepmd.utils.sess import run_sess class DescrptLocFrame () : - def __init__(self, jdata): - args = ClassArg()\ - .add('sel_a', list, must = True) \ - .add('sel_r', list, must = True) \ - .add('rcut', float, default = 6.0) \ - .add('axis_rule',list, must = True) - class_data = args.parse(jdata) - self.sel_a = class_data['sel_a'] - self.sel_r = class_data['sel_r'] - self.axis_rule = class_data['axis_rule'] - self.rcut_r = class_data['rcut'] + """Defines a local frame at each atom, and the compute the descriptor as local + coordinates under this frame. + + Parameters + ---------- + rcut + The cut-off radius + sel_a : list[str] + The length of the list should be the same as the number of atom types in the system. + `sel_a[i]` gives the selected number of type-i neighbors. + The full relative coordinates of the neighbors are used by the descriptor. + sel_r : list[str] + The length of the list should be the same as the number of atom types in the system. + `sel_r[i]` gives the selected number of type-i neighbors. + Only relative distance of the neighbors are used by the descriptor. + sel_a[i] + sel_r[i] is recommended to be larger than the maximally possible number of type-i neighbors in the cut-off radius. + axis_rule: list[int] + The length should be 6 times of the number of types. + - axis_rule[i*6+0]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\ + - axis_rule[i*6+1]: type of the atom defining the first axis of type-i atom.\n\n\ + - axis_rule[i*6+2]: index of the axis atom defining the first axis. Note that the neighbors with the same class and type are sorted according to their relative distance.\n\n\ + - axis_rule[i*6+3]: class of the atom defining the first axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance.\n\n\ + - axis_rule[i*6+4]: type of the atom defining the second axis of type-i atom.\n\n\ + - axis_rule[i*6+5]: class of the atom defining the second axis of type-i atom. 0 for neighbors with full coordinates and 1 for neighbors only with relative distance. + """ + def __init__(self, + rcut: float, + sel_a : List[int], + sel_r : List[int], + axis_rule : List[int] + ) -> None: + """ + Constructor + """ + # args = ClassArg()\ + # .add('sel_a', list, must = True) \ + # .add('sel_r', list, must = True) \ + # .add('rcut', float, default = 6.0) \ + # .add('axis_rule',list, must = True) + # class_data = args.parse(jdata) + self.sel_a = sel_a + self.sel_r = sel_r + self.axis_rule = axis_rule + self.rcut_r = rcut # ntypes and rcut_a === -1 self.ntypes = len(self.sel_a) assert(self.ntypes == len(self.sel_r)) @@ -33,13 +68,13 @@ def __init__(self, jdata): self.dstd = None self.place_holders = {} - avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(global_np_float_precision) - std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(global_np_float_precision) + avg_zero = np.zeros([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION) + std_ones = np.ones ([self.ntypes,self.ndescrpt]).astype(GLOBAL_NP_FLOAT_PRECISION) sub_graph = tf.Graph() with sub_graph.as_default(): name_pfx = 'd_lf_' for ii in ['coord', 'box']: - self.place_holders[ii] = tf.placeholder(global_np_float_precision, [None, None], name = name_pfx+'t_'+ii) + self.place_holders[ii] = tf.placeholder(GLOBAL_NP_FLOAT_PRECISION, [None, None], name = name_pfx+'t_'+ii) self.place_holders['type'] = tf.placeholder(tf.int32, [None, None], name=name_pfx+'t_type') self.place_holders['natoms_vec'] = tf.placeholder(tf.int32, [self.ntypes+2], name=name_pfx+'t_natoms') self.place_holders['default_mesh'] = tf.placeholder(tf.int32, [None], name=name_pfx+'t_mesh') @@ -59,24 +94,65 @@ def __init__(self, jdata): self.sub_sess = tf.Session(graph = sub_graph, config=default_tf_session_config) - def get_rcut (self) : + def get_rcut (self) -> float: + """ + Returns the cut-off radisu + """ return self.rcut_r - def get_ntypes (self) : + def get_ntypes (self) -> int: + """ + Returns the number of atom types + """ return self.ntypes - def get_dim_out (self) : + def get_dim_out (self) -> int: + """ + Returns the output dimension of this descriptor + """ return self.ndescrpt - def get_nlist (self) : + def get_nlist (self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]: + """ + Returns + ------- + nlist + Neighbor list + rij + The relative distance between the neighbor and the center atom. + sel_a + The number of neighbors with full information + sel_r + The number of neighbors with only radial information + """ return self.nlist, self.rij, self.sel_a, self.sel_r def compute_input_stats (self, - data_coord, - data_box, - data_atype, - natoms_vec, - mesh) : + data_coord : list, + data_box : list, + data_atype : list, + natoms_vec : list, + mesh : list, + input_dict : dict + ) -> None : + """ + Compute the statisitcs (avg and std) of the training data. The input will be normalized by the statistics. + + Parameters + ---------- + data_coord + The coordinates. Can be generated by deepmd.model.make_stat_input + data_box + The box. Can be generated by deepmd.model.make_stat_input + data_atype + The atom types. Can be generated by deepmd.model.make_stat_input + natoms_vec + The vector for the number of atoms of the system and different types of atoms. Can be generated by deepmd.model.make_stat_input + mesh + The mesh for neighbor searching. Can be generated by deepmd.model.make_stat_input + input_dict + Dictionary for additional input + """ all_davg = [] all_dstd = [] if True: @@ -105,13 +181,45 @@ def compute_input_stats (self, def build (self, - coord_, - atype_, - natoms, - box_, - mesh, - suffix = '', - reuse = None): + coord_ : tf.Tensor, + atype_ : tf.Tensor, + natoms : tf.Tensor, + box_ : tf.Tensor, + mesh : tf.Tensor, + input_dict : dict, + reuse : bool = None, + suffix : str = '' + ) -> tf.Tensor: + """ + Build the computational graph for the descriptor + + Parameters + ---------- + coord_ + The coordinate of atoms + atype_ + The type of atoms + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + mesh + For historical reasons, only the length of the Tensor matters. + if size of mesh == 6, pbc is assumed. + if size of mesh == 0, no-pbc is assumed. + input_dict + Dictionary for additional inputs + reuse + The weights in the networks should be reused when get the variable. + suffix + Name suffix to identify this descriptor + + Returns + ------- + descriptor + The output descriptor + """ davg = self.davg dstd = self.dstd with tf.variable_scope('descrpt_attr' + suffix, reuse = reuse) : @@ -121,18 +229,18 @@ def build (self, dstd = np.ones ([self.ntypes, self.ndescrpt]) t_rcut = tf.constant(np.max([self.rcut_r, self.rcut_a]), name = 'rcut', - dtype = global_tf_float_precision) + dtype = GLOBAL_TF_FLOAT_PRECISION) t_ntypes = tf.constant(self.ntypes, name = 'ntypes', dtype = tf.int32) self.t_avg = tf.get_variable('t_avg', davg.shape, - dtype = global_tf_float_precision, + dtype = GLOBAL_TF_FLOAT_PRECISION, trainable = False, initializer = tf.constant_initializer(davg)) self.t_std = tf.get_variable('t_std', dstd.shape, - dtype = global_tf_float_precision, + dtype = GLOBAL_TF_FLOAT_PRECISION, trainable = False, initializer = tf.constant_initializer(dstd)) @@ -154,13 +262,46 @@ def build (self, sel_r = self.sel_r, axis_rule = self.axis_rule) self.descrpt = tf.reshape(self.descrpt, [-1, self.ndescrpt]) + tf.summary.histogram('descrpt', self.descrpt) + tf.summary.histogram('rij', self.rij) + tf.summary.histogram('nlist', self.nlist) + return self.descrpt - def get_rot_mat(self) : + def get_rot_mat(self) -> tf.Tensor: + """ + Get rotational matrix + """ return self.rot_mat - def prod_force_virial(self, atom_ener, natoms) : + def prod_force_virial(self, + atom_ener : tf.Tensor, + natoms : tf.Tensor + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + """ + Compute force and virial + + Parameters + ---------- + atom_ener + The atomic energy + natoms + The number of atoms. This tensor has the length of Ntypes + 2 + natoms[0]: number of local atoms + natoms[1]: total number of atoms held by this processor + natoms[i]: 2 <= i < Ntypes+2, number of type i atoms + + Returns + ------- + force + The force on atoms + virial + The total virial + atom_virial + The atomic virial + """ [net_deriv] = tf.gradients (atom_ener, self.descrpt) + tf.summary.histogram('net_derivative', net_deriv) net_deriv_reshape = tf.reshape (net_deriv, [-1, natoms[0] * self.ndescrpt]) force = op_module.prod_force (net_deriv_reshape, self.descrpt_deriv, @@ -178,6 +319,9 @@ def prod_force_virial(self, atom_ener, natoms) : natoms, n_a_sel = self.nnei_a, n_r_sel = self.nnei_r) + tf.summary.histogram('force', force) + tf.summary.histogram('virial', virial) + tf.summary.histogram('atom_virial', atom_virial) return force, virial, atom_virial @@ -189,7 +333,7 @@ def _compute_dstats_sys_nonsmth (self, natoms_vec, mesh) : dd_all \ - = self.sub_sess.run(self.stat_descrpt, + = run_sess(self.sub_sess, self.stat_descrpt, feed_dict = { self.place_holders['coord']: data_coord, self.place_holders['type']: data_atype, diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py new file mode 100644 index 0000000000..fbc9a77b56 --- /dev/null +++ b/deepmd/descriptor/se_a.py @@ -0,0 +1,845 @@ +import math +import numpy as np +from typing import Tuple, List, Dict, Any + +from deepmd.env import tf +from deepmd.common import get_activation_func, get_precision, ACTIVATION_FN_DICT, PRECISION_DICT, docstring_parameter, get_np_precision +from deepmd.utils.argcheck import list_to_doc +from deepmd.env import GLOBAL_TF_FLOAT_PRECISION +from deepmd.env import GLOBAL_NP_FLOAT_PRECISION +from deepmd.env import op_module +from deepmd.env import default_tf_session_config +from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift +from deepmd.utils.tabulate import DPTabulate +from deepmd.utils.type_embed import embed_atom_type +from deepmd.utils.sess import run_sess +from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph + +class DescrptSeA (): + r"""DeepPot-SE constructed from all information (both angular and radial) of + atomic configurations. The embedding takes the distance between atoms as input. + + The descriptor :math:`\mathcal{D}^i \in \mathcal{R}^{M_1 \times M_2}` is given by [1]_ + + .. math:: + \mathcal{D}^i = (\mathcal{G}^i)^T \mathcal{R}^i (\mathcal{R}^i)^T \mathcal{G}^i_< + + where :math:`\mathcal{R}^i \in \mathbb{R}^{N \times 4}` is the coordinate + matrix, and each row of :math:`\mathcal{R}^i` can be constructed as follows + + .. math:: + (\mathcal{R}^i)_j = [ + \begin{array}{c} + s(r_{ji}) & x_{ji} & y_{ji} & z_{ji} + \end{array} + ] + + where :math:`\mathbf{R}_{ji}=\mathbf{R}_j-\mathbf{R}_i = (x_{ji}, y_{ji}, z_{ji})` is + the relative coordinate and :math:`r_{ji}=\lVert \mathbf{R}_{ji} \lVert` is its norm. + The switching function :math:`s(r)` is defined as: + + .. math:: + s(r)= + \begin{cases} + \frac{1}{r}, & r