diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index bb34fe0bf6..c497e86e1d 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -33,7 +33,7 @@ jobs: platform_id: manylinux_aarch64 dp_variant: cpu steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true # https://github.com/pypa/setuptools_scm/issues/480 @@ -42,13 +42,13 @@ jobs: name: Setup QEMU if: matrix.platform_id == 'manylinux_aarch64' - name: Build wheels - uses: pypa/cibuildwheel@v2.11.3 + uses: pypa/cibuildwheel@v2.12.1 env: CIBW_BUILD_VERBOSITY: 1 CIBW_ARCHS: all CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} DP_VARIANT: ${{ matrix.dp_variant }} - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl build_sdist: @@ -79,7 +79,7 @@ jobs: with: name: artifact path: dist - - uses: pypa/gh-action-pypi-publish@v4 + - uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.pypi_password }} @@ -103,12 +103,12 @@ jobs: - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + uses: docker/metadata-action@507c2f2dc502c992ad446e3d7a5dfbe311567a96 with: images: ghcr.io/deepmodeling/deepmd-kit - name: Build and push Docker image - uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc + uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671 with: context: source/install/docker push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }} diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index e57c0a30e1..6cc5ed8dc8 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -9,14 +9,14 @@ jobs: name: Build C library runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Package C library run: ./source/install/docker_package_c.sh - name: Test C library run: ./source/install/docker_test_package_c.sh # for download and debug - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: path: ./libdeepmd_c.tar.gz - name: Release diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py index d49c329353..2bff523608 100644 --- a/deepmd/descriptor/se_a.py +++ b/deepmd/descriptor/se_a.py @@ -96,8 +96,8 @@ class DescrptSeA(DescrptSe): .. math:: (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji})) - :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2`$` columns of - :math:`\mathcal{G}^i`$`. The equation of embedding network :math:`\mathcal{N}` can be found at + :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of + :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at :meth:`deepmd.utils.network.embedding_net`. Parameters diff --git a/deepmd/descriptor/se_a_mask.py b/deepmd/descriptor/se_a_mask.py index 00b23232f1..93256e3910 100644 --- a/deepmd/descriptor/se_a_mask.py +++ b/deepmd/descriptor/se_a_mask.py @@ -70,8 +70,8 @@ class DescrptSeAMask(DescrptSeA): .. math:: (\mathcal{G}^i)_j = \mathcal{N}(s(r_{ji})) - :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2`$` columns of - :math:`\mathcal{G}^i`$`. The equation of embedding network :math:`\mathcal{N}` can be found at + :math:`\mathcal{G}^i_< \in \mathbb{R}^{N \times M_2}` takes first :math:`M_2` columns of + :math:`\mathcal{G}^i`. The equation of embedding network :math:`\mathcal{N}` can be found at :meth:`deepmd.utils.network.embedding_net`. Specially for descriptor se_a_mask is a concise implementation of se_a. The difference is that se_a_mask only considered a non-pbc system. diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py index 356c1fae2b..ba8fa9adfa 100644 --- a/deepmd/descriptor/se_atten.py +++ b/deepmd/descriptor/se_atten.py @@ -1,3 +1,4 @@ +import warnings from typing import ( List, Optional, @@ -67,6 +68,8 @@ class DescrptSeAtten(DescrptSeA): exclude_types : List[List[int]] The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1. + set_davg_zero + Set the shift of embedding net input to zero. activation_function The activation function in the embedding net. Supported options are |ACTIVATION_FN| precision @@ -97,6 +100,7 @@ def __init__( trainable: bool = True, seed: Optional[int] = None, type_one_side: bool = True, + set_davg_zero: bool = True, exclude_types: List[List[int]] = [], activation_function: str = "tanh", precision: str = "default", @@ -107,6 +111,11 @@ def __init__( attn_mask: bool = False, multi_task: bool = False, ) -> None: + if not set_davg_zero: + warnings.warn( + "Set 'set_davg_zero' False in descriptor 'se_atten' " + "may cause unexpected incontinuity during model inference!" + ) DescrptSeA.__init__( self, rcut, @@ -119,7 +128,7 @@ def __init__( seed=seed, type_one_side=type_one_side, exclude_types=exclude_types, - set_davg_zero=True, + set_davg_zero=set_davg_zero, activation_function=activation_function, precision=precision, uniform_seed=uniform_seed, diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py index d65a3a984b..c9f70117f7 100644 --- a/deepmd/fit/ener.py +++ b/deepmd/fit/ener.py @@ -59,9 +59,9 @@ class EnerFitting(Fitting): \mathbf{y}=\mathcal{L}(\mathbf{x};\mathbf{w},\mathbf{b})= \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}) - where :math:`\mathbf{x} \in \mathbb{R}^{N_1}`$` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` + where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and - :math:`\mathbf{b} \in \mathbb{R}^{N_2}`$` are weights and biases, respectively, + :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively, both of which are trainable if `trainable[i]` is `True`. :math:`\boldsymbol{\phi}` is the activation function. @@ -71,9 +71,9 @@ class EnerFitting(Fitting): \mathbf{y}=\mathcal{L}^{(n)}(\mathbf{x};\mathbf{w},\mathbf{b})= \mathbf{x}^T\mathbf{w}+\mathbf{b} - where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}`$` is the input vector and :math:`\mathbf{y} \in \mathbb{R}` + where :math:`\mathbf{x} \in \mathbb{R}^{N_{n-1}}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}` is the output scalar. :math:`\mathbf{w} \in \mathbb{R}^{N_{n-1}}` and - :math:`\mathbf{b} \in \mathbb{R}`$` are weights and bias, respectively, + :math:`\mathbf{b} \in \mathbb{R}` are weights and bias, respectively, both of which are trainable if `trainable[n]` is `True`. Parameters @@ -549,13 +549,14 @@ def build( aparam = tf.reshape(aparam, [-1, self.numb_aparam * natoms[0]]) atype_nall = tf.reshape(atype, [-1, natoms[1]]) - atype_filter = tf.cast(atype_nall >= 0, GLOBAL_TF_FLOAT_PRECISION) + self.atype_nloc = tf.slice( + atype_nall, [0, 0], [-1, natoms[0]] + ) ## lammps will make error + atype_filter = tf.cast(self.atype_nloc >= 0, GLOBAL_TF_FLOAT_PRECISION) + self.atype_nloc = tf.reshape(self.atype_nloc, [-1]) # prevent embedding_lookup error, # but the filter will be applied anyway - atype_nall = tf.clip_by_value(atype_nall, 0, self.ntypes - 1) - self.atype_nloc = tf.reshape( - tf.slice(atype_nall, [0, 0], [-1, natoms[0]]), [-1] - ) ## lammps will make error + self.atype_nloc = tf.clip_by_value(self.atype_nloc, 0, self.ntypes - 1) if type_embedding is not None: atype_embed = tf.nn.embedding_lookup(type_embedding, self.atype_nloc) else: diff --git a/deepmd/loggers/loggers.py b/deepmd/loggers/loggers.py index bb2e5f2dad..4249d7fb3d 100644 --- a/deepmd/loggers/loggers.py +++ b/deepmd/loggers/loggers.py @@ -229,6 +229,8 @@ def set_log_handles( ch.setLevel(level) ch.addFilter(_AppFilter()) + # clean old handlers before adding new one + root_log.handlers.clear() root_log.addHandler(ch) # * add file handler *************************************************************** diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index 93135304a9..9fb977e5eb 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -325,6 +325,7 @@ def descrpt_se_atten_args(): doc_precision = f"The precision of the embedding net parameters, supported options are {list_to_doc(PRECISION_DICT.keys())} Default follows the interface precision." doc_trainable = "If the parameters in the embedding net is trainable" doc_seed = "Random seed for parameter initialization" + doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `se_atten` descriptor or `atom_ener` in the energy fitting is used" doc_exclude_types = "The excluded pairs of types which have no interaction with each other. For example, `[[0, 1]]` means no interaction between type 0 and type 1." doc_attn = "The length of hidden vectors in attention layers" doc_attn_layer = "The number of attention layers" @@ -361,6 +362,9 @@ def descrpt_se_atten_args(): Argument( "exclude_types", list, optional=True, default=[], doc=doc_exclude_types ), + Argument( + "set_davg_zero", bool, optional=True, default=True, doc=doc_set_davg_zero + ), Argument("attn", int, optional=True, default=128, doc=doc_attn), Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer), Argument("attn_dotr", bool, optional=True, default=True, doc=doc_attn_dotr), @@ -972,7 +976,8 @@ def training_data_args(): # ! added by Ziyao: new specification style for data - list: the length of which is the same as the {link_sys}. The batch size of each system is given by the elements of the list.\n\n\ - int: all {link_sys} use the same batch size.\n\n\ - string "auto": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than 32.\n\n\ -- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.' +- string "auto:N": automatically determines the batch size so that the batch_size times the number of atoms in the system is no less than N.\n\n\ +- string "mixed:N": the batch data will be sampled from all systems and merged into a mixed system with the batch size N. Only support the se_atten descriptor.' doc_auto_prob_style = 'Determine the probability of systems automatically. The method is assigned by this key and can be\n\n\ - "prob_uniform" : the probability all the systems are equal, namely 1.0/self.get_nsystems()\n\n\ - "prob_sys_size" : the probability of a system is proportional to the number of batches in the system\n\n\ diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index 537c5b4868..66870e85b2 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -91,6 +91,10 @@ def __init__( self.type_idx_map = np.array( sorter[np.searchsorted(type_map, self.type_map, sorter=sorter)] ) + # padding for virtual atom + self.type_idx_map = np.append( + self.type_idx_map, np.array([-1], dtype=np.int32) + ) self.type_map = type_map if type_map is None and self.type_map is None and self.mixed_type: raise RuntimeError("mixed_type format must have type_map!") @@ -489,8 +493,12 @@ def _load_set(self, set_name: DPPath): [(real_type == i).sum(axis=-1) for i in range(self.get_ntypes())], dtype=np.int32, ).T + ghost_nums = np.array( + [(real_type == -1).sum(axis=-1)], + dtype=np.int32, + ).T assert ( - atom_type_nums.sum(axis=-1) == natoms + atom_type_nums.sum(axis=-1) + ghost_nums.sum(axis=-1) == natoms ).all(), "some types in 'real_atom_types.npy' of set {} are not contained in {} types!".format( set_name, self.get_ntypes() ) diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py index 28dc799bf8..d87219fcc9 100644 --- a/deepmd/utils/data_system.py +++ b/deepmd/utils/data_system.py @@ -112,6 +112,7 @@ def __init__( # batch size self.batch_size = batch_size is_auto_bs = False + self.mixed_systems = False if isinstance(self.batch_size, int): self.batch_size = self.batch_size * np.ones(self.nsystems, dtype=int) elif isinstance(self.batch_size, str): @@ -121,9 +122,16 @@ def __init__( rule = 32 if len(words) == 2: rule = int(words[1]) + self.batch_size = self._make_auto_bs(rule) + elif "mixed" == words[0]: + self.mixed_systems = True + if len(words) == 2: + rule = int(words[1]) + else: + raise RuntimeError("batch size must be specified for mixed systems") + self.batch_size = rule * np.ones(self.nsystems, dtype=int) else: raise RuntimeError("unknown batch_size rule " + words[0]) - self.batch_size = self._make_auto_bs(rule) elif isinstance(self.batch_size, list): pass else: @@ -361,7 +369,7 @@ def _get_sys_probs(self, sys_probs, auto_prob_style): # depreciated prob = self._process_sys_probs(sys_probs) return prob - def get_batch(self, sys_idx: Optional[int] = None): + def get_batch(self, sys_idx: Optional[int] = None) -> dict: # batch generation style altered by Ziyao Li: # one should specify the "sys_prob" and "auto_prob_style" params # via set_sys_prob() function. The sys_probs this function uses is @@ -375,9 +383,36 @@ def get_batch(self, sys_idx: Optional[int] = None): The index of system from which the batch is get. If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. + This option does not work for mixed systems. + + Returns + ------- + dict + The batch data """ if not hasattr(self, "default_mesh"): self._make_default_mesh() + if not self.mixed_systems: + b_data = self.get_batch_standard(sys_idx) + else: + b_data = self.get_batch_mixed() + return b_data + + def get_batch_standard(self, sys_idx: Optional[int] = None) -> dict: + """Get a batch of data from the data systems in the standard way. + + Parameters + ---------- + sys_idx : int + The index of system from which the batch is get. + If sys_idx is not None, `sys_probs` and `auto_prob_style` are ignored + If sys_idx is None, automatically determine the system according to `sys_probs` or `auto_prob_style`, see the following. + + Returns + ------- + dict + The batch data + """ if sys_idx is not None: self.pick_idx = sys_idx else: @@ -390,6 +425,73 @@ def get_batch(self, sys_idx: Optional[int] = None): b_data["default_mesh"] = self.default_mesh[self.pick_idx] return b_data + def get_batch_mixed(self) -> dict: + """Get a batch of data from the data systems in the mixed way. + + Returns + ------- + dict + The batch data + """ + # mixed systems have a global batch size + batch_size = self.batch_size[0] + batch_data = [] + for _ in range(batch_size): + self.pick_idx = dp_random.choice(np.arange(self.nsystems), p=self.sys_probs) + bb_data = self.data_systems[self.pick_idx].get_batch(1) + bb_data["natoms_vec"] = self.natoms_vec[self.pick_idx] + bb_data["default_mesh"] = self.default_mesh[self.pick_idx] + batch_data.append(bb_data) + b_data = self._merge_batch_data(batch_data) + return b_data + + def _merge_batch_data(self, batch_data: List[dict]) -> dict: + """Merge batch data from different systems. + + Parameters + ---------- + batch_data : list of dict + A list of batch data from different systems. + + Returns + ------- + dict + The merged batch data. + """ + b_data = {} + max_natoms = max(bb["natoms_vec"][0] for bb in batch_data) + # natoms_vec + natoms_vec = np.zeros(2 + self.get_ntypes(), dtype=int) + natoms_vec[0:3] = max_natoms + b_data["natoms_vec"] = natoms_vec + # real_natoms_vec + real_natoms_vec = np.vstack([bb["natoms_vec"] for bb in batch_data]) + b_data["real_natoms_vec"] = real_natoms_vec + # type + type_vec = np.full((len(batch_data), max_natoms), -1, dtype=int) + for ii, bb in enumerate(batch_data): + type_vec[ii, : bb["type"].shape[1]] = bb["type"][0] + b_data["type"] = type_vec + # default_mesh + default_mesh = np.mean([bb["default_mesh"] for bb in batch_data], axis=0) + b_data["default_mesh"] = default_mesh + # other data + data_dict = self.get_data_dict(0) + for kk, vv in data_dict.items(): + if kk not in batch_data[0]: + continue + b_data["find_" + kk] = batch_data[0]["find_" + kk] + if not vv["atomic"]: + b_data[kk] = np.concatenate([bb[kk] for bb in batch_data], axis=0) + else: + b_data[kk] = np.zeros( + (len(batch_data), max_natoms * vv["ndof"] * vv["repeat"]), + dtype=batch_data[0][kk].dtype, + ) + for ii, bb in enumerate(batch_data): + b_data[kk][ii, : bb[kk].shape[1]] = bb[kk][0] + return b_data + # ! altered by Marián Rynik def get_test(self, sys_idx: Optional[int] = None, n_test: int = -1): # depreciated """Get test data from the the data systems. diff --git a/deepmd/utils/neighbor_stat.py b/deepmd/utils/neighbor_stat.py index b07614fd57..eebe33d7a0 100644 --- a/deepmd/utils/neighbor_stat.py +++ b/deepmd/utils/neighbor_stat.py @@ -69,8 +69,8 @@ def builder(): t_natoms = place_holders["natoms_vec"] if self.one_type: # all types = 0, natoms_vec = [natoms, natoms, natoms] - t_type = tf.zeros_like(t_type, dtype=tf.int32) - t_natoms = tf.repeat(t_natoms[0], 3) + t_type = tf.clip_by_value(t_type, -1, 0) + t_natoms = tf.tile(t_natoms[0:1], [3]) _max_nbor_size, _min_nbor_dist = op_module.neighbor_stat( place_holders["coord"], diff --git a/deepmd/utils/network.py b/deepmd/utils/network.py index e5ce249f45..a718da0b26 100644 --- a/deepmd/utils/network.py +++ b/deepmd/utils/network.py @@ -144,9 +144,9 @@ def embedding_net( \boldsymbol{\phi}(\mathbf{x}^T\mathbf{w}+\mathbf{b}), & \text{otherwise} \\ \end{cases} - where :math:`\mathbf{x} \in \mathbb{R}^{N_1}`$` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` + where :math:`\mathbf{x} \in \mathbb{R}^{N_1}` is the input vector and :math:`\mathbf{y} \in \mathbb{R}^{N_2}` is the output vector. :math:`\mathbf{w} \in \mathbb{R}^{N_1 \times N_2}` and - :math:`\mathbf{b} \in \mathbb{R}^{N_2}`$` are weights and biases, respectively, + :math:`\mathbf{b} \in \mathbb{R}^{N_2}` are weights and biases, respectively, both of which are trainable if `trainable` is `True`. :math:`\boldsymbol{\phi}` is the activation function. diff --git a/doc/conf.py b/doc/conf.py index e338274f0e..2fd0dcdc33 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -305,6 +305,10 @@ def setup(app): "fontpkg": r""" \usepackage{fontspec} \setmainfont{Symbola} +""", + "preamble": r""" +\usepackage{enumitem} +\setlistdepth{99} """, } diff --git a/doc/model/train-se-atten.md b/doc/model/train-se-atten.md index e91ae646c4..8b006346a9 100644 --- a/doc/model/train-se-atten.md +++ b/doc/model/train-se-atten.md @@ -103,11 +103,13 @@ ID | Property | File | Requir ---------- | -------------------------------- | ------------------- | -------------------- | ----------------------- | ----------- / | Atom type indexes (place holder) | type.raw | Required | Natoms | All zeros to fake the type input type_map | Atom type names | type_map.raw | Required | Ntypes | Atom names that map to atom type contained in all the frames, which is unnecessart to be contained in the periodic table -type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map +type | Atom type indexes of each frame | real_atom_types.npy | Required | Nframes \* Natoms | Integers that describe atom types in each frame, corresponding to indexes in type_map. `-1` means virtual atoms. With these edited files, one can put together frames with the same `Natoms`, instead of the same formula (like `H2O`). Note that this `mixed_type` format only supports `se_atten` descriptor. -The API to generate or transfer to `mixed_type` format will be uploaded on [dpdata](https://github.com/deepmodeling/dpdata) soon for a more convenient experience. +To put frames with different `Natoms` into the same system, one can pad systems by adding virtual atoms whose type is `-1`. Virtual atoms do not contribute to any fitting property, so the atomic property of virtual atoms (e.g. forces) should be given zero. + +The API to generate or transfer to `mixed_type` format is available on [dpdata](https://github.com/deepmodeling/dpdata) for a more convenient experience. ## Training example Here we upload the AlMgCu example shown in the paper, you can download it here: diff --git a/examples/nopbc/mixed/input.json b/examples/nopbc/mixed/input.json new file mode 100644 index 0000000000..ab49b4e442 --- /dev/null +++ b/examples/nopbc/mixed/input.json @@ -0,0 +1,71 @@ +{ + "_comment": " model parameters", + "model": { + "type_map": [ + "C", + "H", + "O" + ], + "descriptor": { + "type": "se_atten", + "sel": 120, + "rcut_smth": 1.00, + "rcut": 6.00, + "neuron": [ + 25, + 50, + 100 + ], + "resnet_dt": false, + "axis_neuron": 12, + "seed": 1, + "_comment": " that's all" + }, + "fitting_net": { + "neuron": [ + 240, + 240, + 240 + ], + "resnet_dt": true, + "seed": 1, + "_comment": " that's all" + }, + "_comment": " that's all" + }, + + "learning_rate": { + "type": "exp", + "decay_steps": 4000, + "start_lr": 0.001, + "stop_lr": 3.51e-8, + "_comment": "that's all" + }, + + "loss": { + "type": "ener", + "start_pref_e": 0.02, + "limit_pref_e": 1, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0, + "_comment": " that's all" + }, + + "training": { + "training_data": { + "systems": "../data/", + "batch_size": "mixed:4", + "_comment": "that's all" + }, + "numb_steps": 4000000, + "seed": 10, + "disp_file": "lcurve.out", + "disp_freq": 100, + "save_freq": 1000, + "_comment": "that's all" + }, + + "_comment": "that's all" +} diff --git a/source/op/CMakeLists.txt b/source/op/CMakeLists.txt index fdc7fa7ea2..059e72b9a8 100644 --- a/source/op/CMakeLists.txt +++ b/source/op/CMakeLists.txt @@ -56,6 +56,9 @@ add_library(${LIB_DEEPMD_OP} MODULE ${OP_SRC} ${OP_REMAPPER_SRC}) # link: libdeepmd libtensorflow_cc libtensorflow_framework target_link_libraries(${LIB_DEEPMD_OP} PRIVATE TensorFlow::tensorflow_framework) target_link_libraries(${LIB_DEEPMD_OP} PRIVATE ${LIB_DEEPMD}) +if(Protobuf_LIBRARY) + target_link_libraries(${LIB_DEEPMD_OP} PRIVATE ${Protobuf_LIBRARY}) +endif() if(APPLE) set_target_properties( ${LIB_DEEPMD_OP} PROPERTIES INSTALL_RPATH diff --git a/source/op/neighbor_stat.cc b/source/op/neighbor_stat.cc index 6c58096824..ba067786f9 100644 --- a/source/op/neighbor_stat.cc +++ b/source/op/neighbor_stat.cc @@ -180,8 +180,10 @@ class NeighborStatOp : public OpKernel { #pragma omp parallel for for (int ii = 0; ii < nloc; ii++) { + if (d_type[ii] < 0) continue; // virtual atom for (int jj = 0; jj < d_nlist_r[ii].size(); jj++) { int type = d_type[d_nlist_r[ii][jj]]; + if (type < 0) continue; // virtual atom max_nbor_size[ii * ntypes + type] += 1; compute_t rij[3] = { d_coord3[d_nlist_r[ii][jj] * 3 + 0] - d_coord3[ii * 3 + 0], diff --git a/source/tests/common.py b/source/tests/common.py index d0dd2d2608..8fc303c1d7 100644 --- a/source/tests/common.py +++ b/source/tests/common.py @@ -85,9 +85,62 @@ def gen_data_mixed_type(nframes=1): ) -def gen_data(nframes=1, mixed_type=False): +def gen_data_virtual_type(nframes=1, nghost=4): + tmpdata = Data(rand_pert=0.1, seed=1, nframes=nframes) + sys = dpdata.LabeledSystem() + real_type_map = ["foo", "bar"] + sys.data["atom_names"] = ["X"] + sys.data["coords"] = tmpdata.coord + sys.data["atom_types"] = np.concatenate( + [ + np.zeros_like(tmpdata.atype), + np.zeros([nghost], dtype=np.int32), + ], + axis=0, + ) + sys.data["cells"] = tmpdata.cell + nframes = tmpdata.nframes + natoms = tmpdata.natoms + sys.data["coords"] = np.concatenate( + [ + sys.data["coords"].reshape([nframes, natoms, 3]), + np.zeros([nframes, nghost, 3]), + ], + axis=1, + ) + sys.data["cells"] = sys.data["cells"].reshape([nframes, 3, 3]) + sys.data["energies"] = np.zeros([nframes, 1]) + sys.data["forces"] = np.zeros([nframes, natoms + nghost, 3]) + sys.to_deepmd_npy("system_mixed_type", prec=np.float64) + np.savetxt("system_mixed_type/type_map.raw", real_type_map, fmt="%s") + np.save( + "system_mixed_type/set.000/real_atom_types.npy", + np.concatenate( + [ + tmpdata.atype.reshape(1, -1).repeat(nframes, 0), + np.full([nframes, nghost], -1, dtype=np.int32), + ], + axis=1, + ), + ) + np.save("system_mixed_type/set.000/fparam.npy", tmpdata.fparam) + np.save( + "system_mixed_type/set.000/aparam.npy", + np.concatenate( + [ + tmpdata.aparam.reshape([nframes, natoms, 2]), + np.zeros([nframes, nghost, 2]), + ], + axis=1, + ), + ) + + +def gen_data(nframes=1, mixed_type=False, virtual_type=False): if not mixed_type: gen_data_type_specific(nframes) + elif virtual_type: + gen_data_virtual_type(nframes) else: gen_data_mixed_type(nframes) diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/test_deepmd_data_sys.py index d24291114d..9df48c3f57 100644 --- a/source/tests/test_deepmd_data_sys.py +++ b/source/tests/test_deepmd_data_sys.py @@ -7,6 +7,9 @@ from deepmd.env import ( GLOBAL_NP_FLOAT_PRECISION, ) +from deepmd.utils import ( + random, +) from deepmd.utils.data_system import ( DeepmdDataSystem, ) @@ -398,3 +401,24 @@ def test_sys_prob_floating_point_error(self): ] ds = DeepmdDataSystem(self.sys_name, 3, 2, 2.0, sys_probs=sys_probs) self.assertEqual(ds.sys_probs.size, len(sys_probs)) + + def test_get_mixed_batch(self): + """test get_batch with mixed system.""" + batch_size = "mixed:3" + test_size = 2 + + ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0) + ds.add("test", self.test_ndof, atomic=True, must=True) + ds.add("null", self.test_ndof, atomic=True, must=False) + random.seed(114514) + # with this seed, the batch is fixed, with natoms 3, 6, 6 + data = ds.get_batch() + np.testing.assert_equal(data["natoms_vec"], np.array([6, 6, 6, 0, 0])) + np.testing.assert_equal(data["real_natoms_vec"][:, 0], np.array([3, 6, 6])) + np.testing.assert_equal(data["type"][0, 3:6], np.array([-1, -1, -1])) + np.testing.assert_equal(data["coord"][0, 9:18], np.zeros(9)) + for kk in ("test", "null"): + np.testing.assert_equal( + data[kk][0, 3 * self.test_ndof : 6 * self.test_ndof], + np.zeros(3 * self.test_ndof), + ) diff --git a/source/tests/test_virtual_type.py b/source/tests/test_virtual_type.py index 575f6dc768..98ab84cf93 100644 --- a/source/tests/test_virtual_type.py +++ b/source/tests/test_virtual_type.py @@ -4,15 +4,26 @@ import numpy as np from common import ( + gen_data, + j_loader, tests_path, ) +from deepmd.common import ( + j_must_have, +) from deepmd.infer import ( DeepPot, ) from deepmd.utils.convert import ( convert_pbtxt_to_pb, ) +from deepmd.utils.data_system import ( + DeepmdDataSystem, +) +from deepmd.utils.neighbor_stat import ( + NeighborStat, +) class TestVirtualType(unittest.TestCase): @@ -106,3 +117,24 @@ def test_infer_mixed_type(self): np.testing.assert_almost_equal(v1, v2) np.testing.assert_almost_equal(ae1[:nloc], ae2[nghost:]) np.testing.assert_almost_equal(av1[:nloc], av2[nghost:]) + + +class TestTrainVirtualType(unittest.TestCase): + def setUp(self) -> None: + gen_data(mixed_type=True, virtual_type=True) + + def test_data_mixed_type(self): + jfile = "water_se_atten_mixed_type.json" + jdata = j_loader(jfile) + + systems = j_must_have(jdata, "systems") + batch_size = 1 + test_size = 1 + rcut = j_must_have(jdata["model"]["descriptor"], "rcut") + type_map = j_must_have(jdata["model"], "type_map") + + data = DeepmdDataSystem(systems, batch_size, test_size, rcut, type_map=type_map) + data.get_batch() + # neighbor stat + nei_stat = NeighborStat(len(type_map), rcut, one_type=True) + min_nbor_dist, max_nbor_size = nei_stat.get_stat(data)