From eadc67e3d9e38ff97c137a9d66bad91e46c8b204 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 4 Sep 2023 12:43:13 -0400 Subject: [PATCH] improve documentation for checkpoints Fix #2767. --- deepmd/entrypoints/train.py | 4 ++-- deepmd/model/frozen.py | 2 +- deepmd/model/linear.py | 2 +- deepmd/model/model.py | 4 ++-- deepmd/utils/argcheck.py | 2 +- deepmd_cli/main.py | 6 +++--- doc/nvnmd/nvnmd.md | 4 ++-- doc/train/training-advanced.md | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py index 1a0d4b9c6d..716ff482a3 100755 --- a/deepmd/entrypoints/train.py +++ b/deepmd/entrypoints/train.py @@ -86,9 +86,9 @@ def train( INPUT : str json/yaml control file init_model : Optional[str] - path to checkpoint folder or None + path prefix of checkpoint files or None restart : Optional[str] - path to checkpoint folder or None + path prefix of checkpoint files or None output : str path for dump file with arguments init_frz_model : str diff --git a/deepmd/model/frozen.py b/deepmd/model/frozen.py index c1b7d0286e..972acb9185 100644 --- a/deepmd/model/frozen.py +++ b/deepmd/model/frozen.py @@ -74,7 +74,7 @@ def build( frz_model : str, optional The path to the frozen model ckpt_meta : str, optional - The path to the checkpoint and meta file + The path prefix of the checkpoint and meta files suffix : str, optional The suffix of the scope reuse : bool or tf.AUTO_REUSE, optional diff --git a/deepmd/model/linear.py b/deepmd/model/linear.py index 6399766662..799642ce33 100644 --- a/deepmd/model/linear.py +++ b/deepmd/model/linear.py @@ -166,7 +166,7 @@ def build( frz_model : str, optional The path to the frozen model ckpt_meta : str, optional - The path to the checkpoint and meta file + The path prefix of the checkpoint and meta files suffix : str, optional The suffix of the scope reuse : bool or tf.AUTO_REUSE, optional diff --git a/deepmd/model/model.py b/deepmd/model/model.py index a06a3141c0..9ae5eacf4f 100644 --- a/deepmd/model/model.py +++ b/deepmd/model/model.py @@ -191,7 +191,7 @@ def build( frz_model : str, optional The path to the frozen model ckpt_meta : str, optional - The path to the checkpoint and meta file + The path prefix of the checkpoint and meta files suffix : str, optional The suffix of the scope reuse : bool or tf.AUTO_REUSE, optional @@ -259,7 +259,7 @@ def build_descrpt( frz_model : str, optional The path to the frozen model ckpt_meta : str, optional - The path to the checkpoint and meta file + The path prefix of the checkpoint and meta files suffix : str, optional The suffix of the scope reuse : bool or tf.AUTO_REUSE, optional diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py index b38f8c8063..153824cb0d 100644 --- a/deepmd/utils/argcheck.py +++ b/deepmd/utils/argcheck.py @@ -1601,7 +1601,7 @@ def training_args(): # ! modified by Ziyao: data configuration isolated. doc_disp_file = "The file for printing learning curve." doc_disp_freq = "The frequency of printing learning curve." doc_save_freq = "The frequency of saving check point." - doc_save_ckpt = "The file name of saving check point." + doc_save_ckpt = "The path prefix of saving check point files." doc_disp_training = "Displaying verbose information during training." doc_time_training = "Timing durining training." doc_profiling = "Profiling during training." diff --git a/deepmd_cli/main.py b/deepmd_cli/main.py index a6b293020b..e3213d8b00 100644 --- a/deepmd_cli/main.py +++ b/deepmd_cli/main.py @@ -150,14 +150,14 @@ def main_parser() -> argparse.ArgumentParser: "--init-model", type=str, default=None, - help="Initialize the model by the provided checkpoint.", + help="Initialize the model by the provided path prefix of checkpoint files.", ) parser_train_subgroup.add_argument( "-r", "--restart", type=str, default=None, - help="Restart the training from the provided checkpoint.", + help="Restart the training from the provided path prefix of checkpoint files.", ) parser_train_subgroup.add_argument( "-f", @@ -549,7 +549,7 @@ def main_parser() -> argparse.ArgumentParser: "--restart", type=str, default=None, - help="Restart the training from the provided checkpoint.", + help="Restart the training from the provided prefix of checkpoint files.", ) parser_train_nvnmd.add_argument( "-s", diff --git a/doc/nvnmd/nvnmd.md b/doc/nvnmd/nvnmd.md index 8cad297391..0596ba5dc8 100644 --- a/doc/nvnmd/nvnmd.md +++ b/doc/nvnmd/nvnmd.md @@ -162,7 +162,7 @@ where items are defined as: | numb_test | the accuracy is test by using {numb_test} sample | a positive integer | | disp_file | the log file where the training message display | a string | | disp_freq | display frequency | a positive integer | -| save_ckpt | check point file | a string | +| save_ckpt | path prefix of check point files | a string | | save_freq | save frequency | a positive integer | | systems | a list of data directory which contains the dataset | string list | | set_prefix | the prefix of dataset | a string | @@ -181,7 +181,7 @@ dp train-nvnmd train_qnn.json -s s2 After the training process, you will get two folders: `nvnmd_cnn` and `nvnmd_qnn`. The `nvnmd_cnn` contains the model after continuous neural network (CNN) training. The `nvnmd_qnn` contains the model after quantized neural network (QNN) training. The binary file `nvnmd_qnn/model.pb` is the model file that is used to perform NVNMD in the server [http://nvnmd.picp.vip]. -You can also restart the CNN training from the checkpoint (`nvnmd_cnn/model.ckpt`) by +You can also restart the CNN training from the path prefix of checkpoint files (`nvnmd_cnn/model.ckpt`) by ``` bash dp train-nvnmd train_cnn.json -r nvnmd_cnn/model.ckpt -s s1 diff --git a/doc/train/training-advanced.md b/doc/train/training-advanced.md index 39cf87d8b3..b0194e3471 100644 --- a/doc/train/training-advanced.md +++ b/doc/train/training-advanced.md @@ -121,7 +121,7 @@ optional arguments: --skip-neighbor-stat Skip calculating neighbor statistics. Sel checking, automatic sel, and model compression will be disabled. (default: False) ``` -**`--init-model model.ckpt`**, initializes the model training with an existing model that is stored in the checkpoint `model.ckpt`, the network architectures should match. +**`--init-model model.ckpt`**, initializes the model training with an existing model that is stored in the path prefix of checkpoint files `model.ckpt`, the network architectures should match. **`--restart model.ckpt`**, continues the training from the checkpoint `model.ckpt`.