diff --git a/deepmd/entrypoints/compress.py b/deepmd/entrypoints/compress.py index 2193f0c1f9..1222b1e51b 100644 --- a/deepmd/entrypoints/compress.py +++ b/deepmd/entrypoints/compress.py @@ -23,7 +23,7 @@ def compress( input: str, output: str, extrapolate: int, - stride: float, + step: float, frequency: str, checkpoint_folder: str, mpi_log: str, @@ -34,9 +34,9 @@ def compress( """Compress model. The table is composed of fifth-order polynomial coefficients and is assembled from - two sub-tables. The first table takes the stride(parameter) as it's uniform stride, - while the second table takes 10 * stride as it's uniform stride. The range of the - first table is automatically detected by deepmd-kit, while the second table ranges + two sub-tables. The first table takes the step parameter as the domain's uniform step size, + while the second table takes 10 * step as it's uniform step size. The range of the + first table is automatically detected by the code, while the second table ranges from the first table's upper boundary(upper) to the extrapolate(parameter) * upper. Parameters @@ -49,8 +49,8 @@ def compress( compressed model filename extrapolate : int scale of model extrapolation - stride : float - uniform stride of tabulation's first table + step : float + uniform step size of the tabulation's first table frequency : str frequency of tabulation overflow check checkpoint_folder : str @@ -71,8 +71,8 @@ def compress( jdata["model"]["compress"]["model_file"] = input jdata["model"]["compress"]["table_config"] = [ extrapolate, - stride, - 10 * stride, + step, + 10 * step, int(frequency), ] # be careful here, if one want to refine the model diff --git a/deepmd/entrypoints/main.py b/deepmd/entrypoints/main.py index e0c1d8d4af..b245053053 100644 --- a/deepmd/entrypoints/main.py +++ b/deepmd/entrypoints/main.py @@ -242,8 +242,8 @@ def parse_args(args: Optional[List[str]] = None): # * compress model ***************************************************************** # Compress a model, which including tabulating the embedding-net. # The table is composed of fifth-order polynomial coefficients and is assembled - # from two sub-tables. The first table takes the stride(parameter) as it's uniform - # stride, while the second table takes 10 * stride as it\s uniform stride + # from two sub-tables. The first table takes the step(parameter) as it's uniform + # step, while the second table takes 10 * step as it\s uniform step #  The range of the first table is automatically detected by deepmd-kit, while the # second table ranges from the first table's upper boundary(upper) to the # extrapolate(parameter) * upper. @@ -263,36 +263,43 @@ def parse_args(args: Optional[List[str]] = None): "--input", default="frozen_model.pb", type=str, - help="The original frozen model, which will be compressed by the deepmd-kit", + help="The original frozen model, which will be compressed by the code", ) parser_compress.add_argument( "-o", "--output", - default="frozen_model_compress.pb", + default="frozen_model_compressed.pb", type=str, help="The compressed model", ) + parser_compress.add_argument( + "-s", + "--step", + default=0.01, + type=float, + help="Model compression uses fifth-order polynomials to interpolate the embedding-net. " + "It introduces two tables with different step size to store the parameters of the polynomials. " + "The first table covers the range of the training data, while the second table is an extrapolation of the training data. " + "The domain of each table is uniformly divided by a given step size. " + "And the step(parameter) denotes the step size of the first table and the second table will " + "use 10 * step as it's step size to save the memory. " + "Usually the value ranges from 0.1 to 0.001. " + "Smaller step means higher accuracy and bigger model size", + ) parser_compress.add_argument( "-e", "--extrapolate", default=5, type=int, - help="The scale of model extrapolation", - ) - parser_compress.add_argument( - "-s", - "--stride", - default=0.01, - type=float, - help="The uniform stride of tabulation's first table, the second table will " - "use 10 * stride as it's uniform stride", + help="The domain range of the first table is automatically detected by the code: [d_low, d_up]. " + "While the second table ranges from the first table's upper boundary(d_up) to the extrapolate(parameter) * d_up: [d_up, extrapolate * d_up]", ) parser_compress.add_argument( "-f", "--frequency", default=-1, type=int, - help="The frequency of tabulation overflow check(If the input environment " + help="The frequency of tabulation overflow check(Whether the input environment " "matrix overflow the first or second table range). " "By default do not check the overflow", ) diff --git a/doc/getting-started.md b/doc/getting-started.md index a355fc9836..6a10a49eee 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -244,23 +244,50 @@ positional arguments: optional arguments: -h, --help show this help message and exit + -v {DEBUG,3,INFO,2,WARNING,1,ERROR,0}, --log-level {DEBUG,3,INFO,2,WARNING,1,ERROR,0} + set verbosity level by string or number, 0=ERROR, + 1=WARNING, 2=INFO and 3=DEBUG (default: INFO) + -l LOG_PATH, --log-path LOG_PATH + set log file to log messages to disk, if not + specified, the logs will only be output to console + (default: None) + -m {master,collect,workers}, --mpi-log {master,collect,workers} + Set the manner of logging when running with MPI. + 'master' logs only on main process, 'collect' + broadcasts logs from workers to master and 'workers' + means each process will output its own log (default: + master) -i INPUT, --input INPUT The original frozen model, which will be compressed by - the deepmd-kit + the code (default: frozen_model.pb) -o OUTPUT, --output OUTPUT - The compressed model + The compressed model (default: + frozen_model_compressed.pb) + -s STEP, --step STEP Model compression uses fifth-order polynomials to + interpolate the embedding-net. It introduces two + tables with different step size to store the + parameters of the polynomials. The first table covers + the range of the training data, while the second table + is an extrapolation of the training data. The domain + of each table is uniformly divided by a given step + size. And the step(parameter) denotes the step size of + the first table and the second table will use 10 * + step as it's step size to save the memory. Usually the + value ranges from 0.1 to 0.001. Smaller step means + higher accuracy and bigger model size (default: 0.01) -e EXTRAPOLATE, --extrapolate EXTRAPOLATE - The scale of model extrapolation - -s STRIDE, --stride STRIDE - The uniform stride of tabulation's first table, the - second table will use 10 * stride as it's uniform - stride + The domain range of the first table is automatically + detected by the code: [d_low, d_up]. While the second + table ranges from the first table's upper + boundary(d_up) to the extrapolate(parameter) * d_up: + [d_up, extrapolate * d_up] (default: 5) -f FREQUENCY, --frequency FREQUENCY - The frequency of tabulation overflow check(If the + The frequency of tabulation overflow check(Whether the input environment matrix overflow the first or second table range). By default do not check the overflow - -d FOLDER, --folder FOLDER - path to checkpoint folder + (default: -1) + -c CHECKPOINT_FOLDER, --checkpoint-folder CHECKPOINT_FOLDER + path to checkpoint folder (default: .) ``` **Parameter explanation** diff --git a/source/tests/test_argument_parser.py b/source/tests/test_argument_parser.py index 1c85728e40..f9f28fb81b 100644 --- a/source/tests/test_argument_parser.py +++ b/source/tests/test_argument_parser.py @@ -272,10 +272,10 @@ def test_parser_compress(self): ARGS = { "INPUT": dict(type=str, value="INFILE"), "--output": dict(type=str, value="OUTFILE"), - "--extrapolate": dict(type=int, value=10), - "--stride": dict(type=float, value=0.1), - "--frequency": dict(type=int, value=1), - "--checkpoint-folder": dict(type=str, value="FOLDER"), + "--extrapolate": dict(type=int, value=5), + "--step": dict(type=float, value=0.1), + "--frequency": dict(type=int, value=-1), + "--checkpoint-folder": dict(type=str, value="."), } self.run_test(command="compress", mapping=ARGS)