Skip to content

[BUG] Model compression fails for some descriptor neuron structures #1370

@youngjae142

Description

@youngjae142

Summary

When I compress the model with some neuron structures of the descriptor, it fails.
When I use the neuron of [25, 50, 100], it succeeds.
But when I use [50, 50, 50] or [100, 50, 25], it fails.

Deepmd-kit version, installation way, input file, running commands, error log, etc.
DP-kit version: v2.0.2
installation: source install
running commands: dp compress -i frozen_model.pb -t input.json
frozen_model.pb files will be attached.
frozen_model.zip

input.json:
`

{
"_comment": " model parameters",
"model": {
"type_map": ["Ge", "Sb", "Te"],
"descriptor" :{
"type": "se_e2_a",
"sel": [30, 30, 30],
"rcut_smth": 0.10,
"rcut": 6.00,
"neuron": [100, 50, 25],
"resnet_dt": false,
"axis_neuron": 16,
"activation_function": "tanh",
"seed": 1,
"_comment": " that's all"
},
"fitting_net" : {
"neuron": [200, 200, 200],
"activation_function": "tanh",
"resnet_dt": true,
"seed": 1,
"_comment": " that's all"
},
"_comment": " that's all"
},

"learning_rate" :{
"type":		"exp",
"start_lr":	1e-3,	
"stop_lr":	1e-3,
"_comment":	"that's all"
},

"loss" :{
"type":		"ener",
"start_pref_e": 1e-2,
"limit_pref_e": 1e-2,
"start_pref_f": 1e-0,
"limit_pref_f": 1e-0,
"start_pref_v": 1e-2,
"limit_pref_v": 1e-2,
"_comment":	" that's all"
},

"training" : {
    "training_data": {
        "systems": [
            "../raw-sffld_gst-hex-gather.traj.d/train",
            "../raw-sffld_gst-cubic-gather.traj.d/train",
            "../raw-sffld_gs-cubic-liquid-gather.traj.d",
            "../raw-sffld_sb-hex-liquid-8th.traj.d",
            "../raw-sffld_ge-cubic-liquid-gather.traj.d",
            "../raw-sffld_te-cubic-liquid-gather.traj.d"
        ],
        "batch_size":	2,
        "_comment":		"that's all"
    },
    "validation_data":{
        "systems": [
            "../raw-sffld_gst-hex-gather.traj.d/test",
            "../raw-sffld_gst-cubic-gather.traj.d/test"
        ],
        "batch_size":	50,
        "numb_btch":	1,
        "_comment":		"that's all"
    },
    "numb_steps":	2000000,
    "seed":		10,
    "disp_file":	"lcurve.out",
    "disp_freq":	1000,
    "save_freq":	1000,
    "save_ckpt": "bu/model.ckpt",
    "_comment":	"that's all"
    },    

"_comment":		"that's all"

}
`

Error log:
`
Traceback (most recent call last):
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1853, in _create_c_op
c_op = pywrap_tf_session.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimensions must be equal, but are 50 and 200 for '{{node add_4/add}} = AddV2[T=DT_DOUBLE](Tanh_1, concat)' with input shapes: [1501,50], [1501,200].

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 1175, in binary_op_wrapper
out = r_op(x)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 1195, in r_binary_op_wrapper
return func(x, y, name=name)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 1486, in _add_dispatch
return gen_math_ops.add_v2(x, y, name=name)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/gen_math_ops.py", line 481, in add_v2
_, _, _op, _outputs = _op_def_library._apply_op_helper(
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py", line 748, in _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 3528, in _create_op_internal
ret = Operation(
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 2015, in init
self._c_op = _create_c_op(self._graph, node_def, inputs,
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1856, in _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 50 and 200 for '{{node add_4/add}} = AddV2[T=DT_DOUBLE](Tanh_1, concat)' with input shapes: [1501,50], [1501,200].

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/home/grad/ssrokyz/.conda/envs/tf24/bin/dp", line 33, in
sys.exit(load_entry_point('deepmd-kit==2.0.2', 'console_scripts', 'dp')())
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/entrypoints/main.py", line 447, in main
compress(**dict_args)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/entrypoints/compress.py", line 119, in compress
train(
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/entrypoints/train.py", line 103, in train
_do_work(jdata, run_opt, is_compress)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/entrypoints/train.py", line 159, in _do_work
model.build(train_data, stop_batch)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/train/trainer.py", line 285, in build
self.descrpt.enable_compression(self.model_param['compress']["min_nbor_dist"], self.model_param['compress']['model_file'], self.model_param['compress']['table_config'][0], self.model_param['compress']['table_config'][1], self.model_param['compress']['table_config'][2], self.model_param['compress']['table_config'][3])
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/descriptor/se_a.py", line 335, in enable_compression
= self.table.build(min_nbor_dist,
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/utils/tabulate.py", line 148, in build
vv, dd, d2 = self._make_data(xx, ii)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/utils/tabulate.py", line 236, in _make_data
tt, zz = self._layer_1(
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/deepmd_kit-2.0.2-py3.8-linux-x86_64.egg/deepmd/utils/tabulate.py", line 255, in _layer_1
return t, self.activation_fn(tf.matmul(x, w) + b) + t
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 1180, in binary_op_wrapper
raise e
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 1164, in binary_op_wrapper
return func(x, y, name=name)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 1486, in _add_dispatch
return gen_math_ops.add_v2(x, y, name=name)
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/ops/gen_math_ops.py", line 481, in add_v2
_, _, _op, _outputs = _op_def_library._apply_op_helper(
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/op_def_library.py", line 748, in _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 3528, in _create_op_internal
ret = Operation(
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 2015, in init
self._c_op = _create_c_op(self._graph, node_def, inputs,
File "/home/grad/ssrokyz/.conda/envs/tf24/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1856, in _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 50 and 200 for '{{node add_4}} = AddV2[T=DT_DOUBLE](Tanh_1, concat)' with input shapes: [1501,50], [1501,200].
`

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions