From 2dd69cd7d6b4d7fc56070434ac55f48d9dca47a1 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 21:14:38 +0800 Subject: [PATCH 1/5] update paddlepaddle softmax --- python/tvm/relay/frontend/paddlepaddle.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index bb72d30352af..470275f93185 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -2386,14 +2386,12 @@ def convert_slice(g, op, block): def convert_softmax(g, op, block): """Operator converter for softmax.""" + data = g.get_node(op.input("X")[0]) axis = op.attr("axis") input_shape = block.var(op.input("X")[0]).shape if axis < 0: axis = len(input_shape) + axis - x = g.get_node(op.input("X")[0]) - m = _op.max(x, axis, keepdims=True) - e = _op.exp(x - m) - out = e / _op.sum(e, axis, keepdims=True) + out = _op.nn.softmax(data, axis) g.add_node(op.output("Out")[0], out) From 1226b810ce10b6736d90dab3c17e2bd2e486106c Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 26 Mar 2024 18:45:09 +0800 Subject: [PATCH 2/5] update paddlepaddle.py --- python/tvm/relay/frontend/paddlepaddle.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index f9da96b5800c..d63043c9768b 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -1826,10 +1826,15 @@ def convert_dequantize_linear(g, op, block): data_node = g.get_node(data_node_name) # paddle_scale = tvm_scale * 127 - paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() - tvm_quantize_scale = paddle_quantize_scale / 127.0 + paddle_dequantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() + tvm_dequantize_scale = paddle_dequantize_scale / 127.0 + tvm_dequantize_scale = tvm_dequantize_scale.squeeze() + print("tvm_dequantize_scale is ", tvm_dequantize_scale) - tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + + tvm_dequantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + tvm_dequantize_zp = tvm_dequantize_zp.squeeze() + print("tvm_dequantize_zp is ", tvm_dequantize_zp) tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: @@ -1840,8 +1845,8 @@ def convert_dequantize_linear(g, op, block): out = _qnn.op.dequantize( data=data_node, - input_scale=_op.const(tvm_quantize_scale, "float32"), - input_zero_point=_op.const(tvm_quantize_zp, "int32"), + input_scale=_expr.const(tvm_dequantize_scale, "float32"), + input_zero_point=_expr.const(tvm_dequantize_zp, "int32"), axis=tvm_quantize_axis, ) g.add_node(op.output("Y")[0], out) @@ -1856,8 +1861,10 @@ def convert_quantize_linear(g, op, block): # paddle_scale = tvm_scale * 127 paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() tvm_quantize_scale = paddle_quantize_scale / 127.0 + tvm_quantize_scale = tvm_quantize_scale.squeeze() tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + tvm_quantize_zp = tvm_quantize_zp.squeeze() tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: @@ -1865,8 +1872,8 @@ def convert_quantize_linear(g, op, block): out = _qnn.op.quantize( data=data_node, - output_scale=_op.const(tvm_quantize_scale, "float32"), - output_zero_point=_op.const(tvm_quantize_zp, "int32"), + output_scale=_expr.const(tvm_quantize_scale, "float32"), + output_zero_point=_expr.const(tvm_quantize_zp, "int32"), axis=tvm_quantize_axis, ) g.add_node(op.output("Y")[0], out) From 17c5b538ca50dacd96ea2611a0cbb79751b984f0 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 26 Mar 2024 18:47:59 +0800 Subject: [PATCH 3/5] update cmsisnn.py --- python/tvm/relay/frontend/paddlepaddle.py | 2 -- python/tvm/relay/op/contrib/cmsisnn.py | 11 ++++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index d63043c9768b..54d5ededccfb 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -1829,12 +1829,10 @@ def convert_dequantize_linear(g, op, block): paddle_dequantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() tvm_dequantize_scale = paddle_dequantize_scale / 127.0 tvm_dequantize_scale = tvm_dequantize_scale.squeeze() - print("tvm_dequantize_scale is ", tvm_dequantize_scale) tvm_dequantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() tvm_dequantize_zp = tvm_dequantize_zp.squeeze() - print("tvm_dequantize_zp is ", tvm_dequantize_zp) tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index ed620f0ff125..ee4dc7ff72bb 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -86,9 +86,14 @@ def check_qnn_softmax(pattern): zero_point = pattern.args[2].data.numpy().item(0) # check for dtypes of quantize and dequantize + # if ( + # (scale == 1.0 / 256 and zero_point == -128) + # and pattern.attrs.out_dtype == "int8" + # and dequantize_call.args[0].checked_type.dtype == "int8" + # ): + # return True if ( - (scale == 1.0 / 256 and zero_point == -128) - and pattern.attrs.out_dtype == "int8" + pattern.attrs.out_dtype == "int8" and dequantize_call.args[0].checked_type.dtype == "int8" ): return True @@ -99,7 +104,7 @@ def check_qnn_softmax(pattern): and dequantize_call.args[0].checked_type.dtype == "int16" ): return True - + print("check_qnn_softmax failed") return False def qnn_conv2d_pattern(with_pad): From 13a8cd54fef9c4d9bfa2b0d903c31c931679c0f0 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Tue, 26 Mar 2024 18:48:45 +0800 Subject: [PATCH 4/5] update cmsisnn.py --- python/tvm/relay/op/contrib/cmsisnn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/relay/op/contrib/cmsisnn.py b/python/tvm/relay/op/contrib/cmsisnn.py index ee4dc7ff72bb..559f31de018d 100644 --- a/python/tvm/relay/op/contrib/cmsisnn.py +++ b/python/tvm/relay/op/contrib/cmsisnn.py @@ -104,7 +104,6 @@ def check_qnn_softmax(pattern): and dequantize_call.args[0].checked_type.dtype == "int16" ): return True - print("check_qnn_softmax failed") return False def qnn_conv2d_pattern(with_pad): From 28a22662d6d2d2566077e3ac0c867faf8901bd4c Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 27 Mar 2024 08:56:27 +0800 Subject: [PATCH 5/5] update lint --- python/tvm/relay/frontend/paddlepaddle.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index 54d5ededccfb..3e2770da4e50 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -1830,7 +1830,6 @@ def convert_dequantize_linear(g, op, block): tvm_dequantize_scale = paddle_dequantize_scale / 127.0 tvm_dequantize_scale = tvm_dequantize_scale.squeeze() - tvm_dequantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() tvm_dequantize_zp = tvm_dequantize_zp.squeeze()