From c2bf9fbd868520a39ba2b2657e559eade88e1e93 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 21 Feb 2024 20:20:50 +0800 Subject: [PATCH 1/9] support conv2d when data_format is NHWC --- python/tvm/relay/frontend/paddlepaddle.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index 1a3b119b383f..c64eb91e6ee4 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -314,6 +314,7 @@ def convert_conv2d(g, op, block): strides = op.attr("strides") kernel = g.get_node(op.input("Filter")[0]) + kernel_layout = "OIHW" input_x = g.get_node(op.input("Input")[0]) data_layout = op.attr("data_format") out_channels, _, k_h, k_w = infer_shape(kernel) @@ -335,6 +336,16 @@ def convert_conv2d(g, op, block): msg = f'Value {padding_algorithm} in attribute "padding" of operator Conv is not "valid."' raise tvm.error.OpAttributeInvalid(msg) + if data_layout == "NHWC": + kernel_layout = "HWIO" + # PaddlePaddle wieght layout is "OIHW", tvm need "HWIO" when op data_format is "NHWC" + kernel_data = g.get_params(op.input("Filter")[0]) + kernel_data = kernel_data.asnumpy() + kernel_data = kernel_data.transpose((2, 3, 1, 0)) + kernel_data = _nd.array(kernel_data) + g.modify_node(op.input("Filter")[0], kernel_data) + kernel = g.get_node(op.input("Filter")[0]) + out = _op.nn.conv2d( input_x, kernel, @@ -345,6 +356,7 @@ def convert_conv2d(g, op, block): channels=out_channels, kernel_size=[k_h, k_w], data_layout=data_layout, + kernel_layout=kernel_layout, ) g.add_node(op.output("Output")[0], out) @@ -2915,6 +2927,12 @@ def add_node(self, name, node): self.nodes[name] = fold_constant(node) + def modify_node(self, name, params): + """modify node from graph""" + + self.params[name] = params + self.nodes[name] = new_var(name, shape=params.shape, dtype=params.dtype) + def get_params(self, name=None): """Get params from graph.""" From f6223d1e6a774f44576c5e7187220a2751838ba6 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Wed, 21 Feb 2024 22:39:17 +0800 Subject: [PATCH 2/9] modify the annotation --- python/tvm/relay/frontend/paddlepaddle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index c64eb91e6ee4..bb72d30352af 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -338,7 +338,7 @@ def convert_conv2d(g, op, block): if data_layout == "NHWC": kernel_layout = "HWIO" - # PaddlePaddle wieght layout is "OIHW", tvm need "HWIO" when op data_format is "NHWC" + # PaddlePaddle wieght layout is "OIHW", tvm need "HWIO" when op data_format is "NHWC". kernel_data = g.get_params(op.input("Filter")[0]) kernel_data = kernel_data.asnumpy() kernel_data = kernel_data.transpose((2, 3, 1, 0)) From 3866762bdd576b726ec60015654a985050e45cc1 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 10:05:49 +0800 Subject: [PATCH 3/9] Do not convert input data when processing quantization conv_2d nodes --- python/tvm/relay/frontend/paddlepaddle.py | 76 ++++++++++++++++++++--- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index c64eb91e6ee4..6ba0a3855c5c 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -31,6 +31,7 @@ from .. import function as _function from .. import ty as _ty from .. import op as _op +from .. import qnn as _qnn from .common import ( autopad, fold_constant, @@ -314,9 +315,9 @@ def convert_conv2d(g, op, block): strides = op.attr("strides") kernel = g.get_node(op.input("Filter")[0]) - kernel_layout = "OIHW" input_x = g.get_node(op.input("Input")[0]) data_layout = op.attr("data_format") + kernel_layout = "OIHW" if data_layout == "NCHW" else "HWIO" out_channels, _, k_h, k_w = infer_shape(kernel) if padding_algorithm == "VALID": paddings = [0, 0] @@ -335,10 +336,15 @@ def convert_conv2d(g, op, block): else: msg = f'Value {padding_algorithm} in attribute "padding" of operator Conv is not "valid."' raise tvm.error.OpAttributeInvalid(msg) - - if data_layout == "NHWC": - kernel_layout = "HWIO" - # PaddlePaddle wieght layout is "OIHW", tvm need "HWIO" when op data_format is "NHWC" + + is_quantized = op.has_attr("quantization_type") + # PaddlePaddle wieght layout is "OIHW", tvm need "HWIO" when op data_format is "NHWC". + # There are two situations when converting the data format of weights: + # 1 Conv_2d is not a quantified OP, its weight information is the weights themselves. + # We directly convert the weight information when processing conv_2d. + # 2 Conv_2d is a quantified OP, and its weight information is the output of the quantize_linear operator. + # Therefore, the weight information needs to be transformed when processing the quantize_linear operator. + if (not is_quantized) and (data_layout == "NHWC"): kernel_data = g.get_params(op.input("Filter")[0]) kernel_data = kernel_data.asnumpy() kernel_data = kernel_data.transpose((2, 3, 1, 0)) @@ -1626,7 +1632,7 @@ def convert_pool3d(g, op, block): raise tvm.error.OpAttributeInvalid(msg.format(padding_algorithm)) # handle with special case - # while kernel size less than input size + # while kernel size more than input size # shrink kernel size to input size if ( not isinstance(in_h, _op.Expr) @@ -1812,6 +1818,53 @@ def convert_roi_align(g, op, block): g.add_node(op.output("Out")[0], out) +def convert_dequantize_linear(g, op, block): + """Operator converter for dequantize_linear.""" + + data = g.get_node(op.input("X")[0]) + # paddle_scale = tvm_scale * 127 + paddle_scale = g.get_node(op.input("Scale")[0]) + paddle_scale = _op.cast(paddle_scale, "float64") + paddle_scale_scale = _expr.const(127, "float64") + tvm_scale = paddle_scale / paddle_scale_scale + + zp = g.get_node(op.input("ZeroPoint")[0]) + axis = op.attr("quant_axis") + if axis == -1: + axis = 0 + out = _qnn.op.dequantize( + data=data, + input_scale=_op.cast(tvm_scale, "float32"), + input_zero_point=_op.cast(zp, "int32"), + axis=axis, + ) + g.add_node(op.output("Y")[0], out) + + +def convert_quantize_linear(g, op, block): + """Operator converter for dequantize_linear.""" + + data = g.get_node(op.input("X")[0]) + + # paddle_scale = tvm_scale * 127 + paddle_scale = g.get_node(op.input("Scale")[0]) + paddle_scale = _op.cast(paddle_scale, "float64") + paddle_scale_scale = _expr.const(127, "float64") + tvm_scale = paddle_scale / paddle_scale_scale + + zp = g.get_node(op.input("ZeroPoint")[0]) + axis = op.attr("quant_axis") + if axis == -1: + axis = 0 + out = _qnn.op.quantize( + data=data, + output_scale=_op.cast(tvm_scale, "float32"), + output_zero_point=_op.cast(zp, "int32"), + axis=axis, + ) + g.add_node(op.output("Y")[0], out) + + def convert_rnn(g, op, block): """Operator converter for rnn.""" @@ -2905,6 +2958,9 @@ def convert_where_index(g, op, block): "unstack": convert_unstack, "where": convert_where, "where_index": convert_where_index, + # Quantized + "dequantize_linear": convert_dequantize_linear, + "quantize_linear": convert_quantize_linear, } @@ -2938,7 +2994,7 @@ def get_params(self, name=None): if name is None: return self.params - assert name in self.params + assert name in self.params, f"The name({name}) is not in params" return self.params[name] def extract_parameters(self, program, scope=None): @@ -2950,8 +3006,9 @@ def extract_parameters(self, program, scope=None): var = program.global_block().var(name) if name.endswith("feed") or name.endswith("fetch"): continue - if not var.persistable: - continue + # This judgment will cause the PaddleInference model exported by PaddleSlim to skip some operators that need to be read in NHWC format. + # if not var.persistable: + # continue if isinstance(scope, dict): self.params[name] = _nd.array(scope[name]) else: @@ -3018,7 +3075,6 @@ def from_program(self, program, shape_dict, scope): for op in block.ops: if op.type == "fetch": output_names.append(op.input("X")[0]) - outputs = [self.nodes[name] for name in output_names] outputs = outputs[0] if len(outputs) == 1 else _expr.Tuple(outputs) From 2f8861cb8aab920eb6dbbd3cc504ae62bd9e0c1d Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 10:11:20 +0800 Subject: [PATCH 4/9] Fix code formatting issues --- python/tvm/relay/frontend/paddlepaddle.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index 6ba0a3855c5c..011228fa4a06 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -336,13 +336,13 @@ def convert_conv2d(g, op, block): else: msg = f'Value {padding_algorithm} in attribute "padding" of operator Conv is not "valid."' raise tvm.error.OpAttributeInvalid(msg) - + is_quantized = op.has_attr("quantization_type") # PaddlePaddle wieght layout is "OIHW", tvm need "HWIO" when op data_format is "NHWC". # There are two situations when converting the data format of weights: - # 1 Conv_2d is not a quantified OP, its weight information is the weights themselves. + # 1 Conv_2d is not a quantified OP, its weight information is the weights themselves. # We directly convert the weight information when processing conv_2d. - # 2 Conv_2d is a quantified OP, and its weight information is the output of the quantize_linear operator. + # 2 Conv_2d is a quantified OP, and its weight information is the output of the quantize_linear operator. # Therefore, the weight information needs to be transformed when processing the quantize_linear operator. if (not is_quantized) and (data_layout == "NHWC"): kernel_data = g.get_params(op.input("Filter")[0]) From 322466426f4d5151ce27b68c993c9983e04f51cf Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 11:28:47 +0800 Subject: [PATCH 5/9] fixed error code format --- python/tvm/relay/frontend/paddlepaddle.py | 87 ++++++++++++----------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index e4429bf65860..b16898f71812 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -315,7 +315,6 @@ def convert_conv2d(g, op, block): strides = op.attr("strides") kernel = g.get_node(op.input("Filter")[0]) - kernel_layout = "OIHW" input_x = g.get_node(op.input("Input")[0]) data_layout = op.attr("data_format") kernel_layout = "OIHW" if data_layout == "NCHW" else "HWIO" @@ -343,8 +342,9 @@ def convert_conv2d(g, op, block): # There are two situations when converting the data format of weights: # 1 Conv_2d is not a quantified OP, its weight information is the weights themselves. # We directly convert the weight information when processing conv_2d. - # 2 Conv_2d is a quantified OP, and its weight information is the output of the quantize_linear operator. - # Therefore, the weight information needs to be transformed when processing the quantize_linear operator. + # 2 Conv_2d is a quantified OP, and its weight information is the output of + # the quantize_linear operator. Therefore, the weight information needs to be + # transformed when processing the quantize_linear operator. if (not is_quantized) and (data_layout == "NHWC"): kernel_data = g.get_params(op.input("Filter")[0]) kernel_data = kernel_data.asnumpy() @@ -1822,22 +1822,24 @@ def convert_roi_align(g, op, block): def convert_dequantize_linear(g, op, block): """Operator converter for dequantize_linear.""" - data = g.get_node(op.input("X")[0]) + data_node_name = op.input("X")[0] + data_node = g.get_node(data_node_name) + print(f"dequantize_input_name is {data_node_name}") + # paddle_scale = tvm_scale * 127 - paddle_scale = g.get_node(op.input("Scale")[0]) - paddle_scale = _op.cast(paddle_scale, "float64") - paddle_scale_scale = _expr.const(127, "float64") - tvm_scale = paddle_scale / paddle_scale_scale - - zp = g.get_node(op.input("ZeroPoint")[0]) - axis = op.attr("quant_axis") - if axis == -1: - axis = 0 + paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() + tvm_quantize_scale = paddle_quantize_scale / 127 + + tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + + tvm_quantize_axis = op.attr("quant_axis") + if tvm_quantize_axis == -1: + tvm_quantize_axis = 0 out = _qnn.op.dequantize( - data=data, - input_scale=_op.cast(tvm_scale, "float32"), - input_zero_point=_op.cast(zp, "int32"), - axis=axis, + data=data_node, + input_scale=_op.const(tvm_quantize_scale, "float32"), + input_zero_point=_op.const(tvm_quantize_zp, "int32"), + axis=tvm_quantize_axis, ) g.add_node(op.output("Y")[0], out) @@ -1845,23 +1847,23 @@ def convert_dequantize_linear(g, op, block): def convert_quantize_linear(g, op, block): """Operator converter for dequantize_linear.""" - data = g.get_node(op.input("X")[0]) + data_node_name = op.input("X")[0] + data_node = g.get_node(data_node_name) + print(f"quantize_input_name is {data_node_name}") # paddle_scale = tvm_scale * 127 - paddle_scale = g.get_node(op.input("Scale")[0]) - paddle_scale = _op.cast(paddle_scale, "float64") - paddle_scale_scale = _expr.const(127, "float64") - tvm_scale = paddle_scale / paddle_scale_scale - - zp = g.get_node(op.input("ZeroPoint")[0]) - axis = op.attr("quant_axis") - if axis == -1: - axis = 0 + paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() + tvm_quantize_scale = paddle_quantize_scale / 127 + + tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + tvm_quantize_axis = op.attr("quant_axis") + if tvm_quantize_axis == -1: + tvm_quantize_axis = 0 out = _qnn.op.quantize( - data=data, - output_scale=_op.cast(tvm_scale, "float32"), - output_zero_point=_op.cast(zp, "int32"), - axis=axis, + data=data_node, + output_scale=_op.const(tvm_quantize_scale, "float32"), + output_zero_point=_op.const(tvm_quantize_zp, "int32"), + axis=tvm_quantize_axis, ) g.add_node(op.output("Y")[0], out) @@ -2440,14 +2442,17 @@ def convert_slice(g, op, block): def convert_softmax(g, op, block): """Operator converter for softmax.""" - axis = op.attr("axis") - input_shape = block.var(op.input("X")[0]).shape - if axis < 0: - axis = len(input_shape) + axis x = g.get_node(op.input("X")[0]) - m = _op.max(x, axis, keepdims=True) - e = _op.exp(x - m) - out = e / _op.sum(e, axis, keepdims=True) + axis = op.attr("axis") + # input_shape = block.var(op.input("X")[0]).shape + # if axis < 0: + # axis = len(input_shape) + axis + # m = _op.max(x, axis, keepdims=True) + # e = _op.exp(x - m) + # out = e / _op.sum(e, axis, keepdims=True) + # out = _op.cast(out,"float32") + + out = _op.nn.softmax(x, axis) g.add_node(op.output("Out")[0], out) @@ -3004,10 +3009,12 @@ def extract_parameters(self, program, scope=None): self.params = {} variables = program.global_block().vars for name in variables: - var = program.global_block().var(name) if name.endswith("feed") or name.endswith("fetch"): continue - # This judgment will cause the PaddleInference model exported by PaddleSlim to skip some operators that need to be read in NHWC format. + # This judgment will cause the PaddleInference model + # exported by PaddleSlim to skip some operators + # that need to be read in NHWC format. + # var = program.global_block().var(name) # if not var.persistable: # continue if isinstance(scope, dict): From b804cab80c3e9b12f7ab9aaefd22e856aa5a5744 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 13:00:59 +0800 Subject: [PATCH 6/9] update dequantize and quantize --- python/tvm/relay/frontend/paddlepaddle.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index b16898f71812..ca30319b0834 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -1824,13 +1824,13 @@ def convert_dequantize_linear(g, op, block): data_node_name = op.input("X")[0] data_node = g.get_node(data_node_name) - print(f"dequantize_input_name is {data_node_name}") + # print(f"data_node_name is {data_node_name};data_node is {data_node}") # paddle_scale = tvm_scale * 127 - paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() + paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy()[0] tvm_quantize_scale = paddle_quantize_scale / 127 - tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy()[0] tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: @@ -1849,13 +1849,13 @@ def convert_quantize_linear(g, op, block): data_node_name = op.input("X")[0] data_node = g.get_node(data_node_name) - print(f"quantize_input_name is {data_node_name}") + # print(f"data_node_name is {data_node_name};data_node is {data_node}") # paddle_scale = tvm_scale * 127 - paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() + paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy()[0] tvm_quantize_scale = paddle_quantize_scale / 127 - tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() + tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy()[0] tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: tvm_quantize_axis = 0 From de28af339fc372b485e0b14aa23e1a4cc7980f44 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 13:25:55 +0800 Subject: [PATCH 7/9] fixed bug when model is fp32 model --- python/tvm/relay/frontend/paddlepaddle.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index ca30319b0834..940d84578116 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -2444,15 +2444,13 @@ def convert_softmax(g, op, block): x = g.get_node(op.input("X")[0]) axis = op.attr("axis") - # input_shape = block.var(op.input("X")[0]).shape - # if axis < 0: - # axis = len(input_shape) + axis - # m = _op.max(x, axis, keepdims=True) - # e = _op.exp(x - m) - # out = e / _op.sum(e, axis, keepdims=True) - # out = _op.cast(out,"float32") - - out = _op.nn.softmax(x, axis) + input_shape = block.var(op.input("X")[0]).shape + if axis < 0: + axis = len(input_shape) + axis + m = _op.max(x, axis, keepdims=True) + e = _op.exp(x - m) + out = e / _op.sum(e, axis, keepdims=True) + # out = _op.nn.softmax(x, axis) g.add_node(op.output("Out")[0], out) @@ -3014,9 +3012,9 @@ def extract_parameters(self, program, scope=None): # This judgment will cause the PaddleInference model # exported by PaddleSlim to skip some operators # that need to be read in NHWC format. - # var = program.global_block().var(name) - # if not var.persistable: - # continue + var = program.global_block().var(name) + if not var.persistable: + continue if isinstance(scope, dict): self.params[name] = _nd.array(scope[name]) else: From a802e47a180bd5b3bf0d8c8bf5f1f5c464ab0199 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 13:45:38 +0800 Subject: [PATCH 8/9] update dequantize and quantize --- python/tvm/relay/frontend/paddlepaddle.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index 940d84578116..a5a602270634 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -1827,10 +1827,10 @@ def convert_dequantize_linear(g, op, block): # print(f"data_node_name is {data_node_name};data_node is {data_node}") # paddle_scale = tvm_scale * 127 - paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy()[0] + paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() tvm_quantize_scale = paddle_quantize_scale / 127 - tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy()[0] + tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: @@ -1852,10 +1852,10 @@ def convert_quantize_linear(g, op, block): # print(f"data_node_name is {data_node_name};data_node is {data_node}") # paddle_scale = tvm_scale * 127 - paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy()[0] + paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() tvm_quantize_scale = paddle_quantize_scale / 127 - tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy()[0] + tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: tvm_quantize_axis = 0 @@ -2450,7 +2450,6 @@ def convert_softmax(g, op, block): m = _op.max(x, axis, keepdims=True) e = _op.exp(x - m) out = e / _op.sum(e, axis, keepdims=True) - # out = _op.nn.softmax(x, axis) g.add_node(op.output("Out")[0], out) From 2a214ebbc773c0e0bd3629c30d0187cd7705859c Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng Date: Wed, 28 Feb 2024 16:45:33 +0800 Subject: [PATCH 9/9] update for paddle quantize model when format is NCHW --- python/tvm/relay/frontend/paddlepaddle.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/frontend/paddlepaddle.py b/python/tvm/relay/frontend/paddlepaddle.py index a5a602270634..b00bb43d4648 100755 --- a/python/tvm/relay/frontend/paddlepaddle.py +++ b/python/tvm/relay/frontend/paddlepaddle.py @@ -1824,17 +1824,20 @@ def convert_dequantize_linear(g, op, block): data_node_name = op.input("X")[0] data_node = g.get_node(data_node_name) - # print(f"data_node_name is {data_node_name};data_node is {data_node}") # paddle_scale = tvm_scale * 127 paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() - tvm_quantize_scale = paddle_quantize_scale / 127 + tvm_quantize_scale = paddle_quantize_scale / 127.0 tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() tvm_quantize_axis = op.attr("quant_axis") if tvm_quantize_axis == -1: tvm_quantize_axis = 0 + + if len(infer_shape(data_node)) < 2: + tvm_quantize_axis = 0 + out = _qnn.op.dequantize( data=data_node, input_scale=_op.const(tvm_quantize_scale, "float32"), @@ -1849,16 +1852,17 @@ def convert_quantize_linear(g, op, block): data_node_name = op.input("X")[0] data_node = g.get_node(data_node_name) - # print(f"data_node_name is {data_node_name};data_node is {data_node}") # paddle_scale = tvm_scale * 127 paddle_quantize_scale = g.get_params(op.input("Scale")[0]).asnumpy() - tvm_quantize_scale = paddle_quantize_scale / 127 + tvm_quantize_scale = paddle_quantize_scale / 127.0 tvm_quantize_zp = g.get_params(op.input("ZeroPoint")[0]).asnumpy() tvm_quantize_axis = op.attr("quant_axis") + if tvm_quantize_axis == -1: tvm_quantize_axis = 0 + out = _qnn.op.quantize( data=data_node, output_scale=_op.const(tvm_quantize_scale, "float32"),