diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index b68ebc3084c3..09525a64ac05 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -29,6 +29,7 @@ from .. import expr as _expr from .. import function as _function from .. import op as _op +from .. import qnn as _qnn from .. import vision as _vision from .. import loops as _loops from .. import ty as _ty @@ -2782,6 +2783,60 @@ def _impl_v1(cls, inputs, attr, params): return cls._op_dispatch(operator, inputs, attr, params) +class QuantizeLinear(OnnxOpConverter): + """Operator converter for QuantizeLinear.""" + + @classmethod + def _impl_v10(cls, inputs, attr, params): + data, scale, zp = inputs + out_dtype = infer_type(zp).checked_type.dtype + return _qnn.op.quantize(data, scale, _op.cast(zp, "int32"), 0, out_dtype) + + @classmethod + def _impl_v13(cls, inputs, attr, params): + data, scale, zp = inputs + out_dtype = infer_type(zp).checked_type.dtype + axis = attr.get("axis", 1) + return _qnn.op.quantize(data, scale, _op.cast(zp, "int32"), axis, out_dtype) + + +class DequantizeLinear(OnnxOpConverter): + """Operator converter for QuantizeLinear.""" + + @classmethod + def _impl_v10(cls, inputs, attr, params): + data, scale, zp = inputs + return _qnn.op.dequantize(data, scale, _op.cast(zp, "int32"), 0) + + @classmethod + def _impl_v13(cls, inputs, attr, params): + data, scale, zp = inputs + axis = attr.get("axis", 1) + return _qnn.op.dequantize(data, scale, _op.cast(zp, "int32"), axis) + + +class DynamicQuantizeLinear(OnnxOpConverter): + """Operator converter for QuantizeLinear.""" + + @classmethod + def _impl_v11(cls, inputs, attr, params): + """This op is deprecated an only supports uint8""" + data = inputs[0] + data_dtype = infer_type(data).checked_type.dtype + zero = _op.const(0, dtype=data_dtype) + maximum = _op.maximum(zero, _op.max(data)) + minimum = _op.minimum(zero, _op.min(data)) + scale = (maximum - minimum) / _op.const(255, dtype=data_dtype) + zp = zero - _op.min(data) / scale + zp = _op.cast(_op.round(_op.clip(zp, 0, 255)), "uint8") + return _expr.TupleWrapper( + _expr.Tuple( + [_qnn.op.quantize(data, scale, _op.cast(zp, "int32"), 0, "uint8"), scale, zp] + ), + size=3, + ) + + class BitShift(OnnxOpConverter): """Operator converter for NonZero""" @@ -2966,6 +3021,10 @@ def _get_convert_map(opset): "If": If.get_converter(opset), # Torch ATen Dispatcher. "ATen": ATen.get_converter(opset), + # Quantization + "QuantizeLinear": QuantizeLinear.get_converter(opset), + "DequantizeLinear": DequantizeLinear.get_converter(opset), + "DynamicQuantizeLinear": DynamicQuantizeLinear.get_converter(opset), } diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 5c6a735f901e..a491ed130418 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -4157,15 +4157,8 @@ def verify_cumsum(indata, axis, exclusive=0, reverse=0, type="float32"): "test_cumsum_2d_axis_0/", "test_cumsum_2d_axis_1/", "test_cumsum_2d_negative_axis/", - "test_dequantizelinear/", "test_det_2d/", "test_det_nd/", - "test_dynamicquantizelinear/", - "test_dynamicquantizelinear_expanded/", - "test_dynamicquantizelinear_max_adjusted/", - "test_dynamicquantizelinear_max_adjusted_expanded/", - "test_dynamicquantizelinear_min_adjusted/", - "test_dynamicquantizelinear_min_adjusted_expanded/", "test_eyelike_populate_off_main_diagonal/", "test_eyelike_with_dtype/", "test_eyelike_without_dtype/", @@ -4193,7 +4186,6 @@ def verify_cumsum(indata, axis, exclusive=0, reverse=0, type="float32"): "test_qlinearconv/", "test_qlinearmatmul_2D/", "test_qlinearmatmul_3D/", - "test_quantizelinear/", "test_range_float_type_positive_delta_expanded/", "test_range_int32_type_negative_delta_expanded/", "test_resize_downsample_scales_cubic/",