From 73501161342f7ccf7c0444188641792f44628ba6 Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Fri, 11 Aug 2023 14:22:58 +0800 Subject: [PATCH 1/7] Update annotate for output of add op The type of add op`s output is activation, it should annotate by QAnnotateKind.ACTIVATION. If not, the graph will cast int32 into int8 directly without quantized, when quantize resnet. --- python/tvm/relay/quantize/_annotate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/quantize/_annotate.py b/python/tvm/relay/quantize/_annotate.py index c1a7b50d3f45..b6d6c921a8a1 100644 --- a/python/tvm/relay/quantize/_annotate.py +++ b/python/tvm/relay/quantize/_annotate.py @@ -276,7 +276,7 @@ def add_rewrite(ref_call, new_args, ctx): assert rhs_kind in [QAnnotateKind.INPUT, QAnnotateKind.ACTIVATION] lhs_expr = attach_simulated_quantize(lhs_expr, QAnnotateKind.INPUT) expr = _forward_op(ref_call, [lhs_expr, rhs_expr]) - return QAnnotateExpr(expr, QAnnotateKind.INPUT) + return QAnnotateExpr(expr, QAnnotateKind.ACTIVATION) if lhs_kind is not None and rhs_kind is None: if _analysis.check_constant(rhs_expr): @@ -290,7 +290,7 @@ def add_rewrite(ref_call, new_args, ctx): if lhs_kind is not None and rhs_kind is not None: if lhs_kind == QAnnotateKind.INPUT and rhs_kind == QAnnotateKind.INPUT: expr = _forward_op(ref_call, [lhs_expr, rhs_expr]) - return QAnnotateExpr(expr, QAnnotateKind.INPUT) + return QAnnotateExpr(expr, QAnnotateKind.ACTIVATION) if lhs_kind == QAnnotateKind.ACTIVATION and rhs_kind == QAnnotateKind.ACTIVATION: rhs_expr = attach_simulated_quantize(rhs_expr, QAnnotateKind.INPUT) expr = _forward_op(ref_call, [lhs_expr, rhs_expr]) From 3e2ffba3a27c0f7eeffa307b2167b901a1a2f207 Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Sat, 12 Aug 2023 09:31:09 +0800 Subject: [PATCH 2/7] Update test_pass_auto_quantize.py --- tests/python/relay/test_pass_auto_quantize.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 488866ab6ff8..3225ce5643e0 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -439,6 +439,68 @@ def _check_dense(node): relay.analysis.post_order_visit(qnn_mod["main"], _check_dense) +def test_add_lhs_is_none_annotate(): + data_conv = relay.var("data_conv", shape=(1, 16, 64, 64)) + conv2d_w = relay.const(np.random.random((16, 16, 3, 3))) + conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1),kernel_size=(3,3)) + data_add = relay.var("data_add", shape=(16, 1, 1)) + add = relay.add(data_add, conv2d) + global_avg_pool2d = relay.nn.global_avg_pool2d(add) + mod = tvm.IRModule.from_expr(global_avg_pool2d) + + calibrate_data = [{"data_conv": np.random.random((1, 16, 64 ,64)), + "data_add": np.random.random((16, 1, 1))}] + + with tvm.transform.PassContext(opt_level=3): + with relay.quantize.qconfig( + calibrate_mode="kl_divergence", skip_conv_layers=None + ): + qmod = relay.quantize.quantize(mod, dataset=calibrate_data) + print(qmod) + # ensure partitioned and unpartitioned results agree + params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] + # a = 1 + + def _eval_mod(mod): + return relay.create_executor("vm", device=tvm.cpu(0), target="llvm", mod=mod).evaluate()( + *params + ) + + mod_result = _eval_mod(mod) + qmod_result = _eval_mod(qmod) + tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(), rtol=1e-1, atol=1e-1) + +def test_add_lhs_rhs_is_input_annotate(): + data_conv_r = relay.var("data_conv_r", shape=(1, 16, 64, 64)) + conv2d_r = relay.nn.conv2d(data_conv_r, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1),kernel_size=(3,3)) + data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64)) + conv2d_l = relay.nn.conv2d(data_conv_l, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1),kernel_size=(3,3)) + add = relay.add(conv2d_l, conv2d_r) + global_avg_pool2d = relay.nn.global_avg_pool2d(add) + mod = tvm.IRModule.from_expr(global_avg_pool2d) + + calibrate_data = [{"data_conv_l": np.random.random((1, 16, 64 ,64)), + "data_conv_r": np.random.random((1, 16, 64 ,64)), + "data_add": np.random.random((16, 1, 1))}] + + with tvm.transform.PassContext(opt_level=3): + with relay.quantize.qconfig( + calibrate_mode="kl_divergence", skip_conv_layers=None + ): + qmod = relay.quantize.quantize(mod, dataset=calibrate_data) + print(qmod) + # ensure partitioned and unpartitioned results agree + params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] + # a = 1 + + def _eval_mod(mod): + return relay.create_executor("vm", device=tvm.cpu(0), target="llvm", mod=mod).evaluate()( + *params + ) + + mod_result = _eval_mod(mod) + qmod_result = _eval_mod(qmod) + tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(), rtol=1e-1, atol=1e-1) if __name__ == "__main__": test_mul_rewrite() @@ -460,3 +522,7 @@ def _check_dense(node): test_skip_conv() test_stop_quantize() + + test_add_lhs_is_none_annotate() + test_add_lhs_rhs_is_input_annotate() + From 227a27b8bccdd27b74085cf0ed4c83226fd52f4f Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Sat, 12 Aug 2023 14:37:17 +0800 Subject: [PATCH 3/7] Update test_pass_auto_quantize.py --- tests/python/relay/test_pass_auto_quantize.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 3225ce5643e0..ca4e671ad80f 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -457,9 +457,8 @@ def test_add_lhs_is_none_annotate(): ): qmod = relay.quantize.quantize(mod, dataset=calibrate_data) print(qmod) - # ensure partitioned and unpartitioned results agree + params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] - # a = 1 def _eval_mod(mod): return relay.create_executor("vm", device=tvm.cpu(0), target="llvm", mod=mod).evaluate()( @@ -489,9 +488,8 @@ def test_add_lhs_rhs_is_input_annotate(): ): qmod = relay.quantize.quantize(mod, dataset=calibrate_data) print(qmod) - # ensure partitioned and unpartitioned results agree + params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] - # a = 1 def _eval_mod(mod): return relay.create_executor("vm", device=tvm.cpu(0), target="llvm", mod=mod).evaluate()( From 5b47c58e9b34b134ee41b50f432e1cfa3e16202e Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Sun, 13 Aug 2023 10:17:20 +0800 Subject: [PATCH 4/7] change format of test case --- tests/python/relay/test_pass_auto_quantize.py | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index ca4e671ad80f..930a289a7cd0 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -439,24 +439,23 @@ def _check_dense(node): relay.analysis.post_order_visit(qnn_mod["main"], _check_dense) + def test_add_lhs_is_none_annotate(): data_conv = relay.var("data_conv", shape=(1, 16, 64, 64)) conv2d_w = relay.const(np.random.random((16, 16, 3, 3))) - conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1),kernel_size=(3,3)) + conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1), kernel_size=(3,3)) data_add = relay.var("data_add", shape=(16, 1, 1)) add = relay.add(data_add, conv2d) global_avg_pool2d = relay.nn.global_avg_pool2d(add) mod = tvm.IRModule.from_expr(global_avg_pool2d) - calibrate_data = [{"data_conv": np.random.random((1, 16, 64 ,64)), - "data_add": np.random.random((16, 1, 1))}] + calibrate_data = [ + {"data_conv": np.random.random((1, 16, 64 ,64)), "data_add": np.random.random((16, 1, 1))} + ] with tvm.transform.PassContext(opt_level=3): - with relay.quantize.qconfig( - calibrate_mode="kl_divergence", skip_conv_layers=None - ): + with relay.quantize.qconfig(calibrate_mode="kl_divergence", skip_conv_layers=None): qmod = relay.quantize.quantize(mod, dataset=calibrate_data) - print(qmod) params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] @@ -469,25 +468,35 @@ def _eval_mod(mod): qmod_result = _eval_mod(qmod) tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(), rtol=1e-1, atol=1e-1) + def test_add_lhs_rhs_is_input_annotate(): data_conv_r = relay.var("data_conv_r", shape=(1, 16, 64, 64)) - conv2d_r = relay.nn.conv2d(data_conv_r, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1),kernel_size=(3,3)) + conv2d_r = relay.nn.conv2d( + data_conv_r, + relay.const(np.random.random((16, 16, 3, 3))), + padding=(1, 1), + kernel_size=(3,3)) data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64)) - conv2d_l = relay.nn.conv2d(data_conv_l, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1),kernel_size=(3,3)) + conv2d_l = relay.nn.conv2d( + data_conv_l, + relay.const(np.random.random((16, 16, 3, 3))), + padding=(1, 1), + kernel_size=(3,3)) add = relay.add(conv2d_l, conv2d_r) global_avg_pool2d = relay.nn.global_avg_pool2d(add) mod = tvm.IRModule.from_expr(global_avg_pool2d) - calibrate_data = [{"data_conv_l": np.random.random((1, 16, 64 ,64)), - "data_conv_r": np.random.random((1, 16, 64 ,64)), - "data_add": np.random.random((16, 1, 1))}] + calibrate_data = [ + { + "data_conv_l": np.random.random((1, 16, 64 ,64)), + "data_conv_r": np.random.random((1, 16, 64 ,64)), + "data_add": np.random.random((16, 1, 1)) + } + ] with tvm.transform.PassContext(opt_level=3): - with relay.quantize.qconfig( - calibrate_mode="kl_divergence", skip_conv_layers=None - ): + with relay.quantize.qconfig(calibrate_mode="kl_divergence", skip_conv_layers=None): qmod = relay.quantize.quantize(mod, dataset=calibrate_data) - print(qmod) params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] @@ -500,6 +509,7 @@ def _eval_mod(mod): qmod_result = _eval_mod(qmod) tvm.testing.assert_allclose(mod_result.numpy(), qmod_result.numpy(), rtol=1e-1, atol=1e-1) + if __name__ == "__main__": test_mul_rewrite() test_batch_flatten_rewrite() @@ -523,4 +533,3 @@ def _eval_mod(mod): test_add_lhs_is_none_annotate() test_add_lhs_rhs_is_input_annotate() - From 2ffbd6b086d5a1903e4c9317d8d6791d52de7e91 Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Sun, 13 Aug 2023 11:03:58 +0800 Subject: [PATCH 5/7] reformat test cast --- tests/python/relay/test_pass_auto_quantize.py | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 930a289a7cd0..367cdc0cf809 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -443,20 +443,20 @@ def _check_dense(node): def test_add_lhs_is_none_annotate(): data_conv = relay.var("data_conv", shape=(1, 16, 64, 64)) conv2d_w = relay.const(np.random.random((16, 16, 3, 3))) - conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1), kernel_size=(3,3)) + conv2d = relay.nn.conv2d(data_conv, conv2d_w, padding=(1, 1), kernel_size=(3, 3)) data_add = relay.var("data_add", shape=(16, 1, 1)) add = relay.add(data_add, conv2d) global_avg_pool2d = relay.nn.global_avg_pool2d(add) mod = tvm.IRModule.from_expr(global_avg_pool2d) - + calibrate_data = [ - {"data_conv": np.random.random((1, 16, 64 ,64)), "data_add": np.random.random((16, 1, 1))} + {"data_conv": np.random.random((1, 16, 64, 64)), "data_add": np.random.random((16, 1, 1))} ] with tvm.transform.PassContext(opt_level=3): with relay.quantize.qconfig(calibrate_mode="kl_divergence", skip_conv_layers=None): qmod = relay.quantize.quantize(mod, dataset=calibrate_data) - + params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] def _eval_mod(mod): @@ -472,32 +472,34 @@ def _eval_mod(mod): def test_add_lhs_rhs_is_input_annotate(): data_conv_r = relay.var("data_conv_r", shape=(1, 16, 64, 64)) conv2d_r = relay.nn.conv2d( - data_conv_r, - relay.const(np.random.random((16, 16, 3, 3))), - padding=(1, 1), - kernel_size=(3,3)) + data_conv_r, + relay.const(np.random.random((16, 16, 3, 3))), + padding=(1, 1), + kernel_size=(3,3) + ) data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64)) conv2d_l = relay.nn.conv2d( - data_conv_l, - relay.const(np.random.random((16, 16, 3, 3))), - padding=(1, 1), - kernel_size=(3,3)) + data_conv_l, + relay.const(np.random.random((16, 16, 3, 3))), + padding=(1, 1), + kernel_size=(3,3) + ) add = relay.add(conv2d_l, conv2d_r) global_avg_pool2d = relay.nn.global_avg_pool2d(add) mod = tvm.IRModule.from_expr(global_avg_pool2d) - + calibrate_data = [ { - "data_conv_l": np.random.random((1, 16, 64 ,64)), - "data_conv_r": np.random.random((1, 16, 64 ,64)), - "data_add": np.random.random((16, 1, 1)) + "data_conv_l": np.random.random((1, 16, 64, 64)), + "data_conv_r": np.random.random((1, 16, 64, 64)), + "data_add": np.random.random((16, 1, 1)), } ] with tvm.transform.PassContext(opt_level=3): with relay.quantize.qconfig(calibrate_mode="kl_divergence", skip_conv_layers=None): qmod = relay.quantize.quantize(mod, dataset=calibrate_data) - + params = [gen_rand_tvm(param.type_annotation, 0, 1) for param in mod["main"].params] def _eval_mod(mod): From 3ec8fa25f38a65e38abdb0a0fad467a310639e10 Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Sun, 13 Aug 2023 11:36:14 +0800 Subject: [PATCH 6/7] reformat quantize test case --- tests/python/relay/test_pass_auto_quantize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 367cdc0cf809..9625c475e0ff 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -475,14 +475,14 @@ def test_add_lhs_rhs_is_input_annotate(): data_conv_r, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1), - kernel_size=(3,3) + kernel_size=(3, 3) ) data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64)) conv2d_l = relay.nn.conv2d( data_conv_l, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1), - kernel_size=(3,3) + kernel_size=(3, 3) ) add = relay.add(conv2d_l, conv2d_r) global_avg_pool2d = relay.nn.global_avg_pool2d(add) From cb738157dd5bb13d563d525e679f84e5041f1bde Mon Sep 17 00:00:00 2001 From: MingkangW <131323282+MingkangW@users.noreply.github.com> Date: Sun, 13 Aug 2023 13:50:58 +0800 Subject: [PATCH 7/7] reformat test case --- tests/python/relay/test_pass_auto_quantize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/relay/test_pass_auto_quantize.py b/tests/python/relay/test_pass_auto_quantize.py index 9625c475e0ff..30d4c3650215 100644 --- a/tests/python/relay/test_pass_auto_quantize.py +++ b/tests/python/relay/test_pass_auto_quantize.py @@ -475,14 +475,14 @@ def test_add_lhs_rhs_is_input_annotate(): data_conv_r, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1), - kernel_size=(3, 3) + kernel_size=(3, 3), ) data_conv_l = relay.var("data_conv_l", shape=(1, 16, 64, 64)) conv2d_l = relay.nn.conv2d( data_conv_l, relay.const(np.random.random((16, 16, 3, 3))), padding=(1, 1), - kernel_size=(3, 3) + kernel_size=(3, 3), ) add = relay.add(conv2d_l, conv2d_r) global_avg_pool2d = relay.nn.global_avg_pool2d(add)