From fcb0beee808e4ce3904de5efd6c9c2d07095942e Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Tue, 21 Sep 2021 11:42:12 +0100 Subject: [PATCH 1/2] Ensure AOT passes all intermediary storages to function calls This iterates over the return storage IDs rather than just using the first one to ensure all of them get passed to subsequent calls. Fixes #9036 --- src/relay/backend/aot_executor_codegen.cc | 4 +- tests/python/relay/aot/test_crt_aot.py | 66 +++++++++++------------ 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index ad9ba1b2069d..f1398786b93b 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -291,7 +291,9 @@ class AOTExecutorCodegen : public MixedModeVisitor { args.push_back(param_handle); } else { auto var_arg = FindExpr(arg); - args.push_back(var_arg[0]); + for (const auto& var : var_arg) { + args.push_back(var); + } } } diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index e117302d0ed8..f5466f05326a 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -25,7 +25,7 @@ from tvm import relay from tvm.ir.module import IRModule from tvm.relay import testing, transform -from tvm.relay.testing import byoc +from tvm.relay.op.annotation import compiler_begin, compiler_end from aot_test_utils import ( AOTTestModel, AOT_DEFAULT_RUNNER, @@ -312,8 +312,9 @@ def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment): ) -def test_byoc_microtvm(): - """This is a simple test case to check BYOC capabilities of AOT""" +@pytest.mark.parametrize("merge_compiler_regions", [False, True]) +def test_byoc_microtvm(merge_compiler_regions): + """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036""" use_unpacked_api = False interface_api = "packed" test_runner = AOT_DEFAULT_RUNNER @@ -321,44 +322,37 @@ def test_byoc_microtvm(): x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) - w2 = relay.var("w2", shape=(10, 10)) - w3 = relay.var("w3", shape=(10, 10)) - w4 = relay.var("w4", shape=(10, 10)) - w5 = relay.var("w5", shape=(10, 10)) - w6 = relay.var("w6", shape=(10, 10)) - w7 = relay.var("w7", shape=(10, 10)) - - # C compiler - z0 = relay.add(x, w0) - p0 = relay.subtract(z0, w1) - q0 = relay.multiply(p0, w2) - - z1 = relay.add(x, w3) - p1 = relay.subtract(z1, w4) - q1 = relay.multiply(p1, w5) - - # Other parts on TVM - z2 = relay.add(x, w6) - q2 = relay.subtract(z2, w7) - - r = relay.concatenate((q0, q1, q2), axis=0) - f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) + + # z0 = x + w0 + x_ = compiler_begin(x, "ccompiler") + w0_ = compiler_begin(w0, "ccompiler") + z0_ = relay.add(x_, w0_) + z0 = compiler_end(z0_, "ccompiler") + + # z1 = z0 + w1 + z0__ = compiler_begin(z0, "ccompiler") + w1_ = compiler_begin(w1, "ccompiler") + z1_ = relay.add(z0__, w1_) + z1 = compiler_end(z1_, "ccompiler") + + # z2 = z0 + z1 + z2 = relay.add(z0, z1) + + f = relay.Function([x, w0, w1], z2) mod = tvm.IRModule() - ann = byoc.CcompilerAnnotator() - mod["main"] = ann.visit(f) + mod["main"] = f - mod = tvm.relay.transform.PartitionGraph("mod_name")(mod) - mod = tvm.relay.transform.InferType()(mod) + if merge_compiler_regions: + mod = transform.MergeCompilerRegions()(mod) - x_data = np.random.rand(10, 10).astype("float32") - w_data = [] - for _ in range(8): - w_data.append(np.random.rand(10, 10).astype("float32")) + mod = transform.PartitionGraph("mod_name")(mod) + mod = transform.InferType()(mod) + + x_data = [("x", np.random.rand(10, 10).astype("float32"))] + w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] - map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) + map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) - input_list = [map_inputs["x"]] - input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), test_runner, From f4ae62fbb82dcc4a611a2c894a2fc85dd2d7a1ab Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Tue, 21 Sep 2021 12:42:53 +0000 Subject: [PATCH 2/2] Re-introduce multi sub graph AOT test --- tests/python/relay/aot/test_crt_aot.py | 60 ++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index f5466f05326a..73aa385161f6 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -25,6 +25,7 @@ from tvm import relay from tvm.ir.module import IRModule from tvm.relay import testing, transform +from tvm.relay.testing import byoc from tvm.relay.op.annotation import compiler_begin, compiler_end from aot_test_utils import ( AOTTestModel, @@ -361,6 +362,65 @@ def test_byoc_microtvm(merge_compiler_regions): ) +@pytest.mark.parametrize("merge_compiler_regions", [False, True]) +def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions): + """This is a test case to check BYOC capabilities of AOT with multiple sub graphs""" + use_unpacked_api = False + interface_api = "packed" + test_runner = AOT_DEFAULT_RUNNER + + x = relay.var("x", shape=(10, 10)) + w0 = relay.var("w0", shape=(10, 10)) + w1 = relay.var("w1", shape=(10, 10)) + w2 = relay.var("w2", shape=(10, 10)) + w3 = relay.var("w3", shape=(10, 10)) + w4 = relay.var("w4", shape=(10, 10)) + w5 = relay.var("w5", shape=(10, 10)) + w6 = relay.var("w6", shape=(10, 10)) + w7 = relay.var("w7", shape=(10, 10)) + + # C compiler + z0 = relay.add(x, w0) + p0 = relay.subtract(z0, w1) + q0 = relay.multiply(p0, w2) + + z1 = relay.add(x, w3) + p1 = relay.subtract(z1, w4) + q1 = relay.multiply(p1, w5) + + # Other parts on TVM + z2 = relay.add(x, w6) + q2 = relay.subtract(z2, w7) + + r = relay.concatenate((q0, q1, q2), axis=0) + f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) + mod = tvm.IRModule() + ann = byoc.CcompilerAnnotator() + mod["main"] = ann.visit(f) + + if merge_compiler_regions: + mod = transform.MergeCompilerRegions()(mod) + + mod = tvm.relay.transform.PartitionGraph("mod_name")(mod) + mod = tvm.relay.transform.InferType()(mod) + + x_data = np.random.rand(10, 10).astype("float32") + w_data = [] + for _ in range(8): + w_data.append(np.random.rand(10, 10).astype("float32")) + + map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) + output_list = generate_ref_data(mod, map_inputs) + input_list = [map_inputs["x"]] + input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) + compile_and_run( + AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), + test_runner, + interface_api, + use_unpacked_api, + ) + + @parametrize_aot_options def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(1, 10))