From fcb0beee808e4ce3904de5efd6c9c2d07095942e Mon Sep 17 00:00:00 2001
From: Chris Sidebottom <chris.sidebottom@arm.com>
Date: Tue, 21 Sep 2021 11:42:12 +0100
Subject: [PATCH 1/2] Ensure AOT passes all intermediary storages to function
 calls

This iterates over the return storage IDs rather than just using the
first one to ensure all of them get passed to subsequent calls.

Fixes #9036
---
 src/relay/backend/aot_executor_codegen.cc |  4 +-
 tests/python/relay/aot/test_crt_aot.py    | 66 +++++++++++------------
 2 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc
index ad9ba1b2069d..f1398786b93b 100644
--- a/src/relay/backend/aot_executor_codegen.cc
+++ b/src/relay/backend/aot_executor_codegen.cc
@@ -291,7 +291,9 @@ class AOTExecutorCodegen : public MixedModeVisitor {
         args.push_back(param_handle);
       } else {
         auto var_arg = FindExpr(arg);
-        args.push_back(var_arg[0]);
+        for (const auto& var : var_arg) {
+          args.push_back(var);
+        }
       }
     }
 
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index e117302d0ed8..f5466f05326a 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -25,7 +25,7 @@
 from tvm import relay
 from tvm.ir.module import IRModule
 from tvm.relay import testing, transform
-from tvm.relay.testing import byoc
+from tvm.relay.op.annotation import compiler_begin, compiler_end
 from aot_test_utils import (
     AOTTestModel,
     AOT_DEFAULT_RUNNER,
@@ -312,8 +312,9 @@ def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment):
     )
 
 
-def test_byoc_microtvm():
-    """This is a simple test case to check BYOC capabilities of AOT"""
+@pytest.mark.parametrize("merge_compiler_regions", [False, True])
+def test_byoc_microtvm(merge_compiler_regions):
+    """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036"""
     use_unpacked_api = False
     interface_api = "packed"
     test_runner = AOT_DEFAULT_RUNNER
@@ -321,44 +322,37 @@ def test_byoc_microtvm():
     x = relay.var("x", shape=(10, 10))
     w0 = relay.var("w0", shape=(10, 10))
     w1 = relay.var("w1", shape=(10, 10))
-    w2 = relay.var("w2", shape=(10, 10))
-    w3 = relay.var("w3", shape=(10, 10))
-    w4 = relay.var("w4", shape=(10, 10))
-    w5 = relay.var("w5", shape=(10, 10))
-    w6 = relay.var("w6", shape=(10, 10))
-    w7 = relay.var("w7", shape=(10, 10))
-
-    # C compiler
-    z0 = relay.add(x, w0)
-    p0 = relay.subtract(z0, w1)
-    q0 = relay.multiply(p0, w2)
-
-    z1 = relay.add(x, w3)
-    p1 = relay.subtract(z1, w4)
-    q1 = relay.multiply(p1, w5)
-
-    # Other parts on TVM
-    z2 = relay.add(x, w6)
-    q2 = relay.subtract(z2, w7)
-
-    r = relay.concatenate((q0, q1, q2), axis=0)
-    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
+
+    # z0 = x + w0
+    x_ = compiler_begin(x, "ccompiler")
+    w0_ = compiler_begin(w0, "ccompiler")
+    z0_ = relay.add(x_, w0_)
+    z0 = compiler_end(z0_, "ccompiler")
+
+    # z1 = z0 + w1
+    z0__ = compiler_begin(z0, "ccompiler")
+    w1_ = compiler_begin(w1, "ccompiler")
+    z1_ = relay.add(z0__, w1_)
+    z1 = compiler_end(z1_, "ccompiler")
+
+    # z2 = z0 + z1
+    z2 = relay.add(z0, z1)
+
+    f = relay.Function([x, w0, w1], z2)
     mod = tvm.IRModule()
-    ann = byoc.CcompilerAnnotator()
-    mod["main"] = ann.visit(f)
+    mod["main"] = f
 
-    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
-    mod = tvm.relay.transform.InferType()(mod)
+    if merge_compiler_regions:
+        mod = transform.MergeCompilerRegions()(mod)
 
-    x_data = np.random.rand(10, 10).astype("float32")
-    w_data = []
-    for _ in range(8):
-        w_data.append(np.random.rand(10, 10).astype("float32"))
+    mod = transform.PartitionGraph("mod_name")(mod)
+    mod = transform.InferType()(mod)
+
+    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
+    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)]
 
-    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)])
+    map_inputs = OrderedDict(x_data + w_data)
     output_list = generate_ref_data(mod, map_inputs)
-    input_list = [map_inputs["x"]]
-    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
     compile_and_run(
         AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
         test_runner,

From f4ae62fbb82dcc4a611a2c894a2fc85dd2d7a1ab Mon Sep 17 00:00:00 2001
From: Chris Sidebottom <chris.sidebottom@arm.com>
Date: Tue, 21 Sep 2021 12:42:53 +0000
Subject: [PATCH 2/2] Re-introduce multi sub graph AOT test

---
 tests/python/relay/aot/test_crt_aot.py | 60 ++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index f5466f05326a..73aa385161f6 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -25,6 +25,7 @@
 from tvm import relay
 from tvm.ir.module import IRModule
 from tvm.relay import testing, transform
+from tvm.relay.testing import byoc
 from tvm.relay.op.annotation import compiler_begin, compiler_end
 from aot_test_utils import (
     AOTTestModel,
@@ -361,6 +362,65 @@ def test_byoc_microtvm(merge_compiler_regions):
     )
 
 
+@pytest.mark.parametrize("merge_compiler_regions", [False, True])
+def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions):
+    """This is a test case to check BYOC capabilities of AOT with multiple sub graphs"""
+    use_unpacked_api = False
+    interface_api = "packed"
+    test_runner = AOT_DEFAULT_RUNNER
+
+    x = relay.var("x", shape=(10, 10))
+    w0 = relay.var("w0", shape=(10, 10))
+    w1 = relay.var("w1", shape=(10, 10))
+    w2 = relay.var("w2", shape=(10, 10))
+    w3 = relay.var("w3", shape=(10, 10))
+    w4 = relay.var("w4", shape=(10, 10))
+    w5 = relay.var("w5", shape=(10, 10))
+    w6 = relay.var("w6", shape=(10, 10))
+    w7 = relay.var("w7", shape=(10, 10))
+
+    # C compiler
+    z0 = relay.add(x, w0)
+    p0 = relay.subtract(z0, w1)
+    q0 = relay.multiply(p0, w2)
+
+    z1 = relay.add(x, w3)
+    p1 = relay.subtract(z1, w4)
+    q1 = relay.multiply(p1, w5)
+
+    # Other parts on TVM
+    z2 = relay.add(x, w6)
+    q2 = relay.subtract(z2, w7)
+
+    r = relay.concatenate((q0, q1, q2), axis=0)
+    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
+    mod = tvm.IRModule()
+    ann = byoc.CcompilerAnnotator()
+    mod["main"] = ann.visit(f)
+
+    if merge_compiler_regions:
+        mod = transform.MergeCompilerRegions()(mod)
+
+    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
+    mod = tvm.relay.transform.InferType()(mod)
+
+    x_data = np.random.rand(10, 10).astype("float32")
+    w_data = []
+    for _ in range(8):
+        w_data.append(np.random.rand(10, 10).astype("float32"))
+
+    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)])
+    output_list = generate_ref_data(mod, map_inputs)
+    input_list = [map_inputs["x"]]
+    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
+    compile_and_run(
+        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
+        test_runner,
+        interface_api,
+        use_unpacked_api,
+    )
+
+
 @parametrize_aot_options
 def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner):
     x = relay.var("x", shape=(1, 10))