From afc8df649870bde24e6e2e6a53787007b7553e20 Mon Sep 17 00:00:00 2001
From: Luke Hutton <luke.hutton@arm.com>
Date: Fri, 16 Feb 2024 16:18:44 +0000
Subject: [PATCH 1/3] [AOT][Testing] Print output values on test failure

This commit enhances the AOT test harness to print the "actual" and
"reference" values when there is a mismatch. This helps when
debugging a failing test. Sample output:
```
Actual, Reference
8.502946, 8.887751
9.810405, 9.108611
8.563767, 9.041000
10.019511, 9.190888
....
```

Change-Id: Ifff729744c74b6a5256b4fd47c8580be3f5c32a9
---
 python/tvm/testing/aot.py | 76 +++++++++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 15 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index 9ee3a84c8a38..8d74f545a3c2 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -425,7 +425,14 @@ def fake_tensor(source, source_index, packed_index):
     main_file.write("\n")
 
 
-def _emit_main_compare(main_file, outputs, output_tolerance, mod_name, use_interface_c=False):
+def _emit_main_compare(
+    main_file,
+    outputs,
+    output_tolerance,
+    mod_name,
+    use_interface_c=False,
+    print_output_on_mismatch=False,
+):
     for key in outputs:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
         expected_data_name = _mangle_name(mod_name, f"expected_output_data_{sanitized_tensor_name}")
@@ -433,9 +440,11 @@ def _emit_main_compare(main_file, outputs, output_tolerance, mod_name, use_inter
 
         comparison_function = "abs"
         tolerance = output_tolerance or 0
+        value_format_specifier = "%d"
         if is_float_dtype:
             comparison_function = "fabs"
             tolerance = output_tolerance or 0.001
+            value_format_specifier = "%f"
 
         data_length_var_name = (
             _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}") + "_len"
@@ -447,15 +456,34 @@ def _emit_main_compare(main_file, outputs, output_tolerance, mod_name, use_inter
             )
         else:
             actual_data_name = _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}")
-        main_file.write(
-            f"for (int i = 0; i<{data_length_var_name}; i++) {{\n"
-            f"\tif ({comparison_function}({actual_data_name}[i]-"
-            f"{expected_data_name}[i]) > {tolerance}) {{\n"
-            f'\t\tprintf("{AOT_FAILURE_TOKEN}\\n");\n'
-            f"\t\treturn -1;\n"
-            f"\t}}\n"
-            f"}}"
-        )
+
+        if print_output_on_mismatch:
+            main_file.write(
+                f"int mismatch = 0;"
+                f'printf("Actual, Reference\\n");\n'
+                f"for (int i = 0; i<{data_length_var_name}; i++) {{\n"
+                f"\tif ({comparison_function}({actual_data_name}[i]-"
+                f"{expected_data_name}[i]) > {tolerance}) {{\n"
+                f'\t\tprintf("{value_format_specifier}, {value_format_specifier}\\n"'
+                f", {actual_data_name}[i], {expected_data_name}[i]);\n"
+                f"\t\tmismatch = 1;\n"
+                f"\t}}\n"
+                f"}}"
+                f"if (mismatch == 1) {{\n"
+                f'\tprintf("{AOT_FAILURE_TOKEN}\\n");\n'
+                f"\treturn -1;\n"
+                f"}}"
+            )
+        else:
+            main_file.write(
+                f"for (int i = 0; i<{data_length_var_name}; i++) {{\n"
+                f"\tif ({comparison_function}({actual_data_name}[i]-"
+                f"{expected_data_name}[i]) > {tolerance}) {{\n"
+                f'\t\tprintf("{AOT_FAILURE_TOKEN}\\n");\n'
+                f"\t\treturn -1;\n"
+                f"\t}}\n"
+                f"}}"
+            )
 
 
 def _emit_main_init_memory_manager(main_file):
@@ -500,6 +528,7 @@ def _create_main(
     use_stack_allocator=True,
     use_workspace_io=False,
     debug_last_error=False,
+    print_output_on_mismatch=False,
 ):
     file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
     # create header file
@@ -568,7 +597,12 @@ def _create_main(
         for compiled_model in compiled_models:
             model = compiled_model.model
             _emit_main_compare(
-                main_file, model.outputs, model.output_tolerance, model.name, interface_api == "c"
+                main_file,
+                model.outputs,
+                model.output_tolerance,
+                model.name,
+                interface_api == "c",
+                print_output_on_mismatch,
             )
         _emit_main_epilogue(main_file, custom_epilogue)
 
@@ -709,6 +743,7 @@ def run_and_check(
     use_workspace_io: bool = False,
     debug_last_error: bool = False,
     checker: Optional[Callable[[str], bool]] = None,
+    print_output_on_mismatch: bool = False,
 ):
     """
     This method uses the original test data and compiled runtime.Modules
@@ -789,6 +824,7 @@ def run_and_check_body(base_path):
             use_stack_allocator,
             use_workspace_io,
             debug_last_error,
+            print_output_on_mismatch,
         )
 
         if checker and (not checker(base_path)):
@@ -832,7 +868,10 @@ def run_and_check_body(base_path):
         _subprocess_check_log_output(run_command, build_path, run_log_path)
 
         with open(run_log_path) as run_log:
-            assert AOT_SUCCESS_TOKEN in run_log.read()
+            run_log_out = run_log.read()
+            if print_output_on_mismatch and AOT_FAILURE_TOKEN in run_log_out:
+                print(run_log_out)
+            assert AOT_SUCCESS_TOKEN in run_log_out
 
         return True
 
@@ -861,15 +900,21 @@ def compile_and_run(
     schedule_name: str = None,
     debug_last_error: bool = False,
     checker: Optional[Callable[[str], bool]] = None,
+    print_output_on_mismatch: bool = False,
 ) -> bool:
     """This is a wrapper API to compile and run models as test for AoT
 
     Parameters
     ----------
     test_dir : str
-        This path will contain build, codegen, include directories
-    verbose: bool
-        Prints commands to build and run AOT test runner
+        This path will contain build, codegen, include directories.
+
+    verbose : bool
+        Prints commands to build and run AOT test runner.
+
+    print_output_on_mismatch : bool
+        Print both the output and reference values side-by-side
+        when there is a mismatch.
     """
 
     if target_opts:
@@ -904,6 +949,7 @@ def compile_and_run(
         verbose=verbose,
         debug_last_error=debug_last_error,
         checker=checker,
+        print_output_on_mismatch=print_output_on_mismatch,
     )
 
 

From 1d9d600b5e0f3302f9d47b8d1c1db7413eecb7cc Mon Sep 17 00:00:00 2001
From: Luke Hutton <luke.hutton@arm.com>
Date: Tue, 20 Feb 2024 17:00:34 +0000
Subject: [PATCH 2/3] add test

Change-Id: Iafe63d239dc38980710316179c73e6b8b7c1ea9c
---
 .../python/relay/aot/test_aot_test_harness.py | 58 +++++++++++++++++++
 tests/python/relay/aot/test_crt_aot.py        |  1 +
 2 files changed, 59 insertions(+)
 create mode 100644 tests/python/relay/aot/test_aot_test_harness.py

diff --git a/tests/python/relay/aot/test_aot_test_harness.py b/tests/python/relay/aot/test_aot_test_harness.py
new file mode 100644
index 000000000000..ce5451bac877
--- /dev/null
+++ b/tests/python/relay/aot/test_aot_test_harness.py
@@ -0,0 +1,58 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Tests for the AOT test harness.
+"""
+
+import pytest
+import numpy as np
+
+import tvm
+from tvm import relay
+from tvm.testing.aot import AOTTestRunner, compile_and_run, AOTTestModel
+
+
+def test_output_on_mismatch_option():
+    interface_api = "packed"
+    use_unpacked_api = True
+    test_runner = AOTTestRunner()
+    dtype = "float32"
+
+    two = relay.add(relay.const(1, dtype=dtype), relay.const(1, dtype=dtype))
+    func = relay.Function([], two)
+    outputs = {
+        "output": np.array(
+            [
+                0,
+            ]
+        ).astype(dtype)
+    }
+
+    msg = ".*Actual, Reference\n" "2.000000, 0.000000\n" "AOT_TEST_FAILURE.*"
+    with pytest.raises(RuntimeError, match=msg):
+        compile_and_run(
+            AOTTestModel(module=tvm.IRModule.from_expr(func), inputs={}, outputs=outputs),
+            test_runner,
+            interface_api,
+            use_unpacked_api,
+            print_output_on_mismatch=True,
+        )
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index f7e5af18d20e..1c0f354d31eb 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -93,6 +93,7 @@ def test_conv_with_params(interface_api, use_unpacked_api, test_runner):
         test_runner,
         interface_api,
         use_unpacked_api,
+        print_output_on_mismatch=True,
     )
 
 

From 2062378f5873b95d58bdebe16b72810540675df9 Mon Sep 17 00:00:00 2001
From: Luke Hutton <luke.hutton@arm.com>
Date: Tue, 20 Feb 2024 17:47:44 +0000
Subject: [PATCH 3/3] fix lint

Change-Id: I7f6e905d25832057bd51a6deb7d1c2f8652d539e
---
 tests/python/relay/aot/test_aot_test_harness.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/python/relay/aot/test_aot_test_harness.py b/tests/python/relay/aot/test_aot_test_harness.py
index ce5451bac877..8ec9506f9f65 100644
--- a/tests/python/relay/aot/test_aot_test_harness.py
+++ b/tests/python/relay/aot/test_aot_test_harness.py
@@ -28,6 +28,9 @@
 
 
 def test_output_on_mismatch_option():
+    """
+    Test the print_output_on_mismatch option when there is a mismatch.
+    """
     interface_api = "packed"
     use_unpacked_api = True
     test_runner = AOTTestRunner()
@@ -43,7 +46,7 @@ def test_output_on_mismatch_option():
         ).astype(dtype)
     }
 
-    msg = ".*Actual, Reference\n" "2.000000, 0.000000\n" "AOT_TEST_FAILURE.*"
+    msg = ".*Actual, Reference\n2.000000, 0.000000\nAOT_TEST_FAILURE.*"
     with pytest.raises(RuntimeError, match=msg):
         compile_and_run(
             AOTTestModel(module=tvm.IRModule.from_expr(func), inputs={}, outputs=outputs),