From bb205abeb2a9f9bc4c5261b88412a07165e70593 Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Sat, 19 Oct 2019 00:21:11 +0000
Subject: [PATCH 1/4] [bugfix][codegen] fix casting bug in llvm codegen

---
 src/codegen/llvm/codegen_llvm.cc         |  2 +-
 tests/python/relay/test_op_qnn_conv2d.py | 74 ++++++++++++++++--------
 2 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc
index d009290bb2fe..f5d43dbe5254 100644
--- a/src/codegen/llvm/codegen_llvm.cc
+++ b/src/codegen/llvm/codegen_llvm.cc
@@ -569,7 +569,7 @@ llvm::Value* CodeGenLLVM::CreateCast(Type from, Type to, llvm::Value* value) {
       return builder_->CreateICmpNE(value, zero);
     }
   } else if (!from.is_float() && !to.is_float()) {
-    return builder_->CreateIntCast(value, target, from.is_int());
+    return builder_->CreateIntCast(value, target, to.is_int());
   } else if (from.is_float() && to.is_int()) {
     return builder_->CreateFPToSI(value, target);
   } else if (from.is_float() && to.is_uint()) {
diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py
index b4e8bfd71b62..25c987bf1a5f 100644
--- a/tests/python/relay/test_op_qnn_conv2d.py
+++ b/tests/python/relay/test_op_qnn_conv2d.py
@@ -160,7 +160,7 @@ def get_output(func, golden_inputs):
     qnn_output = get_output(qnn_func, golden_inputs)
     np.testing.assert_equal(qnn_output, golden_output)
 
-def no_zero_point_test():
+def test_no_zero_point():
     # uint8 input
     data_shape = (2, 1, 2, 4)
     data_dtype = 'uint8'
@@ -203,7 +203,7 @@ def no_zero_point_test():
     verify(ref_func, qnn_func, data_shape, data_dtype,
             kernel_shape, kernel_dtype)
 
-def kernel_zero_point_test():
+def test_kernel_zero_point():
     # uint8 input
     data_shape = (2, 4, 2, 4)
     data_dtype = 'uint8'
@@ -247,7 +247,7 @@ def kernel_zero_point_test():
             kernel_shape, kernel_dtype)
 
 
-def input_zero_point_test():
+def test_input_zero_point():
     # uint8 input
     data_shape = (2, 4, 2, 4)
     data_dtype = 'uint8'
@@ -290,7 +290,7 @@ def input_zero_point_test():
     verify(ref_func, qnn_func, data_shape, data_dtype,
             kernel_shape, kernel_dtype)
 
-def both_zero_point_test():
+def test_both_zero_point():
     # uint8 input
     data_shape = (2, 4, 2, 4)
     data_dtype = 'uint8'
@@ -333,7 +333,7 @@ def both_zero_point_test():
     verify(ref_func, qnn_func, data_shape, data_dtype,
             kernel_shape, kernel_dtype)
 
-def layout_test():
+def test_layout():
     # uint8 input
     data_shape = (2, 2, 4, 4) # NHWC
     data_dtype = 'uint8'
@@ -378,7 +378,7 @@ def layout_test():
 
 
-def padding_test():
+def test_padding():
     # uint8 input
     data_shape = (1, 4, 2, 2)
     data_dtype = 'uint8'
@@ -421,7 +421,7 @@ def padding_test():
     verify(ref_func, qnn_func, data_shape, data_dtype,
             kernel_shape, kernel_dtype)
 
-def dilation_test():
+def test_dilation():
     # uint8 input
     data_shape = (2, 4, 4, 4)
     data_dtype = 'uint8'
@@ -444,7 +444,7 @@ def dilation_test():
             kernel_shape, kernel_dtype)
 
 
-def const_folding_test():
+def test_const_folding():
     data_shape = (2, 4, 2, 4)
     data_dtype = 'uint8'
     kernel_shape = (3, 4, 2, 2)
@@ -470,7 +470,7 @@ def const_folding_test():
     folded_func = folded_mod["main"]
     assert "reshape" not in folded_func.astext()
 
-def kernel_size_1x1_test():
+def test_kernel_size_1x1():
     # uint8 input
     data_shape = (2, 4, 2, 4)
     data_dtype = 'uint8'
@@ -493,7 +493,7 @@ def kernel_size_1x1_test():
     verify(ref_func, qnn_func, data_shape, data_dtype,
             kernel_shape, kernel_dtype)
 
-def tflite_large_irregular_test():
+def test_tflite_large_irregular():
     # uint8 input
     data_shape = (1, 1024, 1, 1)
     data_dtype = 'uint8'
@@ -607,7 +607,7 @@ def tflite_anistropic_strides():
     golden_output = np.array((124, -92, 164, -132)).reshape(1, 1, 2, 2)
     np.testing.assert_equal(qnn_output, golden_output)
 
-def broadcast_layout_test():
+def test_broadcast_layout():
     # Test broadcast support for NHWC layout.
     data_shape = (1, 229, 229, 3) # NHWC
     data_dtype = 'uint8'
@@ -640,17 +640,43 @@ def broadcast_layout_test():
     with relay.build_config(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm -mcpu=skylake-avx512")
 
+
+def test_conv2d_int():
+    data = relay.var("data", shape=(1, 28, 28, 128), dtype='uint8')
+    kernel = relay.var("w", shape=(3, 3, 128, 256), dtype='int8')
+    conv = relay.nn.conv2d(
+        data,
+        kernel,
+        kernel_size=(3, 3),
+        out_dtype='int32',
+        data_layout='NHWC',
+        kernel_layout='HWIO')
+    func = relay.Function([data, kernel], conv)
+
+    with relay.build_config(opt_level=0):
+        params = {"w": np.zeros((3, 3, 128, 256)).astype("int8")}
+        graph, lib, params = relay.build(func, 'llvm', params=params)
+        mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
+        mod.set_input("data", np.zeros((1, 28, 28, 128)).astype("uint8"))
+        mod.set_input(**params)
+        mod.run()
+        qnn_output = mod.get_output(0).asnumpy()
+    golden_output = np.zeros((1, 26, 26, 256)).astype("int32")
+    np.testing.assert_equal(qnn_output, golden_output)
+
+
 if __name__ == "__main__":
-    no_zero_point_test()
-    input_zero_point_test()
-    kernel_zero_point_test()
-    both_zero_point_test()
-    layout_test()
-    padding_test()
-    dilation_test()
-    const_folding_test()
-    kernel_size_1x1_test()
-    tflite_large_irregular_test()
-    tflite_output_multiplier_greater_than_one()
-    tflite_anistropic_strides()
-    broadcast_layout_test()
+    test_no_zero_point()
+    test_input_zero_point()
+    test_kernel_zero_point()
+    test_both_zero_point()
+    test_layout()
+    test_padding()
+    test_dilation()
+    test_const_folding()
+    test_kernel_size_1x1g()
+    test_tflite_large_irregularg()
+    test_tflite_output_multiplier_greater_than_one()
+    test_tflite_anistropic_strides()
+    test_broadcast_layoutg()
+    test_conv2d_int8()

From eba520fff79406e3ee8a739113c6b8d7c087790c Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Sun, 20 Oct 2019 20:30:53 +0000
Subject: [PATCH 2/4] update example

---
 src/codegen/llvm/codegen_llvm.cc         | 2 +-
 tests/python/relay/test_op_qnn_conv2d.py | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc
index f5d43dbe5254..d009290bb2fe 100644
--- a/src/codegen/llvm/codegen_llvm.cc
+++ b/src/codegen/llvm/codegen_llvm.cc
@@ -569,7 +569,7 @@ llvm::Value* CodeGenLLVM::CreateCast(Type from, Type to, llvm::Value* value) {
       return builder_->CreateICmpNE(value, zero);
     }
   } else if (!from.is_float() && !to.is_float()) {
-    return builder_->CreateIntCast(value, target, to.is_int());
+    return builder_->CreateIntCast(value, target, from.is_int());
   } else if (from.is_float() && to.is_int()) {
     return builder_->CreateFPToSI(value, target);
   } else if (from.is_float() && to.is_uint()) {
diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py
index 25c987bf1a5f..1f4849e934d9 100644
--- a/tests/python/relay/test_op_qnn_conv2d.py
+++ b/tests/python/relay/test_op_qnn_conv2d.py
@@ -641,7 +641,7 @@ def test_broadcast_layout():
         graph, lib, params = relay.build(mod, "llvm -mcpu=skylake-avx512")
 
 
-def test_conv2d_int():
+def test_conv2d_int8():
     data = relay.var("data", shape=(1, 28, 28, 128), dtype='uint8')
     kernel = relay.var("w", shape=(3, 3, 128, 256), dtype='int8')
     conv = relay.nn.conv2d(
@@ -655,7 +655,12 @@ def test_conv2d_int():
 
     with relay.build_config(opt_level=0):
         params = {"w": np.zeros((3, 3, 128, 256)).astype("int8")}
-        graph, lib, params = relay.build(func, 'llvm', params=params)
+        # -mcpu should be specified to avoid the llvm jitting error here:
+        # https://discuss.tvm.ai/t/segfault-in-llvm/3567
+        # To use VNNI, we need to specify the micro-architecture that supports
+        # it, e.g. cascadelake.
+        graph, lib, params = relay.build(func, 'llvm -mcpu=core-avx2',
+                                         params=params)
         mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
         mod.set_input("data", np.zeros((1, 28, 28, 128)).astype("uint8"))
         mod.set_input(**params)

From e9a65375dd251568c1d212d90296676a6ed48f9d Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Mon, 21 Oct 2019 03:30:25 +0000
Subject: [PATCH 3/4] retrigger ci


From b1ef50a115581d68d16e75f0d26268ce3d20e85d Mon Sep 17 00:00:00 2001
From: Zhi Chen <chzhi@amazon.com>
Date: Mon, 21 Oct 2019 04:35:13 +0000
Subject: [PATCH 4/4] check llvm version

---
 tests/python/relay/test_op_qnn_conv2d.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py
index 1f4849e934d9..3b5195c051b8 100644
--- a/tests/python/relay/test_op_qnn_conv2d.py
+++ b/tests/python/relay/test_op_qnn_conv2d.py
@@ -642,6 +642,11 @@ def test_broadcast_layout():
 
 
 def test_conv2d_int8():
+    target = "llvm -mcpu=core-avx2"
+    if not tvm.module.enabled(target):
+        print("skip because %s is not enabled..." % target)
+        return
+
     data = relay.var("data", shape=(1, 28, 28, 128), dtype='uint8')
     kernel = relay.var("w", shape=(3, 3, 128, 256), dtype='int8')
     conv = relay.nn.conv2d(
@@ -659,8 +664,7 @@ def test_conv2d_int8():
         # https://discuss.tvm.ai/t/segfault-in-llvm/3567
         # To use VNNI, we need to specify the micro-architecture that supports
         # it, e.g. cascadelake.
-        graph, lib, params = relay.build(func, 'llvm -mcpu=core-avx2',
-                                         params=params)
+        graph, lib, params = relay.build(func, target, params=params)
         mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
         mod.set_input("data", np.zeros((1, 28, 28, 128)).astype("uint8"))
         mod.set_input(**params)