From fe4965954130c19f4d87c1de4760475a0d6da6c6 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 8 Dec 2020 21:47:06 +0000
Subject: [PATCH 1/5] Allow cross compilation of cuda targets without physical
 device.

---
 python/tvm/contrib/nvcc.py           | 12 +++++++++++-
 python/tvm/relay/op/strategy/cuda.py | 10 +++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index 89548b74866b..2b40b13085f5 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -23,6 +23,7 @@
 import warnings
 
 import tvm._ffi
+from tvm.autotvm.env import AutotvmGlobalScope
 from tvm.runtime import ndarray as nd
 
 from . import utils
@@ -269,7 +270,7 @@ def have_int8(compute_version):
     return False
 
 
-def have_tensorcore(compute_version):
+def have_tensorcore(compute_version=None):
     """Either TensorCore support is provided in the compute capability or not
 
     Parameters
@@ -277,7 +278,16 @@ def have_tensorcore(compute_version):
     compute_version : str
         compute capability of a GPU (e.g. "7.0")
     """
+    if compute_version is None:
+        if tvm.gpu(0).exist:
+            compute_version = tvm.gpu(0).compute_version
+        else:
+            compute_version = AutotvmGlobalScope.current.cuda_target_arch
+            # Compute version will be in the form "sm_{major}{minor}"
+            major, minor = compute_version.split('_')[1]
+            compute_version = major + '.' + minor
     major, _ = parse_compute_version(compute_version)
+
     if major == 7:
         return True
 
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index fc80c9ed6171..cf0861e226ca 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -197,7 +197,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
             if judge_winograd_autotvm:
                 if (
                     target.kind.name == "cuda"
-                    and nvcc.have_tensorcore(tvm.gpu(0).compute_version)
+                    and nvcc.have_tensorcore()
                     and judge_winograd_tensorcore
                 ):
                     strategy.add_implementation(
@@ -215,7 +215,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
                     )
             if (
                 target.kind.name == "cuda"
-                and nvcc.have_tensorcore(tvm.gpu(0).compute_version)
+                and nvcc.have_tensorcore()
                 and (
                     (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0)
                     or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0)
@@ -436,7 +436,7 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty
         )
         if (
             target.kind.name == "cuda"
-            and nvcc.have_tensorcore(tvm.gpu(0).compute_version)
+            and nvcc.have_tensorcore()
             and judge_winograd_tensorcore
         ):
             strategy.add_implementation(
@@ -563,7 +563,7 @@ def conv3d_strategy_cuda(attrs, inputs, out_type, target):
         N, _, _, _, _ = get_const_tuple(data.shape)
         _, _, _, CI, CO = get_const_tuple(kernel.shape)
         if target.kind.name == "cuda":
-            if nvcc.have_tensorcore(tvm.gpu(0).compute_version):
+            if nvcc.have_tensorcore():
                 if (
                     (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0)
                     or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0)
@@ -679,7 +679,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target):
                 plevel=5,
             )
         if target.kind.name == "cuda":
-            if nvcc.have_tensorcore(tvm.gpu(0).compute_version):
+            if nvcc.have_tensorcore():
                 if (
                     (i % 16 == 0 and b % 16 == 0 and o % 16 == 0)
                     or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0)

From d7f45dcc243b7efd10936b8ebca72f4c54898b57 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Tue, 8 Dec 2020 21:48:58 +0000
Subject: [PATCH 2/5] Formatting.

---
 python/tvm/contrib/nvcc.py           | 4 ++--
 python/tvm/relay/op/strategy/cuda.py | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index 2b40b13085f5..cb135d9c74a1 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -284,8 +284,8 @@ def have_tensorcore(compute_version=None):
         else:
             compute_version = AutotvmGlobalScope.current.cuda_target_arch
             # Compute version will be in the form "sm_{major}{minor}"
-            major, minor = compute_version.split('_')[1]
-            compute_version = major + '.' + minor
+            major, minor = compute_version.split("_")[1]
+            compute_version = major + "." + minor
     major, _ = parse_compute_version(compute_version)
 
     if major == 7:
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index cf0861e226ca..001bbe859178 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -434,11 +434,7 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty
             kernel.dtype,
             pre_flag=True,
         )
-        if (
-            target.kind.name == "cuda"
-            and nvcc.have_tensorcore()
-            and judge_winograd_tensorcore
-        ):
+        if target.kind.name == "cuda" and nvcc.have_tensorcore() and judge_winograd_tensorcore:
             strategy.add_implementation(
                 wrap_compute_conv2d(
                     topi.cuda.conv2d_nhwc_winograd_tensorcore_without_weight_transform

From 6af071478736fc4f3573cfb2b0ce59b07818c529 Mon Sep 17 00:00:00 2001
From: Josh Fromm <jwfromm@uw.edu>
Date: Wed, 9 Dec 2020 09:44:09 -0800
Subject: [PATCH 3/5] Add warning when architecture cant be found.

---
 python/tvm/contrib/nvcc.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index cb135d9c74a1..19e2deb46841 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -283,6 +283,11 @@ def have_tensorcore(compute_version=None):
             compute_version = tvm.gpu(0).compute_version
         else:
             compute_version = AutotvmGlobalScope.current.cuda_target_arch
+            if compute_version is None:
+                warnings.warn(
+                    "Cannot find cuda architecture. Tensorcore schedules will be disabled."
+                )
+                return False
             # Compute version will be in the form "sm_{major}{minor}"
             major, minor = compute_version.split("_")[1]
             compute_version = major + "." + minor

From cdd7daf5f50b09993d7a5d949bc4174bd23d3306 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Wed, 9 Dec 2020 21:27:20 +0000
Subject: [PATCH 4/5] Use target instead of autotvm arch specification.

---
 python/tvm/contrib/nvcc.py           | 18 +++++++++++-------
 python/tvm/relay/op/strategy/cuda.py | 14 +++++++++-----
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index 19e2deb46841..f7254909a97f 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -23,7 +23,6 @@
 import warnings
 
 import tvm._ffi
-from tvm.autotvm.env import AutotvmGlobalScope
 from tvm.runtime import ndarray as nd
 
 from . import utils
@@ -270,24 +269,29 @@ def have_int8(compute_version):
     return False
 
 
-def have_tensorcore(compute_version=None):
+def have_tensorcore(compute_version=None, target=None):
     """Either TensorCore support is provided in the compute capability or not
 
     Parameters
     ----------
-    compute_version : str
-        compute capability of a GPU (e.g. "7.0")
+    compute_version : str, optional
+        compute capability of a GPU (e.g. "7.0").
+
+    target : tvm.target.Target, optional
+        The compilation target, will be used to determine arch if compute_version
+        isn't specified.
     """
     if compute_version is None:
         if tvm.gpu(0).exist:
             compute_version = tvm.gpu(0).compute_version
         else:
-            compute_version = AutotvmGlobalScope.current.cuda_target_arch
-            if compute_version is None:
+            if target is None or "arch" not in target.attrs:
                 warnings.warn(
-                    "Cannot find cuda architecture. Tensorcore schedules will be disabled."
+                    "Cannot find cuda architecture, try specifying it by adding '-arch=sm_xx'"
+                    "to your target. Tensorcore schedules will be disabled."
                 )
                 return False
+            compute_version = target.attrs["arch"]
             # Compute version will be in the form "sm_{major}{minor}"
             major, minor = compute_version.split("_")[1]
             compute_version = major + "." + minor
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index 001bbe859178..09ed475ad4e6 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -197,7 +197,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
             if judge_winograd_autotvm:
                 if (
                     target.kind.name == "cuda"
-                    and nvcc.have_tensorcore()
+                    and nvcc.have_tensorcore(target=target)
                     and judge_winograd_tensorcore
                 ):
                     strategy.add_implementation(
@@ -215,7 +215,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
                     )
             if (
                 target.kind.name == "cuda"
-                and nvcc.have_tensorcore()
+                and nvcc.have_tensorcore(target=target)
                 and (
                     (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0)
                     or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0)
@@ -434,7 +434,11 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty
             kernel.dtype,
             pre_flag=True,
         )
-        if target.kind.name == "cuda" and nvcc.have_tensorcore() and judge_winograd_tensorcore:
+        if (
+            target.kind.name == "cuda"
+            and nvcc.have_tensorcore(target=target)
+            and judge_winograd_tensorcore
+        ):
             strategy.add_implementation(
                 wrap_compute_conv2d(
                     topi.cuda.conv2d_nhwc_winograd_tensorcore_without_weight_transform
@@ -559,7 +563,7 @@ def conv3d_strategy_cuda(attrs, inputs, out_type, target):
         N, _, _, _, _ = get_const_tuple(data.shape)
         _, _, _, CI, CO = get_const_tuple(kernel.shape)
         if target.kind.name == "cuda":
-            if nvcc.have_tensorcore():
+            if nvcc.have_tensorcore(target=target):
                 if (
                     (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0)
                     or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0)
@@ -675,7 +679,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target):
                 plevel=5,
             )
         if target.kind.name == "cuda":
-            if nvcc.have_tensorcore():
+            if nvcc.have_tensorcore(target=target):
                 if (
                     (i % 16 == 0 and b % 16 == 0 and o % 16 == 0)
                     or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0)

From deee3c31b79543314ef7bc4df7821aecd9a39d78 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <jwfromm@jwfromm-cpu-dev.itxhlkosmouevgkdrmwxfbs5qh.xx.internal.cloudapp.net>
Date: Wed, 9 Dec 2020 22:53:38 +0000
Subject: [PATCH 5/5] Change warning message.

---
 python/tvm/contrib/nvcc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index f7254909a97f..bc11e4a867e4 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -287,8 +287,8 @@ def have_tensorcore(compute_version=None, target=None):
         else:
             if target is None or "arch" not in target.attrs:
                 warnings.warn(
-                    "Cannot find cuda architecture, try specifying it by adding '-arch=sm_xx'"
-                    "to your target. Tensorcore schedules will be disabled."
+                    "Tensorcore will be disabled due to no CUDA architecture specified."
+                    "Try specifying it by adding '-arch=sm_xx' to your target."
                 )
                 return False
             compute_version = target.attrs["arch"]