From fe4965954130c19f4d87c1de4760475a0d6da6c6 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 8 Dec 2020 21:47:06 +0000 Subject: [PATCH 1/5] Allow cross compilation of cuda targets without physical device. --- python/tvm/contrib/nvcc.py | 12 +++++++++++- python/tvm/relay/op/strategy/cuda.py | 10 +++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 89548b74866b..2b40b13085f5 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -23,6 +23,7 @@ import warnings import tvm._ffi +from tvm.autotvm.env import AutotvmGlobalScope from tvm.runtime import ndarray as nd from . import utils @@ -269,7 +270,7 @@ def have_int8(compute_version): return False -def have_tensorcore(compute_version): +def have_tensorcore(compute_version=None): """Either TensorCore support is provided in the compute capability or not Parameters @@ -277,7 +278,16 @@ def have_tensorcore(compute_version): compute_version : str compute capability of a GPU (e.g. "7.0") """ + if compute_version is None: + if tvm.gpu(0).exist: + compute_version = tvm.gpu(0).compute_version + else: + compute_version = AutotvmGlobalScope.current.cuda_target_arch + # Compute version will be in the form "sm_{major}{minor}" + major, minor = compute_version.split('_')[1] + compute_version = major + '.' + minor major, _ = parse_compute_version(compute_version) + if major == 7: return True diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index fc80c9ed6171..cf0861e226ca 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -197,7 +197,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target): if judge_winograd_autotvm: if ( target.kind.name == "cuda" - and nvcc.have_tensorcore(tvm.gpu(0).compute_version) + and nvcc.have_tensorcore() and judge_winograd_tensorcore ): strategy.add_implementation( @@ -215,7 +215,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target): ) if ( target.kind.name == "cuda" - and nvcc.have_tensorcore(tvm.gpu(0).compute_version) + and nvcc.have_tensorcore() and ( (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0) or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0) @@ -436,7 +436,7 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty ) if ( target.kind.name == "cuda" - and nvcc.have_tensorcore(tvm.gpu(0).compute_version) + and nvcc.have_tensorcore() and judge_winograd_tensorcore ): strategy.add_implementation( @@ -563,7 +563,7 @@ def conv3d_strategy_cuda(attrs, inputs, out_type, target): N, _, _, _, _ = get_const_tuple(data.shape) _, _, _, CI, CO = get_const_tuple(kernel.shape) if target.kind.name == "cuda": - if nvcc.have_tensorcore(tvm.gpu(0).compute_version): + if nvcc.have_tensorcore(): if ( (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0) or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0) @@ -679,7 +679,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target): plevel=5, ) if target.kind.name == "cuda": - if nvcc.have_tensorcore(tvm.gpu(0).compute_version): + if nvcc.have_tensorcore(): if ( (i % 16 == 0 and b % 16 == 0 and o % 16 == 0) or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) From d7f45dcc243b7efd10936b8ebca72f4c54898b57 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 8 Dec 2020 21:48:58 +0000 Subject: [PATCH 2/5] Formatting. --- python/tvm/contrib/nvcc.py | 4 ++-- python/tvm/relay/op/strategy/cuda.py | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 2b40b13085f5..cb135d9c74a1 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -284,8 +284,8 @@ def have_tensorcore(compute_version=None): else: compute_version = AutotvmGlobalScope.current.cuda_target_arch # Compute version will be in the form "sm_{major}{minor}" - major, minor = compute_version.split('_')[1] - compute_version = major + '.' + minor + major, minor = compute_version.split("_")[1] + compute_version = major + "." + minor major, _ = parse_compute_version(compute_version) if major == 7: diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index cf0861e226ca..001bbe859178 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -434,11 +434,7 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty kernel.dtype, pre_flag=True, ) - if ( - target.kind.name == "cuda" - and nvcc.have_tensorcore() - and judge_winograd_tensorcore - ): + if target.kind.name == "cuda" and nvcc.have_tensorcore() and judge_winograd_tensorcore: strategy.add_implementation( wrap_compute_conv2d( topi.cuda.conv2d_nhwc_winograd_tensorcore_without_weight_transform From 6af071478736fc4f3573cfb2b0ce59b07818c529 Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Wed, 9 Dec 2020 09:44:09 -0800 Subject: [PATCH 3/5] Add warning when architecture cant be found. --- python/tvm/contrib/nvcc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index cb135d9c74a1..19e2deb46841 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -283,6 +283,11 @@ def have_tensorcore(compute_version=None): compute_version = tvm.gpu(0).compute_version else: compute_version = AutotvmGlobalScope.current.cuda_target_arch + if compute_version is None: + warnings.warn( + "Cannot find cuda architecture. Tensorcore schedules will be disabled." + ) + return False # Compute version will be in the form "sm_{major}{minor}" major, minor = compute_version.split("_")[1] compute_version = major + "." + minor From cdd7daf5f50b09993d7a5d949bc4174bd23d3306 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 9 Dec 2020 21:27:20 +0000 Subject: [PATCH 4/5] Use target instead of autotvm arch specification. --- python/tvm/contrib/nvcc.py | 18 +++++++++++------- python/tvm/relay/op/strategy/cuda.py | 14 +++++++++----- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 19e2deb46841..f7254909a97f 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -23,7 +23,6 @@ import warnings import tvm._ffi -from tvm.autotvm.env import AutotvmGlobalScope from tvm.runtime import ndarray as nd from . import utils @@ -270,24 +269,29 @@ def have_int8(compute_version): return False -def have_tensorcore(compute_version=None): +def have_tensorcore(compute_version=None, target=None): """Either TensorCore support is provided in the compute capability or not Parameters ---------- - compute_version : str - compute capability of a GPU (e.g. "7.0") + compute_version : str, optional + compute capability of a GPU (e.g. "7.0"). + + target : tvm.target.Target, optional + The compilation target, will be used to determine arch if compute_version + isn't specified. """ if compute_version is None: if tvm.gpu(0).exist: compute_version = tvm.gpu(0).compute_version else: - compute_version = AutotvmGlobalScope.current.cuda_target_arch - if compute_version is None: + if target is None or "arch" not in target.attrs: warnings.warn( - "Cannot find cuda architecture. Tensorcore schedules will be disabled." + "Cannot find cuda architecture, try specifying it by adding '-arch=sm_xx'" + "to your target. Tensorcore schedules will be disabled." ) return False + compute_version = target.attrs["arch"] # Compute version will be in the form "sm_{major}{minor}" major, minor = compute_version.split("_")[1] compute_version = major + "." + minor diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 001bbe859178..09ed475ad4e6 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -197,7 +197,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target): if judge_winograd_autotvm: if ( target.kind.name == "cuda" - and nvcc.have_tensorcore() + and nvcc.have_tensorcore(target=target) and judge_winograd_tensorcore ): strategy.add_implementation( @@ -215,7 +215,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target): ) if ( target.kind.name == "cuda" - and nvcc.have_tensorcore() + and nvcc.have_tensorcore(target=target) and ( (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0) or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0) @@ -434,7 +434,11 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty kernel.dtype, pre_flag=True, ) - if target.kind.name == "cuda" and nvcc.have_tensorcore() and judge_winograd_tensorcore: + if ( + target.kind.name == "cuda" + and nvcc.have_tensorcore(target=target) + and judge_winograd_tensorcore + ): strategy.add_implementation( wrap_compute_conv2d( topi.cuda.conv2d_nhwc_winograd_tensorcore_without_weight_transform @@ -559,7 +563,7 @@ def conv3d_strategy_cuda(attrs, inputs, out_type, target): N, _, _, _, _ = get_const_tuple(data.shape) _, _, _, CI, CO = get_const_tuple(kernel.shape) if target.kind.name == "cuda": - if nvcc.have_tensorcore(): + if nvcc.have_tensorcore(target=target): if ( (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0) or (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0) @@ -675,7 +679,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target): plevel=5, ) if target.kind.name == "cuda": - if nvcc.have_tensorcore(): + if nvcc.have_tensorcore(target=target): if ( (i % 16 == 0 and b % 16 == 0 and o % 16 == 0) or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) From deee3c31b79543314ef7bc4df7821aecd9a39d78 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 9 Dec 2020 22:53:38 +0000 Subject: [PATCH 5/5] Change warning message. --- python/tvm/contrib/nvcc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index f7254909a97f..bc11e4a867e4 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -287,8 +287,8 @@ def have_tensorcore(compute_version=None, target=None): else: if target is None or "arch" not in target.attrs: warnings.warn( - "Cannot find cuda architecture, try specifying it by adding '-arch=sm_xx'" - "to your target. Tensorcore schedules will be disabled." + "Tensorcore will be disabled due to no CUDA architecture specified." + "Try specifying it by adding '-arch=sm_xx' to your target." ) return False compute_version = target.attrs["arch"]