From da31fb317c88a763538d3c6c9fcedf27afa101a3 Mon Sep 17 00:00:00 2001 From: Leyuan Wang Date: Thu, 31 Dec 2020 05:27:56 +0000 Subject: [PATCH 1/3] fix tc type issue for dense --- python/tvm/relay/op/strategy/cuda.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 9d8420c69610..127950a2673a 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -677,10 +677,14 @@ def dense_strategy_cuda(attrs, inputs, out_type, target): ) if target.kind.name == "cuda": if nvcc.have_tensorcore(target=target): - if ( - (i % 16 == 0 and b % 16 == 0 and o % 16 == 0) + if ((data.dtype in ["float32"] and i % 4 == 0 and b % 8 == 0 and o % 8 == 0) or + (data.dtype in ["float16", "int8", "uint8"] and + ((i % 16 == 0 and b % 16 == 0 and o % 16 == 0) or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) - or (i % 16 == 0 and b % 32 == 0 and o % 8 == 0) + or (i % 16 == 0 and b % 32 == 0 and o % 8 == 0))) or + (data.dtype in ["int4", "uint4"] and + i % 32 == 0 and b % 8 == 0 and o % 8 == 0) or + (data.dtype in ["int1", "uint1"] and i % 128 == 0 and b % 8 == 0 and o % 8 == 0) ): strategy.add_implementation( wrap_compute_dense(topi.cuda.dense_tensorcore), From 634b9d256506ea6ab66d52d2e8fdf07785231e9c Mon Sep 17 00:00:00 2001 From: Leyuan Wang Date: Thu, 31 Dec 2020 05:45:19 +0000 Subject: [PATCH 2/3] fix lint --- python/tvm/relay/op/strategy/cuda.py | 30 ++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 127950a2673a..3068a86344d6 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -677,14 +677,28 @@ def dense_strategy_cuda(attrs, inputs, out_type, target): ) if target.kind.name == "cuda": if nvcc.have_tensorcore(target=target): - if ((data.dtype in ["float32"] and i % 4 == 0 and b % 8 == 0 and o % 8 == 0) or - (data.dtype in ["float16", "int8", "uint8"] and - ((i % 16 == 0 and b % 16 == 0 and o % 16 == 0) - or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) - or (i % 16 == 0 and b % 32 == 0 and o % 8 == 0))) or - (data.dtype in ["int4", "uint4"] and - i % 32 == 0 and b % 8 == 0 and o % 8 == 0) or - (data.dtype in ["int1", "uint1"] and i % 128 == 0 and b % 8 == 0 and o % 8 == 0) + if ( + (data.dtype in ["float32"] and i % 4 == 0 and b % 8 == 0 and o % 8 == 0) + or ( + data.dtype in ["float16", "int8", "uint8"] + and ( + (i % 16 == 0 and b % 16 == 0 and o % 16 == 0) + or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) + or (i % 16 == 0 and b % 32 == 0 and o % 8 == 0) + ) + ) + or ( + data.dtype in ["int4", "uint4"] + and i % 32 == 0 + and b % 8 == 0 + and o % 8 == 0 + ) + or ( + data.dtype in ["int1", "uint1"] + and i % 128 == 0 + and b % 8 == 0 + and o % 8 == 0 + ) ): strategy.add_implementation( wrap_compute_dense(topi.cuda.dense_tensorcore), From bccf4ffb5c0f3fa2758ca558b4ab479249902470 Mon Sep 17 00:00:00 2001 From: Leyuan Wang Date: Thu, 31 Dec 2020 18:54:43 +0000 Subject: [PATCH 3/3] rm float 32 --- python/tvm/relay/op/strategy/cuda.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 3068a86344d6..37946c01cb46 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -678,8 +678,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target): if target.kind.name == "cuda": if nvcc.have_tensorcore(target=target): if ( - (data.dtype in ["float32"] and i % 4 == 0 and b % 8 == 0 and o % 8 == 0) - or ( + ( data.dtype in ["float16", "int8", "uint8"] and ( (i % 16 == 0 and b % 16 == 0 and o % 16 == 0)