From e0ad1bb5026f5d40e55c4a80c7dd85d4688b1215 Mon Sep 17 00:00:00 2001 From: wangjiuyang Date: Thu, 20 Jan 2022 17:03:19 +0800 Subject: [PATCH 1/3] [Fix Bug]fix the bug of tensorflow frontend when parsing Range layer --- python/tvm/relay/frontend/tensorflow_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/frontend/tensorflow_ops.py b/python/tvm/relay/frontend/tensorflow_ops.py index df8b7438af88..9b36d712e9ec 100644 --- a/python/tvm/relay/frontend/tensorflow_ops.py +++ b/python/tvm/relay/frontend/tensorflow_ops.py @@ -2454,6 +2454,7 @@ def _impl(inputs, attr, params, mod): delta = inputs[2] # if all attributes are constant, evalute the range function and return relay.const + dtype = attr["Tidx"].name if "Tidx" in attr else str(start.dtype) if all( [ isinstance(start, (np.int32, np.int64, int, np.float32, np.float64, float)), @@ -2461,9 +2462,8 @@ def _impl(inputs, attr, params, mod): isinstance(delta, (np.int32, np.int64, int, np.float32, np.float64, float)), ] ): - return tvm.relay.const(list(range(int(start), int(limit), int(delta)))) + return tvm.relay.const(list(range(int(start), int(limit), int(delta))), dtype=dtype) - dtype = attr["Tidx"].name if "Tidx" in attr else str(start.dtype) if isinstance(start, (np.int32, np.int64, int, np.float32, np.float64, float)): start = _expr.const(start, dtype=dtype) if isinstance(limit, (np.int32, np.int64, int, np.float32, np.float64, float)): From 6a922cd36e068da4bacc1ca8613013bba684a63c Mon Sep 17 00:00:00 2001 From: wjy Date: Mon, 5 Dec 2022 17:50:13 +0800 Subject: [PATCH 2/3] [Fix Bug]fix the bug of schedule batch_matmul_int8 on cuda --- python/tvm/topi/cuda/batch_matmul.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/tvm/topi/cuda/batch_matmul.py b/python/tvm/topi/cuda/batch_matmul.py index 4e476094f2d9..d2f5c9b9c586 100644 --- a/python/tvm/topi/cuda/batch_matmul.py +++ b/python/tvm/topi/cuda/batch_matmul.py @@ -352,7 +352,7 @@ def _schedule_batch_matmul_int8(cfg, s, output): cfg.define_split("tile_k", K // k_factor, num_outputs=2) cfg.define_knob("auto_unroll_max_step", [0, 256, 512, 1024]) - batch_matmul_op = s.outputs[0] + batch_matmul_op = s[output].op s[input_x].compute_inline() s[input_y].compute_inline() @@ -373,6 +373,10 @@ def _schedule_batch_matmul_int8(cfg, s, output): dtypes = (input_x.dtype, input_y.dtype) s[batch_matmul_cache].tensorize(ki, dp4a("shared", "shared", "local", dtypes)) + if batch_matmul_op not in s.outputs: + s[output].compute_inline() + batch_matmul_op = s.outputs[0] + # tile axis f, m, n = batch_matmul_op.axis kernel_scope, f = s[batch_matmul_op].split(f, nparts=1) From 8fab01e9be2eaa387868a62a49de1f75dee6caff Mon Sep 17 00:00:00 2001 From: ninesheep Date: Mon, 19 Dec 2022 13:46:30 +0800 Subject: [PATCH 3/3] fix cast fp16 to int8/uint8 on cuda --- src/target/source/codegen_cuda.cc | 17 +++++++++++++++++ src/target/source/codegen_cuda.h | 1 + 2 files changed, 18 insertions(+) diff --git a/src/target/source/codegen_cuda.cc b/src/target/source/codegen_cuda.cc index 436e85247ffe..c891ec5a28cf 100644 --- a/src/target/source/codegen_cuda.cc +++ b/src/target/source/codegen_cuda.cc @@ -579,6 +579,23 @@ void CodeGenCUDA::PrintStorageScope(const std::string& scope, std::ostream& os) } } +std::string CodeGenCUDA::CastFromTo(std::string value, DataType from, DataType target) { + if (from == target) return value; + std::ostringstream os; + os << "(("; + this->PrintType(target, os); + os << ")"; + if (from.is_float16() && (target.is_int() || target.is_uint()) && target.bits() == 8) { + os << "("; + if (target.is_uint()) { + os << "u"; + } + os << "int)"; + } + os << value << ")"; + return os.str(); +} + void CodeGenCUDA::VisitExpr_(const CastNode* op, std::ostream& os) { DataType from_ty = op->value.dtype(); DataType target_ty = op->dtype; diff --git a/src/target/source/codegen_cuda.h b/src/target/source/codegen_cuda.h index 0fef15c7a7f3..bb507c179993 100644 --- a/src/target/source/codegen_cuda.h +++ b/src/target/source/codegen_cuda.h @@ -58,6 +58,7 @@ class CodeGenCUDA final : public CodeGenC { void PrintVecElemStore(const std::string& vec, DataType t, int i, const std::string& value) final; void BindThreadIndex(const IterVar& iv) final; // NOLINT(*) void PrintVecElemLoadExpr(DataType t, int i, const std::string& value, std::ostream& os) final; + std::string CastFromTo(std::string value, DataType from, DataType target) final; // overload visitor void VisitExpr_(const RampNode* op, std::ostream& os) final; // NOLINT(*) void VisitExpr_(const ShuffleNode* op, std::ostream& os) final; // NOLINT(*)