From e6eeb81a6c3607147aa263527d9ebf8508d7c61b Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 7 Apr 2021 15:35:09 -0700 Subject: [PATCH 01/41] Update intrinsic lowering pass --- include/tvm/tir/op_attr_types.h | 6 ++ src/target/intrin_rule.cc | 94 +++++++++++++++------ src/target/llvm/intrin_rule_llvm.cc | 126 +++++++++++++++++----------- src/tir/transforms/lower_intrin.cc | 51 ++++------- 4 files changed, 166 insertions(+), 111 deletions(-) diff --git a/include/tvm/tir/op_attr_types.h b/include/tvm/tir/op_attr_types.h index 3dcc4b943a79..aed4c03f1a5b 100644 --- a/include/tvm/tir/op_attr_types.h +++ b/include/tvm/tir/op_attr_types.h @@ -28,6 +28,7 @@ #ifndef TVM_TIR_OP_ATTR_TYPES_H_ #define TVM_TIR_OP_ATTR_TYPES_H_ +#include #include namespace tvm { @@ -43,6 +44,11 @@ using TGlobalSymbol = String; */ using TVectorizable = bool; +/*! + * \brief The intrinsic lowering function for given OP. + */ +using FLowerIntrinsic = PackedFunc; + /*! * \brief The effect type of the call. */ diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index 1a7214476188..b19a0ca68e16 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -24,64 +24,94 @@ #include "intrin_rule.h" #include +#include namespace tvm { namespace codegen { namespace intrin { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.erf").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.erf") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.log2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log2") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.log10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log10") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.log1p").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log1p") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.tan").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tan") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.atan").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.atan") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.atanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.atanh") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.atan2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.atan2") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.cos").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cos") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.acos").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.acos") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.cosh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cosh") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.acosh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.acosh") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.sin").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sin") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.asin").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.asin") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.sinh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sinh") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.asinh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.asinh") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.hypot").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.hypot") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.nextafter").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.nextafter") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.copysign").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.copysign") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.ldexp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ldexp") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + +TVM_REGISTER_OP("tir.round") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.round").set_body(DispatchPureExtern); TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.rsqrt") .set_body([](const TVMArgs& args, TVMRetValue* rv) { @@ -95,6 +125,16 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.rsqrt") TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sigmoid") +.set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + *rv = one / (one + exp(-call->args[0])); + })); + TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.sigmoid") .set_body([](const TVMArgs& args, TVMRetValue* rv) { PrimExpr e = args[0]; diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 093a746adcab..b9e70edd45aa 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -25,24 +25,30 @@ #include "intrin_rule_llvm.h" #include +#include namespace tvm { namespace codegen { namespace llvm { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.prefetch") - .set_body(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 4>); +TVM_REGISTER_OP("tir.prefetch") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 4>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.exp") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>); +TVM_REGISTER_OP("tir.exp") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.exp2") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1>); +TVM_REGISTER_OP("tir.exp2") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1>)); // TODO(tvm-team): migrate the legalization transformations as a separate // set of rules in TIR that can be shared across backends. -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.exp10") - .set_body([](const TVMArgs& targs, TVMRetValue* rv) { +TVM_REGISTER_OP("tir.exp10") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { using tir::make_const; using tir::make_zero; PrimExpr e = targs[0]; @@ -52,43 +58,56 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.exp10") PrimExpr ln10 = make_const(x.dtype(), 2.302585093); PrimExpr ret = exp(x * ln10); *rv = ret; - }); + })); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.fma") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>); +TVM_REGISTER_OP("tir.fma") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.log") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>); +TVM_REGISTER_OP("tir.log") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.log2") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1>); +TVM_REGISTER_OP("tir.log2") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.log10") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1>); +TVM_REGISTER_OP("tir.log10") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sqrt") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>); +TVM_REGISTER_OP("tir.sqrt") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.floor") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>); +TVM_REGISTER_OP("tir.floor") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.ceil") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>); +TVM_REGISTER_OP("tir.ceil") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.trunc") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>); +TVM_REGISTER_OP("tir.trunc") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.fabs") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>); +TVM_REGISTER_OP("tir.fabs") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.round") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>); +TVM_REGISTER_OP("tir.round") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.nearbyint") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>); +TVM_REGISTER_OP("tir.nearbyint") + .set_attr( + "llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.tanh") - .set_body([](const TVMArgs& targs, TVMRetValue* rv) { +TVM_REGISTER_OP("tir.tanh") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { using tir::make_const; using tir::make_zero; PrimExpr e = targs[0]; @@ -105,28 +124,33 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.tanh") PrimExpr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); PrimExpr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); *rv = tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); - }); + })); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.pow") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>); +TVM_REGISTER_OP("tir.pow") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.popcount") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>); +TVM_REGISTER_OP("tir.popcount") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.tan").set_body([](const TVMArgs& targs, TVMRetValue* rv) { +TVM_REGISTER_OP("tir.tan").set_attr( + "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; PrimExpr tan_x = sin(x) / cos(x); *rv = tan_x; -}); +})); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.cos") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>); +TVM_REGISTER_OP("tir.cos") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.cosh") - .set_body([](const TVMArgs& targs, TVMRetValue* rv) { +TVM_REGISTER_OP("tir.cosh") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { using tir::make_const; using tir::make_zero; PrimExpr e = targs[0]; @@ -139,13 +163,15 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.cosh") PrimExpr exp_posx = exp(x); PrimExpr ret = (exp_posx + exp_negx) / two; *rv = ret; - }); + })); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sin") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>); +TVM_REGISTER_OP("tir.sin") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sinh") - .set_body([](const TVMArgs& targs, TVMRetValue* rv) { +TVM_REGISTER_OP("tir.sinh") + .set_attr( + "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { using tir::make_const; using tir::make_zero; PrimExpr e = targs[0]; @@ -158,7 +184,7 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sinh") PrimExpr exp_posx = exp(x); PrimExpr ret = (exp_posx - exp_negx) / two; *rv = ret; - }); + })); TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.clz").set_body([](const TVMArgs& targs, TVMRetValue* rv) { PrimExpr e = targs[0]; diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index cd7c10ffa688..085141970638 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -42,15 +42,15 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { IntrinInjecter(arith::Analyzer* analyzer, std::string target, std::string mtriple = "") : IRMutatorWithAnalyzer(analyzer) { - patterns_.push_back("tvm.intrin.rule." + target + "."); + patterns_.push_back(target + ".FLowerIntrinsic"); bool is_llvm_aarch64 = (mtriple.find("aarch64") != std::string::npos); if (is_llvm_aarch64) { - patterns_.push_back("tvm.intrin.rule." + target + "." + "aarch64."); + patterns_.push_back(target + "." + "aarch64.FLowerIntrinsic"); } - patterns_.push_back("tvm.intrin.rule.default."); - fma_ = runtime::Registry::Get(patterns_[0] + "fma"); + patterns_.push_back("default.FLowerIntrinsic"); + fma_ = runtime::Registry::Get("tvm.intrin.rule." + target + ".fma"); if (target == "stackvm") { support_bitwise_op_ = false; } @@ -58,12 +58,19 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { PrimExpr VisitExpr_(const CallNode* op) final { if (auto* ptr_op = op->op.as()) { - // Still use legacy string based rewriting - // TODO(tvm-team): migrate the pattern application from global function look up - // to an OpAttrMap - std::string name = ptr_op->name; - PrimExpr r = ApplyPattern(name, GetRef(op)); - if (r.defined()) return r; + for (size_t i = 0; i < patterns_.size(); ++i) { + auto default_intrin = Op::GetAttrMap(patterns_[i]); + FLowerIntrinsic f = default_intrin.get(GetRef(ptr_op), nullptr); + const PrimExpr e = GetRef(op); + if (f != nullptr) { + PrimExpr r = f(e); + ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; + if (!r.same_as(e)) { + r = this->VisitExpr(r); + if (r.defined()) return r; + } + } + } } return IRMutatorWithAnalyzer::VisitExpr_(op); } @@ -266,30 +273,6 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { return IRMutatorWithAnalyzer::VisitExpr_(op); } - PrimExpr ApplyPattern(std::string name, const PrimExpr& e) { - if (name.compare(0, 4, "tir.") == 0) { - name = name.substr(4); - } - - for (size_t i = 0; i < patterns_.size(); ++i) { - std::string& p = patterns_[i]; - size_t psize = p.length(); - p.resize(psize + name.length()); - name.copy(&p[0] + psize, name.length()); - const runtime::PackedFunc* f = runtime::Registry::Get(p); - p.resize(psize); - // if pattern exists. - if (f != nullptr) { - PrimExpr r = (*f)(e); - ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; - if (!r.same_as(e)) { - return this->VisitExpr(r); - } - } - } - return PrimExpr(); - } - // patterns std::vector patterns_; const PackedFunc* fma_{nullptr}; From 7d2401f09a1c0c0a809dd97175c974b2f9201eae Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 11:19:15 -0700 Subject: [PATCH 02/41] Add support on all hw platforms --- include/tvm/tir/op_attr_types.h | 1 - src/target/intrin_rule.cc | 220 ++++++++++++------------ src/target/llvm/intrin_rule_hexagon.cc | 46 ++--- src/target/llvm/intrin_rule_nvptx.cc | 68 +++++--- src/target/llvm/intrin_rule_rocm.cc | 87 ++++++---- src/target/source/intrin_rule_aocl.cc | 68 +++++--- src/target/source/intrin_rule_cuda.cc | 86 +++++---- src/target/source/intrin_rule_metal.cc | 62 ++++--- src/target/source/intrin_rule_opencl.cc | 67 +++++--- src/target/source/intrin_rule_vhls.cc | 59 +++++-- src/target/spirv/intrin_rule_spirv.cc | 77 +++++---- 11 files changed, 510 insertions(+), 331 deletions(-) diff --git a/include/tvm/tir/op_attr_types.h b/include/tvm/tir/op_attr_types.h index aed4c03f1a5b..b68048e38dfb 100644 --- a/include/tvm/tir/op_attr_types.h +++ b/include/tvm/tir/op_attr_types.h @@ -28,7 +28,6 @@ #ifndef TVM_TIR_OP_ATTR_TYPES_H_ #define TVM_TIR_OP_ATTR_TYPES_H_ -#include #include namespace tvm { diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index b19a0ca68e16..36f9b7c066bc 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -112,131 +112,125 @@ TVM_REGISTER_OP("tir.ceil") TVM_REGISTER_OP("tir.round") .set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.rsqrt") +.set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + *rv = one / sqrt(call->args[0]); + })); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.rsqrt") - .set_body([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - - auto one = make_const(call->args[0].dtype(), 1); - *rv = one / sqrt(call->args[0]); - }); +TVM_REGISTER_OP("tir.pow") +.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.pow").set_body(DispatchPureExtern); TVM_REGISTER_OP("tir.sigmoid") .set_attr("default.FLowerIntrinsic", PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); - *rv = one / (one + exp(-call->args[0])); - })); - -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.sigmoid") - .set_body([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - - auto one = make_const(call->args[0].dtype(), 1); - *rv = one / (one + exp(-call->args[0])); - }); - -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.isfinite") - .set_body([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - *rv = isfinite(call->args[0]); - }); - -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.isinf") - .set_body([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - *rv = isinf(call->args[0]); - }); - -TVM_REGISTER_GLOBAL("tvm.intrin.rule.default.q_multiply_shift") - .set_body([](const TVMArgs& args, TVMRetValue* rv) { - using tir::make_const; - - PrimExpr e = args[0]; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - - PrimExpr x = call->args[0]; - PrimExpr y = call->args[1]; - PrimExpr q = call->args[2]; - PrimExpr s = call->args[3]; - - // Lambda function to extract the int value from PrimExpr - auto get_int_value = [](const PrimExpr node) { - if (auto int_node = node.as()) { - return int_node->value; - } - auto broadcast_node = node.as(); - CHECK(broadcast_node != nullptr); - auto int_node = broadcast_node->value.as(); - CHECK(int_node != nullptr); - return int_node->value; - }; - // Power of 2 is determined by the fixed_point_multiplier == 1 << 30. In case of power of 2, - // fixed point multiplier will represent a float value of 0.5. In fixed point, this is - // represented by 1 << 30. - if (get_int_value(y) == (1 << 30)) { - PrimExpr exp = s - 1; - int exp_val = get_int_value(s) - 1; - if (exp_val > 0) { - // power of 2 is greater than 0, apply left shift. - *rv = x << exp; - } else { - // power of 2 is less than 0, round and then apply right shift. - DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); - PrimExpr one = make_const(lp_dtype, 1); - exp = -exp; - PrimExpr rounding_factor = one << (exp - 1); - PrimExpr rounded_t = x + rounding_factor; - *rv = rounded_t >> exp; - } - } else { - // Only int32 types are supported (any number of lanes is allowed) - ICHECK(y.dtype().code() == DLDataTypeCode::kDLInt && y.dtype().bits() == 32); - ICHECK(s.dtype().code() == DLDataTypeCode::kDLInt && s.dtype().bits() == 32); - - DataType hp_dtype = DataType::Int(64, x.dtype().lanes()); - DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); - - // 1) Calculating the integer multiplier and integer shift - PrimExpr zero = make_const(s.dtype(), 0); - PrimExpr left_shift = tir::Select(s > zero, s, zero); - PrimExpr right_shift = tir::Select(s > zero, zero, -s); + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + *rv = one / (one + exp(-call->args[0])); + })); + +TVM_REGISTER_OP("tir.isfinite") +.set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + *rv = isfinite(call->args[0]); + })); - // 2) Cast and Multiply the integer multiplier - PrimExpr one = make_const(hp_dtype, 1); - x = cast(hp_dtype, x); - y = cast(hp_dtype, y); - x = tir::Select(left_shift != zero, x << left_shift, x); +TVM_REGISTER_OP("tir.isinf") +.set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + *rv = isinf(call->args[0]); + })); - // 3) Perform the multiplication in higher precision. - x = x * y; +TVM_REGISTER_OP("tir.q_multiply_shift") +.set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + using tir::make_const; - // 4) Find the rounding scalar - PrimExpr total_right_shift = right_shift + q; - PrimExpr pos_rounding_value = (one << (total_right_shift - 1)); - x = x + pos_rounding_value; + PrimExpr e = args[0]; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); - // 5) Simply right shift the result to get the final output. - x = x >> total_right_shift; + PrimExpr x = call->args[0]; + PrimExpr y = call->args[1]; + PrimExpr q = call->args[2]; + PrimExpr s = call->args[3]; - // 6) The fixed point multiplication keeps the value in int32 range. Casting back to int32. - *rv = cast(lp_dtype, x); + // Lambda function to extract the int value from PrimExpr + auto get_int_value = [](const PrimExpr node) { + if (auto int_node = node.as()) { + return int_node->value; + } + auto broadcast_node = node.as(); + CHECK(broadcast_node != nullptr); + auto int_node = broadcast_node->value.as(); + CHECK(int_node != nullptr); + return int_node->value; + }; + // Power of 2 is determined by the fixed_point_multiplier == 1 << 30. In case of power of 2, + // fixed point multiplier will represent a float value of 0.5. In fixed point, this is + // represented by 1 << 30. + if (get_int_value(y) == (1 << 30)) { + PrimExpr exp = s - 1; + int exp_val = get_int_value(s) - 1; + if (exp_val > 0) { + // power of 2 is greater than 0, apply left shift. + *rv = x << exp; + } else { + // power of 2 is less than 0, round and then apply right shift. + DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); + PrimExpr one = make_const(lp_dtype, 1); + exp = -exp; + PrimExpr rounding_factor = one << (exp - 1); + PrimExpr rounded_t = x + rounding_factor; + *rv = rounded_t >> exp; } - }); + } else { + // Only int32 types are supported (any number of lanes is allowed) + ICHECK(y.dtype().code() == DLDataTypeCode::kDLInt && y.dtype().bits() == 32); + ICHECK(s.dtype().code() == DLDataTypeCode::kDLInt && s.dtype().bits() == 32); + + DataType hp_dtype = DataType::Int(64, x.dtype().lanes()); + DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); + + // 1) Calculating the integer multiplier and integer shift + PrimExpr zero = make_const(s.dtype(), 0); + PrimExpr left_shift = tir::Select(s > zero, s, zero); + PrimExpr right_shift = tir::Select(s > zero, zero, -s); + + // 2) Cast and Multiply the integer multiplier + PrimExpr one = make_const(hp_dtype, 1); + x = cast(hp_dtype, x); + y = cast(hp_dtype, y); + x = tir::Select(left_shift != zero, x << left_shift, x); + + // 3) Perform the multiplication in higher precision. + x = x * y; + + // 4) Find the rounding scalar + PrimExpr total_right_shift = right_shift + q; + PrimExpr pos_rounding_value = (one << (total_right_shift - 1)); + x = x + pos_rounding_value; + + // 5) Simply right shift the result to get the final output. + x = x >> total_right_shift; + + // 6) The fixed point multiplication keeps the value in int32 range. Casting back to int32. + *rv = cast(lp_dtype, x); + } + })); } // namespace intrin } // namespace codegen diff --git a/src/target/llvm/intrin_rule_hexagon.cc b/src/target/llvm/intrin_rule_hexagon.cc index d38225184c2e..a2328f8be730 100644 --- a/src/target/llvm/intrin_rule_hexagon.cc +++ b/src/target/llvm/intrin_rule_hexagon.cc @@ -20,43 +20,45 @@ #ifdef TVM_LLVM_VERSION #include "intrin_rule_llvm.h" +#include namespace tvm { namespace codegen { namespace llvm { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.exp") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>); +TVM_REGISTER_OP("tir.exp").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.fma") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>); +TVM_REGISTER_OP("tir.fma").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.log") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>); +TVM_REGISTER_OP("tir.log").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.sqrt") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>); +TVM_REGISTER_OP("tir.sqrt").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.floor") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>); +TVM_REGISTER_OP("tir.floor").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.ceil") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>); +TVM_REGISTER_OP("tir.ceil").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.trunc") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>); +TVM_REGISTER_OP("tir.trunc").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.fabs") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>); +TVM_REGISTER_OP("tir.fabs").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.round") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>); +TVM_REGISTER_OP("tir.round").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.pow") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>); +TVM_REGISTER_OP("tir.pow").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.hexagon.popcount") - .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>); +TVM_REGISTER_OP("tir.ctpop").set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); } // namespace llvm } // namespace codegen diff --git a/src/target/llvm/intrin_rule_nvptx.cc b/src/target/llvm/intrin_rule_nvptx.cc index bb653e8ee5e0..4201f6e15efd 100644 --- a/src/target/llvm/intrin_rule_nvptx.cc +++ b/src/target/llvm/intrin_rule_nvptx.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -57,50 +58,73 @@ inline void DispatchPureExternLibDevice(const TVMArgs& args, TVMRetValue* rv) { } namespace llvm { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.floor").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.floor").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.ceil").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.ceil").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.round").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.round").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.trunc").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.trunc").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.fabs").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.fabs").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.exp").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.exp").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.exp2").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.exp2").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.exp10").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.exp10").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.erf").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.erf").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.fma").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.fma").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.log").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.log").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.log2").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.log2").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.log10").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.log10").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.sqrt").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.sqrt").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.pow").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.pow").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.tanh").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.tanh").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.tan").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.tan").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.cos").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.cos").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.cosh").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.cosh").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.sin").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.sin").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.sinh").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.sinh").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.nvptx.atan").set_body(DispatchPureExternLibDevice); +TVM_REGISTER_OP("tir.atan").set_attr("nvptx.FLowerIntrinsic", + PackedFunc(DispatchPureExternLibDevice)); } // namespace llvm } // namespace codegen diff --git a/src/target/llvm/intrin_rule_rocm.cc b/src/target/llvm/intrin_rule_rocm.cc index 08b32ed1b946..55dcb51d5d66 100644 --- a/src/target/llvm/intrin_rule_rocm.cc +++ b/src/target/llvm/intrin_rule_rocm.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -93,63 +94,89 @@ inline void DispatchShuffle(const TVMArgs& targs, TVMRetValue* rv) { } namespace llvm { +using namespace tir; // dummy because we don't have the activemask -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tvm_warp_activemask") - .set_body([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr zero = tir::make_zero(DataType::Int(32)); - *rv = zero; - }); +TVM_REGISTER_OP("tir.tvm_warp_activemask").set_attr("rocm.FLowerIntrinsic", + PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + PrimExpr zero = tir::make_zero(DataType::Int(32)); + *rv = zero; + })); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tvm_warp_shuffle").set_body(DispatchShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tvm_warp_shuffle_up").set_body(DispatchShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle_up") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tvm_warp_shuffle_down").set_body(DispatchShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle_down") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.floor").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.floor") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.ceil").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.ceil") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.round").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.round") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.trunc").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.trunc") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fabs").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.fabs") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.exp") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp2").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.exp2") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp10").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.exp10") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.erf").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.erf") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fma").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.fma") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.log") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log2").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.log2") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log10").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.log10") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.sqrt").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.pow").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.pow") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tanh").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.tanh") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tan").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.tan") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.cos").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.cos") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.cosh").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.cosh") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.sin").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.sin") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.sinh").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.sinh") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.atan").set_body(DispatchPureExternOCML); +TVM_REGISTER_OP("tir.atan") + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); } // namespace llvm } // namespace codegen diff --git a/src/target/source/intrin_rule_aocl.cc b/src/target/source/intrin_rule_aocl.cc index 69279a041413..197744b592d9 100644 --- a/src/target/source/intrin_rule_aocl.cc +++ b/src/target/source/intrin_rule_aocl.cc @@ -22,54 +22,78 @@ * \brief AOCL intrinsic rules. */ #include "../intrin_rule.h" +#include namespace tvm { namespace codegen { namespace intrin { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.trunc").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.trunc") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.fabs").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fabs") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.round").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.round") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.pow") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl.popcount").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.popcount") + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.trunc").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.trunc") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.fabs").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fabs") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.round").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.round") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.pow") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.aocl_sw_emu.popcount").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.popcount") + .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_cuda.cc b/src/target/source/intrin_rule_cuda.cc index 965b86c24d9e..c0085664ddfe 100644 --- a/src/target/source/intrin_rule_cuda.cc +++ b/src/target/source/intrin_rule_cuda.cc @@ -30,6 +30,7 @@ namespace tvm { namespace codegen { namespace intrin { // Add float suffix to the intrinsics, CUDA fast math. +using namespace tir; struct CUDAMath { std::string operator()(DataType t, std::string name) const { if (t.is_float()) { @@ -126,63 +127,86 @@ static void DispatchCUDAShuffle(const TVMArgs& args, TVMRetValue* rv) { *rv = Call(call->dtype, T()(call->dtype, Downcast(call->op)), cuda_args); } -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.trunc").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.trunc") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.fabs").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fabs") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.round").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.round") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.exp2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp2") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.exp10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp10") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.erf").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.erf") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.log2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log2") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.log10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log10") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.tan").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tan").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.cos").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cos") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.cosh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cosh") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.sin").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sin") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.sinh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sinh") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.atan").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.atan") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.pow") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.popcount").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.popcount") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.tvm_warp_shuffle") - .set_body(DispatchCUDAShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchCUDAShuffle)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.tvm_warp_shuffle_up") - .set_body(DispatchCUDAShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle_up").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchCUDAShuffle)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.tvm_warp_shuffle_down") - .set_body(DispatchCUDAShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle_down").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchCUDAShuffle)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.tvm_warp_activemask") - .set_body(DispatchCUDAWarpActiveMask); +TVM_REGISTER_OP("tir.tvm_warp_activemask") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchCUDAWarpActiveMask)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.cuda.fmod").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fmod") + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); // Register low-level builtin ops. // TODO(tvm-team): consider make CUDA its own subfolder and create a file for low-level builtins. diff --git a/src/target/source/intrin_rule_metal.cc b/src/target/source/intrin_rule_metal.cc index 80a10312c011..0fd128e45035 100644 --- a/src/target/source/intrin_rule_metal.cc +++ b/src/target/source/intrin_rule_metal.cc @@ -22,50 +22,72 @@ * \brief Metal intrinsic rules. */ #include "../intrin_rule.h" +#include namespace tvm { namespace codegen { namespace intrin { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.trunc").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.trunc") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.fabs").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fabs") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.round").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.round") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.exp2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp2") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.exp10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp10") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.log2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log2") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.log10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log10") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.pow") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.popcount").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.popcount") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.fmod").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fmod") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.sin").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sin") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.sinh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sinh") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.cos").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cos") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.metal.cosh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cosh") + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_opencl.cc b/src/target/source/intrin_rule_opencl.cc index 54da5c74ab02..0de1abc9a023 100644 --- a/src/target/source/intrin_rule_opencl.cc +++ b/src/target/source/intrin_rule_opencl.cc @@ -22,52 +22,74 @@ * \brief OpenCL intrinsic rules. */ #include - +#include #include "../intrin_rule.h" + namespace tvm { namespace codegen { namespace intrin { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.trunc").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.trunc") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.fabs").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fabs") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.round").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.round") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.exp2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp2") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.exp10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp10") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.log2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log2") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.log10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log10") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.pow") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.popcount").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.popcount") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.fmod").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fmod") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.sin").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sin") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.sinh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sinh") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.cos").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cos") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.cosh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cosh") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); // There is no warp shuffle instruction in standard OpenCL // When shuffle is used, we assume it is intel's shuffle extension @@ -83,7 +105,8 @@ static void DispatchIntelShuffle(const TVMArgs& args, TVMRetValue* rv) { *rv = Call(call->dtype, builtin::call_pure_extern(), opencl_args); } -TVM_REGISTER_GLOBAL("tvm.intrin.rule.opencl.tvm_warp_shuffle").set_body(DispatchIntelShuffle); +TVM_REGISTER_OP("tir.tvm_warp_shuffle") + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchIntelShuffle)); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_vhls.cc b/src/target/source/intrin_rule_vhls.cc index da9bc79452ed..14037070c972 100644 --- a/src/target/source/intrin_rule_vhls.cc +++ b/src/target/source/intrin_rule_vhls.cc @@ -22,48 +22,69 @@ * \brief VHLS intrinsic rules. */ #include "../intrin_rule.h" +#include namespace tvm { namespace codegen { namespace intrin { +using namespace tir; -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.floor").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.floor") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.ceil").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.ceil") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.trunc").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.trunc") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.fabs").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.fabs") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.round").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.round") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.exp").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.exp2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp2") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.exp10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.exp10") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.log").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.log2").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log2") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.log10").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.log10") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.tanh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.tanh") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.sqrt").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.pow").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.pow") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.popcount").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.popcount") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.sin").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sin") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.sinh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.sinh") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.cos").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cos") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.sdaccel.cosh").set_body(DispatchPureExtern); +TVM_REGISTER_OP("tir.cosh") + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); } // namespace intrin } // namespace codegen diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc index f77e8f4a26f8..f3ae5b5fb238 100644 --- a/src/target/spirv/intrin_rule_spirv.cc +++ b/src/target/spirv/intrin_rule_spirv.cc @@ -22,6 +22,7 @@ */ #include #include +#include #include #include #include @@ -31,6 +32,7 @@ namespace codegen { namespace spirv { using namespace runtime; +using namespace tir; // num_signature means number of arguments used to query signature @@ -54,34 +56,44 @@ inline void DispatchGLSLPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { *rv = CallGLSLIntrin(targs, rv); } -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.floor") - .set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.floor").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.ceil").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.ceil").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.round") - .set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.round").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.trunc") - .set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.trunc").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.fabs").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.fabs").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.exp").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.exp").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.sin").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.sin").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.cos").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.cos").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.log").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.log").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.log2").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.log2").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.sqrt").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.sqrt").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.pow").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.pow").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.tanh").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.tanh").set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.clz") .set_body([](const TVMArgs& targs, TVMRetValue* rv) { @@ -95,28 +107,35 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.clz") }); // WebGPU rules. -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.floor") - .set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.floor").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.ceil").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.ceil").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.round") - .set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.round").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.trunc") - .set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.trunc").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.fabs").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.fabs").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.exp").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.exp").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.log").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.log").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.sqrt").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.sqrt").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.pow").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.pow").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.webgpu.tanh").set_body(DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.tanh").set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); } // namespace spirv } // namespace codegen From fa948303fd1e9813a0a5a9d850b7bb2ad47f6db7 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 11:49:04 -0700 Subject: [PATCH 03/41] Fix clang format --- src/target/intrin_rule.cc | 307 +++++++++++++----------- src/target/llvm/intrin_rule_hexagon.cc | 54 +++-- src/target/llvm/intrin_rule_llvm.cc | 240 +++++++++--------- src/target/llvm/intrin_rule_nvptx.cc | 72 +++--- src/target/llvm/intrin_rule_rocm.cc | 77 +++--- src/target/source/intrin_rule_aocl.cc | 67 +++--- src/target/source/intrin_rule_cuda.cc | 81 ++++--- src/target/source/intrin_rule_metal.cc | 53 ++-- src/target/source/intrin_rule_opencl.cc | 54 ++--- src/target/source/intrin_rule_vhls.cc | 51 ++-- src/target/spirv/intrin_rule_spirv.cc | 116 +++++---- 11 files changed, 614 insertions(+), 558 deletions(-) diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index 36f9b7c066bc..386f949df556 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -31,206 +31,227 @@ namespace codegen { namespace intrin { using namespace tir; -TVM_REGISTER_OP("tir.exp") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.erf") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.erf").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log2") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log10") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log1p") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.tan") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.tan").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.atan") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.atanh") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.atan2") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.cos") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.cos").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.acos") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.cosh") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.acosh") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.sin") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.sin").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.asin") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sinh") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.asinh") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.hypot") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.nextafter") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.copysign") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ldexp") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.floor") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.rsqrt") -.set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); - *rv = one / sqrt(call->args[0]); - })); - -TVM_REGISTER_OP("tir.pow") -.set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); - + .set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + *rv = one / sqrt(call->args[0]); + })); + +TVM_REGISTER_OP("tir.pow").set_attr("default.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sigmoid") -.set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); - *rv = one / (one + exp(-call->args[0])); - })); + .set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + *rv = one / (one + exp(-call->args[0])); + })); TVM_REGISTER_OP("tir.isfinite") -.set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - *rv = isfinite(call->args[0]); - })); + .set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + *rv = isfinite(call->args[0]); + })); TVM_REGISTER_OP("tir.isinf") -.set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; - const CallNode* call = e.as(); - ICHECK(call != nullptr); - *rv = isinf(call->args[0]); - })); + .set_attr("default.FLowerIntrinsic", + PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + PrimExpr e = args[0]; + const CallNode* call = e.as(); + ICHECK(call != nullptr); + *rv = isinf(call->args[0]); + })); TVM_REGISTER_OP("tir.q_multiply_shift") -.set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - using tir::make_const; - - PrimExpr e = args[0]; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - - PrimExpr x = call->args[0]; - PrimExpr y = call->args[1]; - PrimExpr q = call->args[2]; - PrimExpr s = call->args[3]; - - // Lambda function to extract the int value from PrimExpr - auto get_int_value = [](const PrimExpr node) { - if (auto int_node = node.as()) { - return int_node->value; - } - auto broadcast_node = node.as(); - CHECK(broadcast_node != nullptr); - auto int_node = broadcast_node->value.as(); - CHECK(int_node != nullptr); - return int_node->value; - }; - // Power of 2 is determined by the fixed_point_multiplier == 1 << 30. In case of power of 2, - // fixed point multiplier will represent a float value of 0.5. In fixed point, this is - // represented by 1 << 30. - if (get_int_value(y) == (1 << 30)) { - PrimExpr exp = s - 1; - int exp_val = get_int_value(s) - 1; - if (exp_val > 0) { - // power of 2 is greater than 0, apply left shift. - *rv = x << exp; - } else { - // power of 2 is less than 0, round and then apply right shift. - DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); - PrimExpr one = make_const(lp_dtype, 1); - exp = -exp; - PrimExpr rounding_factor = one << (exp - 1); - PrimExpr rounded_t = x + rounding_factor; - *rv = rounded_t >> exp; - } - } else { - // Only int32 types are supported (any number of lanes is allowed) - ICHECK(y.dtype().code() == DLDataTypeCode::kDLInt && y.dtype().bits() == 32); - ICHECK(s.dtype().code() == DLDataTypeCode::kDLInt && s.dtype().bits() == 32); - - DataType hp_dtype = DataType::Int(64, x.dtype().lanes()); - DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); - - // 1) Calculating the integer multiplier and integer shift - PrimExpr zero = make_const(s.dtype(), 0); - PrimExpr left_shift = tir::Select(s > zero, s, zero); - PrimExpr right_shift = tir::Select(s > zero, zero, -s); - - // 2) Cast and Multiply the integer multiplier - PrimExpr one = make_const(hp_dtype, 1); - x = cast(hp_dtype, x); - y = cast(hp_dtype, y); - x = tir::Select(left_shift != zero, x << left_shift, x); - - // 3) Perform the multiplication in higher precision. - x = x * y; - - // 4) Find the rounding scalar - PrimExpr total_right_shift = right_shift + q; - PrimExpr pos_rounding_value = (one << (total_right_shift - 1)); - x = x + pos_rounding_value; - - // 5) Simply right shift the result to get the final output. - x = x >> total_right_shift; - - // 6) The fixed point multiplication keeps the value in int32 range. Casting back to int32. - *rv = cast(lp_dtype, x); - } - })); + .set_attr( + "default.FLowerIntrinsic", PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + using tir::make_const; + + PrimExpr e = args[0]; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + + PrimExpr x = call->args[0]; + PrimExpr y = call->args[1]; + PrimExpr q = call->args[2]; + PrimExpr s = call->args[3]; + + // Lambda function to extract the int value from PrimExpr + auto get_int_value = [](const PrimExpr node) { + if (auto int_node = node.as()) { + return int_node->value; + } + auto broadcast_node = node.as(); + CHECK(broadcast_node != nullptr); + auto int_node = broadcast_node->value.as(); + CHECK(int_node != nullptr); + return int_node->value; + }; + // Power of 2 is determined by the fixed_point_multiplier == 1 << 30. In case of power of + // 2, fixed point multiplier will represent a float value of 0.5. In fixed point, this is + // represented by 1 << 30. + if (get_int_value(y) == (1 << 30)) { + PrimExpr exp = s - 1; + int exp_val = get_int_value(s) - 1; + if (exp_val > 0) { + // power of 2 is greater than 0, apply left shift. + *rv = x << exp; + } else { + // power of 2 is less than 0, round and then apply right shift. + DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); + PrimExpr one = make_const(lp_dtype, 1); + exp = -exp; + PrimExpr rounding_factor = one << (exp - 1); + PrimExpr rounded_t = x + rounding_factor; + *rv = rounded_t >> exp; + } + } else { + // Only int32 types are supported (any number of lanes is allowed) + ICHECK(y.dtype().code() == DLDataTypeCode::kDLInt && y.dtype().bits() == 32); + ICHECK(s.dtype().code() == DLDataTypeCode::kDLInt && s.dtype().bits() == 32); + + DataType hp_dtype = DataType::Int(64, x.dtype().lanes()); + DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); + + // 1) Calculating the integer multiplier and integer shift + PrimExpr zero = make_const(s.dtype(), 0); + PrimExpr left_shift = tir::Select(s > zero, s, zero); + PrimExpr right_shift = tir::Select(s > zero, zero, -s); + + // 2) Cast and Multiply the integer multiplier + PrimExpr one = make_const(hp_dtype, 1); + x = cast(hp_dtype, x); + y = cast(hp_dtype, y); + x = tir::Select(left_shift != zero, x << left_shift, x); + + // 3) Perform the multiplication in higher precision. + x = x * y; + + // 4) Find the rounding scalar + PrimExpr total_right_shift = right_shift + q; + PrimExpr pos_rounding_value = (one << (total_right_shift - 1)); + x = x + pos_rounding_value; + + // 5) Simply right shift the result to get the final output. + x = x >> total_right_shift; + + // 6) The fixed point multiplication keeps the value in int32 range. Casting back to + // int32. + *rv = cast(lp_dtype, x); + } + })); } // namespace intrin } // namespace codegen diff --git a/src/target/llvm/intrin_rule_hexagon.cc b/src/target/llvm/intrin_rule_hexagon.cc index a2328f8be730..8e4a83c59b8e 100644 --- a/src/target/llvm/intrin_rule_hexagon.cc +++ b/src/target/llvm/intrin_rule_hexagon.cc @@ -19,46 +19,54 @@ #ifdef TVM_LLVM_VERSION -#include "intrin_rule_llvm.h" #include +#include "intrin_rule_llvm.h" + namespace tvm { namespace codegen { namespace llvm { using namespace tir; -TVM_REGISTER_OP("tir.exp").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); +TVM_REGISTER_OP("tir.exp").set_attr( + "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); -TVM_REGISTER_OP("tir.fma").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); +TVM_REGISTER_OP("tir.fma").set_attr( + "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); -TVM_REGISTER_OP("tir.log").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); +TVM_REGISTER_OP("tir.log").set_attr( + "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); -TVM_REGISTER_OP("tir.sqrt").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); -TVM_REGISTER_OP("tir.floor").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); +TVM_REGISTER_OP("tir.floor") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); -TVM_REGISTER_OP("tir.ceil").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); +TVM_REGISTER_OP("tir.ceil") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); -TVM_REGISTER_OP("tir.trunc").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); +TVM_REGISTER_OP("tir.trunc") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); -TVM_REGISTER_OP("tir.fabs").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); +TVM_REGISTER_OP("tir.fabs") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); -TVM_REGISTER_OP("tir.round").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); +TVM_REGISTER_OP("tir.round") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); -TVM_REGISTER_OP("tir.pow").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>)); +TVM_REGISTER_OP("tir.pow").set_attr( + "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>)); -TVM_REGISTER_OP("tir.ctpop").set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); +TVM_REGISTER_OP("tir.ctpop") + .set_attr("hexagon.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); } // namespace llvm } // namespace codegen diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index b9e70edd45aa..fef367d72ca0 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -33,174 +33,168 @@ namespace llvm { using namespace tir; TVM_REGISTER_OP("tir.prefetch") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 4>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 4>)); -TVM_REGISTER_OP("tir.exp") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); +TVM_REGISTER_OP("tir.exp").set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); TVM_REGISTER_OP("tir.exp2") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1>)); // TODO(tvm-team): migrate the legalization transformations as a separate // set of rules in TIR that can be shared across backends. TVM_REGISTER_OP("tir.exp10") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - using tir::make_const; - using tir::make_zero; - PrimExpr e = targs[0]; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr ln10 = make_const(x.dtype(), 2.302585093); - PrimExpr ret = exp(x * ln10); - *rv = ret; - })); - -TVM_REGISTER_OP("tir.fma") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); - -TVM_REGISTER_OP("tir.log") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + using tir::make_const; + using tir::make_zero; + PrimExpr e = targs[0]; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr ln10 = make_const(x.dtype(), 2.302585093); + PrimExpr ret = exp(x * ln10); + *rv = ret; + })); + +TVM_REGISTER_OP("tir.fma").set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); + +TVM_REGISTER_OP("tir.log").set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); TVM_REGISTER_OP("tir.log2") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1>)); TVM_REGISTER_OP("tir.log10") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1>)); TVM_REGISTER_OP("tir.sqrt") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); TVM_REGISTER_OP("tir.floor") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); TVM_REGISTER_OP("tir.ceil") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); TVM_REGISTER_OP("tir.trunc") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); TVM_REGISTER_OP("tir.fabs") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); TVM_REGISTER_OP("tir.round") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); TVM_REGISTER_OP("tir.nearbyint") - .set_attr( - "llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>)); TVM_REGISTER_OP("tir.tanh") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - using tir::make_const; - using tir::make_zero; - PrimExpr e = targs[0]; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr one = make_const(x.dtype(), 1); - PrimExpr two = make_const(x.dtype(), 2); - PrimExpr neg_two = make_const(x.dtype(), -2); - - PrimExpr exp_neg2x = exp(neg_two * x); - PrimExpr exp_pos2x = exp(two * x); - - PrimExpr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); - PrimExpr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); - *rv = tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); - })); - -TVM_REGISTER_OP("tir.pow") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + using tir::make_const; + using tir::make_zero; + PrimExpr e = targs[0]; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr one = make_const(x.dtype(), 1); + PrimExpr two = make_const(x.dtype(), 2); + PrimExpr neg_two = make_const(x.dtype(), -2); + + PrimExpr exp_neg2x = exp(neg_two * x); + PrimExpr exp_pos2x = exp(two * x); + + PrimExpr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); + PrimExpr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); + *rv = tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); + })); + +TVM_REGISTER_OP("tir.pow").set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>)); TVM_REGISTER_OP("tir.popcount") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); + .set_attr("llvm.FLowerIntrinsic", + PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); TVM_REGISTER_OP("tir.tan").set_attr( "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr tan_x = sin(x) / cos(x); - *rv = tan_x; -})); - -TVM_REGISTER_OP("tir.cos") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>)); - -TVM_REGISTER_OP("tir.cosh") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - using tir::make_const; - using tir::make_zero; PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; - PrimExpr two = make_const(x.dtype(), 2); - PrimExpr neg_one = make_const(x.dtype(), -1); - PrimExpr exp_negx = exp(neg_one * x); - PrimExpr exp_posx = exp(x); - PrimExpr ret = (exp_posx + exp_negx) / two; - *rv = ret; + PrimExpr tan_x = sin(x) / cos(x); + *rv = tan_x; })); -TVM_REGISTER_OP("tir.sin") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>)); +TVM_REGISTER_OP("tir.cos").set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>)); + +TVM_REGISTER_OP("tir.cosh") + .set_attr("llvm.FLowerIntrinsic", + PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + using tir::make_const; + using tir::make_zero; + PrimExpr e = targs[0]; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr two = make_const(x.dtype(), 2); + PrimExpr neg_one = make_const(x.dtype(), -1); + PrimExpr exp_negx = exp(neg_one * x); + PrimExpr exp_posx = exp(x); + PrimExpr ret = (exp_posx + exp_negx) / two; + *rv = ret; + })); + +TVM_REGISTER_OP("tir.sin").set_attr( + "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>)); TVM_REGISTER_OP("tir.sinh") - .set_attr( - "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - using tir::make_const; - using tir::make_zero; + .set_attr("llvm.FLowerIntrinsic", + PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + using tir::make_const; + using tir::make_zero; + PrimExpr e = targs[0]; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr two = make_const(x.dtype(), 2); + PrimExpr neg_one = make_const(x.dtype(), -1); + PrimExpr exp_negx = exp(neg_one * x); + PrimExpr exp_posx = exp(x); + PrimExpr ret = (exp_posx - exp_negx) / two; + *rv = ret; + })); + +TVM_REGISTER_OP("tir.clz").set_attr( + "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr two = make_const(x.dtype(), 2); - PrimExpr neg_one = make_const(x.dtype(), -1); - PrimExpr exp_negx = exp(neg_one * x); - PrimExpr exp_posx = exp(x); - PrimExpr ret = (exp_posx - exp_negx) / two; - *rv = ret; + ICHECK_EQ(call->args.size(), 1); + Array cargs; + cargs.push_back(IntImm(DataType::UInt(32), ::llvm::Intrinsic::ctlz)); + cargs.push_back(IntImm(DataType::UInt(32), 2)); + cargs.push_back(call->args[0]); + cargs.push_back(IntImm(DataType::Int(1), 1)); // is_zero_undef + // LLVM requires that the return type must match the first argument type + auto clz = tir::Call(call->args[0]->dtype, tir::builtin::call_llvm_intrin(), cargs); + *rv = cast(call->dtype, clz); })); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.clz").set_body([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - ICHECK_EQ(call->args.size(), 1); - Array cargs; - cargs.push_back(IntImm(DataType::UInt(32), ::llvm::Intrinsic::ctlz)); - cargs.push_back(IntImm(DataType::UInt(32), 2)); - cargs.push_back(call->args[0]); - cargs.push_back(IntImm(DataType::Int(1), 1)); // is_zero_undef - // LLVM requires that the return type must match the first argument type - auto clz = tir::Call(call->args[0]->dtype, tir::builtin::call_llvm_intrin(), cargs); - *rv = cast(call->dtype, clz); -}); - } // namespace llvm } // namespace codegen } // namespace tvm diff --git a/src/target/llvm/intrin_rule_nvptx.cc b/src/target/llvm/intrin_rule_nvptx.cc index 4201f6e15efd..02092618e131 100644 --- a/src/target/llvm/intrin_rule_nvptx.cc +++ b/src/target/llvm/intrin_rule_nvptx.cc @@ -60,71 +60,71 @@ inline void DispatchPureExternLibDevice(const TVMArgs& args, TVMRetValue* rv) { namespace llvm { using namespace tir; -TVM_REGISTER_OP("tir.floor").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.floor") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.ceil").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.ceil") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.round").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.round") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.trunc").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.trunc") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.fabs").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.fabs") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.exp").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.exp2").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.exp2") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.exp10").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.exp10") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.erf").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.fma").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.log").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.log2").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.log2") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.log10").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.log10") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.sqrt").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.pow").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.tanh").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.tanh") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.tan").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.cos").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.cosh").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.cosh") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); TVM_REGISTER_OP("tir.sin").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.sinh").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.sinh") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); -TVM_REGISTER_OP("tir.atan").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); +TVM_REGISTER_OP("tir.atan") + .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); } // namespace llvm } // namespace codegen diff --git a/src/target/llvm/intrin_rule_rocm.cc b/src/target/llvm/intrin_rule_rocm.cc index 55dcb51d5d66..acfe1e813933 100644 --- a/src/target/llvm/intrin_rule_rocm.cc +++ b/src/target/llvm/intrin_rule_rocm.cc @@ -97,86 +97,87 @@ namespace llvm { using namespace tir; // dummy because we don't have the activemask -TVM_REGISTER_OP("tir.tvm_warp_activemask").set_attr("rocm.FLowerIntrinsic", - PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr zero = tir::make_zero(DataType::Int(32)); - *rv = zero; - })); +TVM_REGISTER_OP("tir.tvm_warp_activemask") + .set_attr("rocm.FLowerIntrinsic", + PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + PrimExpr zero = tir::make_zero(DataType::Int(32)); + *rv = zero; + })); TVM_REGISTER_OP("tir.tvm_warp_shuffle") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); TVM_REGISTER_OP("tir.tvm_warp_shuffle_up") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); TVM_REGISTER_OP("tir.tvm_warp_shuffle_down") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); TVM_REGISTER_OP("tir.floor") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.ceil") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.round") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.trunc") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.fabs") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.exp") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.exp").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.exp2") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.exp10") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.erf") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.erf").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.fma") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.fma").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.log") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.log").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.log2") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.log10") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.pow") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.pow").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.tanh") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.tan") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.tan").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.cos") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.cos").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.cosh") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); -TVM_REGISTER_OP("tir.sin") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); +TVM_REGISTER_OP("tir.sin").set_attr("rocm.FLowerIntrinsic", + PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.sinh") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); TVM_REGISTER_OP("tir.atan") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); } // namespace llvm } // namespace codegen diff --git a/src/target/source/intrin_rule_aocl.cc b/src/target/source/intrin_rule_aocl.cc index 197744b592d9..e0bba3696428 100644 --- a/src/target/source/intrin_rule_aocl.cc +++ b/src/target/source/intrin_rule_aocl.cc @@ -21,79 +21,88 @@ * \file intrin_rule_aocl.cc * \brief AOCL intrinsic rules. */ -#include "../intrin_rule.h" #include +#include "../intrin_rule.h" + namespace tvm { namespace codegen { namespace intrin { using namespace tir; TVM_REGISTER_OP("tir.floor") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.trunc") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fabs") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.exp") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("aocl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("aocl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.pow") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.pow").set_attr("aocl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.popcount") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.floor") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.trunc") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fabs") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.exp") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.pow") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.pow").set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.popcount") - .set_attr("aocl_sw_emu.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_cuda.cc b/src/target/source/intrin_rule_cuda.cc index c0085664ddfe..336f920bd470 100644 --- a/src/target/source/intrin_rule_cuda.cc +++ b/src/target/source/intrin_rule_cuda.cc @@ -128,85 +128,92 @@ static void DispatchCUDAShuffle(const TVMArgs& args, TVMRetValue* rv) { } TVM_REGISTER_OP("tir.floor") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.trunc") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fabs") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.exp") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp2") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp10") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.erf") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.erf").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log2") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log10") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.tan").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.tan").set_attr( + "cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.cos") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.cos").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.cosh") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.sin") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.sin").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sinh") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.atan") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.pow") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.pow").set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.popcount") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.tvm_warp_shuffle").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchCUDAShuffle)); +TVM_REGISTER_OP("tir.tvm_warp_shuffle") + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchCUDAShuffle)); -TVM_REGISTER_OP("tir.tvm_warp_shuffle_up").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchCUDAShuffle)); +TVM_REGISTER_OP("tir.tvm_warp_shuffle_up") + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchCUDAShuffle)); -TVM_REGISTER_OP("tir.tvm_warp_shuffle_down").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchCUDAShuffle)); +TVM_REGISTER_OP("tir.tvm_warp_shuffle_down") + .set_attr("cuda.FLowerIntrinsic", + PackedFunc(DispatchCUDAShuffle)); TVM_REGISTER_OP("tir.tvm_warp_activemask") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchCUDAWarpActiveMask)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchCUDAWarpActiveMask)); TVM_REGISTER_OP("tir.fmod") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); // Register low-level builtin ops. // TODO(tvm-team): consider make CUDA its own subfolder and create a file for low-level builtins. diff --git a/src/target/source/intrin_rule_metal.cc b/src/target/source/intrin_rule_metal.cc index 0fd128e45035..07e1940a473f 100644 --- a/src/target/source/intrin_rule_metal.cc +++ b/src/target/source/intrin_rule_metal.cc @@ -21,73 +21,74 @@ * \file intrin_rule_metal.cc * \brief Metal intrinsic rules. */ -#include "../intrin_rule.h" #include +#include "../intrin_rule.h" + namespace tvm { namespace codegen { namespace intrin { using namespace tir; TVM_REGISTER_OP("tir.floor") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.trunc") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fabs") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.exp") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("metal.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp2") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp10") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("metal.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log2") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log10") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.pow") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.pow").set_attr("metal.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.popcount") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fmod") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.sin") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.sin").set_attr("metal.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sinh") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.cos") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.cos").set_attr("metal.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.cosh") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_opencl.cc b/src/target/source/intrin_rule_opencl.cc index 0de1abc9a023..fdc83936f8d3 100644 --- a/src/target/source/intrin_rule_opencl.cc +++ b/src/target/source/intrin_rule_opencl.cc @@ -23,8 +23,8 @@ */ #include #include -#include "../intrin_rule.h" +#include "../intrin_rule.h" namespace tvm { namespace codegen { @@ -32,64 +32,64 @@ namespace intrin { using namespace tir; TVM_REGISTER_OP("tir.floor") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.trunc") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fabs") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.exp") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("opencl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp2") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp10") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("opencl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log2") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log10") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.pow") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.pow").set_attr("opencl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.popcount") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fmod") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.sin") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.sin").set_attr("opencl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sinh") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.cos") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.cos").set_attr("opencl.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.cosh") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); // There is no warp shuffle instruction in standard OpenCL // When shuffle is used, we assume it is intel's shuffle extension @@ -106,7 +106,7 @@ static void DispatchIntelShuffle(const TVMArgs& args, TVMRetValue* rv) { } TVM_REGISTER_OP("tir.tvm_warp_shuffle") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchIntelShuffle)); + .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchIntelShuffle)); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_vhls.cc b/src/target/source/intrin_rule_vhls.cc index 14037070c972..a91fb06246aa 100644 --- a/src/target/source/intrin_rule_vhls.cc +++ b/src/target/source/intrin_rule_vhls.cc @@ -21,70 +21,71 @@ * \file intrin_rule_vhls.cc * \brief VHLS intrinsic rules. */ -#include "../intrin_rule.h" #include +#include "../intrin_rule.h" + namespace tvm { namespace codegen { namespace intrin { using namespace tir; TVM_REGISTER_OP("tir.floor") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.ceil") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.trunc") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.fabs") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.round") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.exp") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.exp").set_attr("sdaccel.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp2") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.exp10") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.log") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.log").set_attr("sdaccel.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log2") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.log10") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.tanh") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sqrt") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.pow") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.pow").set_attr("sdaccel.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.popcount") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.sin") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.sin").set_attr("sdaccel.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.sinh") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); -TVM_REGISTER_OP("tir.cos") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.cos").set_attr("sdaccel.FLowerIntrinsic", + PackedFunc(DispatchPureExtern)); TVM_REGISTER_OP("tir.cosh") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); } // namespace intrin } // namespace codegen diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc index f3ae5b5fb238..52bb27a20740 100644 --- a/src/target/spirv/intrin_rule_spirv.cc +++ b/src/target/spirv/intrin_rule_spirv.cc @@ -22,10 +22,9 @@ */ #include #include -#include #include #include -#include +#include namespace tvm { namespace codegen { @@ -56,47 +55,55 @@ inline void DispatchGLSLPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { *rv = CallGLSLIntrin(targs, rv); } -TVM_REGISTER_OP("tir.floor").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.floor") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.ceil").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.ceil") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.round").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.round") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.trunc").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.trunc") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.fabs").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.fabs") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.exp").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.exp").set_attr( + "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.sin").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.sin").set_attr( + "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.cos").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.cos").set_attr( + "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.log").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.log").set_attr( + "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.log2").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.log2") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.sqrt").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.pow").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.pow").set_attr( + "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.tanh").set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.tanh") + .set_attr("vulkan.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.clz") - .set_body([](const TVMArgs& targs, TVMRetValue* rv) { +TVM_REGISTER_OP("tir.clz").set_attr( + "vulkan.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); @@ -104,38 +111,45 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.vulkan.clz") PrimExpr arg = call->args[0]; PrimExpr msb = CallGLSLIntrin(targs, rv); *rv = PrimExpr(arg.dtype().bits() - 1) - msb; - }); + })); // WebGPU rules. -TVM_REGISTER_OP("tir.floor").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.floor") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.ceil").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.ceil") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.round").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.round") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.trunc").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.trunc") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.fabs").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.fabs") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.exp").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.exp").set_attr( + "webgpu.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.log").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.log").set_attr( + "webgpu.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.sqrt").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.sqrt") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.pow").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.pow").set_attr( + "webgpu.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); -TVM_REGISTER_OP("tir.tanh").set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); +TVM_REGISTER_OP("tir.tanh") + .set_attr("webgpu.FLowerIntrinsic", + PackedFunc(DispatchGLSLPureIntrin)); } // namespace spirv } // namespace codegen From 9b4fb8c7dc97a49bcab71d4f4ea1ec43b1e9799c Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 12:41:27 -0700 Subject: [PATCH 04/41] Fix build with include files --- include/tvm/tir/op_attr_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/tvm/tir/op_attr_types.h b/include/tvm/tir/op_attr_types.h index b68048e38dfb..201e99c78950 100644 --- a/include/tvm/tir/op_attr_types.h +++ b/include/tvm/tir/op_attr_types.h @@ -29,10 +29,12 @@ #define TVM_TIR_OP_ATTR_TYPES_H_ #include +#include +#include namespace tvm { namespace tir { - +using namespace runtime; /*! * \brief Global symbol of the op after lowering. */ From c70a5a84b3097f837971d1636fb90df59f2973b5 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 12:57:45 -0700 Subject: [PATCH 05/41] Fix format --- include/tvm/tir/op_attr_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/tvm/tir/op_attr_types.h b/include/tvm/tir/op_attr_types.h index 201e99c78950..43a097d83150 100644 --- a/include/tvm/tir/op_attr_types.h +++ b/include/tvm/tir/op_attr_types.h @@ -28,9 +28,9 @@ #ifndef TVM_TIR_OP_ATTR_TYPES_H_ #define TVM_TIR_OP_ATTR_TYPES_H_ +#include #include #include -#include namespace tvm { namespace tir { From bc06f2946036cb076eb4cd0c98baedc38f8ebde0 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 14:07:12 -0700 Subject: [PATCH 06/41] Fix attrmap missing value issue --- src/tir/transforms/lower_intrin.cc | 23 +- tests/python/unittest/test.c | 578 +++++++++++++++++++++++++++++ 2 files changed, 590 insertions(+), 11 deletions(-) create mode 100644 tests/python/unittest/test.c diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 085141970638..34abee947b74 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -58,18 +58,19 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { PrimExpr VisitExpr_(const CallNode* op) final { if (auto* ptr_op = op->op.as()) { - for (size_t i = 0; i < patterns_.size(); ++i) { - auto default_intrin = Op::GetAttrMap(patterns_[i]); - FLowerIntrinsic f = default_intrin.get(GetRef(ptr_op), nullptr); - const PrimExpr e = GetRef(op); - if (f != nullptr) { - PrimExpr r = f(e); - ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; - if (!r.same_as(e)) { - r = this->VisitExpr(r); - if (r.defined()) return r; + for (size_t i = 0; i < patterns_.size(); ++i) + if (Op::HasAttrMap(patterns_[i])) { + auto default_intrin = Op::GetAttrMap(patterns_[i]); + FLowerIntrinsic f = default_intrin.get(GetRef(ptr_op), nullptr); + const PrimExpr e = GetRef(op); + if (f != nullptr) { + PrimExpr r = f(e); + ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; + if (!r.same_as(e)) { + r = this->VisitExpr(r); + if (r.defined()) return r; + } } - } } } return IRMutatorWithAnalyzer::VisitExpr_(op); diff --git a/tests/python/unittest/test.c b/tests/python/unittest/test.c new file mode 100644 index 000000000000..adcc708a0bba --- /dev/null +++ b/tests/python/unittest/test.c @@ -0,0 +1,578 @@ +// tvm target: c -keys=cpu -link-params=1 +#define TVM_EXPORTS +#include "tvm/runtime/c_runtime_api.h" +#include "tvm/runtime/c_backend_api.h" +#include +void* __tvm_module_ctx = NULL; + +#ifdef __cplusplus +extern "C" { +#endif +static const double __tvm_param__p1[81] = { + -0x1.11eaa9e4d9b01p+1023, 0x1.1810ae97e4b43p+1021, -0x1.9fdc65fb7041dp+1023, 0x1.482627f5d6e4ap+1023, + -0x1.0772953561f07p+1020, 0x1.48a84e6cecff5p+1023, -0x1.26e28d17f635fp+1019, 0x1.7768fb3cff3c2p+1023, + -0x1.3c2fe44e75b9ap+1023, 0x1.44fb53557d3efp+1019, -0x1.5ee5fc8e28f1fp+1019, 0x1.0eb0a8c81c2a7p+1021, + -0x1.751ab22340c33p+1023, 0x1.d2c2afd7bd2a3p+1021, -0x1.be8801836629fp+1020, 0x1.f04f446513665p+1022, + -0x1.352ef6c4f0945p+1023, 0x1.5604c1ff524cap+1023, -0x1.3a5c224658ffep+1023, 0x1.44ac607a3d647p+1020, + -0x1.b882a496cdd23p+1021, 0x1.c0c6f66b80247p+1023, -0x1.ae594846fb8dep+1023, 0x1.596ed369fe11bp+1023, + -0x1.963dc3980f599p+1022, 0x1.d00f9052bfd4ap+1023, -0x1.6c00017c39815p+1022, 0x1.e9cce03ca9067p+1022, + -0x1.ba66095631057p+1022, 0x1.cd60aa80b3167p+1022, -0x1.9c69db1dec421p+1022, 0x1.52b9a71c13bb6p+1023, + -0x1.0df52422dae5bp+1022, 0x1.5ce9afae3d1dp+1023, -0x1.00b94a2dbe44bp+1023, 0x1.ec10a44afe9a7p+1021, + -0x1.cd70a5e91633ep+1023, 0x1.490108a32c333p+1022, -0x1.d09fd23a2744bp+1023, 0x1.cc8db1ba019e9p+1022, + -0x1.91ce17c01a327p+1022, 0x1.3e579d5d1070bp+1022, -0x1.23f0631410215p+1023, 0x1.d5f35bfe175efp+1023, + -0x1.571752d7f1f98p+1023, 0x1.01c8bd32f5264p+1023, -0x1.9d526c6f2eb1fp+1018, 0x1.6db67c710f181p+1022, + -0x1.79bfcc89df706p+1023, 0x1.81462f31a7f53p+1022, -0x1.e3e4f58fb370dp+1023, 0x1.36cf2d16c35a8p+1023, + -0x1.d7633decbb7e9p+1023, 0x1.9bfc0d68f0c77p+1020, -0x1.2ab53ffafc01p+1023, 0x1.620951c440317p+1023, + -0x1.549904988fcaep+1023, 0x1.6f7b8dd75aeb7p+1023, -0x1.1eedf60463e45p+1022, 0x1.45fa6ed55ef4fp+1020, + -0x1.d55a8ffb931efp+1021, 0x1.603eb928cf331p+1022, -0x1.f0122563885dep+1023, 0x1.3e048e5d5ae8cp+1023, + -0x1.aea9dcde66ee6p+1023, 0x1.8cfffa466229ap+1023, -0x1.81c661c48b6c3p+1021, 0x1.5313d6955731dp+1023, + -0x1.78dc9c04a57b1p+1023, 0x1.8692f83761623p+1021, -0x1.ae67377e5a8d3p+1021, 0x1.8b025daa1d0cfp+1023, + -0x1.51b58718ddaeap+1023, 0x1.dcece6dd29963p+1023, -0x1.d86ef9cfb2dcbp+1022, 0x1.4ec982c572341p+1022, + -0x1.4fcb41ca2c159p+1022, 0x1.43fb3809efdc5p+1022, -0x1.1bb063e8c0906p+1023, 0x1.3dbf5a1b6dc8fp+1023, + -0x1.f2e148163392p+1023 +}; +#ifdef __cplusplus +} // extern "C" +#endif + +#ifdef __cplusplus +extern "C" { +#endif +static const double __tvm_param__p0[81] = { + -0x1.4dd47c51b72c8p+1023, 0x1.d9786a2b4d522p+1023, -0x1.fa07659bbc332p+1023, 0x1.6a62d30e83376p+1023, + -0x1.61089010741afp+1023, 0x1.656acf3ee1f7fp+1018, -0x1.2db66cc2a2f89p+1022, 0x1.e5cff3f6e202fp+1021, + -0x1.fd1a7adf445e9p+1022, 0x1.e260ae3916fb7p+1020, -0x1.1733d4372196cp+1023, 0x1.1d3650da0488dp+1023, + -0x1.9429cbc9a0347p+1020, 0x1.fc2c4446caedfp+1022, -0x1.06ad552e50b7bp+1022, 0x1.a0d419cf3706fp+1021, + -0x1.c8d619d83c835p+1022, 0x1.3dee9d58c177dp+1023, -0x1.bd6154f334a5cp+1023, 0x1.6146fc6d83fbfp+1023, + -0x1.cee100d78ceebp+1022, 0x1.156c5c9703e2ep+1023, -0x1.c0d33df619823p+1023, 0x1.e778228bfda2dp+1023, + -0x1.b71d84a1818fdp+1022, 0x1.aadb7c9781ddfp+1019, -0x1.257d2093da1f4p+1023, 0x1.3bc9cabe81393p+1023, + -0x1.1ef74a333e921p+1023, 0x1.729faa7d44b87p+1023, -0x1.938c107565401p+1022, 0x1.61fe4a0fd703fp+1023, + -0x1.d3f042f02589bp+1021, 0x1.394c485132ed6p+1023, -0x1.9c0dc79f151b1p+1022, 0x1.d08cd5c2f2f14p+1023, + -0x1.7e0dd67569d46p+1023, 0x1.f40c7704a373fp+1019, -0x1.c65642f30faf3p+1022, 0x1.673c607f8063p+1023, + -0x1.ed604120af0bfp+1020, 0x1.e52bb5083a4f8p+1023, -0x1.dd0485959554ap+1023, 0x1.2073becbe0e7fp+1018, + -0x1.93c95a54c07bdp+1022, 0x1.b338edb0a925dp+1022, -0x1.8af01f1153728p+1023, 0x1.5690181b1cddfp+1021, + -0x1.4211eaa65bf6fp+1019, 0x1.2fd8941825303p+1022, -0x1.1d8bf2b895a22p+1023, 0x1.ace68f96b68f3p+1022, + -0x1.6b2ad59d1d04dp+1023, 0x1.16479152b0a65p+1022, -0x1.74b3a2915b9cep+1023, 0x1.289a824cb08dfp+1021, + -0x1.71a2e9804bdfdp+1023, 0x1.e3b2df87e1868p+1023, -0x1.f5561ee543c3ep+1023, 0x1.c2da5999d986fp+1019, + -0x1.09513fd29ecafp+1019, 0x1.cf733cf0fb9bap+1023, -0x1.7cb5b76cfc722p+1023, 0x1.761f94b3f9df7p+1022, + -0x1.5ab3ed78fd4d3p+1023, 0x1.c74b6fe00f673p+1022, -0x1.5ccc5ce2a41fap+1023, 0x1.ad2225037bfefp+1021, + -0x1.b51724e855eebp+1021, 0x1.e56f375dda8bfp+1017, -0x1.199148aafc6c7p+1022, 0x1.bc4e7440f731ep+1023, + -0x1.7e43d1934185dp+1023, 0x1.ffd35b2d4e39fp+1021, -0x1.0454b56089466p+1023, 0x1.85d74b4c7757fp+1018, + -0x1.c8c44749af163p+1021, 0x1.d836a0252c82cp+1023, -0x1.8d503b03a94d9p+1022, 0x1.35c61ffc07cefp+1023, + -0x1.5912c93559c8ep+1023 +}; +#ifdef __cplusplus +} // extern "C" +#endif +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_154(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { + void* arg0 = (((TVMValue*)args)[0].v_handle); + int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; + void* arg1 = (((TVMValue*)args)[1].v_handle); + int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; + void* arg2 = (((TVMValue*)args)[2].v_handle); + int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; + void* placeholder = (((DLTensor*)arg0)[0].data); + void* arg0_shape = (((DLTensor*)arg0)[0].shape); + void* arg0_strides = (((DLTensor*)arg0)[0].strides); + int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); + void* placeholder1 = (((DLTensor*)arg1)[0].data); + void* arg1_shape = (((DLTensor*)arg1)[0].shape); + void* arg1_strides = (((DLTensor*)arg1)[0].strides); + void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); + void* arg2_shape = (((DLTensor*)arg2)[0].shape); + void* arg2_strides = (((DLTensor*)arg2)[0].strides); + if (!(arg0_strides == NULL)) { + } + if (!(arg1_strides == NULL)) { + } + if (!(arg2_strides == NULL)) { + } + for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 12; ++n_oc_chunk_fused_oh_fused) { + double conv2d_NCHWc_global[36]; + for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { + conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; + } + for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { + conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; + } + for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { + conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; + } + for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { + conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; + } + for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { + conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; + } + for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { + conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; + } + for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { + conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; + } + for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { + conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; + } + for (int32_t oc_block_c_init8 = 0; oc_block_c_init8 < 3; ++oc_block_c_init8) { + conv2d_NCHWc_global[((oc_block_c_init8 + 24))] = 0.000000e+00; + } + for (int32_t oc_block_c_init9 = 0; oc_block_c_init9 < 3; ++oc_block_c_init9) { + conv2d_NCHWc_global[((oc_block_c_init9 + 27))] = 0.000000e+00; + } + for (int32_t oc_block_c_init10 = 0; oc_block_c_init10 < 3; ++oc_block_c_init10) { + conv2d_NCHWc_global[((oc_block_c_init10 + 30))] = 0.000000e+00; + } + for (int32_t oc_block_c_init11 = 0; oc_block_c_init11 < 3; ++oc_block_c_init11) { + conv2d_NCHWc_global[((oc_block_c_init11 + 33))] = 0.000000e+00; + } + for (int32_t kh = 0; kh < 3; ++kh) { + for (int32_t kw = 0; kw < 3; ++kw) { + for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { + for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { + conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); + } + for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { + conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); + } + for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { + conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); + } + for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { + conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); + } + for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { + conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); + } + for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { + conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); + } + for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { + conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); + } + for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { + conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); + } + for (int32_t oc_block_c8 = 0; oc_block_c8 < 3; ++oc_block_c8) { + conv2d_NCHWc_global[((oc_block_c8 + 24))] = (conv2d_NCHWc_global[((oc_block_c8 + 24))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 24))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c8))])); + } + for (int32_t oc_block_c9 = 0; oc_block_c9 < 3; ++oc_block_c9) { + conv2d_NCHWc_global[((oc_block_c9 + 27))] = (conv2d_NCHWc_global[((oc_block_c9 + 27))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 27))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c9))])); + } + for (int32_t oc_block_c10 = 0; oc_block_c10 < 3; ++oc_block_c10) { + conv2d_NCHWc_global[((oc_block_c10 + 30))] = (conv2d_NCHWc_global[((oc_block_c10 + 30))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 30))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c10))])); + } + for (int32_t oc_block_c11 = 0; oc_block_c11 < 3; ++oc_block_c11) { + conv2d_NCHWc_global[((oc_block_c11 + 33))] = (conv2d_NCHWc_global[((oc_block_c11 + 33))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 33))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c11))])); + } + } + } + } + for (int32_t ow_inner = 0; ow_inner < 12; ++ow_inner) { + for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { + ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 36) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; + } + } + } + return 0; +} + +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_153(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { + void* arg0 = (((TVMValue*)args)[0].v_handle); + int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; + void* arg1 = (((TVMValue*)args)[1].v_handle); + int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; + void* arg2 = (((TVMValue*)args)[2].v_handle); + int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; + void* placeholder = (((DLTensor*)arg0)[0].data); + void* arg0_shape = (((DLTensor*)arg0)[0].shape); + void* arg0_strides = (((DLTensor*)arg0)[0].strides); + int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); + void* placeholder1 = (((DLTensor*)arg1)[0].data); + void* arg1_shape = (((DLTensor*)arg1)[0].shape); + void* arg1_strides = (((DLTensor*)arg1)[0].strides); + void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); + void* arg2_shape = (((DLTensor*)arg2)[0].shape); + void* arg2_strides = (((DLTensor*)arg2)[0].strides); + if (!(arg0_strides == NULL)) { + } + if (!(arg1_strides == NULL)) { + } + if (!(arg2_strides == NULL)) { + } + for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 10; ++n_oc_chunk_fused_oh_fused) { + double conv2d_NCHWc_global[30]; + for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { + conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; + } + for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { + conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; + } + for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { + conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; + } + for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { + conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; + } + for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { + conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; + } + for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { + conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; + } + for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { + conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; + } + for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { + conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; + } + for (int32_t oc_block_c_init8 = 0; oc_block_c_init8 < 3; ++oc_block_c_init8) { + conv2d_NCHWc_global[((oc_block_c_init8 + 24))] = 0.000000e+00; + } + for (int32_t oc_block_c_init9 = 0; oc_block_c_init9 < 3; ++oc_block_c_init9) { + conv2d_NCHWc_global[((oc_block_c_init9 + 27))] = 0.000000e+00; + } + for (int32_t kh = 0; kh < 3; ++kh) { + for (int32_t kw = 0; kw < 3; ++kw) { + for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { + for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { + conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); + } + for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { + conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); + } + for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { + conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); + } + for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { + conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); + } + for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { + conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); + } + for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { + conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); + } + for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { + conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); + } + for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { + conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); + } + for (int32_t oc_block_c8 = 0; oc_block_c8 < 3; ++oc_block_c8) { + conv2d_NCHWc_global[((oc_block_c8 + 24))] = (conv2d_NCHWc_global[((oc_block_c8 + 24))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 24))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c8))])); + } + for (int32_t oc_block_c9 = 0; oc_block_c9 < 3; ++oc_block_c9) { + conv2d_NCHWc_global[((oc_block_c9 + 27))] = (conv2d_NCHWc_global[((oc_block_c9 + 27))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 27))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c9))])); + } + } + } + } + for (int32_t ow_inner = 0; ow_inner < 10; ++ow_inner) { + for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { + ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 30) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; + } + } + } + return 0; +} + +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t fused_layout_transform_115(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { + void* arg0 = (((TVMValue*)args)[0].v_handle); + int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; + void* arg1 = (((TVMValue*)args)[1].v_handle); + int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; + void* placeholder = (((DLTensor*)arg0)[0].data); + void* arg0_shape = (((DLTensor*)arg0)[0].shape); + void* arg0_strides = (((DLTensor*)arg0)[0].strides); + int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); + void* T_layout_trans = (((DLTensor*)arg1)[0].data); + void* arg1_shape = (((DLTensor*)arg1)[0].shape); + void* arg1_strides = (((DLTensor*)arg1)[0].strides); + if (!(arg0_strides == NULL)) { + } + if (!(arg1_strides == NULL)) { + } + for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 3; ++ax0_ax1_fused) { + for (int32_t ax2 = 0; ax2 < 8; ++ax2) { + for (int32_t ax3_inner = 0; ax3_inner < 8; ++ax3_inner) { + ((double*)T_layout_trans)[((((ax0_ax1_fused * 64) + (ax2 * 8)) + ax3_inner))] = ((double*)placeholder)[((((ax2 * 24) + (ax3_inner * 3)) + ax0_ax1_fused))]; + } + } + } + return 0; +} + +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_152(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { + void* arg0 = (((TVMValue*)args)[0].v_handle); + int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; + void* arg1 = (((TVMValue*)args)[1].v_handle); + int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; + void* arg2 = (((TVMValue*)args)[2].v_handle); + int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; + void* placeholder = (((DLTensor*)arg0)[0].data); + void* arg0_shape = (((DLTensor*)arg0)[0].shape); + void* arg0_strides = (((DLTensor*)arg0)[0].strides); + int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); + void* placeholder1 = (((DLTensor*)arg1)[0].data); + void* arg1_shape = (((DLTensor*)arg1)[0].shape); + void* arg1_strides = (((DLTensor*)arg1)[0].strides); + void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); + void* arg2_shape = (((DLTensor*)arg2)[0].shape); + void* arg2_strides = (((DLTensor*)arg2)[0].strides); + if (!(arg0_strides == NULL)) { + } + if (!(arg1_strides == NULL)) { + } + if (!(arg2_strides == NULL)) { + } + for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 8; ++n_oc_chunk_fused_oh_fused) { + double conv2d_NCHWc_global[24]; + for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { + conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; + } + for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { + conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; + } + for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { + conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; + } + for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { + conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; + } + for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { + conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; + } + for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { + conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; + } + for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { + conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; + } + for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { + conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; + } + for (int32_t kh = 0; kh < 3; ++kh) { + for (int32_t kw = 0; kw < 3; ++kw) { + for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { + for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { + conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); + } + for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { + conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); + } + for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { + conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); + } + for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { + conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); + } + for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { + conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); + } + for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { + conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); + } + for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { + conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); + } + for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { + conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); + } + } + } + } + for (int32_t ow_inner = 0; ow_inner < 8; ++ow_inner) { + for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { + ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 24) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; + } + } + } + return 0; +} + +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t fused_layout_transform_116(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { + void* arg0 = (((TVMValue*)args)[0].v_handle); + int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; + void* arg1 = (((TVMValue*)args)[1].v_handle); + int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; + void* placeholder = (((DLTensor*)arg0)[0].data); + void* arg0_shape = (((DLTensor*)arg0)[0].shape); + void* arg0_strides = (((DLTensor*)arg0)[0].strides); + int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); + void* T_layout_trans = (((DLTensor*)arg1)[0].data); + void* arg1_shape = (((DLTensor*)arg1)[0].shape); + void* arg1_strides = (((DLTensor*)arg1)[0].strides); + if (!(arg0_strides == NULL)) { + } + if (!(arg1_strides == NULL)) { + } + for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 16; ++ax0_ax1_fused_ax2_fused) { + for (int32_t ax3 = 0; ax3 < 16; ++ax3) { + for (int32_t ax4_inner = 0; ax4_inner < 3; ++ax4_inner) { + ((double*)T_layout_trans)[((((ax0_ax1_fused_ax2_fused * 48) + (ax3 * 3)) + ax4_inner))] = ((double*)placeholder)[((((ax4_inner * 256) + (ax0_ax1_fused_ax2_fused * 16)) + ax3))]; + } + } + } + return 0; +} + +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_155(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { + void* arg0 = (((TVMValue*)args)[0].v_handle); + int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; + void* arg1 = (((TVMValue*)args)[1].v_handle); + int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; + void* arg2 = (((TVMValue*)args)[2].v_handle); + int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; + void* placeholder = (((DLTensor*)arg0)[0].data); + void* arg0_shape = (((DLTensor*)arg0)[0].shape); + void* arg0_strides = (((DLTensor*)arg0)[0].strides); + int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); + void* placeholder1 = (((DLTensor*)arg1)[0].data); + void* arg1_shape = (((DLTensor*)arg1)[0].shape); + void* arg1_strides = (((DLTensor*)arg1)[0].strides); + void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); + void* arg2_shape = (((DLTensor*)arg2)[0].shape); + void* arg2_strides = (((DLTensor*)arg2)[0].strides); + if (!(arg0_strides == NULL)) { + } + if (!(arg1_strides == NULL)) { + } + if (!(arg2_strides == NULL)) { + } + for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 14; ++n_oc_chunk_fused_oh_fused) { + double conv2d_NCHWc_global[42]; + for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { + conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; + } + for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { + conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; + } + for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { + conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; + } + for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { + conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; + } + for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { + conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; + } + for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { + conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; + } + for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { + conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; + } + for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { + conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; + } + for (int32_t oc_block_c_init8 = 0; oc_block_c_init8 < 3; ++oc_block_c_init8) { + conv2d_NCHWc_global[((oc_block_c_init8 + 24))] = 0.000000e+00; + } + for (int32_t oc_block_c_init9 = 0; oc_block_c_init9 < 3; ++oc_block_c_init9) { + conv2d_NCHWc_global[((oc_block_c_init9 + 27))] = 0.000000e+00; + } + for (int32_t oc_block_c_init10 = 0; oc_block_c_init10 < 3; ++oc_block_c_init10) { + conv2d_NCHWc_global[((oc_block_c_init10 + 30))] = 0.000000e+00; + } + for (int32_t oc_block_c_init11 = 0; oc_block_c_init11 < 3; ++oc_block_c_init11) { + conv2d_NCHWc_global[((oc_block_c_init11 + 33))] = 0.000000e+00; + } + for (int32_t oc_block_c_init12 = 0; oc_block_c_init12 < 3; ++oc_block_c_init12) { + conv2d_NCHWc_global[((oc_block_c_init12 + 36))] = 0.000000e+00; + } + for (int32_t oc_block_c_init13 = 0; oc_block_c_init13 < 3; ++oc_block_c_init13) { + conv2d_NCHWc_global[((oc_block_c_init13 + 39))] = 0.000000e+00; + } + for (int32_t kh = 0; kh < 3; ++kh) { + for (int32_t kw = 0; kw < 3; ++kw) { + for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { + for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { + conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); + } + for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { + conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); + } + for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { + conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); + } + for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { + conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); + } + for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { + conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); + } + for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { + conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); + } + for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { + conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); + } + for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { + conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); + } + for (int32_t oc_block_c8 = 0; oc_block_c8 < 3; ++oc_block_c8) { + conv2d_NCHWc_global[((oc_block_c8 + 24))] = (conv2d_NCHWc_global[((oc_block_c8 + 24))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 24))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c8))])); + } + for (int32_t oc_block_c9 = 0; oc_block_c9 < 3; ++oc_block_c9) { + conv2d_NCHWc_global[((oc_block_c9 + 27))] = (conv2d_NCHWc_global[((oc_block_c9 + 27))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 27))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c9))])); + } + for (int32_t oc_block_c10 = 0; oc_block_c10 < 3; ++oc_block_c10) { + conv2d_NCHWc_global[((oc_block_c10 + 30))] = (conv2d_NCHWc_global[((oc_block_c10 + 30))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 30))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c10))])); + } + for (int32_t oc_block_c11 = 0; oc_block_c11 < 3; ++oc_block_c11) { + conv2d_NCHWc_global[((oc_block_c11 + 33))] = (conv2d_NCHWc_global[((oc_block_c11 + 33))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 33))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c11))])); + } + for (int32_t oc_block_c12 = 0; oc_block_c12 < 3; ++oc_block_c12) { + conv2d_NCHWc_global[((oc_block_c12 + 36))] = (conv2d_NCHWc_global[((oc_block_c12 + 36))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 36))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c12))])); + } + for (int32_t oc_block_c13 = 0; oc_block_c13 < 3; ++oc_block_c13) { + conv2d_NCHWc_global[((oc_block_c13 + 39))] = (conv2d_NCHWc_global[((oc_block_c13 + 39))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 39))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c13))])); + } + } + } + } + for (int32_t ow_inner = 0; ow_inner < 14; ++ow_inner) { + for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { + ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 42) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; + } + } + } + return 0; +} + +#ifdef __cplusplus +extern "C" +#endif +TVM_DLL int32_t _lookup_linked_param(void* args, int* arg_type_ids, int num_args, void* out_ret_value, int* out_ret_tcode, void* resource_handle) { + switch (((int64_t*) args)[0]) { + default: + out_ret_tcode[0] = 4; + return 0; + case 4: + ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) __tvm_param__p1; + out_ret_tcode[0] = 3; + return 0; + case 2: + ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) __tvm_param__p0; + out_ret_tcode[0] = 3; + return 0; + } +} From bb78dee400d55f361aa8c9c71a5fb866a697cd4b Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 15:15:41 -0700 Subject: [PATCH 07/41] Try rerun CI --- src/tir/transforms/lower_intrin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 34abee947b74..27c4b2a00ebd 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -62,7 +62,7 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { if (Op::HasAttrMap(patterns_[i])) { auto default_intrin = Op::GetAttrMap(patterns_[i]); FLowerIntrinsic f = default_intrin.get(GetRef(ptr_op), nullptr); - const PrimExpr e = GetRef(op); + PrimExpr e = GetRef(op); if (f != nullptr) { PrimExpr r = f(e); ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; From ac2f560a1fdc6f2ddff5d72de4aa027f553bef20 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 15:19:59 -0700 Subject: [PATCH 08/41] Add header --- tests/python/unittest/test.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/python/unittest/test.c b/tests/python/unittest/test.c index adcc708a0bba..4a3cdcc4112f 100644 --- a/tests/python/unittest/test.c +++ b/tests/python/unittest/test.c @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + // tvm target: c -keys=cpu -link-params=1 #define TVM_EXPORTS #include "tvm/runtime/c_runtime_api.h" From 9b11aa9338dc628cf11c54a6d8734c96995165d1 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 15:20:54 -0700 Subject: [PATCH 09/41] Remove extra file --- tests/python/unittest/test.c | 597 ----------------------------------- 1 file changed, 597 deletions(-) delete mode 100644 tests/python/unittest/test.c diff --git a/tests/python/unittest/test.c b/tests/python/unittest/test.c deleted file mode 100644 index 4a3cdcc4112f..000000000000 --- a/tests/python/unittest/test.c +++ /dev/null @@ -1,597 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// tvm target: c -keys=cpu -link-params=1 -#define TVM_EXPORTS -#include "tvm/runtime/c_runtime_api.h" -#include "tvm/runtime/c_backend_api.h" -#include -void* __tvm_module_ctx = NULL; - -#ifdef __cplusplus -extern "C" { -#endif -static const double __tvm_param__p1[81] = { - -0x1.11eaa9e4d9b01p+1023, 0x1.1810ae97e4b43p+1021, -0x1.9fdc65fb7041dp+1023, 0x1.482627f5d6e4ap+1023, - -0x1.0772953561f07p+1020, 0x1.48a84e6cecff5p+1023, -0x1.26e28d17f635fp+1019, 0x1.7768fb3cff3c2p+1023, - -0x1.3c2fe44e75b9ap+1023, 0x1.44fb53557d3efp+1019, -0x1.5ee5fc8e28f1fp+1019, 0x1.0eb0a8c81c2a7p+1021, - -0x1.751ab22340c33p+1023, 0x1.d2c2afd7bd2a3p+1021, -0x1.be8801836629fp+1020, 0x1.f04f446513665p+1022, - -0x1.352ef6c4f0945p+1023, 0x1.5604c1ff524cap+1023, -0x1.3a5c224658ffep+1023, 0x1.44ac607a3d647p+1020, - -0x1.b882a496cdd23p+1021, 0x1.c0c6f66b80247p+1023, -0x1.ae594846fb8dep+1023, 0x1.596ed369fe11bp+1023, - -0x1.963dc3980f599p+1022, 0x1.d00f9052bfd4ap+1023, -0x1.6c00017c39815p+1022, 0x1.e9cce03ca9067p+1022, - -0x1.ba66095631057p+1022, 0x1.cd60aa80b3167p+1022, -0x1.9c69db1dec421p+1022, 0x1.52b9a71c13bb6p+1023, - -0x1.0df52422dae5bp+1022, 0x1.5ce9afae3d1dp+1023, -0x1.00b94a2dbe44bp+1023, 0x1.ec10a44afe9a7p+1021, - -0x1.cd70a5e91633ep+1023, 0x1.490108a32c333p+1022, -0x1.d09fd23a2744bp+1023, 0x1.cc8db1ba019e9p+1022, - -0x1.91ce17c01a327p+1022, 0x1.3e579d5d1070bp+1022, -0x1.23f0631410215p+1023, 0x1.d5f35bfe175efp+1023, - -0x1.571752d7f1f98p+1023, 0x1.01c8bd32f5264p+1023, -0x1.9d526c6f2eb1fp+1018, 0x1.6db67c710f181p+1022, - -0x1.79bfcc89df706p+1023, 0x1.81462f31a7f53p+1022, -0x1.e3e4f58fb370dp+1023, 0x1.36cf2d16c35a8p+1023, - -0x1.d7633decbb7e9p+1023, 0x1.9bfc0d68f0c77p+1020, -0x1.2ab53ffafc01p+1023, 0x1.620951c440317p+1023, - -0x1.549904988fcaep+1023, 0x1.6f7b8dd75aeb7p+1023, -0x1.1eedf60463e45p+1022, 0x1.45fa6ed55ef4fp+1020, - -0x1.d55a8ffb931efp+1021, 0x1.603eb928cf331p+1022, -0x1.f0122563885dep+1023, 0x1.3e048e5d5ae8cp+1023, - -0x1.aea9dcde66ee6p+1023, 0x1.8cfffa466229ap+1023, -0x1.81c661c48b6c3p+1021, 0x1.5313d6955731dp+1023, - -0x1.78dc9c04a57b1p+1023, 0x1.8692f83761623p+1021, -0x1.ae67377e5a8d3p+1021, 0x1.8b025daa1d0cfp+1023, - -0x1.51b58718ddaeap+1023, 0x1.dcece6dd29963p+1023, -0x1.d86ef9cfb2dcbp+1022, 0x1.4ec982c572341p+1022, - -0x1.4fcb41ca2c159p+1022, 0x1.43fb3809efdc5p+1022, -0x1.1bb063e8c0906p+1023, 0x1.3dbf5a1b6dc8fp+1023, - -0x1.f2e148163392p+1023 -}; -#ifdef __cplusplus -} // extern "C" -#endif - -#ifdef __cplusplus -extern "C" { -#endif -static const double __tvm_param__p0[81] = { - -0x1.4dd47c51b72c8p+1023, 0x1.d9786a2b4d522p+1023, -0x1.fa07659bbc332p+1023, 0x1.6a62d30e83376p+1023, - -0x1.61089010741afp+1023, 0x1.656acf3ee1f7fp+1018, -0x1.2db66cc2a2f89p+1022, 0x1.e5cff3f6e202fp+1021, - -0x1.fd1a7adf445e9p+1022, 0x1.e260ae3916fb7p+1020, -0x1.1733d4372196cp+1023, 0x1.1d3650da0488dp+1023, - -0x1.9429cbc9a0347p+1020, 0x1.fc2c4446caedfp+1022, -0x1.06ad552e50b7bp+1022, 0x1.a0d419cf3706fp+1021, - -0x1.c8d619d83c835p+1022, 0x1.3dee9d58c177dp+1023, -0x1.bd6154f334a5cp+1023, 0x1.6146fc6d83fbfp+1023, - -0x1.cee100d78ceebp+1022, 0x1.156c5c9703e2ep+1023, -0x1.c0d33df619823p+1023, 0x1.e778228bfda2dp+1023, - -0x1.b71d84a1818fdp+1022, 0x1.aadb7c9781ddfp+1019, -0x1.257d2093da1f4p+1023, 0x1.3bc9cabe81393p+1023, - -0x1.1ef74a333e921p+1023, 0x1.729faa7d44b87p+1023, -0x1.938c107565401p+1022, 0x1.61fe4a0fd703fp+1023, - -0x1.d3f042f02589bp+1021, 0x1.394c485132ed6p+1023, -0x1.9c0dc79f151b1p+1022, 0x1.d08cd5c2f2f14p+1023, - -0x1.7e0dd67569d46p+1023, 0x1.f40c7704a373fp+1019, -0x1.c65642f30faf3p+1022, 0x1.673c607f8063p+1023, - -0x1.ed604120af0bfp+1020, 0x1.e52bb5083a4f8p+1023, -0x1.dd0485959554ap+1023, 0x1.2073becbe0e7fp+1018, - -0x1.93c95a54c07bdp+1022, 0x1.b338edb0a925dp+1022, -0x1.8af01f1153728p+1023, 0x1.5690181b1cddfp+1021, - -0x1.4211eaa65bf6fp+1019, 0x1.2fd8941825303p+1022, -0x1.1d8bf2b895a22p+1023, 0x1.ace68f96b68f3p+1022, - -0x1.6b2ad59d1d04dp+1023, 0x1.16479152b0a65p+1022, -0x1.74b3a2915b9cep+1023, 0x1.289a824cb08dfp+1021, - -0x1.71a2e9804bdfdp+1023, 0x1.e3b2df87e1868p+1023, -0x1.f5561ee543c3ep+1023, 0x1.c2da5999d986fp+1019, - -0x1.09513fd29ecafp+1019, 0x1.cf733cf0fb9bap+1023, -0x1.7cb5b76cfc722p+1023, 0x1.761f94b3f9df7p+1022, - -0x1.5ab3ed78fd4d3p+1023, 0x1.c74b6fe00f673p+1022, -0x1.5ccc5ce2a41fap+1023, 0x1.ad2225037bfefp+1021, - -0x1.b51724e855eebp+1021, 0x1.e56f375dda8bfp+1017, -0x1.199148aafc6c7p+1022, 0x1.bc4e7440f731ep+1023, - -0x1.7e43d1934185dp+1023, 0x1.ffd35b2d4e39fp+1021, -0x1.0454b56089466p+1023, 0x1.85d74b4c7757fp+1018, - -0x1.c8c44749af163p+1021, 0x1.d836a0252c82cp+1023, -0x1.8d503b03a94d9p+1022, 0x1.35c61ffc07cefp+1023, - -0x1.5912c93559c8ep+1023 -}; -#ifdef __cplusplus -} // extern "C" -#endif -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_154(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { - void* arg0 = (((TVMValue*)args)[0].v_handle); - int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; - void* arg1 = (((TVMValue*)args)[1].v_handle); - int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; - void* arg2 = (((TVMValue*)args)[2].v_handle); - int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; - void* placeholder = (((DLTensor*)arg0)[0].data); - void* arg0_shape = (((DLTensor*)arg0)[0].shape); - void* arg0_strides = (((DLTensor*)arg0)[0].strides); - int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); - void* placeholder1 = (((DLTensor*)arg1)[0].data); - void* arg1_shape = (((DLTensor*)arg1)[0].shape); - void* arg1_strides = (((DLTensor*)arg1)[0].strides); - void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); - void* arg2_shape = (((DLTensor*)arg2)[0].shape); - void* arg2_strides = (((DLTensor*)arg2)[0].strides); - if (!(arg0_strides == NULL)) { - } - if (!(arg1_strides == NULL)) { - } - if (!(arg2_strides == NULL)) { - } - for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 12; ++n_oc_chunk_fused_oh_fused) { - double conv2d_NCHWc_global[36]; - for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { - conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; - } - for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { - conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; - } - for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { - conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; - } - for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { - conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; - } - for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { - conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; - } - for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { - conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; - } - for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { - conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; - } - for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { - conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; - } - for (int32_t oc_block_c_init8 = 0; oc_block_c_init8 < 3; ++oc_block_c_init8) { - conv2d_NCHWc_global[((oc_block_c_init8 + 24))] = 0.000000e+00; - } - for (int32_t oc_block_c_init9 = 0; oc_block_c_init9 < 3; ++oc_block_c_init9) { - conv2d_NCHWc_global[((oc_block_c_init9 + 27))] = 0.000000e+00; - } - for (int32_t oc_block_c_init10 = 0; oc_block_c_init10 < 3; ++oc_block_c_init10) { - conv2d_NCHWc_global[((oc_block_c_init10 + 30))] = 0.000000e+00; - } - for (int32_t oc_block_c_init11 = 0; oc_block_c_init11 < 3; ++oc_block_c_init11) { - conv2d_NCHWc_global[((oc_block_c_init11 + 33))] = 0.000000e+00; - } - for (int32_t kh = 0; kh < 3; ++kh) { - for (int32_t kw = 0; kw < 3; ++kw) { - for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { - for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { - conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); - } - for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { - conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); - } - for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { - conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); - } - for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { - conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); - } - for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { - conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); - } - for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { - conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); - } - for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { - conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); - } - for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { - conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); - } - for (int32_t oc_block_c8 = 0; oc_block_c8 < 3; ++oc_block_c8) { - conv2d_NCHWc_global[((oc_block_c8 + 24))] = (conv2d_NCHWc_global[((oc_block_c8 + 24))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 24))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c8))])); - } - for (int32_t oc_block_c9 = 0; oc_block_c9 < 3; ++oc_block_c9) { - conv2d_NCHWc_global[((oc_block_c9 + 27))] = (conv2d_NCHWc_global[((oc_block_c9 + 27))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 27))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c9))])); - } - for (int32_t oc_block_c10 = 0; oc_block_c10 < 3; ++oc_block_c10) { - conv2d_NCHWc_global[((oc_block_c10 + 30))] = (conv2d_NCHWc_global[((oc_block_c10 + 30))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 30))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c10))])); - } - for (int32_t oc_block_c11 = 0; oc_block_c11 < 3; ++oc_block_c11) { - conv2d_NCHWc_global[((oc_block_c11 + 33))] = (conv2d_NCHWc_global[((oc_block_c11 + 33))] + (((double*)placeholder)[((((((kh * 42) + (n_oc_chunk_fused_oh_fused * 42)) + (kw * 3)) + ic_inner) + 33))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c11))])); - } - } - } - } - for (int32_t ow_inner = 0; ow_inner < 12; ++ow_inner) { - for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { - ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 36) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; - } - } - } - return 0; -} - -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_153(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { - void* arg0 = (((TVMValue*)args)[0].v_handle); - int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; - void* arg1 = (((TVMValue*)args)[1].v_handle); - int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; - void* arg2 = (((TVMValue*)args)[2].v_handle); - int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; - void* placeholder = (((DLTensor*)arg0)[0].data); - void* arg0_shape = (((DLTensor*)arg0)[0].shape); - void* arg0_strides = (((DLTensor*)arg0)[0].strides); - int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); - void* placeholder1 = (((DLTensor*)arg1)[0].data); - void* arg1_shape = (((DLTensor*)arg1)[0].shape); - void* arg1_strides = (((DLTensor*)arg1)[0].strides); - void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); - void* arg2_shape = (((DLTensor*)arg2)[0].shape); - void* arg2_strides = (((DLTensor*)arg2)[0].strides); - if (!(arg0_strides == NULL)) { - } - if (!(arg1_strides == NULL)) { - } - if (!(arg2_strides == NULL)) { - } - for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 10; ++n_oc_chunk_fused_oh_fused) { - double conv2d_NCHWc_global[30]; - for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { - conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; - } - for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { - conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; - } - for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { - conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; - } - for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { - conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; - } - for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { - conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; - } - for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { - conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; - } - for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { - conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; - } - for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { - conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; - } - for (int32_t oc_block_c_init8 = 0; oc_block_c_init8 < 3; ++oc_block_c_init8) { - conv2d_NCHWc_global[((oc_block_c_init8 + 24))] = 0.000000e+00; - } - for (int32_t oc_block_c_init9 = 0; oc_block_c_init9 < 3; ++oc_block_c_init9) { - conv2d_NCHWc_global[((oc_block_c_init9 + 27))] = 0.000000e+00; - } - for (int32_t kh = 0; kh < 3; ++kh) { - for (int32_t kw = 0; kw < 3; ++kw) { - for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { - for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { - conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); - } - for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { - conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); - } - for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { - conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); - } - for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { - conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); - } - for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { - conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); - } - for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { - conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); - } - for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { - conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); - } - for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { - conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); - } - for (int32_t oc_block_c8 = 0; oc_block_c8 < 3; ++oc_block_c8) { - conv2d_NCHWc_global[((oc_block_c8 + 24))] = (conv2d_NCHWc_global[((oc_block_c8 + 24))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 24))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c8))])); - } - for (int32_t oc_block_c9 = 0; oc_block_c9 < 3; ++oc_block_c9) { - conv2d_NCHWc_global[((oc_block_c9 + 27))] = (conv2d_NCHWc_global[((oc_block_c9 + 27))] + (((double*)placeholder)[((((((kh * 36) + (n_oc_chunk_fused_oh_fused * 36)) + (kw * 3)) + ic_inner) + 27))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c9))])); - } - } - } - } - for (int32_t ow_inner = 0; ow_inner < 10; ++ow_inner) { - for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { - ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 30) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; - } - } - } - return 0; -} - -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t fused_layout_transform_115(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { - void* arg0 = (((TVMValue*)args)[0].v_handle); - int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; - void* arg1 = (((TVMValue*)args)[1].v_handle); - int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; - void* placeholder = (((DLTensor*)arg0)[0].data); - void* arg0_shape = (((DLTensor*)arg0)[0].shape); - void* arg0_strides = (((DLTensor*)arg0)[0].strides); - int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); - void* T_layout_trans = (((DLTensor*)arg1)[0].data); - void* arg1_shape = (((DLTensor*)arg1)[0].shape); - void* arg1_strides = (((DLTensor*)arg1)[0].strides); - if (!(arg0_strides == NULL)) { - } - if (!(arg1_strides == NULL)) { - } - for (int32_t ax0_ax1_fused = 0; ax0_ax1_fused < 3; ++ax0_ax1_fused) { - for (int32_t ax2 = 0; ax2 < 8; ++ax2) { - for (int32_t ax3_inner = 0; ax3_inner < 8; ++ax3_inner) { - ((double*)T_layout_trans)[((((ax0_ax1_fused * 64) + (ax2 * 8)) + ax3_inner))] = ((double*)placeholder)[((((ax2 * 24) + (ax3_inner * 3)) + ax0_ax1_fused))]; - } - } - } - return 0; -} - -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_152(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { - void* arg0 = (((TVMValue*)args)[0].v_handle); - int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; - void* arg1 = (((TVMValue*)args)[1].v_handle); - int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; - void* arg2 = (((TVMValue*)args)[2].v_handle); - int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; - void* placeholder = (((DLTensor*)arg0)[0].data); - void* arg0_shape = (((DLTensor*)arg0)[0].shape); - void* arg0_strides = (((DLTensor*)arg0)[0].strides); - int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); - void* placeholder1 = (((DLTensor*)arg1)[0].data); - void* arg1_shape = (((DLTensor*)arg1)[0].shape); - void* arg1_strides = (((DLTensor*)arg1)[0].strides); - void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); - void* arg2_shape = (((DLTensor*)arg2)[0].shape); - void* arg2_strides = (((DLTensor*)arg2)[0].strides); - if (!(arg0_strides == NULL)) { - } - if (!(arg1_strides == NULL)) { - } - if (!(arg2_strides == NULL)) { - } - for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 8; ++n_oc_chunk_fused_oh_fused) { - double conv2d_NCHWc_global[24]; - for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { - conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; - } - for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { - conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; - } - for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { - conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; - } - for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { - conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; - } - for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { - conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; - } - for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { - conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; - } - for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { - conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; - } - for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { - conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; - } - for (int32_t kh = 0; kh < 3; ++kh) { - for (int32_t kw = 0; kw < 3; ++kw) { - for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { - for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { - conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); - } - for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { - conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); - } - for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { - conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); - } - for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { - conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); - } - for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { - conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); - } - for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { - conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); - } - for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { - conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); - } - for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { - conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 30) + (n_oc_chunk_fused_oh_fused * 30)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); - } - } - } - } - for (int32_t ow_inner = 0; ow_inner < 8; ++ow_inner) { - for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { - ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 24) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; - } - } - } - return 0; -} - -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t fused_layout_transform_116(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { - void* arg0 = (((TVMValue*)args)[0].v_handle); - int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; - void* arg1 = (((TVMValue*)args)[1].v_handle); - int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; - void* placeholder = (((DLTensor*)arg0)[0].data); - void* arg0_shape = (((DLTensor*)arg0)[0].shape); - void* arg0_strides = (((DLTensor*)arg0)[0].strides); - int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); - void* T_layout_trans = (((DLTensor*)arg1)[0].data); - void* arg1_shape = (((DLTensor*)arg1)[0].shape); - void* arg1_strides = (((DLTensor*)arg1)[0].strides); - if (!(arg0_strides == NULL)) { - } - if (!(arg1_strides == NULL)) { - } - for (int32_t ax0_ax1_fused_ax2_fused = 0; ax0_ax1_fused_ax2_fused < 16; ++ax0_ax1_fused_ax2_fused) { - for (int32_t ax3 = 0; ax3 < 16; ++ax3) { - for (int32_t ax4_inner = 0; ax4_inner < 3; ++ax4_inner) { - ((double*)T_layout_trans)[((((ax0_ax1_fused_ax2_fused * 48) + (ax3 * 3)) + ax4_inner))] = ((double*)placeholder)[((((ax4_inner * 256) + (ax0_ax1_fused_ax2_fused * 16)) + ax3))]; - } - } - } - return 0; -} - -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t fused_nn_contrib_conv2d_NCHWc_155(void* args, void* arg_type_ids, int32_t num_args, void* out_ret_value, void* out_ret_tcode, void* resource_handle) { - void* arg0 = (((TVMValue*)args)[0].v_handle); - int32_t arg0_code = ((int32_t*)arg_type_ids)[(0)]; - void* arg1 = (((TVMValue*)args)[1].v_handle); - int32_t arg1_code = ((int32_t*)arg_type_ids)[(1)]; - void* arg2 = (((TVMValue*)args)[2].v_handle); - int32_t arg2_code = ((int32_t*)arg_type_ids)[(2)]; - void* placeholder = (((DLTensor*)arg0)[0].data); - void* arg0_shape = (((DLTensor*)arg0)[0].shape); - void* arg0_strides = (((DLTensor*)arg0)[0].strides); - int32_t dev_id = (((DLTensor*)arg0)[0].device.device_id); - void* placeholder1 = (((DLTensor*)arg1)[0].data); - void* arg1_shape = (((DLTensor*)arg1)[0].shape); - void* arg1_strides = (((DLTensor*)arg1)[0].strides); - void* conv2d_NCHWc = (((DLTensor*)arg2)[0].data); - void* arg2_shape = (((DLTensor*)arg2)[0].shape); - void* arg2_strides = (((DLTensor*)arg2)[0].strides); - if (!(arg0_strides == NULL)) { - } - if (!(arg1_strides == NULL)) { - } - if (!(arg2_strides == NULL)) { - } - for (int32_t n_oc_chunk_fused_oh_fused = 0; n_oc_chunk_fused_oh_fused < 14; ++n_oc_chunk_fused_oh_fused) { - double conv2d_NCHWc_global[42]; - for (int32_t oc_block_c_init = 0; oc_block_c_init < 3; ++oc_block_c_init) { - conv2d_NCHWc_global[(oc_block_c_init)] = 0.000000e+00; - } - for (int32_t oc_block_c_init1 = 0; oc_block_c_init1 < 3; ++oc_block_c_init1) { - conv2d_NCHWc_global[((oc_block_c_init1 + 3))] = 0.000000e+00; - } - for (int32_t oc_block_c_init2 = 0; oc_block_c_init2 < 3; ++oc_block_c_init2) { - conv2d_NCHWc_global[((oc_block_c_init2 + 6))] = 0.000000e+00; - } - for (int32_t oc_block_c_init3 = 0; oc_block_c_init3 < 3; ++oc_block_c_init3) { - conv2d_NCHWc_global[((oc_block_c_init3 + 9))] = 0.000000e+00; - } - for (int32_t oc_block_c_init4 = 0; oc_block_c_init4 < 3; ++oc_block_c_init4) { - conv2d_NCHWc_global[((oc_block_c_init4 + 12))] = 0.000000e+00; - } - for (int32_t oc_block_c_init5 = 0; oc_block_c_init5 < 3; ++oc_block_c_init5) { - conv2d_NCHWc_global[((oc_block_c_init5 + 15))] = 0.000000e+00; - } - for (int32_t oc_block_c_init6 = 0; oc_block_c_init6 < 3; ++oc_block_c_init6) { - conv2d_NCHWc_global[((oc_block_c_init6 + 18))] = 0.000000e+00; - } - for (int32_t oc_block_c_init7 = 0; oc_block_c_init7 < 3; ++oc_block_c_init7) { - conv2d_NCHWc_global[((oc_block_c_init7 + 21))] = 0.000000e+00; - } - for (int32_t oc_block_c_init8 = 0; oc_block_c_init8 < 3; ++oc_block_c_init8) { - conv2d_NCHWc_global[((oc_block_c_init8 + 24))] = 0.000000e+00; - } - for (int32_t oc_block_c_init9 = 0; oc_block_c_init9 < 3; ++oc_block_c_init9) { - conv2d_NCHWc_global[((oc_block_c_init9 + 27))] = 0.000000e+00; - } - for (int32_t oc_block_c_init10 = 0; oc_block_c_init10 < 3; ++oc_block_c_init10) { - conv2d_NCHWc_global[((oc_block_c_init10 + 30))] = 0.000000e+00; - } - for (int32_t oc_block_c_init11 = 0; oc_block_c_init11 < 3; ++oc_block_c_init11) { - conv2d_NCHWc_global[((oc_block_c_init11 + 33))] = 0.000000e+00; - } - for (int32_t oc_block_c_init12 = 0; oc_block_c_init12 < 3; ++oc_block_c_init12) { - conv2d_NCHWc_global[((oc_block_c_init12 + 36))] = 0.000000e+00; - } - for (int32_t oc_block_c_init13 = 0; oc_block_c_init13 < 3; ++oc_block_c_init13) { - conv2d_NCHWc_global[((oc_block_c_init13 + 39))] = 0.000000e+00; - } - for (int32_t kh = 0; kh < 3; ++kh) { - for (int32_t kw = 0; kw < 3; ++kw) { - for (int32_t ic_inner = 0; ic_inner < 3; ++ic_inner) { - for (int32_t oc_block_c = 0; oc_block_c < 3; ++oc_block_c) { - conv2d_NCHWc_global[(oc_block_c)] = (conv2d_NCHWc_global[(oc_block_c)] + (((double*)placeholder)[(((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c))])); - } - for (int32_t oc_block_c1 = 0; oc_block_c1 < 3; ++oc_block_c1) { - conv2d_NCHWc_global[((oc_block_c1 + 3))] = (conv2d_NCHWc_global[((oc_block_c1 + 3))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 3))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c1))])); - } - for (int32_t oc_block_c2 = 0; oc_block_c2 < 3; ++oc_block_c2) { - conv2d_NCHWc_global[((oc_block_c2 + 6))] = (conv2d_NCHWc_global[((oc_block_c2 + 6))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 6))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c2))])); - } - for (int32_t oc_block_c3 = 0; oc_block_c3 < 3; ++oc_block_c3) { - conv2d_NCHWc_global[((oc_block_c3 + 9))] = (conv2d_NCHWc_global[((oc_block_c3 + 9))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 9))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c3))])); - } - for (int32_t oc_block_c4 = 0; oc_block_c4 < 3; ++oc_block_c4) { - conv2d_NCHWc_global[((oc_block_c4 + 12))] = (conv2d_NCHWc_global[((oc_block_c4 + 12))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 12))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c4))])); - } - for (int32_t oc_block_c5 = 0; oc_block_c5 < 3; ++oc_block_c5) { - conv2d_NCHWc_global[((oc_block_c5 + 15))] = (conv2d_NCHWc_global[((oc_block_c5 + 15))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 15))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c5))])); - } - for (int32_t oc_block_c6 = 0; oc_block_c6 < 3; ++oc_block_c6) { - conv2d_NCHWc_global[((oc_block_c6 + 18))] = (conv2d_NCHWc_global[((oc_block_c6 + 18))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 18))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c6))])); - } - for (int32_t oc_block_c7 = 0; oc_block_c7 < 3; ++oc_block_c7) { - conv2d_NCHWc_global[((oc_block_c7 + 21))] = (conv2d_NCHWc_global[((oc_block_c7 + 21))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 21))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c7))])); - } - for (int32_t oc_block_c8 = 0; oc_block_c8 < 3; ++oc_block_c8) { - conv2d_NCHWc_global[((oc_block_c8 + 24))] = (conv2d_NCHWc_global[((oc_block_c8 + 24))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 24))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c8))])); - } - for (int32_t oc_block_c9 = 0; oc_block_c9 < 3; ++oc_block_c9) { - conv2d_NCHWc_global[((oc_block_c9 + 27))] = (conv2d_NCHWc_global[((oc_block_c9 + 27))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 27))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c9))])); - } - for (int32_t oc_block_c10 = 0; oc_block_c10 < 3; ++oc_block_c10) { - conv2d_NCHWc_global[((oc_block_c10 + 30))] = (conv2d_NCHWc_global[((oc_block_c10 + 30))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 30))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c10))])); - } - for (int32_t oc_block_c11 = 0; oc_block_c11 < 3; ++oc_block_c11) { - conv2d_NCHWc_global[((oc_block_c11 + 33))] = (conv2d_NCHWc_global[((oc_block_c11 + 33))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 33))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c11))])); - } - for (int32_t oc_block_c12 = 0; oc_block_c12 < 3; ++oc_block_c12) { - conv2d_NCHWc_global[((oc_block_c12 + 36))] = (conv2d_NCHWc_global[((oc_block_c12 + 36))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 36))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c12))])); - } - for (int32_t oc_block_c13 = 0; oc_block_c13 < 3; ++oc_block_c13) { - conv2d_NCHWc_global[((oc_block_c13 + 39))] = (conv2d_NCHWc_global[((oc_block_c13 + 39))] + (((double*)placeholder)[((((((kh * 48) + (n_oc_chunk_fused_oh_fused * 48)) + (kw * 3)) + ic_inner) + 39))] * ((double*)placeholder1)[(((((kh * 27) + (kw * 9)) + (ic_inner * 3)) + oc_block_c13))])); - } - } - } - } - for (int32_t ow_inner = 0; ow_inner < 14; ++ow_inner) { - for (int32_t oc_block = 0; oc_block < 3; ++oc_block) { - ((double*)conv2d_NCHWc)[((((n_oc_chunk_fused_oh_fused * 42) + (ow_inner * 3)) + oc_block))] = conv2d_NCHWc_global[(((ow_inner * 3) + oc_block))]; - } - } - } - return 0; -} - -#ifdef __cplusplus -extern "C" -#endif -TVM_DLL int32_t _lookup_linked_param(void* args, int* arg_type_ids, int num_args, void* out_ret_value, int* out_ret_tcode, void* resource_handle) { - switch (((int64_t*) args)[0]) { - default: - out_ret_tcode[0] = 4; - return 0; - case 4: - ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) __tvm_param__p1; - out_ret_tcode[0] = 3; - return 0; - case 2: - ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) __tvm_param__p0; - out_ret_tcode[0] = 3; - return 0; - } -} From 579e384ac3cf0799239b425f9be6f2b8184bed7d Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 8 Apr 2021 15:25:19 -0700 Subject: [PATCH 10/41] Fix format --- src/tir/transforms/lower_intrin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 27c4b2a00ebd..0d4186ae9e56 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -71,7 +71,7 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { if (r.defined()) return r; } } - } + } } return IRMutatorWithAnalyzer::VisitExpr_(op); } From 8373578c2d2078998a0c8a36a9b71044b825c52e Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Mon, 12 Apr 2021 14:38:35 -0700 Subject: [PATCH 11/41] Fix python integration function --- include/tvm/runtime/c_runtime_api.h | 11 +++++++++ python/tvm/__init__.py | 2 +- python/tvm/_ffi/__init__.py | 2 +- python/tvm/_ffi/registry.py | 36 +++++++++++++++++++++++++++++ src/runtime/registry.cc | 11 +++++++++ vta/python/vta/environment.py | 8 +++---- 6 files changed, 64 insertions(+), 6 deletions(-) diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 44dba4d9c463..10d19eb8c3b3 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -342,6 +342,17 @@ TVM_DLL int TVMFuncCreateFromCFunc(TVMPackedCFunc func, void* resource_handle, */ TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override); +/*! + * \brief Register the Op lowering function to runtime's Op table. + * + * The registered Op then can be pulled by the backend by the name. + * + * \param name The name of the Op. + * \param f The Op lowering function to be registered. + * \param override Whether allow override already registered function. + */ +TVM_DLL int TVMOpLoweringFuncRegister(const char* name, TVMFunctionHandle f, int override); + /*! * \brief Get a global function. * diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 4643062ea8e8..a92273746a19 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -25,7 +25,7 @@ # tvm._ffi from ._ffi.base import TVMError, __version__ from ._ffi.runtime_ctypes import DataTypeCode, DataType -from ._ffi import register_object, register_func, register_extension, get_global_func +from ._ffi import register_object, register_func, register_extension, get_global_func, register_op # top-level alias # tvm.runtime diff --git a/python/tvm/_ffi/__init__.py b/python/tvm/_ffi/__init__.py index 1b2fc58d2927..0d4120b449ab 100644 --- a/python/tvm/_ffi/__init__.py +++ b/python/tvm/_ffi/__init__.py @@ -26,5 +26,5 @@ """ from . import _pyversion from .base import register_error -from .registry import register_object, register_func, register_extension +from .registry import register_object, register_func, register_extension, register_op from .registry import _init_api, get_global_func diff --git a/python/tvm/_ffi/registry.py b/python/tvm/_ffi/registry.py index 677ca5d8de8d..22ef326ecd08 100644 --- a/python/tvm/_ffi/registry.py +++ b/python/tvm/_ffi/registry.py @@ -198,6 +198,42 @@ def register(myf): return register +def register_op(op_name, f=None, override=False): + """Register Op lowering function + + Parameters + ---------- + op_name : str or function + The op name + + f : function, optional + The function to be registered. + + override: boolean optional + Whether override existing entry. + + Returns + ------- + fregister : function + Register op lowering function if f is not specified. + """ + if not isinstance(op_name, str): + raise ValueError("expect string function name") + + ioverride = ctypes.c_int(override) + + def register(myf): + """internal register function""" + if not isinstance(myf, PackedFuncBase): + myf = convert_to_tvm_func(myf) + check_call(_LIB.TVMOpLoweringFuncRegister(c_str(op_name), myf.handle, ioverride)) + return myf + + if f: + return register(f) + return register + + def get_global_func(name, allow_missing=False): """Get a global function by name diff --git a/src/runtime/registry.cc b/src/runtime/registry.cc index 92b39f03a16b..844890465938 100644 --- a/src/runtime/registry.cc +++ b/src/runtime/registry.cc @@ -22,8 +22,11 @@ * \brief The global registry of packed function. */ #include +#include #include #include +#include +#include #include #include @@ -123,6 +126,14 @@ int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { API_END(); } +int TVMOpLoweringFuncRegister(const char* name, TVMFunctionHandle f, int override) { + API_BEGIN(); + ::tvm::OpRegEntry::RegisterOrGet(name).set_name() + .set_attr( + "default.FLowerIntrinsic", *static_cast(f)); + API_END(); +} + int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out) { API_BEGIN(); const tvm::runtime::PackedFunc* fp = tvm::runtime::Registry::Get(name); diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py index 4b6e5bdeca78..bd8a611cbfff 100644 --- a/vta/python/vta/environment.py +++ b/vta/python/vta/environment.py @@ -291,8 +291,8 @@ def mem_info_acc_buffer(): ) -# TVM related registration -@tvm.register_func("tvm.intrin.rule.default.vta.coproc_sync") +# TVM Op related registration +@tvm.register_op("tir.vta.coproc_sync") def coproc_sync(op): _ = op return tvm.tir.call_extern( @@ -303,14 +303,14 @@ def coproc_sync(op): ) -@tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_push") +@tvm.register_op("tir.vta.coproc_dep_push") def coproc_dep_push(op): return tvm.tir.call_extern( "int32", "VTADepPush", get_env().dev.command_handle, op.args[0], op.args[1] ) -@tvm.register_func("tvm.intrin.rule.default.vta.coproc_dep_pop") +@tvm.register_op("tir.vta.coproc_dep_pop") def coproc_dep_pop(op): return tvm.tir.call_extern( "int32", "VTADepPop", get_env().dev.command_handle, op.args[0], op.args[1] From 60f859f06920222436158322b074a4c03e19fd5b Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Mon, 12 Apr 2021 15:52:59 -0700 Subject: [PATCH 12/41] Fix macOS Compile --- src/runtime/registry.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/runtime/registry.cc b/src/runtime/registry.cc index 844890465938..994e7c369b67 100644 --- a/src/runtime/registry.cc +++ b/src/runtime/registry.cc @@ -128,9 +128,8 @@ int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { int TVMOpLoweringFuncRegister(const char* name, TVMFunctionHandle f, int override) { API_BEGIN(); - ::tvm::OpRegEntry::RegisterOrGet(name).set_name() - .set_attr( - "default.FLowerIntrinsic", *static_cast(f)); + tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( + "default.FLowerIntrinsic", *static_cast(f)); API_END(); } From fcad0bea39257d3eb1a57a6d932a0e97a90c7c10 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 13 Apr 2021 16:55:51 -0700 Subject: [PATCH 13/41] Fix python function with global function --- include/tvm/runtime/c_runtime_api.h | 11 ------ python/tvm/__init__.py | 2 +- python/tvm/_ffi/__init__.py | 2 +- python/tvm/_ffi/registry.py | 36 ----------------- python/tvm/ir/op.py | 60 +++++++++++++++++++++++++++++ src/ir/op.cc | 15 ++++++++ src/runtime/registry.cc | 7 ---- vta/python/vta/environment.py | 6 +-- 8 files changed, 80 insertions(+), 59 deletions(-) diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index 10d19eb8c3b3..44dba4d9c463 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -342,17 +342,6 @@ TVM_DLL int TVMFuncCreateFromCFunc(TVMPackedCFunc func, void* resource_handle, */ TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override); -/*! - * \brief Register the Op lowering function to runtime's Op table. - * - * The registered Op then can be pulled by the backend by the name. - * - * \param name The name of the Op. - * \param f The Op lowering function to be registered. - * \param override Whether allow override already registered function. - */ -TVM_DLL int TVMOpLoweringFuncRegister(const char* name, TVMFunctionHandle f, int override); - /*! * \brief Get a global function. * diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index a92273746a19..4643062ea8e8 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -25,7 +25,7 @@ # tvm._ffi from ._ffi.base import TVMError, __version__ from ._ffi.runtime_ctypes import DataTypeCode, DataType -from ._ffi import register_object, register_func, register_extension, get_global_func, register_op +from ._ffi import register_object, register_func, register_extension, get_global_func # top-level alias # tvm.runtime diff --git a/python/tvm/_ffi/__init__.py b/python/tvm/_ffi/__init__.py index 0d4120b449ab..1b2fc58d2927 100644 --- a/python/tvm/_ffi/__init__.py +++ b/python/tvm/_ffi/__init__.py @@ -26,5 +26,5 @@ """ from . import _pyversion from .base import register_error -from .registry import register_object, register_func, register_extension, register_op +from .registry import register_object, register_func, register_extension from .registry import _init_api, get_global_func diff --git a/python/tvm/_ffi/registry.py b/python/tvm/_ffi/registry.py index 22ef326ecd08..677ca5d8de8d 100644 --- a/python/tvm/_ffi/registry.py +++ b/python/tvm/_ffi/registry.py @@ -198,42 +198,6 @@ def register(myf): return register -def register_op(op_name, f=None, override=False): - """Register Op lowering function - - Parameters - ---------- - op_name : str or function - The op name - - f : function, optional - The function to be registered. - - override: boolean optional - Whether override existing entry. - - Returns - ------- - fregister : function - Register op lowering function if f is not specified. - """ - if not isinstance(op_name, str): - raise ValueError("expect string function name") - - ioverride = ctypes.c_int(override) - - def register(myf): - """internal register function""" - if not isinstance(myf, PackedFuncBase): - myf = convert_to_tvm_func(myf) - check_call(_LIB.TVMOpLoweringFuncRegister(c_str(op_name), myf.handle, ioverride)) - return myf - - if f: - return register(f) - return register - - def get_global_func(name, allow_missing=False): """Get a global function by name diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index 7b06c3da33d6..7dcfe0ed573c 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -16,10 +16,23 @@ # under the License. # pylint: disable=invalid-name """Primitive operators in the TVM IR.""" +import ctypes import tvm._ffi +from tvm._ffi.base import _FFI_MODE from .expr import RelayExpr from . import _ffi_api +try: + # pylint: disable=wrong-import-position,unused-import + if _FFI_MODE == "ctypes": + raise ImportError() + from tvm._ffi._cy3.core import convert_to_tvm_func, _get_global_func, PackedFuncBase +except (RuntimeError, ImportError) as error: + # pylint: disable=wrong-import-position,unused-import + if _FFI_MODE == "cython": + raise error + from tvm._ffi._ctypes.packed_func import convert_to_tvm_func, _get_global_func, PackedFuncBase + @tvm._ffi.register_object("Op") class Op(RelayExpr): @@ -115,3 +128,50 @@ def _register(v): return v return _register(value) if value is not None else _register + + +def register_op_intrin_lowering( + op_name, + f=None, + target="default", + plevel=10, + override=False, +): + """Register Op lowering function + + Parameters + ---------- + op_name : str or function + The op name + + f : function, optional + The function to be registered. + + target : str + The target string for given intrinsic lowering function + + plevel : int + The priority level + + override: boolean optional + Whether override existing entry. + + Returns + ------- + fregister : function + Register op lowering function if f is not specified. + """ + if not isinstance(op_name, str): + raise ValueError("expect string op name") + + def _register(myf): + """internal intrinsic lowering registration function""" + assert isinstance(target, str) + if not isinstance(myf, PackedFuncBase): + myf = convert_to_tvm_func(myf) + _ffi_api.RegisterOpLowerIntrinsic(op_name, myf.handle, target, plevel, override) + return myf + + if f: + return _register(f) + return _register \ No newline at end of file diff --git a/src/ir/op.cc b/src/ir/op.cc index 5d2dc704f5b7..964088cee879 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -122,6 +123,20 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpAttr") } }); +TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") + .set_body_typed([](String name, TVMFunctionHandle f, String target = "default", int plevel = 10, + int can_override = 0) { + if (Op::HasAttrMap(target + ".FLowerIntrinsic") && + OpRegistry::Global()->Get(name) != nullptr && + Op::GetAttrMap(target + ".FLowerIntrinsic") + .count(Op::Get(name))) { + ICHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target + << ".FlowerIntrinsic is already registered"; + } + tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( + target + ".FLowerIntrinsic", *static_cast(f), plevel); + }); + // helper to get internal dev function in objectref. struct Op2ObjectPtr : public ObjectRef { static ObjectPtr Get(const Op& op) { return GetDataPtr(op); } diff --git a/src/runtime/registry.cc b/src/runtime/registry.cc index 994e7c369b67..dc6aea1c7176 100644 --- a/src/runtime/registry.cc +++ b/src/runtime/registry.cc @@ -126,13 +126,6 @@ int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { API_END(); } -int TVMOpLoweringFuncRegister(const char* name, TVMFunctionHandle f, int override) { - API_BEGIN(); - tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( - "default.FLowerIntrinsic", *static_cast(f)); - API_END(); -} - int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out) { API_BEGIN(); const tvm::runtime::PackedFunc* fp = tvm::runtime::Registry::Get(name); diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py index bd8a611cbfff..0864973b2586 100644 --- a/vta/python/vta/environment.py +++ b/vta/python/vta/environment.py @@ -292,7 +292,7 @@ def mem_info_acc_buffer(): # TVM Op related registration -@tvm.register_op("tir.vta.coproc_sync") +@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_sync") def coproc_sync(op): _ = op return tvm.tir.call_extern( @@ -303,14 +303,14 @@ def coproc_sync(op): ) -@tvm.register_op("tir.vta.coproc_dep_push") +@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_push") def coproc_dep_push(op): return tvm.tir.call_extern( "int32", "VTADepPush", get_env().dev.command_handle, op.args[0], op.args[1] ) -@tvm.register_op("tir.vta.coproc_dep_pop") +@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_pop") def coproc_dep_pop(op): return tvm.tir.call_extern( "int32", "VTADepPop", get_env().dev.command_handle, op.args[0], op.args[1] From dc7e67c97843f7c6618d3ae8c7dc3f927eab76a8 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 13 Apr 2021 20:19:22 -0700 Subject: [PATCH 14/41] Update op.py --- python/tvm/ir/op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index 7dcfe0ed573c..430c2f1eba10 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -174,4 +174,4 @@ def _register(myf): if f: return _register(f) - return _register \ No newline at end of file + return _register From 94f55bc99a9ca8fa7717af7a7fbb74826afeb7eb Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 13 Apr 2021 21:56:08 -0700 Subject: [PATCH 15/41] Modify include and format --- src/ir/op.cc | 5 +++-- src/runtime/registry.cc | 3 --- src/target/intrin_rule.cc | 2 +- src/target/llvm/intrin_rule_hexagon.cc | 2 +- src/target/llvm/intrin_rule_llvm.cc | 2 +- src/target/llvm/intrin_rule_nvptx.cc | 2 +- src/target/llvm/intrin_rule_rocm.cc | 2 +- src/target/source/intrin_rule_aocl.cc | 2 +- src/target/source/intrin_rule_cuda.cc | 3 ++- src/target/source/intrin_rule_metal.cc | 2 +- src/target/source/intrin_rule_opencl.cc | 2 +- src/target/source/intrin_rule_vhls.cc | 2 +- src/target/spirv/intrin_rule_spirv.cc | 4 +--- 13 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/ir/op.cc b/src/ir/op.cc index 964088cee879..d20ce913d21a 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -37,6 +37,7 @@ namespace tvm { using runtime::PackedFunc; using runtime::TVMArgs; using runtime::TVMRetValue; +using tir::FLowerIntrinsic; using OpRegistry = AttrRegistry; @@ -128,12 +129,12 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") int can_override = 0) { if (Op::HasAttrMap(target + ".FLowerIntrinsic") && OpRegistry::Global()->Get(name) != nullptr && - Op::GetAttrMap(target + ".FLowerIntrinsic") + Op::GetAttrMap(target + ".FLowerIntrinsic") .count(Op::Get(name))) { ICHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target << ".FlowerIntrinsic is already registered"; } - tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( + tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( target + ".FLowerIntrinsic", *static_cast(f), plevel); }); diff --git a/src/runtime/registry.cc b/src/runtime/registry.cc index dc6aea1c7176..92b39f03a16b 100644 --- a/src/runtime/registry.cc +++ b/src/runtime/registry.cc @@ -22,11 +22,8 @@ * \brief The global registry of packed function. */ #include -#include #include #include -#include -#include #include #include diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index 386f949df556..86fde67a63a4 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -29,7 +29,7 @@ namespace tvm { namespace codegen { namespace intrin { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.exp").set_attr("default.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); diff --git a/src/target/llvm/intrin_rule_hexagon.cc b/src/target/llvm/intrin_rule_hexagon.cc index 8e4a83c59b8e..fde4f79e0072 100644 --- a/src/target/llvm/intrin_rule_hexagon.cc +++ b/src/target/llvm/intrin_rule_hexagon.cc @@ -26,7 +26,7 @@ namespace tvm { namespace codegen { namespace llvm { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.exp").set_attr( "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index fef367d72ca0..539246e6c584 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -30,7 +30,7 @@ namespace tvm { namespace codegen { namespace llvm { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.prefetch") .set_attr("llvm.FLowerIntrinsic", diff --git a/src/target/llvm/intrin_rule_nvptx.cc b/src/target/llvm/intrin_rule_nvptx.cc index 02092618e131..33dd4f422681 100644 --- a/src/target/llvm/intrin_rule_nvptx.cc +++ b/src/target/llvm/intrin_rule_nvptx.cc @@ -58,7 +58,7 @@ inline void DispatchPureExternLibDevice(const TVMArgs& args, TVMRetValue* rv) { } namespace llvm { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); diff --git a/src/target/llvm/intrin_rule_rocm.cc b/src/target/llvm/intrin_rule_rocm.cc index acfe1e813933..e2e60133465d 100644 --- a/src/target/llvm/intrin_rule_rocm.cc +++ b/src/target/llvm/intrin_rule_rocm.cc @@ -94,7 +94,7 @@ inline void DispatchShuffle(const TVMArgs& targs, TVMRetValue* rv) { } namespace llvm { -using namespace tir; +using tir::FLowerIntrinsic; // dummy because we don't have the activemask TVM_REGISTER_OP("tir.tvm_warp_activemask") diff --git a/src/target/source/intrin_rule_aocl.cc b/src/target/source/intrin_rule_aocl.cc index e0bba3696428..73192d535641 100644 --- a/src/target/source/intrin_rule_aocl.cc +++ b/src/target/source/intrin_rule_aocl.cc @@ -28,7 +28,7 @@ namespace tvm { namespace codegen { namespace intrin { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); diff --git a/src/target/source/intrin_rule_cuda.cc b/src/target/source/intrin_rule_cuda.cc index 336f920bd470..5fa9e9cf64ed 100644 --- a/src/target/source/intrin_rule_cuda.cc +++ b/src/target/source/intrin_rule_cuda.cc @@ -30,7 +30,8 @@ namespace tvm { namespace codegen { namespace intrin { // Add float suffix to the intrinsics, CUDA fast math. -using namespace tir; +using tir::FLowerIntrinsic; + struct CUDAMath { std::string operator()(DataType t, std::string name) const { if (t.is_float()) { diff --git a/src/target/source/intrin_rule_metal.cc b/src/target/source/intrin_rule_metal.cc index 07e1940a473f..bff7ade9c84d 100644 --- a/src/target/source/intrin_rule_metal.cc +++ b/src/target/source/intrin_rule_metal.cc @@ -28,7 +28,7 @@ namespace tvm { namespace codegen { namespace intrin { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); diff --git a/src/target/source/intrin_rule_opencl.cc b/src/target/source/intrin_rule_opencl.cc index fdc83936f8d3..b97a8a305779 100644 --- a/src/target/source/intrin_rule_opencl.cc +++ b/src/target/source/intrin_rule_opencl.cc @@ -29,7 +29,7 @@ namespace tvm { namespace codegen { namespace intrin { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); diff --git a/src/target/source/intrin_rule_vhls.cc b/src/target/source/intrin_rule_vhls.cc index a91fb06246aa..a60d75d40723 100644 --- a/src/target/source/intrin_rule_vhls.cc +++ b/src/target/source/intrin_rule_vhls.cc @@ -28,7 +28,7 @@ namespace tvm { namespace codegen { namespace intrin { -using namespace tir; +using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc index 52bb27a20740..5252c03cb6aa 100644 --- a/src/target/spirv/intrin_rule_spirv.cc +++ b/src/target/spirv/intrin_rule_spirv.cc @@ -29,9 +29,7 @@ namespace tvm { namespace codegen { namespace spirv { - -using namespace runtime; -using namespace tir; +using tir::FLowerIntrinsic; // num_signature means number of arguments used to query signature From 4f3119d3d2c35fe62d202c4000949d301b23c2d8 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 14 Apr 2021 09:30:39 -0700 Subject: [PATCH 16/41] Fix clang format --- src/ir/op.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ir/op.cc b/src/ir/op.cc index d20ce913d21a..607f18d345c7 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -129,8 +129,7 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") int can_override = 0) { if (Op::HasAttrMap(target + ".FLowerIntrinsic") && OpRegistry::Global()->Get(name) != nullptr && - Op::GetAttrMap(target + ".FLowerIntrinsic") - .count(Op::Get(name))) { + Op::GetAttrMap(target + ".FLowerIntrinsic").count(Op::Get(name))) { ICHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target << ".FlowerIntrinsic is already registered"; } From db2e5afd078c87da3dbcf4d525f3a14abc3216bc Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 14 Apr 2021 10:46:37 -0700 Subject: [PATCH 17/41] Fix python unused import --- python/tvm/ir/op.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index 430c2f1eba10..2045bd1cf312 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -16,7 +16,6 @@ # under the License. # pylint: disable=invalid-name """Primitive operators in the TVM IR.""" -import ctypes import tvm._ffi from tvm._ffi.base import _FFI_MODE from .expr import RelayExpr From 684e809de64c0ef72506021a5adba0bc938b9240 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 14 Apr 2021 21:44:00 -0700 Subject: [PATCH 18/41] Unify lower intrinsic functions --- python/tvm/target/__init__.py | 3 +- python/tvm/target/intrin.py | 56 ++++-------------------- python/tvm/topi/arm_cpu/tensor_intrin.py | 4 +- python/tvm/topi/cuda/nms.py | 8 ++-- src/tir/transforms/lower_intrin.cc | 2 +- tutorials/language/intrin_math.py | 6 ++- 6 files changed, 21 insertions(+), 58 deletions(-) diff --git a/python/tvm/target/__init__.py b/python/tvm/target/__init__.py index 9482e782041c..df63a71cfca9 100644 --- a/python/tvm/target/__init__.py +++ b/python/tvm/target/__init__.py @@ -60,5 +60,4 @@ from .generic_func import GenericFunc from .generic_func import generic_func, get_native_generic_func, override_native_generic_func from . import datatype -from . import codegen -from .intrin import register_intrin_rule +from . import codegen \ No newline at end of file diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py index 6d205bc0447c..aeb52566dc55 100644 --- a/python/tvm/target/intrin.py +++ b/python/tvm/target/intrin.py @@ -19,50 +19,6 @@ from tvm.tir import call_pure_extern -# Intrinsic rule related code -def register_intrin_rule(target, intrin, f=None, override=False): - """Register an intrinsic function generation rule. - - Intrinsic generation rules are callback functions for - code generator to get device specific calls. - This function simply translates to. - - :code:`register_func("tvm.intrin.rule.%s.%s" % (target, intrin), f, override)` - - TVM may already pre-register intrinsic rules in the backend. - However, user can use this function to change the intrinsic translation - behavior or add new intrinsic rules during runtime. - - Parameters - ---------- - target : str - The name of codegen target. - - intrin : str - The name of the intrinsic. - - f : function, optional - The function to be registered. - - override: boolean optional - Whether override existing entry. - - Returns - ------- - fregister : function - Register function if f is not specified. - - Examples - -------- - The following code registers exp expansion rule for opencl. - - .. code-block:: python - - register_intrin_rule("opencl", "exp", my_exp_rule, override=True) - """ - return tvm._ffi.register_func("tvm.intrin.rule.%s.%s" % (target, intrin), f, override) - - def _rule_float_suffix(op): """Intrinsic rule: Add float suffix if it is float32. @@ -81,7 +37,7 @@ def _rule_float_suffix(op): See Also -------- - register_intrin_rule : The registration function for intrin rule. + tvm.ir.op.register_op_intrin_lowering : The registration function for intrinsic lowering rule. """ name = op.op.name assert name.startswith("tir.") @@ -112,7 +68,7 @@ def _rule_float_direct(op): See Also -------- - register_intrin_rule : The registration function for intrin rule. + tvm.ir.op.register_op_intrin_lowering : The registration function for intrinsic lowering rule. """ if str(op.dtype).startswith("float"): return call_pure_extern(op.dtype, op.op.name[4:], *op.args) @@ -120,6 +76,10 @@ def _rule_float_direct(op): # opencl pattern for exp -register_intrin_rule("opencl", "exp", _rule_float_direct, override=True) +tvm.ir.op.register_op_intrin_lowering( + "tir.exp", f=_rule_float_direct, target="opencl", override=True +) # default pattern for exp -register_intrin_rule("default", "exp", _rule_float_suffix, override=True) +tvm.ir.op.register_op_intrin_lowering( + "tir.exp", f=_rule_float_suffix, target="default", override=True +) diff --git a/python/tvm/topi/arm_cpu/tensor_intrin.py b/python/tvm/topi/arm_cpu/tensor_intrin.py index 4055d7b05c24..863921a30fdc 100644 --- a/python/tvm/topi/arm_cpu/tensor_intrin.py +++ b/python/tvm/topi/arm_cpu/tensor_intrin.py @@ -1054,6 +1054,6 @@ def _q_multiply_shift_arm(op): return tvm.tir.Select(s < 0, out_1, out_2) -tvm.target.intrin.register_intrin_rule( - "llvm.aarch64", "q_multiply_shift", _q_multiply_shift_arm, override=True +tvm.ir.op.register_op_intrin_lowering( + "tir.q_multiply_shift", f=_q_multiply_shift_arm, target="llvm.aarch64", override=True ) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 2789452cc10b..5ec828750776 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -51,10 +51,12 @@ def opencl_atomic_add_rule(op): raise RuntimeError("only support int32") -tvm.target.intrin.register_intrin_rule("cuda", "atomic_add", cuda_atomic_add_rule, override=True) +tvm.ir.op.register_op_intrin_lowering( + "tir.atomic_add", f=cuda_atomic_add_rule, target="cuda", override=True +) -tvm.target.intrin.register_intrin_rule( - "opencl", "atomic_add", opencl_atomic_add_rule, override=True +tvm.ir.op.register_op_intrin_lowering( + "tir.atomic_add", f=opencl_atomic_add_rule, target="opencl", override=True ) diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 0d4186ae9e56..545a5776ac65 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -46,7 +46,7 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { bool is_llvm_aarch64 = (mtriple.find("aarch64") != std::string::npos); if (is_llvm_aarch64) { - patterns_.push_back(target + "." + "aarch64.FLowerIntrinsic"); + patterns_.push_back(target + ".aarch64.FLowerIntrinsic"); } patterns_.push_back("default.FLowerIntrinsic"); diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index 145322586b41..f926ba305db5 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -112,7 +112,7 @@ def my_cuda_math_rule(op): return op -tvm.target.register_intrin_rule("cuda", "exp", my_cuda_math_rule, override=True) +tvm.ir.op.register_op_intrin_lowering("tir.exp", f=my_cuda_math_rule, target="cuda", override=True) ###################################################################### # Register the rule to TVM with override option to override existing rule. # Notice the difference between the printed code from previous one: @@ -148,7 +148,9 @@ def my_cuda_mylog_rule(op): # new op registration is triggered by registering an attribute of the op tvm.ir.register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure) -tvm.target.register_intrin_rule("cuda", "mylog", my_cuda_mylog_rule, override=True) +tvm.ir.op.register_op_intrin_lowering( + "tir.mylog", f=my_cuda_mylog_rule, target="cuda", override=True +) n = te.var("n") A = te.placeholder((n,), name="A") From dfd8c20d96363a184e2ffd18d601dec1ba832ae5 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 15 Apr 2021 00:47:58 -0700 Subject: [PATCH 19/41] Fix python format --- python/tvm/target/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/target/__init__.py b/python/tvm/target/__init__.py index df63a71cfca9..92d72b25b44d 100644 --- a/python/tvm/target/__init__.py +++ b/python/tvm/target/__init__.py @@ -60,4 +60,4 @@ from .generic_func import GenericFunc from .generic_func import generic_func, get_native_generic_func, override_native_generic_func from . import datatype -from . import codegen \ No newline at end of file +from . import codegen From ee2b4f9e1bc6116be025eadbfb8cc6166350d5d0 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 15 Apr 2021 09:39:26 -0700 Subject: [PATCH 20/41] Remove extra namespace --- src/target/intrin_rule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target/intrin_rule.h b/src/target/intrin_rule.h index 69196e1b2c39..c27c3a80e0d5 100644 --- a/src/target/intrin_rule.h +++ b/src/target/intrin_rule.h @@ -72,7 +72,7 @@ inline void DispatchPureExtern(const TVMArgs& args, TVMRetValue* rv) { for (auto arg : call->args) { new_args.push_back(arg); } - *rv = Call(call->dtype, tir::builtin::call_pure_extern(), new_args); + *rv = Call(call->dtype, builtin::call_pure_extern(), new_args); } else { *rv = e; } From 188bd1457cb42f929af6da092b3fe0d630287fcc Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 15 Apr 2021 15:17:04 -0700 Subject: [PATCH 21/41] Change plevel argument name --- python/tvm/ir/op.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index 2045bd1cf312..eeb54f5cef7f 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -133,7 +133,7 @@ def register_op_intrin_lowering( op_name, f=None, target="default", - plevel=10, + level=10, override=False, ): """Register Op lowering function @@ -149,7 +149,7 @@ def register_op_intrin_lowering( target : str The target string for given intrinsic lowering function - plevel : int + level : int The priority level override: boolean optional @@ -168,7 +168,7 @@ def _register(myf): assert isinstance(target, str) if not isinstance(myf, PackedFuncBase): myf = convert_to_tvm_func(myf) - _ffi_api.RegisterOpLowerIntrinsic(op_name, myf.handle, target, plevel, override) + _ffi_api.RegisterOpLowerIntrinsic(op_name, myf.handle, target, level, override) return myf if f: From 4aebebe5c3a664641873acefc40670564e4eec81 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Mon, 19 Apr 2021 14:06:22 -0700 Subject: [PATCH 22/41] Change to TypedPackedFunc --- include/tvm/tir/op_attr_types.h | 10 +++++++--- src/ir/op.cc | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/tvm/tir/op_attr_types.h b/include/tvm/tir/op_attr_types.h index 43a097d83150..963458ccee4a 100644 --- a/include/tvm/tir/op_attr_types.h +++ b/include/tvm/tir/op_attr_types.h @@ -34,7 +34,6 @@ namespace tvm { namespace tir { -using namespace runtime; /*! * \brief Global symbol of the op after lowering. */ @@ -46,9 +45,14 @@ using TGlobalSymbol = String; using TVectorizable = bool; /*! - * \brief The intrinsic lowering function for given OP. + * \brief The intrinsic lowering function for given op. */ -using FLowerIntrinsic = PackedFunc; +using FLowerIntrinsic = runtime::TypedPackedFunc; + +/*! + * \brief The legalization function for given tir op. + */ +using FLegalize = runtime::TypedPackedFunc; /*! * \brief The effect type of the call. diff --git a/src/ir/op.cc b/src/ir/op.cc index 607f18d345c7..a2fc6fd1d7e4 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -134,7 +134,7 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") << ".FlowerIntrinsic is already registered"; } tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( - target + ".FLowerIntrinsic", *static_cast(f), plevel); + target + ".FLowerIntrinsic", *static_cast(f), plevel); }); // helper to get internal dev function in objectref. From 7f65b0b681692002aa102137a04c3a654e86cb7c Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Mon, 19 Apr 2021 14:23:16 -0700 Subject: [PATCH 23/41] Remove tvm handle usage and unused defaults --- python/tvm/ir/op.py | 2 +- src/ir/op.cc | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index eeb54f5cef7f..c99c596a5589 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -168,7 +168,7 @@ def _register(myf): assert isinstance(target, str) if not isinstance(myf, PackedFuncBase): myf = convert_to_tvm_func(myf) - _ffi_api.RegisterOpLowerIntrinsic(op_name, myf.handle, target, level, override) + _ffi_api.RegisterOpLowerIntrinsic(op_name, myf, target, level, override) return myf if f: diff --git a/src/ir/op.cc b/src/ir/op.cc index a2fc6fd1d7e4..f1aed74068b1 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -125,8 +125,7 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpAttr") }); TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") - .set_body_typed([](String name, TVMFunctionHandle f, String target = "default", int plevel = 10, - int can_override = 0) { + .set_body_typed([](String name, PackedFunc f, String target, int plevel, int can_override) { if (Op::HasAttrMap(target + ".FLowerIntrinsic") && OpRegistry::Global()->Get(name) != nullptr && Op::GetAttrMap(target + ".FLowerIntrinsic").count(Op::Get(name))) { @@ -134,7 +133,7 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") << ".FlowerIntrinsic is already registered"; } tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( - target + ".FLowerIntrinsic", *static_cast(f), plevel); + target + ".FLowerIntrinsic", f, plevel); }); // helper to get internal dev function in objectref. From aba2c48a3850fbaa64f63c9d9a9ff7297523bb4c Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 20 Apr 2021 11:31:38 -0700 Subject: [PATCH 24/41] Fix python side function --- python/tvm/ir/op.py | 29 +++++++++--------------- python/tvm/target/intrin.py | 4 ++-- python/tvm/topi/arm_cpu/tensor_intrin.py | 2 +- python/tvm/topi/cuda/nms.py | 4 ++-- tutorials/language/intrin_math.py | 4 ++-- vta/python/vta/environment.py | 6 ++--- 6 files changed, 21 insertions(+), 28 deletions(-) diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index c99c596a5589..14618ab8f152 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -131,8 +131,8 @@ def _register(v): def register_op_intrin_lowering( op_name, + target, f=None, - target="default", level=10, override=False, ): @@ -143,12 +143,12 @@ def register_op_intrin_lowering( op_name : str or function The op name - f : function, optional - The function to be registered. - target : str The target string for given intrinsic lowering function + f : function, optional + The function to be registered. + level : int The priority level @@ -160,17 +160,10 @@ def register_op_intrin_lowering( fregister : function Register op lowering function if f is not specified. """ - if not isinstance(op_name, str): - raise ValueError("expect string op name") - - def _register(myf): - """internal intrinsic lowering registration function""" - assert isinstance(target, str) - if not isinstance(myf, PackedFuncBase): - myf = convert_to_tvm_func(myf) - _ffi_api.RegisterOpLowerIntrinsic(op_name, myf, target, level, override) - return myf - - if f: - return _register(f) - return _register + + def _register(f): + """internal register function""" + _ffi_api.RegisterOpLowerIntrinsic(op_name, f, target, level, override) + return f + + return _register(f) if f is not None else _register diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py index aeb52566dc55..3d31a81ac18c 100644 --- a/python/tvm/target/intrin.py +++ b/python/tvm/target/intrin.py @@ -77,9 +77,9 @@ def _rule_float_direct(op): # opencl pattern for exp tvm.ir.op.register_op_intrin_lowering( - "tir.exp", f=_rule_float_direct, target="opencl", override=True + "tir.exp", target="opencl", f=_rule_float_direct, override=True ) # default pattern for exp tvm.ir.op.register_op_intrin_lowering( - "tir.exp", f=_rule_float_suffix, target="default", override=True + "tir.exp", target="default", f=_rule_float_suffix, override=True ) diff --git a/python/tvm/topi/arm_cpu/tensor_intrin.py b/python/tvm/topi/arm_cpu/tensor_intrin.py index 863921a30fdc..91b0b6bacbff 100644 --- a/python/tvm/topi/arm_cpu/tensor_intrin.py +++ b/python/tvm/topi/arm_cpu/tensor_intrin.py @@ -1055,5 +1055,5 @@ def _q_multiply_shift_arm(op): tvm.ir.op.register_op_intrin_lowering( - "tir.q_multiply_shift", f=_q_multiply_shift_arm, target="llvm.aarch64", override=True + "tir.q_multiply_shift", target="llvm.aarch64", f=_q_multiply_shift_arm, override=True ) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 5ec828750776..9b2472dfc543 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -52,11 +52,11 @@ def opencl_atomic_add_rule(op): tvm.ir.op.register_op_intrin_lowering( - "tir.atomic_add", f=cuda_atomic_add_rule, target="cuda", override=True + "tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, override=True ) tvm.ir.op.register_op_intrin_lowering( - "tir.atomic_add", f=opencl_atomic_add_rule, target="opencl", override=True + "tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, override=True ) diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index f926ba305db5..6a93e0fc4a36 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -112,7 +112,7 @@ def my_cuda_math_rule(op): return op -tvm.ir.op.register_op_intrin_lowering("tir.exp", f=my_cuda_math_rule, target="cuda", override=True) +tvm.ir.op.register_op_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, override=True) ###################################################################### # Register the rule to TVM with override option to override existing rule. # Notice the difference between the printed code from previous one: @@ -149,7 +149,7 @@ def my_cuda_mylog_rule(op): # new op registration is triggered by registering an attribute of the op tvm.ir.register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure) tvm.ir.op.register_op_intrin_lowering( - "tir.mylog", f=my_cuda_mylog_rule, target="cuda", override=True + "tir.mylog", target="cuda", f=my_cuda_mylog_rule, override=True ) n = te.var("n") diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py index 0864973b2586..791e2c657f65 100644 --- a/vta/python/vta/environment.py +++ b/vta/python/vta/environment.py @@ -292,7 +292,7 @@ def mem_info_acc_buffer(): # TVM Op related registration -@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_sync") +@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_sync", "default") def coproc_sync(op): _ = op return tvm.tir.call_extern( @@ -303,14 +303,14 @@ def coproc_sync(op): ) -@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_push") +@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_push", "default") def coproc_dep_push(op): return tvm.tir.call_extern( "int32", "VTADepPush", get_env().dev.command_handle, op.args[0], op.args[1] ) -@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_pop") +@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_pop", "default") def coproc_dep_pop(op): return tvm.tir.call_extern( "int32", "VTADepPop", get_env().dev.command_handle, op.args[0], op.args[1] From 62a2f05dd9a9c3b70da9b1da98334f4fe46a0261 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 20 Apr 2021 16:17:12 -0700 Subject: [PATCH 25/41] Fix PR according to suggestions --- python/tvm/ir/__init__.py | 2 +- python/tvm/ir/op.py | 16 ++-------------- python/tvm/target/intrin.py | 13 +++++-------- python/tvm/topi/arm_cpu/tensor_intrin.py | 3 ++- python/tvm/topi/cuda/nms.py | 9 +++------ src/ir/op.cc | 4 ++-- src/target/spirv/intrin_rule_spirv.cc | 1 + src/tir/transforms/lower_intrin.cc | 14 ++++++++------ tutorials/language/intrin_math.py | 11 +++++------ vta/python/vta/environment.py | 7 ++++--- 10 files changed, 33 insertions(+), 47 deletions(-) diff --git a/python/tvm/ir/__init__.py b/python/tvm/ir/__init__.py index e35077bb5aab..a12d3e9855f0 100644 --- a/python/tvm/ir/__init__.py +++ b/python/tvm/ir/__init__.py @@ -23,7 +23,7 @@ from .tensor_type import TensorType from .type_relation import TypeCall, TypeRelation from .expr import BaseExpr, PrimExpr, RelayExpr, GlobalVar, Range -from .op import Op, register_op_attr +from .op import Op, register_op_attr, register_intrin_lowering from .function import CallingConv, BaseFunc from .adt import Constructor, TypeData from .module import IRModule diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index 14618ab8f152..dcf510551474 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -17,21 +17,9 @@ # pylint: disable=invalid-name """Primitive operators in the TVM IR.""" import tvm._ffi -from tvm._ffi.base import _FFI_MODE from .expr import RelayExpr from . import _ffi_api -try: - # pylint: disable=wrong-import-position,unused-import - if _FFI_MODE == "ctypes": - raise ImportError() - from tvm._ffi._cy3.core import convert_to_tvm_func, _get_global_func, PackedFuncBase -except (RuntimeError, ImportError) as error: - # pylint: disable=wrong-import-position,unused-import - if _FFI_MODE == "cython": - raise error - from tvm._ffi._ctypes.packed_func import convert_to_tvm_func, _get_global_func, PackedFuncBase - @tvm._ffi.register_object("Op") class Op(RelayExpr): @@ -129,7 +117,7 @@ def _register(v): return _register(value) if value is not None else _register -def register_op_intrin_lowering( +def register_intrin_lowering( op_name, target, f=None, @@ -140,7 +128,7 @@ def register_op_intrin_lowering( Parameters ---------- - op_name : str or function + op_name : str The op name target : str diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py index 3d31a81ac18c..066eee5a113e 100644 --- a/python/tvm/target/intrin.py +++ b/python/tvm/target/intrin.py @@ -16,6 +16,7 @@ # under the License. """Target dependent intrinsic registration.""" import tvm._ffi +from tvm.ir import register_intrin_lowering from tvm.tir import call_pure_extern @@ -37,7 +38,7 @@ def _rule_float_suffix(op): See Also -------- - tvm.ir.op.register_op_intrin_lowering : The registration function for intrinsic lowering rule. + register_intrin_lowering : The registration function for intrinsic lowering rule. """ name = op.op.name assert name.startswith("tir.") @@ -68,7 +69,7 @@ def _rule_float_direct(op): See Also -------- - tvm.ir.op.register_op_intrin_lowering : The registration function for intrinsic lowering rule. + register_intrin_lowering : The registration function for intrinsic lowering rule. """ if str(op.dtype).startswith("float"): return call_pure_extern(op.dtype, op.op.name[4:], *op.args) @@ -76,10 +77,6 @@ def _rule_float_direct(op): # opencl pattern for exp -tvm.ir.op.register_op_intrin_lowering( - "tir.exp", target="opencl", f=_rule_float_direct, override=True -) +register_intrin_lowering("tir.exp", target="opencl", f=_rule_float_direct, override=True) # default pattern for exp -tvm.ir.op.register_op_intrin_lowering( - "tir.exp", target="default", f=_rule_float_suffix, override=True -) +register_intrin_lowering("tir.exp", target="default", f=_rule_float_suffix, override=True) diff --git a/python/tvm/topi/arm_cpu/tensor_intrin.py b/python/tvm/topi/arm_cpu/tensor_intrin.py index 91b0b6bacbff..76588f081c46 100644 --- a/python/tvm/topi/arm_cpu/tensor_intrin.py +++ b/python/tvm/topi/arm_cpu/tensor_intrin.py @@ -19,6 +19,7 @@ import tvm from tvm import te +from tvm.ir import register_intrin_lowering def gemm_4x4_int8_int8_int32(M, N, K, unroll, in_type): @@ -1054,6 +1055,6 @@ def _q_multiply_shift_arm(op): return tvm.tir.Select(s < 0, out_1, out_2) -tvm.ir.op.register_op_intrin_lowering( +register_intrin_lowering( "tir.q_multiply_shift", target="llvm.aarch64", f=_q_multiply_shift_arm, override=True ) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 9b2472dfc543..94f8e19095a1 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -21,6 +21,7 @@ from tvm import te from tvm.contrib import nvcc from tvm.contrib.thrust import can_use_thrust, can_use_rocthrust +from tvm.ir import register_intrin_lowering from tvm.tir import if_then_else from .sort import argsort, argsort_thrust from .scan import exclusive_scan @@ -51,13 +52,9 @@ def opencl_atomic_add_rule(op): raise RuntimeError("only support int32") -tvm.ir.op.register_op_intrin_lowering( - "tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, override=True -) +register_intrin_lowering("tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, override=True) -tvm.ir.op.register_op_intrin_lowering( - "tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, override=True -) +register_intrin_lowering("tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, override=True) def atomic_add(x, y): diff --git a/src/ir/op.cc b/src/ir/op.cc index f1aed74068b1..a4a8650835aa 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -129,10 +129,10 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") if (Op::HasAttrMap(target + ".FLowerIntrinsic") && OpRegistry::Global()->Get(name) != nullptr && Op::GetAttrMap(target + ".FLowerIntrinsic").count(Op::Get(name))) { - ICHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target + CHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target << ".FlowerIntrinsic is already registered"; } - tvm::OpRegEntry::RegisterOrGet(name).set_name().set_attr( + tvm::OpRegEntry::RegisterOrGet(name).set_attr( target + ".FLowerIntrinsic", f, plevel); }); diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc index 5252c03cb6aa..fbf20215f290 100644 --- a/src/target/spirv/intrin_rule_spirv.cc +++ b/src/target/spirv/intrin_rule_spirv.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include namespace tvm { diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 545a5776ac65..1fb9e214c765 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -58,17 +58,19 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { PrimExpr VisitExpr_(const CallNode* op) final { if (auto* ptr_op = op->op.as()) { - for (size_t i = 0; i < patterns_.size(); ++i) - if (Op::HasAttrMap(patterns_[i])) { - auto default_intrin = Op::GetAttrMap(patterns_[i]); - FLowerIntrinsic f = default_intrin.get(GetRef(ptr_op), nullptr); - PrimExpr e = GetRef(op); + for (const std::string& pattern : patterns_) + if (Op::HasAttrMap(pattern)) { + auto f_lower_intrin_map = Op::GetAttrMap(pattern); + FLowerIntrinsic f = f_lower_intrin_map.get(GetRef(ptr_op), nullptr); if (f != nullptr) { + PrimExpr e = GetRef(op); PrimExpr r = f(e); ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; if (!r.same_as(e)) { r = this->VisitExpr(r); - if (r.defined()) return r; + if (r.defined()) { + return r; + } } } } diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index 6a93e0fc4a36..7c47600c0a61 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -29,10 +29,11 @@ the interface via tvm's intrinsic API. """ from __future__ import absolute_import, print_function +import numpy as np import tvm from tvm import te -import numpy as np +from tvm.ir import register_op_attr, register_intrin_lowering ###################################################################### # Direct Declare Extern Math Call @@ -112,7 +113,7 @@ def my_cuda_math_rule(op): return op -tvm.ir.op.register_op_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, override=True) +register_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, override=True) ###################################################################### # Register the rule to TVM with override option to override existing rule. # Notice the difference between the printed code from previous one: @@ -147,10 +148,8 @@ def my_cuda_mylog_rule(op): # new op registration is triggered by registering an attribute of the op -tvm.ir.register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure) -tvm.ir.op.register_op_intrin_lowering( - "tir.mylog", target="cuda", f=my_cuda_mylog_rule, override=True -) +register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure) +register_intrin_lowering("tir.mylog", target="cuda", f=my_cuda_mylog_rule, override=True) n = te.var("n") A = te.placeholder((n,), name="A") diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py index 791e2c657f65..9181a44fa523 100644 --- a/vta/python/vta/environment.py +++ b/vta/python/vta/environment.py @@ -23,6 +23,7 @@ import copy import tvm from tvm import te +from tvm.ir import register_intrin_lowering from . import intrin @@ -292,7 +293,7 @@ def mem_info_acc_buffer(): # TVM Op related registration -@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_sync", "default") +@register_intrin_lowering("tir.vta.coproc_sync", "default") def coproc_sync(op): _ = op return tvm.tir.call_extern( @@ -303,14 +304,14 @@ def coproc_sync(op): ) -@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_push", "default") +@register_intrin_lowering("tir.vta.coproc_dep_push", "default") def coproc_dep_push(op): return tvm.tir.call_extern( "int32", "VTADepPush", get_env().dev.command_handle, op.args[0], op.args[1] ) -@tvm.ir.op.register_op_intrin_lowering("tir.vta.coproc_dep_pop", "default") +@register_intrin_lowering("tir.vta.coproc_dep_pop", "default") def coproc_dep_pop(op): return tvm.tir.call_extern( "int32", "VTADepPop", get_env().dev.command_handle, op.args[0], op.args[1] From be56019efc39a3e1688d21f29e5c223e7533533f Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 20 Apr 2021 16:37:07 -0700 Subject: [PATCH 26/41] Fix clang format --- src/ir/op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ir/op.cc b/src/ir/op.cc index a4a8650835aa..a2918eafa3c5 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -130,10 +130,10 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") OpRegistry::Global()->Get(name) != nullptr && Op::GetAttrMap(target + ".FLowerIntrinsic").count(Op::Get(name))) { CHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target - << ".FlowerIntrinsic is already registered"; + << ".FlowerIntrinsic is already registered"; } - tvm::OpRegEntry::RegisterOrGet(name).set_attr( - target + ".FLowerIntrinsic", f, plevel); + tvm::OpRegEntry::RegisterOrGet(name).set_attr(target + ".FLowerIntrinsic", f, + plevel); }); // helper to get internal dev function in objectref. From e23870973135bab1d73f2ebfe7434dc4533b140b Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 20 Apr 2021 21:49:35 -0700 Subject: [PATCH 27/41] Fix python format --- python/tvm/target/intrin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py index 066eee5a113e..a8154cb97640 100644 --- a/python/tvm/target/intrin.py +++ b/python/tvm/target/intrin.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. """Target dependent intrinsic registration.""" -import tvm._ffi from tvm.ir import register_intrin_lowering from tvm.tir import call_pure_extern From 2642ca79879374df133028a532a5a606351e962e Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Tue, 20 Apr 2021 22:32:49 -0700 Subject: [PATCH 28/41] Add argument requirement for register_intrin_lowering --- python/tvm/ir/op.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index dcf510551474..a9bf82b5cb8d 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -120,6 +120,7 @@ def _register(v): def register_intrin_lowering( op_name, target, + *, f=None, level=10, override=False, From 75c276cc09fff787fa9e6931d56bbd0da1bb7757 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 21 Apr 2021 10:04:10 -0700 Subject: [PATCH 29/41] Add can_override field in set_attr --- include/tvm/ir/op.h | 9 +++++---- src/ir/op.cc | 6 +++--- src/node/attr_registry.h | 5 +++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/tvm/ir/op.h b/include/tvm/ir/op.h index 9456ea80d860..073502d5bd43 100644 --- a/include/tvm/ir/op.h +++ b/include/tvm/ir/op.h @@ -277,7 +277,7 @@ class OpRegEntry { */ template inline OpRegEntry& set_attr(const std::string& attr_name, // NOLINT(*) - const ValueType& value, int plevel = 10); + const ValueType& value, int plevel = 10, int can_override = 0); /*! * \brief Resets an attr of the registry. @@ -311,7 +311,8 @@ class OpRegEntry { // return internal pointer to op. inline OpNode* get(); // update the attribute OpAttrMap - TVM_DLL void UpdateAttr(const String& key, runtime::TVMRetValue value, int plevel); + TVM_DLL void UpdateAttr(const String& key, runtime::TVMRetValue value, + int plevel, int can_override = 0); }; /*! @@ -461,11 +462,11 @@ inline OpRegEntry& OpRegEntry::set_support_level(int32_t n) { // NOLINT(*) template inline OpRegEntry& OpRegEntry::set_attr( // NOLINT(*) - const std::string& attr_name, const ValueType& value, int plevel) { + const std::string& attr_name, const ValueType& value, int plevel, int can_override) { ICHECK_GT(plevel, 0) << "plevel in set_attr must be greater than 0"; runtime::TVMRetValue rv; rv = value; - UpdateAttr(attr_name, rv, plevel); + UpdateAttr(attr_name, rv, plevel, can_override); return *this; } diff --git a/src/ir/op.cc b/src/ir/op.cc index a2918eafa3c5..64d3b62b04e8 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -71,8 +71,8 @@ void OpRegEntry::reset_attr(const std::string& attr_name) { OpRegistry::Global()->ResetAttr(attr_name, op_); } -void OpRegEntry::UpdateAttr(const String& key, TVMRetValue value, int plevel) { - OpRegistry::Global()->UpdateAttr(key, op_, value, plevel); +void OpRegEntry::UpdateAttr(const String& key, TVMRetValue value, int plevel, int can_override) { + OpRegistry::Global()->UpdateAttr(key, op_, value, plevel, can_override); } // Frontend APIs @@ -133,7 +133,7 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") << ".FlowerIntrinsic is already registered"; } tvm::OpRegEntry::RegisterOrGet(name).set_attr(target + ".FLowerIntrinsic", f, - plevel); + plevel, can_override); }); // helper to get internal dev function in objectref. diff --git a/src/node/attr_registry.h b/src/node/attr_registry.h index f84be1467453..f6ead373084f 100644 --- a/src/node/attr_registry.h +++ b/src/node/attr_registry.h @@ -95,7 +95,7 @@ class AttrRegistry { * \param plevel The support level. */ void UpdateAttr(const String& attr_name, const KeyType& key, runtime::TVMRetValue value, - int plevel) { + int plevel, int can_override = 0) { using runtime::TVMRetValue; std::lock_guard lock(mutex_); auto& op_map = attrs_[attr_name]; @@ -109,7 +109,8 @@ class AttrRegistry { op_map->data_.resize(index + 1, std::make_pair(TVMRetValue(), 0)); } std::pair& p = op_map->data_[index]; - ICHECK(p.second != plevel) << "Attribute " << attr_name << " of " << key->AttrRegistryName() + ICHECK(can_override || p.second != plevel) << "Attribute " << attr_name << " of " + << key->AttrRegistryName() << " is already registered with same plevel=" << plevel; ICHECK(value.type_code() != kTVMNullptr) << "Registered packed_func is Null for " << attr_name << " of operator " << key->AttrRegistryName(); From 11969773a7f2af3213b0eed80801067dcb5cc129 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 21 Apr 2021 10:08:23 -0700 Subject: [PATCH 30/41] Fix clang format --- include/tvm/ir/op.h | 4 ++-- src/node/attr_registry.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/tvm/ir/op.h b/include/tvm/ir/op.h index 073502d5bd43..71258a69fc17 100644 --- a/include/tvm/ir/op.h +++ b/include/tvm/ir/op.h @@ -311,8 +311,8 @@ class OpRegEntry { // return internal pointer to op. inline OpNode* get(); // update the attribute OpAttrMap - TVM_DLL void UpdateAttr(const String& key, runtime::TVMRetValue value, - int plevel, int can_override = 0); + TVM_DLL void UpdateAttr(const String& key, runtime::TVMRetValue value, int plevel, + int can_override = 0); }; /*! diff --git a/src/node/attr_registry.h b/src/node/attr_registry.h index f6ead373084f..dda96b95f7ef 100644 --- a/src/node/attr_registry.h +++ b/src/node/attr_registry.h @@ -109,9 +109,9 @@ class AttrRegistry { op_map->data_.resize(index + 1, std::make_pair(TVMRetValue(), 0)); } std::pair& p = op_map->data_[index]; - ICHECK(can_override || p.second != plevel) << "Attribute " << attr_name << " of " - << key->AttrRegistryName() - << " is already registered with same plevel=" << plevel; + ICHECK(can_override || p.second != plevel) + << "Attribute " << attr_name << " of " << key->AttrRegistryName() + << " is already registered with same plevel=" << plevel; ICHECK(value.type_code() != kTVMNullptr) << "Registered packed_func is Null for " << attr_name << " of operator " << key->AttrRegistryName(); if (p.second < plevel && value.type_code() != kTVMNullptr) { From 3d88f620e6388f9a7889d4df2df6684141c05661 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Wed, 21 Apr 2021 10:22:19 -0700 Subject: [PATCH 31/41] Add param description for set_attr --- include/tvm/ir/op.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/tvm/ir/op.h b/include/tvm/ir/op.h index 71258a69fc17..5fff1268739a 100644 --- a/include/tvm/ir/op.h +++ b/include/tvm/ir/op.h @@ -270,6 +270,9 @@ class OpRegEntry { * an higher priority level attribute * will replace lower priority level attribute. * Must be bigger than 0. + * \param can_override Whether to explicitly allow + * overriding the attribute, any non-zero value + * implies allowance and 0 means disallowance. * * Cannot set with same plevel twice in the code. * From 2caa5cae3ec3f69d399c91e160f06ad2b5eed33f Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 11:11:10 -0700 Subject: [PATCH 32/41] Modify intrinsic lowering funcs --- src/target/intrin_rule.cc | 77 +++++++++---------------- src/target/intrin_rule.h | 7 +-- src/target/llvm/intrin_rule_hexagon.cc | 22 +++---- src/target/llvm/intrin_rule_llvm.cc | 36 ++++++------ src/target/llvm/intrin_rule_llvm.h | 10 ++-- src/target/llvm/intrin_rule_nvptx.cc | 49 ++++++++-------- src/target/source/intrin_rule_aocl.cc | 52 +++++++---------- src/target/source/intrin_rule_cuda.cc | 75 +++++++++++------------- src/target/source/intrin_rule_metal.cc | 40 ++++++------- src/target/source/intrin_rule_opencl.cc | 47 ++++++++------- src/target/source/intrin_rule_vhls.cc | 38 ++++++------ 11 files changed, 205 insertions(+), 248 deletions(-) diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index 86fde67a63a4..33207871a31c 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -32,106 +32,85 @@ namespace intrin { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.exp").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.erf").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log2") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log10") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log1p") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tan").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.atan") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.atanh") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.atan2") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.cos").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.acos") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.cosh") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.acosh") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sin").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.asin") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sinh") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.asinh") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.hypot") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.nextafter") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.copysign") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ldexp") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.floor") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.rsqrt") .set_attr("default.FLowerIntrinsic", @@ -144,7 +123,7 @@ TVM_REGISTER_OP("tir.rsqrt") })); TVM_REGISTER_OP("tir.pow").set_attr("default.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.sigmoid") .set_attr("default.FLowerIntrinsic", diff --git a/src/target/intrin_rule.h b/src/target/intrin_rule.h index c27c3a80e0d5..6a517a9abd24 100644 --- a/src/target/intrin_rule.h +++ b/src/target/intrin_rule.h @@ -55,8 +55,7 @@ struct Direct { // Call pure extern function. template -inline void DispatchPureExtern(const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; +inline PrimExpr DispatchPureExtern(const PrimExpr& e) { const CallNode* call = e.as(); ICHECK(call != nullptr); // Use string based dispatch to extern for backward compact @@ -72,9 +71,9 @@ inline void DispatchPureExtern(const TVMArgs& args, TVMRetValue* rv) { for (auto arg : call->args) { new_args.push_back(arg); } - *rv = Call(call->dtype, builtin::call_pure_extern(), new_args); + return Call(call->dtype, builtin::call_pure_extern(), new_args); } else { - *rv = e; + return e; } } diff --git a/src/target/llvm/intrin_rule_hexagon.cc b/src/target/llvm/intrin_rule_hexagon.cc index fde4f79e0072..82f7d5051391 100644 --- a/src/target/llvm/intrin_rule_hexagon.cc +++ b/src/target/llvm/intrin_rule_hexagon.cc @@ -29,44 +29,44 @@ namespace llvm { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.exp").set_attr( - "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); + "hexagon.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>); TVM_REGISTER_OP("tir.fma").set_attr( - "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); + "hexagon.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>); TVM_REGISTER_OP("tir.log").set_attr( - "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); + "hexagon.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>); TVM_REGISTER_OP("tir.sqrt") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>); TVM_REGISTER_OP("tir.floor") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>); TVM_REGISTER_OP("tir.ceil") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>); TVM_REGISTER_OP("tir.trunc") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>); TVM_REGISTER_OP("tir.fabs") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>); TVM_REGISTER_OP("tir.round") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>); TVM_REGISTER_OP("tir.pow").set_attr( - "hexagon.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>)); + "hexagon.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>); TVM_REGISTER_OP("tir.ctpop") .set_attr("hexagon.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>); } // namespace llvm } // namespace codegen diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 539246e6c584..c66783db706a 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -34,14 +34,14 @@ using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.prefetch") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 4>)); + DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 4>); TVM_REGISTER_OP("tir.exp").set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>)); + "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>); TVM_REGISTER_OP("tir.exp2") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1>); // TODO(tvm-team): migrate the legalization transformations as a separate // set of rules in TIR that can be shared across backends. @@ -60,46 +60,46 @@ TVM_REGISTER_OP("tir.exp10") })); TVM_REGISTER_OP("tir.fma").set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>)); + "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>); TVM_REGISTER_OP("tir.log").set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>)); + "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>); TVM_REGISTER_OP("tir.log2") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1>); TVM_REGISTER_OP("tir.log10") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1>); TVM_REGISTER_OP("tir.sqrt") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>); TVM_REGISTER_OP("tir.floor") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>); TVM_REGISTER_OP("tir.ceil") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>); TVM_REGISTER_OP("tir.trunc") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>); TVM_REGISTER_OP("tir.fabs") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>); TVM_REGISTER_OP("tir.round") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>); TVM_REGISTER_OP("tir.nearbyint") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>); TVM_REGISTER_OP("tir.tanh") .set_attr("llvm.FLowerIntrinsic", @@ -123,11 +123,11 @@ TVM_REGISTER_OP("tir.tanh") })); TVM_REGISTER_OP("tir.pow").set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>)); + "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>); TVM_REGISTER_OP("tir.popcount") .set_attr("llvm.FLowerIntrinsic", - PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>)); + DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>); TVM_REGISTER_OP("tir.tan").set_attr( "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { @@ -140,7 +140,7 @@ TVM_REGISTER_OP("tir.tan").set_attr( })); TVM_REGISTER_OP("tir.cos").set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>)); + "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>); TVM_REGISTER_OP("tir.cosh") .set_attr("llvm.FLowerIntrinsic", @@ -160,7 +160,7 @@ TVM_REGISTER_OP("tir.cosh") })); TVM_REGISTER_OP("tir.sin").set_attr( - "llvm.FLowerIntrinsic", PackedFunc(DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>)); + "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>); TVM_REGISTER_OP("tir.sinh") .set_attr("llvm.FLowerIntrinsic", diff --git a/src/target/llvm/intrin_rule_llvm.h b/src/target/llvm/intrin_rule_llvm.h index 99463793d8de..a926d7b9be31 100644 --- a/src/target/llvm/intrin_rule_llvm.h +++ b/src/target/llvm/intrin_rule_llvm.h @@ -38,8 +38,7 @@ namespace tvm { namespace codegen { // num_signature means number of arguments used to query signature template -inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; +inline PrimExpr DispatchLLVMPureIntrin(const PrimExpr& e) { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); Array cargs; @@ -50,12 +49,11 @@ inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { for (PrimExpr arg : call->args) { cargs.push_back(arg); } - *rv = tir::Call(call->dtype, tir::builtin::call_llvm_pure_intrin(), cargs); + return tir::Call(call->dtype, tir::builtin::call_llvm_pure_intrin(), cargs); } template -inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; +inline PrimExpr DispatchLLVMIntrin(const PrimExpr& e) { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); Array cargs; @@ -65,7 +63,7 @@ inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { for (PrimExpr arg : call->args) { cargs.push_back(arg); } - *rv = tir::Call(call->dtype, tir::builtin::call_llvm_intrin(), cargs); + return tir::Call(call->dtype, tir::builtin::call_llvm_intrin(), cargs); } } // namespace codegen diff --git a/src/target/llvm/intrin_rule_nvptx.cc b/src/target/llvm/intrin_rule_nvptx.cc index 33dd4f422681..0ee01a63c042 100644 --- a/src/target/llvm/intrin_rule_nvptx.cc +++ b/src/target/llvm/intrin_rule_nvptx.cc @@ -33,8 +33,7 @@ namespace tvm { namespace codegen { -inline void DispatchPureExternLibDevice(const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; +inline PrimExpr DispatchPureExternLibDevice(const PrimExpr& e) { using namespace tir; const CallNode* call = e.as(); ICHECK(call != nullptr); @@ -54,77 +53,77 @@ inline void DispatchPureExternLibDevice(const TVMArgs& args, TVMRetValue* rv) { for (auto arg : call->args) { new_args.push_back(arg); } - *rv = Call(call->dtype, builtin::call_pure_extern(), new_args); + return Call(call->dtype, builtin::call_pure_extern(), new_args); } namespace llvm { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.ceil") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.round") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.trunc") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.fabs") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.exp").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.exp2") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.exp10") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.erf").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.fma").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.log").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.log2") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.log10") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.sqrt") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.pow").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.tanh") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.tan").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.cos").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.cosh") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.sin").set_attr("nvptx.FLowerIntrinsic", - PackedFunc(DispatchPureExternLibDevice)); + DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.sinh") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); TVM_REGISTER_OP("tir.atan") - .set_attr("nvptx.FLowerIntrinsic", PackedFunc(DispatchPureExternLibDevice)); + .set_attr("nvptx.FLowerIntrinsic", DispatchPureExternLibDevice); } // namespace llvm } // namespace codegen diff --git a/src/target/source/intrin_rule_aocl.cc b/src/target/source/intrin_rule_aocl.cc index 73192d535641..09fc087ca252 100644 --- a/src/target/source/intrin_rule_aocl.cc +++ b/src/target/source/intrin_rule_aocl.cc @@ -31,78 +31,70 @@ namespace intrin { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.trunc") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fabs") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp").set_attr("aocl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("aocl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.pow").set_attr("aocl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.popcount") - .set_attr("aocl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("aocl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.floor") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.trunc") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fabs") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp").set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.pow").set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.popcount") - .set_attr("aocl_sw_emu.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("aocl_sw_emu.FLowerIntrinsic", DispatchPureExtern); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_cuda.cc b/src/target/source/intrin_rule_cuda.cc index 5fa9e9cf64ed..edbfb4fe5b83 100644 --- a/src/target/source/intrin_rule_cuda.cc +++ b/src/target/source/intrin_rule_cuda.cc @@ -112,109 +112,100 @@ struct CUDAWarpIntrinsic { } }; -static void DispatchCUDAWarpActiveMask(const TVMArgs& args, TVMRetValue* rv) { - Call call = args[0]; - *rv = Call(call->dtype, Op::Get("tir.cuda.__activemask"), call->args); +static PrimExpr DispatchCUDAWarpActiveMask(const PrimExpr& e) { + const CallNode* call = e.as(); + return Call(call->dtype, Op::Get("tir.cuda.__activemask"), call->args); } template -static void DispatchCUDAShuffle(const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; +static PrimExpr DispatchCUDAShuffle(const PrimExpr& e) { const CallNode* call = e.as(); ICHECK(call != nullptr); ICHECK_EQ(call->args.size(), 5); // mask, value, warp_id, width, warp_size Array cuda_args{{call->args[0], call->args[1], call->args[2], call->args[3]}}; - - *rv = Call(call->dtype, T()(call->dtype, Downcast(call->op)), cuda_args); + return Call(call->dtype, T()(call->dtype, Downcast(call->op)), cuda_args); } TVM_REGISTER_OP("tir.floor") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.trunc") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fabs") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.exp2") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp10") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.erf").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log2") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log10") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); -TVM_REGISTER_OP("tir.tan").set_attr( - "cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); +TVM_REGISTER_OP("tir.tan").set_attr("cuda.FLowerIntrinsic", + DispatchPureExtern); TVM_REGISTER_OP("tir.cos").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.cosh") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sin").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.sinh") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.atan") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.pow").set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.popcount") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tvm_warp_shuffle") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchCUDAShuffle)); + .set_attr("cuda.FLowerIntrinsic", DispatchCUDAShuffle); TVM_REGISTER_OP("tir.tvm_warp_shuffle_up") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchCUDAShuffle)); + .set_attr("cuda.FLowerIntrinsic", DispatchCUDAShuffle); TVM_REGISTER_OP("tir.tvm_warp_shuffle_down") - .set_attr("cuda.FLowerIntrinsic", - PackedFunc(DispatchCUDAShuffle)); + .set_attr("cuda.FLowerIntrinsic", DispatchCUDAShuffle); TVM_REGISTER_OP("tir.tvm_warp_activemask") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchCUDAWarpActiveMask)); + .set_attr("cuda.FLowerIntrinsic", DispatchCUDAWarpActiveMask); TVM_REGISTER_OP("tir.fmod") - .set_attr("cuda.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("cuda.FLowerIntrinsic", DispatchPureExtern); // Register low-level builtin ops. // TODO(tvm-team): consider make CUDA its own subfolder and create a file for low-level builtins. diff --git a/src/target/source/intrin_rule_metal.cc b/src/target/source/intrin_rule_metal.cc index bff7ade9c84d..3b072fcc7006 100644 --- a/src/target/source/intrin_rule_metal.cc +++ b/src/target/source/intrin_rule_metal.cc @@ -31,64 +31,64 @@ namespace intrin { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.trunc") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fabs") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp").set_attr("metal.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.exp2") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp10") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("metal.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log2") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log10") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.pow").set_attr("metal.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.popcount") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fmod") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sin").set_attr("metal.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.sinh") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.cos").set_attr("metal.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.cosh") - .set_attr("metal.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("metal.FLowerIntrinsic", DispatchPureExtern); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_opencl.cc b/src/target/source/intrin_rule_opencl.cc index b97a8a305779..288bb2cfc069 100644 --- a/src/target/source/intrin_rule_opencl.cc +++ b/src/target/source/intrin_rule_opencl.cc @@ -32,69 +32,68 @@ namespace intrin { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.trunc") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fabs") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp").set_attr("opencl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.exp2") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp10") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("opencl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log2") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log10") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.pow").set_attr("opencl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.popcount") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fmod") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sin").set_attr("opencl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.sinh") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.cos").set_attr("opencl.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.cosh") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("opencl.FLowerIntrinsic", DispatchPureExtern); // There is no warp shuffle instruction in standard OpenCL // When shuffle is used, we assume it is intel's shuffle extension -static void DispatchIntelShuffle(const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; +static PrimExpr DispatchIntelShuffle(const PrimExpr& e) { const CallNode* call = e.as(); ICHECK(call != nullptr); ICHECK_EQ(call->args.size(), 5); // mask, value, warp_id, width, warp_size @@ -102,11 +101,11 @@ static void DispatchIntelShuffle(const TVMArgs& args, TVMRetValue* rv) { ICHECK(analyzer.CanProve(call->args[3] == call->args[4])) << "Intel warp shuffle dose not support width != warp_size"; Array opencl_args{{StringImm("intel_sub_group_shuffle"), call->args[1], call->args[2]}}; - *rv = Call(call->dtype, builtin::call_pure_extern(), opencl_args); + return Call(call->dtype, builtin::call_pure_extern(), opencl_args); } TVM_REGISTER_OP("tir.tvm_warp_shuffle") - .set_attr("opencl.FLowerIntrinsic", PackedFunc(DispatchIntelShuffle)); + .set_attr("opencl.FLowerIntrinsic", DispatchIntelShuffle); } // namespace intrin } // namespace codegen diff --git a/src/target/source/intrin_rule_vhls.cc b/src/target/source/intrin_rule_vhls.cc index a60d75d40723..57be8ae17a57 100644 --- a/src/target/source/intrin_rule_vhls.cc +++ b/src/target/source/intrin_rule_vhls.cc @@ -31,61 +31,61 @@ namespace intrin { using tir::FLowerIntrinsic; TVM_REGISTER_OP("tir.floor") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.ceil") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.trunc") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.fabs") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.round") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp").set_attr("sdaccel.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.exp2") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.exp10") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log").set_attr("sdaccel.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.log2") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.log10") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.tanh") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sqrt") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.pow").set_attr("sdaccel.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.popcount") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sin").set_attr("sdaccel.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.sinh") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.cos").set_attr("sdaccel.FLowerIntrinsic", - PackedFunc(DispatchPureExtern)); + DispatchPureExtern); TVM_REGISTER_OP("tir.cosh") - .set_attr("sdaccel.FLowerIntrinsic", PackedFunc(DispatchPureExtern)); + .set_attr("sdaccel.FLowerIntrinsic", DispatchPureExtern); } // namespace intrin } // namespace codegen From 4cd6ad73c384e0bd0e772dcf54c7e91439356f38 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 14:41:50 -0700 Subject: [PATCH 33/41] Remove extra can_override argument --- include/tvm/ir/op.h | 9 ++++----- python/tvm/ir/op.py | 6 +----- python/tvm/target/intrin.py | 4 ++-- python/tvm/topi/arm_cpu/tensor_intrin.py | 2 +- python/tvm/topi/cuda/nms.py | 4 ++-- src/ir/op.cc | 14 ++++---------- src/node/attr_registry.h | 4 ++-- tutorials/language/intrin_math.py | 4 ++-- 8 files changed, 18 insertions(+), 29 deletions(-) diff --git a/include/tvm/ir/op.h b/include/tvm/ir/op.h index 5fff1268739a..f87f0bff4867 100644 --- a/include/tvm/ir/op.h +++ b/include/tvm/ir/op.h @@ -280,7 +280,7 @@ class OpRegEntry { */ template inline OpRegEntry& set_attr(const std::string& attr_name, // NOLINT(*) - const ValueType& value, int plevel = 10, int can_override = 0); + const ValueType& value, int plevel = 10); /*! * \brief Resets an attr of the registry. @@ -314,8 +314,7 @@ class OpRegEntry { // return internal pointer to op. inline OpNode* get(); // update the attribute OpAttrMap - TVM_DLL void UpdateAttr(const String& key, runtime::TVMRetValue value, int plevel, - int can_override = 0); + TVM_DLL void UpdateAttr(const String& key, runtime::TVMRetValue value, int plevel); }; /*! @@ -465,11 +464,11 @@ inline OpRegEntry& OpRegEntry::set_support_level(int32_t n) { // NOLINT(*) template inline OpRegEntry& OpRegEntry::set_attr( // NOLINT(*) - const std::string& attr_name, const ValueType& value, int plevel, int can_override) { + const std::string& attr_name, const ValueType& value, int plevel) { ICHECK_GT(plevel, 0) << "plevel in set_attr must be greater than 0"; runtime::TVMRetValue rv; rv = value; - UpdateAttr(attr_name, rv, plevel, can_override); + UpdateAttr(attr_name, rv, plevel); return *this; } diff --git a/python/tvm/ir/op.py b/python/tvm/ir/op.py index a9bf82b5cb8d..88e760ef91a1 100644 --- a/python/tvm/ir/op.py +++ b/python/tvm/ir/op.py @@ -123,7 +123,6 @@ def register_intrin_lowering( *, f=None, level=10, - override=False, ): """Register Op lowering function @@ -141,9 +140,6 @@ def register_intrin_lowering( level : int The priority level - override: boolean optional - Whether override existing entry. - Returns ------- fregister : function @@ -152,7 +148,7 @@ def register_intrin_lowering( def _register(f): """internal register function""" - _ffi_api.RegisterOpLowerIntrinsic(op_name, f, target, level, override) + _ffi_api.RegisterOpLowerIntrinsic(op_name, f, target, level) return f return _register(f) if f is not None else _register diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py index a8154cb97640..3eb2f441bb5b 100644 --- a/python/tvm/target/intrin.py +++ b/python/tvm/target/intrin.py @@ -76,6 +76,6 @@ def _rule_float_direct(op): # opencl pattern for exp -register_intrin_lowering("tir.exp", target="opencl", f=_rule_float_direct, override=True) +register_intrin_lowering("tir.exp", target="opencl", f=_rule_float_direct, level=100) # default pattern for exp -register_intrin_lowering("tir.exp", target="default", f=_rule_float_suffix, override=True) +register_intrin_lowering("tir.exp", target="default", f=_rule_float_suffix, level=100) diff --git a/python/tvm/topi/arm_cpu/tensor_intrin.py b/python/tvm/topi/arm_cpu/tensor_intrin.py index 76588f081c46..494f6b7bc80d 100644 --- a/python/tvm/topi/arm_cpu/tensor_intrin.py +++ b/python/tvm/topi/arm_cpu/tensor_intrin.py @@ -1056,5 +1056,5 @@ def _q_multiply_shift_arm(op): register_intrin_lowering( - "tir.q_multiply_shift", target="llvm.aarch64", f=_q_multiply_shift_arm, override=True + "tir.q_multiply_shift", target="llvm.aarch64", f=_q_multiply_shift_arm, level=100 ) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 94f8e19095a1..da6be64e0f04 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -52,9 +52,9 @@ def opencl_atomic_add_rule(op): raise RuntimeError("only support int32") -register_intrin_lowering("tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, override=True) +register_intrin_lowering("tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, level=100) -register_intrin_lowering("tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, override=True) +register_intrin_lowering("tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, level=100) def atomic_add(x, y): diff --git a/src/ir/op.cc b/src/ir/op.cc index 64d3b62b04e8..5b258ed2f2f0 100644 --- a/src/ir/op.cc +++ b/src/ir/op.cc @@ -71,8 +71,8 @@ void OpRegEntry::reset_attr(const std::string& attr_name) { OpRegistry::Global()->ResetAttr(attr_name, op_); } -void OpRegEntry::UpdateAttr(const String& key, TVMRetValue value, int plevel, int can_override) { - OpRegistry::Global()->UpdateAttr(key, op_, value, plevel, can_override); +void OpRegEntry::UpdateAttr(const String& key, TVMRetValue value, int plevel) { + OpRegistry::Global()->UpdateAttr(key, op_, value, plevel); } // Frontend APIs @@ -125,15 +125,9 @@ TVM_REGISTER_GLOBAL("ir.RegisterOpAttr") }); TVM_REGISTER_GLOBAL("ir.RegisterOpLowerIntrinsic") - .set_body_typed([](String name, PackedFunc f, String target, int plevel, int can_override) { - if (Op::HasAttrMap(target + ".FLowerIntrinsic") && - OpRegistry::Global()->Get(name) != nullptr && - Op::GetAttrMap(target + ".FLowerIntrinsic").count(Op::Get(name))) { - CHECK(can_override) << "Op " << name << "'s intrinsic lowering function " << target - << ".FlowerIntrinsic is already registered"; - } + .set_body_typed([](String name, PackedFunc f, String target, int plevel) { tvm::OpRegEntry::RegisterOrGet(name).set_attr(target + ".FLowerIntrinsic", f, - plevel, can_override); + plevel); }); // helper to get internal dev function in objectref. diff --git a/src/node/attr_registry.h b/src/node/attr_registry.h index dda96b95f7ef..47ec9eca535c 100644 --- a/src/node/attr_registry.h +++ b/src/node/attr_registry.h @@ -95,7 +95,7 @@ class AttrRegistry { * \param plevel The support level. */ void UpdateAttr(const String& attr_name, const KeyType& key, runtime::TVMRetValue value, - int plevel, int can_override = 0) { + int plevel) { using runtime::TVMRetValue; std::lock_guard lock(mutex_); auto& op_map = attrs_[attr_name]; @@ -109,7 +109,7 @@ class AttrRegistry { op_map->data_.resize(index + 1, std::make_pair(TVMRetValue(), 0)); } std::pair& p = op_map->data_[index]; - ICHECK(can_override || p.second != plevel) + ICHECK(p.second != plevel) << "Attribute " << attr_name << " of " << key->AttrRegistryName() << " is already registered with same plevel=" << plevel; ICHECK(value.type_code() != kTVMNullptr) << "Registered packed_func is Null for " << attr_name diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index 7c47600c0a61..a3ac794a06c5 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -113,7 +113,7 @@ def my_cuda_math_rule(op): return op -register_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, override=True) +register_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, level=100) ###################################################################### # Register the rule to TVM with override option to override existing rule. # Notice the difference between the printed code from previous one: @@ -149,7 +149,7 @@ def my_cuda_mylog_rule(op): # new op registration is triggered by registering an attribute of the op register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure) -register_intrin_lowering("tir.mylog", target="cuda", f=my_cuda_mylog_rule, override=True) +register_intrin_lowering("tir.mylog", target="cuda", f=my_cuda_mylog_rule, level=100) n = te.var("n") A = te.placeholder((n,), name="A") From f1d40610f1f8e15017c1dc37f9b1b971fc099fae Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 15:44:28 -0700 Subject: [PATCH 34/41] Change all Packedfunc to Typed funcs --- src/node/attr_registry.h | 5 +- src/target/intrin_rule.cc | 39 +++++++-------- src/target/llvm/intrin_rule_rocm.cc | 70 +++++++++++++-------------- src/target/spirv/intrin_rule_spirv.cc | 62 ++++++++++++------------ 4 files changed, 83 insertions(+), 93 deletions(-) diff --git a/src/node/attr_registry.h b/src/node/attr_registry.h index 47ec9eca535c..f84be1467453 100644 --- a/src/node/attr_registry.h +++ b/src/node/attr_registry.h @@ -109,9 +109,8 @@ class AttrRegistry { op_map->data_.resize(index + 1, std::make_pair(TVMRetValue(), 0)); } std::pair& p = op_map->data_[index]; - ICHECK(p.second != plevel) - << "Attribute " << attr_name << " of " << key->AttrRegistryName() - << " is already registered with same plevel=" << plevel; + ICHECK(p.second != plevel) << "Attribute " << attr_name << " of " << key->AttrRegistryName() + << " is already registered with same plevel=" << plevel; ICHECK(value.type_code() != kTVMNullptr) << "Registered packed_func is Null for " << attr_name << " of operator " << key->AttrRegistryName(); if (p.second < plevel && value.type_code() != kTVMNullptr) { diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index 33207871a31c..ba1426288137 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -114,51 +114,46 @@ TVM_REGISTER_OP("tir.round") TVM_REGISTER_OP("tir.rsqrt") .set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; + [](const PrimExpr& e)->PrimExpr { const CallNode* call = e.as(); ICHECK(call != nullptr); auto one = make_const(call->args[0].dtype(), 1); - *rv = one / sqrt(call->args[0]); - })); + return one / sqrt(call->args[0]); + }); TVM_REGISTER_OP("tir.pow").set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sigmoid") .set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; + [](const PrimExpr& e)->PrimExpr { const CallNode* call = e.as(); ICHECK(call != nullptr); auto one = make_const(call->args[0].dtype(), 1); - *rv = one / (one + exp(-call->args[0])); - })); + return one / (one + exp(-call->args[0])); + }); TVM_REGISTER_OP("tir.isfinite") .set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; + [](const PrimExpr& e)->PrimExpr { const CallNode* call = e.as(); ICHECK(call != nullptr); - *rv = isfinite(call->args[0]); - })); + return isfinite(call->args[0]); + }); TVM_REGISTER_OP("tir.isinf") .set_attr("default.FLowerIntrinsic", - PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; + [](const PrimExpr& e)->PrimExpr { const CallNode* call = e.as(); ICHECK(call != nullptr); - *rv = isinf(call->args[0]); - })); + return isinf(call->args[0]); + }); TVM_REGISTER_OP("tir.q_multiply_shift") .set_attr( - "default.FLowerIntrinsic", PackedFunc([](const TVMArgs& args, TVMRetValue* rv) { + "default.FLowerIntrinsic", [](const PrimExpr& e)->PrimExpr { using tir::make_const; - PrimExpr e = args[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); @@ -186,7 +181,7 @@ TVM_REGISTER_OP("tir.q_multiply_shift") int exp_val = get_int_value(s) - 1; if (exp_val > 0) { // power of 2 is greater than 0, apply left shift. - *rv = x << exp; + return x << exp; } else { // power of 2 is less than 0, round and then apply right shift. DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); @@ -194,7 +189,7 @@ TVM_REGISTER_OP("tir.q_multiply_shift") exp = -exp; PrimExpr rounding_factor = one << (exp - 1); PrimExpr rounded_t = x + rounding_factor; - *rv = rounded_t >> exp; + return rounded_t >> exp; } } else { // Only int32 types are supported (any number of lanes is allowed) @@ -228,9 +223,9 @@ TVM_REGISTER_OP("tir.q_multiply_shift") // 6) The fixed point multiplication keeps the value in int32 range. Casting back to // int32. - *rv = cast(lp_dtype, x); + return cast(lp_dtype, x); } - })); + }); } // namespace intrin } // namespace codegen diff --git a/src/target/llvm/intrin_rule_rocm.cc b/src/target/llvm/intrin_rule_rocm.cc index e2e60133465d..2ddb93952b4b 100644 --- a/src/target/llvm/intrin_rule_rocm.cc +++ b/src/target/llvm/intrin_rule_rocm.cc @@ -33,8 +33,7 @@ namespace tvm { namespace codegen { -inline void DispatchPureExternOCML(const TVMArgs& args, TVMRetValue* rv) { - PrimExpr e = args[0]; +inline PrimExpr DispatchPureExternOCML(const PrimExpr& e) { using namespace tir; const CallNode* call = e.as(); ICHECK(call != nullptr); @@ -51,14 +50,13 @@ inline void DispatchPureExternOCML(const TVMArgs& args, TVMRetValue* rv) { for (auto arg : call->args) { new_args.push_back(arg); } - - *rv = Call(call->dtype, builtin::call_pure_extern(), new_args); + + return Call(call->dtype, builtin::call_pure_extern(), new_args); } -inline void DispatchShuffle(const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e_call = targs[0]; +inline PrimExpr DispatchShuffle(const PrimExpr& e) { using namespace tir; - const CallNode* call = e_call.as(); + const CallNode* call = e.as(); ICHECK(call != nullptr); ICHECK_EQ(call->args.size(), 5); // mask, value, warp_id, width, warp_size PrimExpr var = call->args[1]; @@ -90,7 +88,7 @@ inline void DispatchShuffle(const TVMArgs& targs, TVMRetValue* rv) { } PrimExpr res = Call(var.dtype(), builtin::call_pure_extern(), {StringImm("llvm.amdgcn.ds.bpermute"), index << 2, var}); - *rv = res; + return res; } namespace llvm { @@ -99,85 +97,85 @@ using tir::FLowerIntrinsic; // dummy because we don't have the activemask TVM_REGISTER_OP("tir.tvm_warp_activemask") .set_attr("rocm.FLowerIntrinsic", - PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + [](const PrimExpr& e)->PrimExpr { PrimExpr zero = tir::make_zero(DataType::Int(32)); - *rv = zero; - })); + return zero; + }); TVM_REGISTER_OP("tir.tvm_warp_shuffle") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); + .set_attr("rocm.FLowerIntrinsic", DispatchShuffle); TVM_REGISTER_OP("tir.tvm_warp_shuffle_up") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); + .set_attr("rocm.FLowerIntrinsic", DispatchShuffle); TVM_REGISTER_OP("tir.tvm_warp_shuffle_down") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchShuffle)); + .set_attr("rocm.FLowerIntrinsic", DispatchShuffle); TVM_REGISTER_OP("tir.floor") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.ceil") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.round") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.trunc") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.fabs") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.exp").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.exp2") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.exp10") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.erf").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.fma").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.log").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.log2") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.log10") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.sqrt") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.pow").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.tanh") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.tan").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.cos").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.cosh") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.sin").set_attr("rocm.FLowerIntrinsic", - PackedFunc(DispatchPureExternOCML)); + DispatchPureExternOCML); TVM_REGISTER_OP("tir.sinh") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); TVM_REGISTER_OP("tir.atan") - .set_attr("rocm.FLowerIntrinsic", PackedFunc(DispatchPureExternOCML)); + .set_attr("rocm.FLowerIntrinsic", DispatchPureExternOCML); } // namespace llvm } // namespace codegen diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc index fbf20215f290..1a667398ec54 100644 --- a/src/target/spirv/intrin_rule_spirv.cc +++ b/src/target/spirv/intrin_rule_spirv.cc @@ -35,8 +35,7 @@ using tir::FLowerIntrinsic; // num_signature means number of arguments used to query signature template -PrimExpr CallGLSLIntrin(const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; +PrimExpr CallGLSLIntrin(const PrimExpr& e) { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); Array cargs; @@ -50,105 +49,104 @@ PrimExpr CallGLSLIntrin(const TVMArgs& targs, TVMRetValue* rv) { } template -inline void DispatchGLSLPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { - *rv = CallGLSLIntrin(targs, rv); +inline PrimExpr DispatchGLSLPureIntrin(const PrimExpr& e) { + return CallGLSLIntrin(e); } TVM_REGISTER_OP("tir.floor") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.ceil") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.round") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.trunc") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.fabs") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.exp").set_attr( - "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.sin").set_attr( - "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.cos").set_attr( - "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.log").set_attr( - "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.log2") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.sqrt") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.pow").set_attr( - "vulkan.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.tanh") .set_attr("vulkan.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.clz").set_attr( - "vulkan.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; + "vulkan.FLowerIntrinsic", [](const PrimExpr& e)->PrimExpr { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); ICHECK_EQ(call->args.size(), 1); PrimExpr arg = call->args[0]; - PrimExpr msb = CallGLSLIntrin(targs, rv); - *rv = PrimExpr(arg.dtype().bits() - 1) - msb; - })); + PrimExpr msb = CallGLSLIntrin(e); + return PrimExpr(arg.dtype().bits() - 1) - msb; + }); // WebGPU rules. TVM_REGISTER_OP("tir.floor") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.ceil") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.round") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.trunc") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.fabs") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.exp").set_attr( - "webgpu.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.log").set_attr( - "webgpu.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.sqrt") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.pow").set_attr( - "webgpu.FLowerIntrinsic", PackedFunc(DispatchGLSLPureIntrin)); + "webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.tanh") .set_attr("webgpu.FLowerIntrinsic", - PackedFunc(DispatchGLSLPureIntrin)); + DispatchGLSLPureIntrin); } // namespace spirv } // namespace codegen From 20b68ae9b0a8ae72cf1c9d7c8254690dbd6fefd0 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 15:54:10 -0700 Subject: [PATCH 35/41] Fix clang format --- src/target/llvm/intrin_rule_rocm.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/target/llvm/intrin_rule_rocm.cc b/src/target/llvm/intrin_rule_rocm.cc index 2ddb93952b4b..072686868a81 100644 --- a/src/target/llvm/intrin_rule_rocm.cc +++ b/src/target/llvm/intrin_rule_rocm.cc @@ -50,7 +50,7 @@ inline PrimExpr DispatchPureExternOCML(const PrimExpr& e) { for (auto arg : call->args) { new_args.push_back(arg); } - + return Call(call->dtype, builtin::call_pure_extern(), new_args); } @@ -96,11 +96,10 @@ using tir::FLowerIntrinsic; // dummy because we don't have the activemask TVM_REGISTER_OP("tir.tvm_warp_activemask") - .set_attr("rocm.FLowerIntrinsic", - [](const PrimExpr& e)->PrimExpr { - PrimExpr zero = tir::make_zero(DataType::Int(32)); - return zero; - }); + .set_attr("rocm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + PrimExpr zero = tir::make_zero(DataType::Int(32)); + return zero; + }); TVM_REGISTER_OP("tir.tvm_warp_shuffle") .set_attr("rocm.FLowerIntrinsic", DispatchShuffle); From f8cca141dd8683f1a3ec74213c0c7e0415976cf9 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 16:02:08 -0700 Subject: [PATCH 36/41] Fix clang format for rules file --- src/target/intrin_rule.cc | 199 +++++++++++++------------- src/target/spirv/intrin_rule_spirv.cc | 79 +++++----- temp.cpp | 138 ++++++++++++++++++ 3 files changed, 267 insertions(+), 149 deletions(-) create mode 100644 temp.cpp diff --git a/src/target/intrin_rule.cc b/src/target/intrin_rule.cc index ba1426288137..bfc3fe6fcc8c 100644 --- a/src/target/intrin_rule.cc +++ b/src/target/intrin_rule.cc @@ -113,119 +113,114 @@ TVM_REGISTER_OP("tir.round") .set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.rsqrt") - .set_attr("default.FLowerIntrinsic", - [](const PrimExpr& e)->PrimExpr { - const CallNode* call = e.as(); - ICHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); - return one / sqrt(call->args[0]); - }); + .set_attr("default.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + return one / sqrt(call->args[0]); + }); TVM_REGISTER_OP("tir.pow").set_attr("default.FLowerIntrinsic", DispatchPureExtern); TVM_REGISTER_OP("tir.sigmoid") - .set_attr("default.FLowerIntrinsic", - [](const PrimExpr& e)->PrimExpr { - const CallNode* call = e.as(); - ICHECK(call != nullptr); - auto one = make_const(call->args[0].dtype(), 1); - return one / (one + exp(-call->args[0])); - }); + .set_attr("default.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + const CallNode* call = e.as(); + ICHECK(call != nullptr); + auto one = make_const(call->args[0].dtype(), 1); + return one / (one + exp(-call->args[0])); + }); TVM_REGISTER_OP("tir.isfinite") - .set_attr("default.FLowerIntrinsic", - [](const PrimExpr& e)->PrimExpr { - const CallNode* call = e.as(); - ICHECK(call != nullptr); - return isfinite(call->args[0]); - }); + .set_attr("default.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + const CallNode* call = e.as(); + ICHECK(call != nullptr); + return isfinite(call->args[0]); + }); TVM_REGISTER_OP("tir.isinf") - .set_attr("default.FLowerIntrinsic", - [](const PrimExpr& e)->PrimExpr { - const CallNode* call = e.as(); - ICHECK(call != nullptr); - return isinf(call->args[0]); - }); + .set_attr("default.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + const CallNode* call = e.as(); + ICHECK(call != nullptr); + return isinf(call->args[0]); + }); TVM_REGISTER_OP("tir.q_multiply_shift") - .set_attr( - "default.FLowerIntrinsic", [](const PrimExpr& e)->PrimExpr { - using tir::make_const; - - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - - PrimExpr x = call->args[0]; - PrimExpr y = call->args[1]; - PrimExpr q = call->args[2]; - PrimExpr s = call->args[3]; - - // Lambda function to extract the int value from PrimExpr - auto get_int_value = [](const PrimExpr node) { - if (auto int_node = node.as()) { - return int_node->value; - } - auto broadcast_node = node.as(); - CHECK(broadcast_node != nullptr); - auto int_node = broadcast_node->value.as(); - CHECK(int_node != nullptr); - return int_node->value; - }; - // Power of 2 is determined by the fixed_point_multiplier == 1 << 30. In case of power of - // 2, fixed point multiplier will represent a float value of 0.5. In fixed point, this is - // represented by 1 << 30. - if (get_int_value(y) == (1 << 30)) { - PrimExpr exp = s - 1; - int exp_val = get_int_value(s) - 1; - if (exp_val > 0) { - // power of 2 is greater than 0, apply left shift. - return x << exp; - } else { - // power of 2 is less than 0, round and then apply right shift. - DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); - PrimExpr one = make_const(lp_dtype, 1); - exp = -exp; - PrimExpr rounding_factor = one << (exp - 1); - PrimExpr rounded_t = x + rounding_factor; - return rounded_t >> exp; - } - } else { - // Only int32 types are supported (any number of lanes is allowed) - ICHECK(y.dtype().code() == DLDataTypeCode::kDLInt && y.dtype().bits() == 32); - ICHECK(s.dtype().code() == DLDataTypeCode::kDLInt && s.dtype().bits() == 32); - - DataType hp_dtype = DataType::Int(64, x.dtype().lanes()); - DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); - - // 1) Calculating the integer multiplier and integer shift - PrimExpr zero = make_const(s.dtype(), 0); - PrimExpr left_shift = tir::Select(s > zero, s, zero); - PrimExpr right_shift = tir::Select(s > zero, zero, -s); - - // 2) Cast and Multiply the integer multiplier - PrimExpr one = make_const(hp_dtype, 1); - x = cast(hp_dtype, x); - y = cast(hp_dtype, y); - x = tir::Select(left_shift != zero, x << left_shift, x); - - // 3) Perform the multiplication in higher precision. - x = x * y; - - // 4) Find the rounding scalar - PrimExpr total_right_shift = right_shift + q; - PrimExpr pos_rounding_value = (one << (total_right_shift - 1)); - x = x + pos_rounding_value; - - // 5) Simply right shift the result to get the final output. - x = x >> total_right_shift; - - // 6) The fixed point multiplication keeps the value in int32 range. Casting back to - // int32. - return cast(lp_dtype, x); - } - }); + .set_attr("default.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + using tir::make_const; + + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + + PrimExpr x = call->args[0]; + PrimExpr y = call->args[1]; + PrimExpr q = call->args[2]; + PrimExpr s = call->args[3]; + + // Lambda function to extract the int value from PrimExpr + auto get_int_value = [](const PrimExpr node) { + if (auto int_node = node.as()) { + return int_node->value; + } + auto broadcast_node = node.as(); + CHECK(broadcast_node != nullptr); + auto int_node = broadcast_node->value.as(); + CHECK(int_node != nullptr); + return int_node->value; + }; + // Power of 2 is determined by the fixed_point_multiplier == 1 << 30. In case of power of + // 2, fixed point multiplier will represent a float value of 0.5. In fixed point, this is + // represented by 1 << 30. + if (get_int_value(y) == (1 << 30)) { + PrimExpr exp = s - 1; + int exp_val = get_int_value(s) - 1; + if (exp_val > 0) { + // power of 2 is greater than 0, apply left shift. + return x << exp; + } else { + // power of 2 is less than 0, round and then apply right shift. + DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); + PrimExpr one = make_const(lp_dtype, 1); + exp = -exp; + PrimExpr rounding_factor = one << (exp - 1); + PrimExpr rounded_t = x + rounding_factor; + return rounded_t >> exp; + } + } else { + // Only int32 types are supported (any number of lanes is allowed) + ICHECK(y.dtype().code() == DLDataTypeCode::kDLInt && y.dtype().bits() == 32); + ICHECK(s.dtype().code() == DLDataTypeCode::kDLInt && s.dtype().bits() == 32); + + DataType hp_dtype = DataType::Int(64, x.dtype().lanes()); + DataType lp_dtype = DataType::Int(32, x.dtype().lanes()); + + // 1) Calculating the integer multiplier and integer shift + PrimExpr zero = make_const(s.dtype(), 0); + PrimExpr left_shift = tir::Select(s > zero, s, zero); + PrimExpr right_shift = tir::Select(s > zero, zero, -s); + + // 2) Cast and Multiply the integer multiplier + PrimExpr one = make_const(hp_dtype, 1); + x = cast(hp_dtype, x); + y = cast(hp_dtype, y); + x = tir::Select(left_shift != zero, x << left_shift, x); + + // 3) Perform the multiplication in higher precision. + x = x * y; + + // 4) Find the rounding scalar + PrimExpr total_right_shift = right_shift + q; + PrimExpr pos_rounding_value = (one << (total_right_shift - 1)); + x = x + pos_rounding_value; + + // 5) Simply right shift the result to get the final output. + x = x >> total_right_shift; + + // 6) The fixed point multiplication keeps the value in int32 range. Casting back to + // int32. + return cast(lp_dtype, x); + } + }); } // namespace intrin } // namespace codegen diff --git a/src/target/spirv/intrin_rule_spirv.cc b/src/target/spirv/intrin_rule_spirv.cc index 1a667398ec54..3baa77e961cc 100644 --- a/src/target/spirv/intrin_rule_spirv.cc +++ b/src/target/spirv/intrin_rule_spirv.cc @@ -54,54 +54,46 @@ inline PrimExpr DispatchGLSLPureIntrin(const PrimExpr& e) { } TVM_REGISTER_OP("tir.floor") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.ceil") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.round") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.trunc") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.fabs") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.exp").set_attr( - "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.exp").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.sin").set_attr( - "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.sin").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.cos").set_attr( - "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.cos").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.log").set_attr( - "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.log").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.log2") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.sqrt") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.pow").set_attr( - "vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.pow").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.tanh") - .set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.clz").set_attr( - "vulkan.FLowerIntrinsic", [](const PrimExpr& e)->PrimExpr { + "vulkan.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); ICHECK_EQ(call->args.size(), 1); @@ -112,41 +104,34 @@ TVM_REGISTER_OP("tir.clz").set_attr( // WebGPU rules. TVM_REGISTER_OP("tir.floor") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.ceil") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.round") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.trunc") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.fabs") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.exp").set_attr( - "webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.exp").set_attr("webgpu.FLowerIntrinsic", + DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.log").set_attr( - "webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.log").set_attr("webgpu.FLowerIntrinsic", + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.sqrt") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); -TVM_REGISTER_OP("tir.pow").set_attr( - "webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); +TVM_REGISTER_OP("tir.pow").set_attr("webgpu.FLowerIntrinsic", + DispatchGLSLPureIntrin); TVM_REGISTER_OP("tir.tanh") - .set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); } // namespace spirv } // namespace codegen diff --git a/temp.cpp b/temp.cpp new file mode 100644 index 000000000000..3baa77e961cc --- /dev/null +++ b/temp.cpp @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file intrin_rule_spirv.cc + */ +#include +#include +#include +#include +#include +#include + +namespace tvm { +namespace codegen { +namespace spirv { +using tir::FLowerIntrinsic; + +// num_signature means number of arguments used to query signature + +template +PrimExpr CallGLSLIntrin(const PrimExpr& e) { + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + Array cargs; + // intrin id. + cargs.push_back(IntImm(DataType::UInt(32), id)); + + for (PrimExpr arg : call->args) { + cargs.push_back(arg); + } + return tir::Call(call->dtype, tir::builtin::call_spirv_pure_glsl450(), cargs); +} + +template +inline PrimExpr DispatchGLSLPureIntrin(const PrimExpr& e) { + return CallGLSLIntrin(e); +} + +TVM_REGISTER_OP("tir.floor") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.ceil") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.round") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.trunc") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.fabs") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.exp").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.sin").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.cos").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.log").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.log2") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.sqrt") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.pow").set_attr("vulkan.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.tanh") + .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.clz").set_attr( + "vulkan.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + ICHECK_EQ(call->args.size(), 1); + PrimExpr arg = call->args[0]; + PrimExpr msb = CallGLSLIntrin(e); + return PrimExpr(arg.dtype().bits() - 1) - msb; + }); + +// WebGPU rules. +TVM_REGISTER_OP("tir.floor") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.ceil") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.round") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.trunc") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.fabs") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.exp").set_attr("webgpu.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.log").set_attr("webgpu.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.sqrt") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.pow").set_attr("webgpu.FLowerIntrinsic", + DispatchGLSLPureIntrin); + +TVM_REGISTER_OP("tir.tanh") + .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); + +} // namespace spirv +} // namespace codegen +} // namespace tvm From 020578d17c60d711fcdf253fa8b9db44a15c4532 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 16:03:23 -0700 Subject: [PATCH 37/41] Remove extra file --- temp.cpp | 138 ------------------------------------------------------- 1 file changed, 138 deletions(-) delete mode 100644 temp.cpp diff --git a/temp.cpp b/temp.cpp deleted file mode 100644 index 3baa77e961cc..000000000000 --- a/temp.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file intrin_rule_spirv.cc - */ -#include -#include -#include -#include -#include -#include - -namespace tvm { -namespace codegen { -namespace spirv { -using tir::FLowerIntrinsic; - -// num_signature means number of arguments used to query signature - -template -PrimExpr CallGLSLIntrin(const PrimExpr& e) { - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - Array cargs; - // intrin id. - cargs.push_back(IntImm(DataType::UInt(32), id)); - - for (PrimExpr arg : call->args) { - cargs.push_back(arg); - } - return tir::Call(call->dtype, tir::builtin::call_spirv_pure_glsl450(), cargs); -} - -template -inline PrimExpr DispatchGLSLPureIntrin(const PrimExpr& e) { - return CallGLSLIntrin(e); -} - -TVM_REGISTER_OP("tir.floor") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.ceil") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.round") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.trunc") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.fabs") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.exp").set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.sin").set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.cos").set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.log").set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.log2") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.sqrt") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.pow").set_attr("vulkan.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.tanh") - .set_attr("vulkan.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.clz").set_attr( - "vulkan.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - ICHECK_EQ(call->args.size(), 1); - PrimExpr arg = call->args[0]; - PrimExpr msb = CallGLSLIntrin(e); - return PrimExpr(arg.dtype().bits() - 1) - msb; - }); - -// WebGPU rules. -TVM_REGISTER_OP("tir.floor") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.ceil") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.round") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.trunc") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.fabs") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.exp").set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.log").set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.sqrt") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.pow").set_attr("webgpu.FLowerIntrinsic", - DispatchGLSLPureIntrin); - -TVM_REGISTER_OP("tir.tanh") - .set_attr("webgpu.FLowerIntrinsic", DispatchGLSLPureIntrin); - -} // namespace spirv -} // namespace codegen -} // namespace tvm From 4ba9b923b19ddeaa8425d0981a82ce2e79d128e5 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Thu, 22 Apr 2021 16:10:56 -0700 Subject: [PATCH 38/41] Remove unused param description --- include/tvm/ir/op.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/tvm/ir/op.h b/include/tvm/ir/op.h index f87f0bff4867..9456ea80d860 100644 --- a/include/tvm/ir/op.h +++ b/include/tvm/ir/op.h @@ -270,9 +270,6 @@ class OpRegEntry { * an higher priority level attribute * will replace lower priority level attribute. * Must be bigger than 0. - * \param can_override Whether to explicitly allow - * overriding the attribute, any non-zero value - * implies allowance and 0 means disallowance. * * Cannot set with same plevel twice in the code. * From 263b75b57f3cd01f1bf04d67e9f35de8db027a52 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Fri, 23 Apr 2021 14:01:39 -0700 Subject: [PATCH 39/41] Remove PackedFunc & Cache AttrMap --- src/target/llvm/intrin_rule_llvm.cc | 42 +++++++++++++---------------- src/tir/transforms/lower_intrin.cc | 32 ++++++++++++---------- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index c66783db706a..7346f1891817 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -47,17 +47,16 @@ TVM_REGISTER_OP("tir.exp2") // set of rules in TIR that can be shared across backends. TVM_REGISTER_OP("tir.exp10") .set_attr("llvm.FLowerIntrinsic", - PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + [](const PrimExpr& e) -> PrimExpr { using tir::make_const; using tir::make_zero; - PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; PrimExpr ln10 = make_const(x.dtype(), 2.302585093); PrimExpr ret = exp(x * ln10); - *rv = ret; - })); + return ret; + }); TVM_REGISTER_OP("tir.fma").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>); @@ -103,10 +102,9 @@ TVM_REGISTER_OP("tir.nearbyint") TVM_REGISTER_OP("tir.tanh") .set_attr("llvm.FLowerIntrinsic", - PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + [](const PrimExpr& e) -> PrimExpr { using tir::make_const; using tir::make_zero; - PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; @@ -119,8 +117,8 @@ TVM_REGISTER_OP("tir.tanh") PrimExpr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); PrimExpr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); - *rv = tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); - })); + return tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); + }); TVM_REGISTER_OP("tir.pow").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>); @@ -130,24 +128,22 @@ TVM_REGISTER_OP("tir.popcount") DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>); TVM_REGISTER_OP("tir.tan").set_attr( - "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; + "llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; PrimExpr tan_x = sin(x) / cos(x); - *rv = tan_x; - })); + return tan_x; + }); TVM_REGISTER_OP("tir.cos").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>); TVM_REGISTER_OP("tir.cosh") .set_attr("llvm.FLowerIntrinsic", - PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + [](const PrimExpr& e) -> PrimExpr { using tir::make_const; using tir::make_zero; - PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; @@ -156,18 +152,17 @@ TVM_REGISTER_OP("tir.cosh") PrimExpr exp_negx = exp(neg_one * x); PrimExpr exp_posx = exp(x); PrimExpr ret = (exp_posx + exp_negx) / two; - *rv = ret; - })); + return ret; + }); TVM_REGISTER_OP("tir.sin").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>); TVM_REGISTER_OP("tir.sinh") .set_attr("llvm.FLowerIntrinsic", - PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { + [](const PrimExpr& e) -> PrimExpr { using tir::make_const; using tir::make_zero; - PrimExpr e = targs[0]; const tir::CallNode* call = e.as(); ICHECK(call != nullptr); const PrimExpr& x = call->args[0]; @@ -176,12 +171,11 @@ TVM_REGISTER_OP("tir.sinh") PrimExpr exp_negx = exp(neg_one * x); PrimExpr exp_posx = exp(x); PrimExpr ret = (exp_posx - exp_negx) / two; - *rv = ret; - })); + return ret; + }); TVM_REGISTER_OP("tir.clz").set_attr( - "llvm.FLowerIntrinsic", PackedFunc([](const TVMArgs& targs, TVMRetValue* rv) { - PrimExpr e = targs[0]; + "llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { const tir::CallNode* call = e.as(); ICHECK(call != nullptr); ICHECK_EQ(call->args.size(), 1); @@ -192,8 +186,8 @@ TVM_REGISTER_OP("tir.clz").set_attr( cargs.push_back(IntImm(DataType::Int(1), 1)); // is_zero_undef // LLVM requires that the return type must match the first argument type auto clz = tir::Call(call->args[0]->dtype, tir::builtin::call_llvm_intrin(), cargs); - *rv = cast(call->dtype, clz); - })); + return cast(call->dtype, clz); + }); } // namespace llvm } // namespace codegen diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index 1fb9e214c765..aad8933cf5e2 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -42,6 +42,7 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { IntrinInjecter(arith::Analyzer* analyzer, std::string target, std::string mtriple = "") : IRMutatorWithAnalyzer(analyzer) { + std::vector patterns_; patterns_.push_back(target + ".FLowerIntrinsic"); bool is_llvm_aarch64 = (mtriple.find("aarch64") != std::string::npos); @@ -50,30 +51,33 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { } patterns_.push_back("default.FLowerIntrinsic"); + fma_ = runtime::Registry::Get("tvm.intrin.rule." + target + ".fma"); if (target == "stackvm") { support_bitwise_op_ = false; } + + for (const std::string& pattern : patterns_) + if (Op::HasAttrMap(pattern)) + lower_intrin_maps_.push_back(Op::GetAttrMap(pattern)); } PrimExpr VisitExpr_(const CallNode* op) final { if (auto* ptr_op = op->op.as()) { - for (const std::string& pattern : patterns_) - if (Op::HasAttrMap(pattern)) { - auto f_lower_intrin_map = Op::GetAttrMap(pattern); - FLowerIntrinsic f = f_lower_intrin_map.get(GetRef(ptr_op), nullptr); - if (f != nullptr) { - PrimExpr e = GetRef(op); - PrimExpr r = f(e); - ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; - if (!r.same_as(e)) { - r = this->VisitExpr(r); - if (r.defined()) { - return r; - } + for (const auto& f_lower_intrin_map : lower_intrin_maps_) { + FLowerIntrinsic f = f_lower_intrin_map.get(GetRef(ptr_op), nullptr); + if (f != nullptr) { + PrimExpr e = GetRef(op); + PrimExpr r = f(e); + ICHECK(r.defined()) << "intrinsic rule must always return valid Expr"; + if (!r.same_as(e)) { + r = this->VisitExpr(r); + if (r.defined()) { + return r; } } } + } } return IRMutatorWithAnalyzer::VisitExpr_(op); } @@ -277,7 +281,7 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { } // patterns - std::vector patterns_; + std::vector> lower_intrin_maps_; const PackedFunc* fma_{nullptr}; bool support_bitwise_op_{true}; }; From eec4c49977985c47c415dc30e303da27d6c7bb31 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Fri, 23 Apr 2021 14:29:46 -0700 Subject: [PATCH 40/41] Fix clang format --- src/target/llvm/intrin_rule_llvm.cc | 127 ++++++++++++++-------------- src/tir/transforms/lower_intrin.cc | 4 +- 2 files changed, 64 insertions(+), 67 deletions(-) diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc index 7346f1891817..2d30c2030685 100644 --- a/src/target/llvm/intrin_rule_llvm.cc +++ b/src/target/llvm/intrin_rule_llvm.cc @@ -46,17 +46,16 @@ TVM_REGISTER_OP("tir.exp2") // TODO(tvm-team): migrate the legalization transformations as a separate // set of rules in TIR that can be shared across backends. TVM_REGISTER_OP("tir.exp10") - .set_attr("llvm.FLowerIntrinsic", - [](const PrimExpr& e) -> PrimExpr { - using tir::make_const; - using tir::make_zero; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr ln10 = make_const(x.dtype(), 2.302585093); - PrimExpr ret = exp(x * ln10); - return ret; - }); + .set_attr("llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + using tir::make_const; + using tir::make_zero; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr ln10 = make_const(x.dtype(), 2.302585093); + PrimExpr ret = exp(x * ln10); + return ret; + }); TVM_REGISTER_OP("tir.fma").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3>); @@ -101,24 +100,23 @@ TVM_REGISTER_OP("tir.nearbyint") DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1>); TVM_REGISTER_OP("tir.tanh") - .set_attr("llvm.FLowerIntrinsic", - [](const PrimExpr& e) -> PrimExpr { - using tir::make_const; - using tir::make_zero; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr one = make_const(x.dtype(), 1); - PrimExpr two = make_const(x.dtype(), 2); - PrimExpr neg_two = make_const(x.dtype(), -2); - - PrimExpr exp_neg2x = exp(neg_two * x); - PrimExpr exp_pos2x = exp(two * x); - - PrimExpr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); - PrimExpr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); - return tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); - }); + .set_attr("llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + using tir::make_const; + using tir::make_zero; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr one = make_const(x.dtype(), 1); + PrimExpr two = make_const(x.dtype(), 2); + PrimExpr neg_two = make_const(x.dtype(), -2); + + PrimExpr exp_neg2x = exp(neg_two * x); + PrimExpr exp_pos2x = exp(two * x); + + PrimExpr tanh_pos = (one - exp_neg2x) / (one + exp_neg2x); + PrimExpr tanh_neg = (exp_pos2x - one) / (exp_pos2x + one); + return tir::Select(x >= make_zero(x.dtype()), tanh_pos, tanh_neg); + }); TVM_REGISTER_OP("tir.pow").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2>); @@ -127,52 +125,51 @@ TVM_REGISTER_OP("tir.popcount") .set_attr("llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>); -TVM_REGISTER_OP("tir.tan").set_attr( - "llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr tan_x = sin(x) / cos(x); - return tan_x; - }); +TVM_REGISTER_OP("tir.tan").set_attr("llvm.FLowerIntrinsic", + [](const PrimExpr& e) -> PrimExpr { + const tir::CallNode* call = + e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr tan_x = sin(x) / cos(x); + return tan_x; + }); TVM_REGISTER_OP("tir.cos").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1>); TVM_REGISTER_OP("tir.cosh") - .set_attr("llvm.FLowerIntrinsic", - [](const PrimExpr& e) -> PrimExpr { - using tir::make_const; - using tir::make_zero; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr two = make_const(x.dtype(), 2); - PrimExpr neg_one = make_const(x.dtype(), -1); - PrimExpr exp_negx = exp(neg_one * x); - PrimExpr exp_posx = exp(x); - PrimExpr ret = (exp_posx + exp_negx) / two; - return ret; - }); + .set_attr("llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + using tir::make_const; + using tir::make_zero; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr two = make_const(x.dtype(), 2); + PrimExpr neg_one = make_const(x.dtype(), -1); + PrimExpr exp_negx = exp(neg_one * x); + PrimExpr exp_posx = exp(x); + PrimExpr ret = (exp_posx + exp_negx) / two; + return ret; + }); TVM_REGISTER_OP("tir.sin").set_attr( "llvm.FLowerIntrinsic", DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1>); TVM_REGISTER_OP("tir.sinh") - .set_attr("llvm.FLowerIntrinsic", - [](const PrimExpr& e) -> PrimExpr { - using tir::make_const; - using tir::make_zero; - const tir::CallNode* call = e.as(); - ICHECK(call != nullptr); - const PrimExpr& x = call->args[0]; - PrimExpr two = make_const(x.dtype(), 2); - PrimExpr neg_one = make_const(x.dtype(), -1); - PrimExpr exp_negx = exp(neg_one * x); - PrimExpr exp_posx = exp(x); - PrimExpr ret = (exp_posx - exp_negx) / two; - return ret; - }); + .set_attr("llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { + using tir::make_const; + using tir::make_zero; + const tir::CallNode* call = e.as(); + ICHECK(call != nullptr); + const PrimExpr& x = call->args[0]; + PrimExpr two = make_const(x.dtype(), 2); + PrimExpr neg_one = make_const(x.dtype(), -1); + PrimExpr exp_negx = exp(neg_one * x); + PrimExpr exp_posx = exp(x); + PrimExpr ret = (exp_posx - exp_negx) / two; + return ret; + }); TVM_REGISTER_OP("tir.clz").set_attr( "llvm.FLowerIntrinsic", [](const PrimExpr& e) -> PrimExpr { diff --git a/src/tir/transforms/lower_intrin.cc b/src/tir/transforms/lower_intrin.cc index aad8933cf5e2..4101891db699 100644 --- a/src/tir/transforms/lower_intrin.cc +++ b/src/tir/transforms/lower_intrin.cc @@ -58,8 +58,8 @@ class IntrinInjecter : public tvm::arith::IRMutatorWithAnalyzer { } for (const std::string& pattern : patterns_) - if (Op::HasAttrMap(pattern)) - lower_intrin_maps_.push_back(Op::GetAttrMap(pattern)); + if (Op::HasAttrMap(pattern)) + lower_intrin_maps_.push_back(Op::GetAttrMap(pattern)); } PrimExpr VisitExpr_(const CallNode* op) final { From bd39863930ab135ab7e78f5ef3e178ea9191c024 Mon Sep 17 00:00:00 2001 From: Xiyou Zhou Date: Fri, 23 Apr 2021 16:35:09 -0700 Subject: [PATCH 41/41] Retrigger CI --- python/tvm/target/intrin.py | 4 ++-- python/tvm/topi/arm_cpu/tensor_intrin.py | 2 +- python/tvm/topi/cuda/nms.py | 4 ++-- tutorials/language/intrin_math.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/tvm/target/intrin.py b/python/tvm/target/intrin.py index 3eb2f441bb5b..a0242cf828a3 100644 --- a/python/tvm/target/intrin.py +++ b/python/tvm/target/intrin.py @@ -76,6 +76,6 @@ def _rule_float_direct(op): # opencl pattern for exp -register_intrin_lowering("tir.exp", target="opencl", f=_rule_float_direct, level=100) +register_intrin_lowering("tir.exp", target="opencl", f=_rule_float_direct, level=99) # default pattern for exp -register_intrin_lowering("tir.exp", target="default", f=_rule_float_suffix, level=100) +register_intrin_lowering("tir.exp", target="default", f=_rule_float_suffix, level=99) diff --git a/python/tvm/topi/arm_cpu/tensor_intrin.py b/python/tvm/topi/arm_cpu/tensor_intrin.py index 494f6b7bc80d..bc9b0491947b 100644 --- a/python/tvm/topi/arm_cpu/tensor_intrin.py +++ b/python/tvm/topi/arm_cpu/tensor_intrin.py @@ -1056,5 +1056,5 @@ def _q_multiply_shift_arm(op): register_intrin_lowering( - "tir.q_multiply_shift", target="llvm.aarch64", f=_q_multiply_shift_arm, level=100 + "tir.q_multiply_shift", target="llvm.aarch64", f=_q_multiply_shift_arm, level=99 ) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index da6be64e0f04..f064360768c2 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -52,9 +52,9 @@ def opencl_atomic_add_rule(op): raise RuntimeError("only support int32") -register_intrin_lowering("tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, level=100) +register_intrin_lowering("tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, level=99) -register_intrin_lowering("tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, level=100) +register_intrin_lowering("tir.atomic_add", target="opencl", f=opencl_atomic_add_rule, level=99) def atomic_add(x, y): diff --git a/tutorials/language/intrin_math.py b/tutorials/language/intrin_math.py index a3ac794a06c5..92383b90a53f 100644 --- a/tutorials/language/intrin_math.py +++ b/tutorials/language/intrin_math.py @@ -113,7 +113,7 @@ def my_cuda_math_rule(op): return op -register_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, level=100) +register_intrin_lowering("tir.exp", target="cuda", f=my_cuda_math_rule, level=99) ###################################################################### # Register the rule to TVM with override option to override existing rule. # Notice the difference between the printed code from previous one: @@ -149,7 +149,7 @@ def my_cuda_mylog_rule(op): # new op registration is triggered by registering an attribute of the op register_op_attr("tir.mylog", "TCallEffectKind", tvm.tir.CallEffectKind.Pure) -register_intrin_lowering("tir.mylog", target="cuda", f=my_cuda_mylog_rule, level=100) +register_intrin_lowering("tir.mylog", target="cuda", f=my_cuda_mylog_rule, level=99) n = te.var("n") A = te.placeholder((n,), name="A")