From 45807de1e1127e4fd716e75f6eb8963e8371c72a Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Wed, 9 Jan 2019 14:33:14 -0800 Subject: [PATCH 01/21] on the way to enable hybrid schedule --- 3rdparty/HalideIR | 2 +- include/tvm/operation.h | 3 + python/tvm/tensor.py | 7 +- src/op/hybrid_op.cc | 72 +++++++++++++++++++++ src/op/hybrid_op.h | 36 +++++++++++ src/op/op_util.cc | 32 --------- src/op/op_util.h | 8 --- tests/python/unittest/test_hybrid_script.py | 22 +++++++ 8 files changed, 139 insertions(+), 43 deletions(-) create mode 100644 src/op/hybrid_op.h diff --git a/3rdparty/HalideIR b/3rdparty/HalideIR index 6e7c1f046fda..a08e26e5a97f 160000 --- a/3rdparty/HalideIR +++ b/3rdparty/HalideIR @@ -1 +1 @@ -Subproject commit 6e7c1f046fda536562dc80977e93324fee2324bd +Subproject commit a08e26e5a97f4ef4d566a42f6c78704b3f9c7b8a diff --git a/include/tvm/operation.h b/include/tvm/operation.h index 02cd0d016f39..3509b133cfc3 100644 --- a/include/tvm/operation.h +++ b/include/tvm/operation.h @@ -459,6 +459,8 @@ class HybridOpNode : public OperationNode { Array inputs; /*! \brief Symbolic placeholder representation of outputs */ Array outputs; + /*! \brief The axis of iterations */ + Array axis; /*! \brief the statement that generates the computation. This is * slightly different from the body in ExternOpNode. All the output * tensors keep its own name specified by users in the script. @@ -500,6 +502,7 @@ class HybridOpNode : public OperationNode { v->Visit("attrs", &attrs); v->Visit("inputs", &inputs); v->Visit("outputs", &outputs); + v->Visit("axis", &axis); v->Visit("body", &body); } EXPORT static Operation make(std::string name, diff --git a/python/tvm/tensor.py b/python/tvm/tensor.py index 9a98e9a6e769..e1345ad373bf 100644 --- a/python/tvm/tensor.py +++ b/python/tvm/tensor.py @@ -152,7 +152,7 @@ class ComputeOp(Operation): """Compute operation.""" @property def axis(self): - """Represent axis of IterVar, only defined when it is a ComputeOp""" + """Represent axis of IterVar, defined when it is a ComputeOp""" return self.__getattr__("axis") @property @@ -184,4 +184,7 @@ class ExternOp(Operation): @register_node class HybridOp(Operation): """Hybrid operation.""" - pass + @property + def axis(self): + """Represent axis of IterVar, also defined when it is a HybridOp""" + return self.__getattr__("axis") diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 4dbb2c0b964f..13283966db4b 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -8,7 +8,9 @@ #include #include #include + #include "op_util.h" +#include "hybrid_op.h" namespace tvm { using namespace ir; @@ -52,6 +54,7 @@ Operation HybridOpNode::make(std::string name, n->attrs = std::move(attrs); n->inputs = std::move(inputs); n->outputs = std::move(outputs); + n->axis = op::GatherLoopVars(body); n->body = std::move(body); Operation res = Operation(n); return res; @@ -186,4 +189,73 @@ Stmt HybridOpNode::BuildProvide( ret = op::ReplaceProvideTensor(ret, rmap); return ret; } + +class LoopVarFinder : public ir::IRVisitor { + public: + std::vector res_; + + void Visit_(const ir::For *op) { + Var loop_var(op->loop_var); + Range dom = Range::make_by_min_extent(op->min, op->extent); + IterVarType iter_var_ty = kOpaque; + switch(op->for_type) { + case ForType::Serial: + iter_var_ty = kOrdered; + break; + case ForType::Parallel: + iter_var_ty = kDataPar; + break; + case ForType::Vectorized: + iter_var_ty = kVectorized; + break; + case ForType::Unrolled: + iter_var_ty = kUnrolled; + break; + } + res_.push_back(IterVarNode::make(dom, loop_var, iter_var_ty)); + Visit(op->body); + } + +}; + +std::vector GatherLoopVars(Stmt stmt) { + LoopVarFinder Finder; + Finder.Visit(stmt); + return Finder.res_; +} + +// replacer to replace tensors' usage in Provide +class ProviderReplacer : public ir::IRMutator { + public: + explicit ProviderReplacer(const std::unordered_map& vmap) + : vmap_(vmap) {} + + Stmt Mutate_(const ir::Provide* op, const Stmt& s) { + Tensor t = Operation(op->func.node_).output(op->value_index); + auto it = vmap_.find(t); + if (it != vmap_.end()) { + Stmt ret = ir::Provide::make( + it->second->op, it->second->value_index, op->value, op->args); + found = true; + return IRMutator::Mutate_(ret.as(), ret); + } + return IRMutator::Mutate_(op, s); + } + + // whether it is found. + bool found{false}; + + private: + const std::unordered_map& vmap_; +}; + +Stmt ReplaceProvideTensor(Stmt stmt, + const std::unordered_map& replace) { + ProviderReplacer repl(replace); + Stmt ret = repl.Mutate(stmt); + return repl.found ? ret : stmt; +} + + + } // namespace tvm diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h new file mode 100644 index 000000000000..a57d871f813d --- /dev/null +++ b/src/op/hybrid_op.h @@ -0,0 +1,36 @@ +#ifndef TVM_HYBRID_OP_H +#define TVM_HYBRID_OP_H + +#include +#include +#include +#include +#include +#include +#include +#include "../pass/ir_util.h" +#include "../pass/arg_binder.h" +#include "../schedule/message_passing.h" + + +namespace tvm { +namespace op { + +/*! + * \brief Find all the iteration variables in the given statement body. + * \param stmt The body to be inspected. + */ +std::vector GatherLoopVars(Stmt stmt); + +/*! + * \brief Replace the tensor reference (especially in Provide's) in stmt by the replace map. + * \param stmt The statement to be processed. + * \param replace The replacement rule. + */ +Stmt ReplaceProvideTensor(Stmt stmt, + const std::unordered_map& replace); + +} // namespace op +} // namespace tvm + +#endif // TVM_HYBRID_OP_H diff --git a/src/op/op_util.cc b/src/op/op_util.cc index 886f7c912303..9038e045106c 100644 --- a/src/op/op_util.cc +++ b/src/op/op_util.cc @@ -164,38 +164,6 @@ std::vector MakeIfNest(const std::vector& predicates) { return nest; } -// replacer to replace tensors' usage in Provide -class ProviderReplacer : public ir::IRMutator { - public: - explicit ProviderReplacer(const std::unordered_map& vmap) - : vmap_(vmap) {} - - Stmt Mutate_(const ir::Provide* op, const Stmt& s) { - Tensor t = Operation(op->func.node_).output(op->value_index); - auto it = vmap_.find(t); - if (it != vmap_.end()) { - Stmt ret = ir::Provide::make( - it->second->op, it->second->value_index, op->value, op->args); - found = true; - return IRMutator::Mutate_(ret.as(), ret); - } - return IRMutator::Mutate_(op, s); - } - - // whether it is found. - bool found{false}; - - private: - const std::unordered_map& vmap_; -}; - -Stmt ReplaceProvideTensor(Stmt stmt, - const std::unordered_map& replace) { - ProviderReplacer repl(replace); - Stmt ret = repl.Mutate(stmt); - return repl.found ? ret : stmt; -} - // replacer to replace tensors class TensorReplacer : public ir::IRMutator { public: diff --git a/src/op/op_util.h b/src/op/op_util.h index 6971f14eef73..523633db0984 100644 --- a/src/op/op_util.h +++ b/src/op/op_util.h @@ -48,14 +48,6 @@ MakeLoopNest(const Stage& stage, */ std::vector MakeIfNest(const std::vector& predicates); -/*! - * \brief Replace the tensor reference (especially in Provide's) in stmt by the replace map. - * \param stmt The statement to be processed. - * \param replace The replacement rule. - */ -Stmt ReplaceProvideTensor(Stmt stmt, - const std::unordered_map& replace); - /*! * \brief Replace the tensor reference (especially in Call's) in stmt by the replace map. * \param stmt The statement to be processed. diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 668b1598446b..0d6edf27561c 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -593,6 +593,27 @@ def hoo(a, b): b = [1, 2, 3, 4, 5] run_and_check(hoo, [a, b]) +def test_schedule(): + @script + def outer_product(a, b): + """This is a simple outer product. + Actually this function is not required to be documented. + I write this docstring to test skipping docstring functionality. + """ + c = output_tensor((128, 128), a.dtype) + for i in range(128): + for j in range(128): + c[i, j] = a[i] * b[j] + return c + a = tvm.placeholder((128,)) + b = tvm.placeholder((128,)) + c = outer_product(a, b) + sch = tvm.create_schedule(c.op) + i, j = c.op.axis + print(i.iter_type) + print(j.iter_type) + print(i, j) + io, ii = sch[c].split(i, 4) if __name__ == "__main__": test_outer_product() @@ -610,5 +631,6 @@ def hoo(a, b): test_func_call() test_bool() test_const_range() + test_schedule() # TODO: # test_inplace() From 52e757e4a6ef7fbc227b79691bf1dce6ab731534 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Thu, 10 Jan 2019 11:03:44 -0800 Subject: [PATCH 02/21] I think I am done with imperfect loop split? --- src/op/compute_op.cc | 1 + src/op/hybrid_op.cc | 135 +++++++++++++++----- src/op/hybrid_op.h | 10 +- src/op/op_util.cc | 28 ++++ src/op/op_util.h | 12 ++ tests/python/unittest/test_hybrid_script.py | 37 ++++-- 6 files changed, 180 insertions(+), 43 deletions(-) diff --git a/src/op/compute_op.cc b/src/op/compute_op.cc index d4cb2b4c632b..baf42f9367b4 100644 --- a/src/op/compute_op.cc +++ b/src/op/compute_op.cc @@ -212,6 +212,7 @@ void ComputeOpNode::GatherBound( const Operation& self, const std::unordered_map& tensor_dom, std::unordered_map* out_dom_map) const { + CHECK_EQ(self.operator->(), this); const TensorDom& tdom = tensor_dom.at(self.output(0)); for (size_t i = 0; i < this->axis.size(); ++i) { Range r = arith::Union(tdom.data.at(i)).cover_range(this->axis[i]->dom); diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 13283966db4b..bfdd7706ea82 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include "op_util.h" @@ -27,7 +29,7 @@ int HybridOpNode::num_outputs() const { } Array HybridOpNode::root_iter_vars() const { - return {}; + return this->axis; } Type HybridOpNode::output_dtype(size_t i) const { @@ -105,6 +107,10 @@ void HybridOpNode::GatherBound( const Operation& self, const std::unordered_map& tensor_dom, std::unordered_map* out_dom_map) const { + for (auto iter_var : axis) { + CHECK(!out_dom_map->count(iter_var)); + out_dom_map->operator[](iter_var) = iter_var->dom; + } } Stmt HybridOpNode::BuildRealize( @@ -187,41 +193,113 @@ Stmt HybridOpNode::BuildProvide( * */ ret = op::ReplaceTensor(ret, rmap); ret = op::ReplaceProvideTensor(ret, rmap); + + ret = op::ApplySchedule(stage, dom_map, ret); return ret; } -class LoopVarFinder : public ir::IRVisitor { - public: - std::vector res_; +namespace op { +Stmt ApplySplits(const Stage &stage, + const std::unordered_map& dom_map, Stmt stmt) { + class LoopSpliter : public IRMutator { + Expr factor; + IterVar parent, inner, outer; + public: + LoopSpliter(const SplitNode *split, + const std::unordered_map& dom_map) : + factor(split->factor) { + + auto &parent_ = split->parent; + if (parent_->dom.defined()) { + CHECK(is_const_int(parent_->dom->min, 0)); + parent= parent_; + } else { + CHECK(dom_map.count(parent_)); + auto &dom = dom_map.find(parent_)->second; + CHECK(is_const_int(dom->min, 0)); + parent = IterVarNode::make(dom, parent_->var, parent_->iter_type); + } + + auto &inner_ = split->inner; + CHECK(dom_map.count(inner_)); + auto &inner_dom = dom_map.find(inner_)->second; + CHECK(is_const_int(inner_dom->min, 0)); + + auto &outer_ = split->outer; + CHECK(dom_map.count(outer_)); + auto &outer_dom = dom_map.find(outer_)->second; + CHECK(is_const_int(outer_dom->min, 0)); - void Visit_(const ir::For *op) { - Var loop_var(op->loop_var); - Range dom = Range::make_by_min_extent(op->min, op->extent); - IterVarType iter_var_ty = kOpaque; - switch(op->for_type) { - case ForType::Serial: - iter_var_ty = kOrdered; - break; - case ForType::Parallel: - iter_var_ty = kDataPar; - break; - case ForType::Vectorized: - iter_var_ty = kVectorized; - break; - case ForType::Unrolled: - iter_var_ty = kUnrolled; - break; + inner = IterVarNode::make(inner_dom, inner_->var, inner_->iter_type); + outer = IterVarNode::make(outer_dom, outer_->var, outer_->iter_type); + } + + Stmt Mutate_(const For *op, const Stmt &stmt) { + if (op->loop_var.get() == parent->var.get()) { + std::unordered_map rmap; + rmap[op->loop_var.get()] = inner + outer * factor; + Stmt ret = ir::Substitute(op->body, rmap); + Expr cond = likely(outer * factor < (parent->dom->extent - inner)); + ret = IfThenElse::make(cond, ret); + ret = For::make(inner->var, Expr(0), inner->dom->extent, + IterVarTypeToForType(inner->iter_type), op->device_api, ret); + ret = For::make(outer->var, Expr(0), outer->dom->extent, + IterVarTypeToForType(outer->iter_type), op->device_api, ret); + return ret; + } + return IRMutator::Mutate_(op, stmt); + } + }; + + bool changed = true; + while (changed) { + changed = false; + for (auto &rel : stage->relations) { + if (const SplitNode* split = rel.as()) { + bool not_splited = false; + PostOrderVisit(stmt, [¬_splited, &split](const NodeRef &node) { + if (const Variable *var = node.as()) { + if (var == split->parent->var.get()) + not_splited = true; + } + }); + if (not_splited) { + stmt = LoopSpliter(split, dom_map).Mutate(stmt); + changed = true; + } + } } - res_.push_back(IterVarNode::make(dom, loop_var, iter_var_ty)); - Visit(op->body); } -}; + return stmt; +} + +Stmt ApplyLoopAnnotations(const Stage &stage, Stmt stmt) { + return stmt; +} + +Stmt ApplyLoopOrder(const Stage &stage, Stmt stmt) { + return stmt; +} + +Stmt ApplySchedule(const Stage &stage, const + std::unordered_map& dom_map, Stmt stmt) { + stmt = ApplySplits(stage, dom_map, stmt); + stmt = ApplyLoopAnnotations(stage, stmt); + stmt = ApplyLoopOrder(stage, stmt); + return stmt; +} std::vector GatherLoopVars(Stmt stmt) { - LoopVarFinder Finder; - Finder.Visit(stmt); - return Finder.res_; + std::vector res_; + PostOrderVisit(stmt, [&res_](const NodeRef &node) { + if (const For *op = node.as()) { + Var loop_var(op->loop_var); + Range dom = Range::make_by_min_extent(op->min, op->extent); + res_.push_back(IterVarNode::make(dom, loop_var, ForTypeToIterVarType(op->for_type))); + } + }); + return res_; } // replacer to replace tensors' usage in Provide @@ -255,7 +333,6 @@ Stmt ReplaceProvideTensor(Stmt stmt, Stmt ret = repl.Mutate(stmt); return repl.found ? ret : stmt; } - - +} // namespace op } // namespace tvm diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index a57d871f813d..5f8a7b5b578c 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -28,7 +28,15 @@ std::vector GatherLoopVars(Stmt stmt); * \param replace The replacement rule. */ Stmt ReplaceProvideTensor(Stmt stmt, - const std::unordered_map& replace); + const std::unordered_map& replace); + +/*! + * \brief Replace the tensor reference (especially in Provide's) in stmt by the replace map. + * \param stmt The statement to be processed. + * \param stage The schedule information to be applied. + */ +Stmt ApplySchedule(const Stage& stage, + const std::unordered_map& dom_map, Stmt stmt); } // namespace op } // namespace tvm diff --git a/src/op/op_util.cc b/src/op/op_util.cc index 9038e045106c..ef0da51097a1 100644 --- a/src/op/op_util.cc +++ b/src/op/op_util.cc @@ -215,5 +215,33 @@ Stmt Substitute(Stmt s, return ir::Substitute(s, init); } +IterVarType ForTypeToIterVarType(ir::ForType for_type) { + switch(for_type) { + case ForType::Serial: + return kOrdered; + case ForType::Parallel: + return kDataPar; + case ForType::Vectorized: + return kVectorized; + case ForType::Unrolled: + return kUnrolled; + } +} + +ir::ForType IterVarTypeToForType(IterVarType iter_type) { + switch(iter_type) { + case kOrdered: + return ForType::Serial; + case kDataPar: + return ForType::Parallel; + case kVectorized: + return ForType::Vectorized; + case kUnrolled: + return ForType::Unrolled; + default: + return ForType::Serial; + } +} + } // namespace op } // namespace tvm diff --git a/src/op/op_util.h b/src/op/op_util.h index 523633db0984..de2e44c2ed59 100644 --- a/src/op/op_util.h +++ b/src/op/op_util.h @@ -72,6 +72,18 @@ Expr ReplaceTensor(Expr expr, Stmt Substitute(Stmt stmt, const std::unordered_map& value_map); +/*! + * \brief Converts Halide ForType to its corresponding IterVarType + * \param for_type The ForType to be converted + */ +IterVarType ForTypeToIterVarType(ir::ForType for_type); + +/*! + * \brief Converts IterVarType to its corresponding Halide ForType + * \param iter_type The IterVarType to be converted + */ +ir::ForType IterVarTypeToForType(IterVarType iter_type); + } // namespace op } // namespace tvm #endif // TVM_OP_OP_UTIL_H_ diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 0d6edf27561c..080618c7df12 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -594,26 +594,37 @@ def hoo(a, b): run_and_check(hoo, [a, b]) def test_schedule(): + # Testing perfect loop split @script def outer_product(a, b): - """This is a simple outer product. - Actually this function is not required to be documented. - I write this docstring to test skipping docstring functionality. - """ - c = output_tensor((128, 128), a.dtype) + c = output_tensor((128, 64), a.dtype) for i in range(128): - for j in range(128): + for j in range(64): c[i, j] = a[i] * b[j] return c - a = tvm.placeholder((128,)) - b = tvm.placeholder((128,)) + a = tvm.placeholder((128,), name='a') + b = tvm.placeholder((64,), name='b') c = outer_product(a, b) sch = tvm.create_schedule(c.op) - i, j = c.op.axis - print(i.iter_type) - print(j.iter_type) - print(i, j) - io, ii = sch[c].split(i, 4) + j, i = c.op.axis + jo, ji = sch[c].split(j, 4) + joo, joi = sch[c].split(jo, 4) + ir = tvm.lower(sch, [a, b, c], simple_mode=True) + assert isinstance(ir, tvm.stmt.ProducerConsumer) + ir = ir.body + assert isinstance(ir, tvm.stmt.AttrStmt) + ir = ir.body + assert ir.loop_var.name == 'i' + ir = ir.body + assert isinstance(ir, tvm.stmt.For) + assert ir.loop_var.name == 'j.outer.outer' + ir = ir.body + assert isinstance(ir, tvm.stmt.For) + assert ir.loop_var.name == 'j.outer.inner' + ir = ir.body + assert isinstance(ir, tvm.stmt.For) + assert ir.loop_var.name == 'j.inner' + if __name__ == "__main__": test_outer_product() From 11e97e56b1e5d9099fe4257a949cd8149e9689b7 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Thu, 10 Jan 2019 11:11:47 -0800 Subject: [PATCH 03/21] copyright watermark --- src/op/hybrid_op.cc | 4 ++-- src/op/hybrid_op.h | 5 +++++ tests/python/unittest/test_hybrid_script.py | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index bfdd7706ea82..4eaec71e0c90 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -1,5 +1,5 @@ /*! - * Copyright (c) 2018 by Contributors + * Copyright (c) 2019 by Contributors * \brief Hybrid computation rule. * \file hybrid_op.cc */ @@ -199,6 +199,7 @@ Stmt HybridOpNode::BuildProvide( } namespace op { + Stmt ApplySplits(const Stage &stage, const std::unordered_map& dom_map, Stmt stmt) { class LoopSpliter : public IRMutator { @@ -334,5 +335,4 @@ Stmt ReplaceProvideTensor(Stmt stmt, return repl.found ? ret : stmt; } } // namespace op - } // namespace tvm diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index 5f8a7b5b578c..6046ef04ec7b 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -1,3 +1,8 @@ +/*! + * Copyright (c) 2019 by Contributors + * \brief Helper utilities to implement hybrid_op. + * \file hybrid_op.h + */ #ifndef TVM_HYBRID_OP_H #define TVM_HYBRID_OP_H diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 080618c7df12..7de03ff1b906 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -594,7 +594,7 @@ def hoo(a, b): run_and_check(hoo, [a, b]) def test_schedule(): - # Testing perfect loop split + # Test perfect loop split @script def outer_product(a, b): c = output_tensor((128, 64), a.dtype) @@ -625,6 +625,11 @@ def outer_product(a, b): assert isinstance(ir, tvm.stmt.For) assert ir.loop_var.name == 'j.inner' + # Test imperfect loop split + # Test loop annotation + # Test loop binds + # Test loop reorder + if __name__ == "__main__": test_outer_product() From aa46637dee64fe95cf39fc99041e5534226eb134 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Thu, 10 Jan 2019 14:24:21 -0800 Subject: [PATCH 04/21] loop annotation --- src/op/hybrid_op.cc | 40 +++++++++++++++++++++ src/op/hybrid_op.h | 11 +++++- src/op/op_util.cc | 8 ++--- src/schedule/schedule_dataflow_rewrite.cc | 3 ++ tests/python/unittest/test_hybrid_script.py | 6 ++-- 5 files changed, 60 insertions(+), 8 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 4eaec71e0c90..3e0e5e96df3d 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -276,6 +276,46 @@ Stmt ApplySplits(const Stage &stage, } Stmt ApplyLoopAnnotations(const Stage &stage, Stmt stmt) { + class LoopAnnotator : public IRMutator { + const Variable *var; + ForType for_type; + public: + LoopAnnotator(const Variable *var_, ForType for_type_) : var(var_), for_type(for_type_) {} + + Stmt Mutate_(const For *op, const Stmt &stmt) { + if (op->loop_var.get() == var) { + CHECK(for_type != op->for_type); + return For::make(op->loop_var, op->min, op->extent, + for_type, op->device_api, op->body); + } + return IRMutator::Mutate_(op, stmt); + } + }; + + for (auto &iter_var : stage->leaf_iter_vars) { + bool equal = false; + int found = 0; + + const Variable *var = iter_var->var.get(); + ForType expected = IterVarTypeToForType(iter_var->iter_type); + if (stage->iter_var_attrs.count(iter_var)) { + expected = IterVarTypeToForType(stage->iter_var_attrs[iter_var]->iter_type); + } + + PostOrderVisit(stmt, [&found, &var, &expected, &equal](const NodeRef &node) { + if (const For *op = node.as()) { + if (op->loop_var.get() == var) { + ++found; + equal = expected == op->for_type; + } + } + }); + + CHECK_EQ(found, 1) << " iter var should be found exactly once!"; + if (!equal) { + stmt = LoopAnnotator(var, expected).Mutate(stmt); + } + } return stmt; } diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index 6046ef04ec7b..29cc774341cd 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -36,13 +36,22 @@ Stmt ReplaceProvideTensor(Stmt stmt, const std::unordered_map& replace); /*! - * \brief Replace the tensor reference (especially in Provide's) in stmt by the replace map. + * \brief Apply the schedule manipulation on the function body. * \param stmt The statement to be processed. * \param stage The schedule information to be applied. */ Stmt ApplySchedule(const Stage& stage, const std::unordered_map& dom_map, Stmt stmt); +/*! + * \brief Apply loop splits in the schedule on the function body. + * \param stage The schedule information to be applied. + * \param dom_map The extents of the iterative variables may be used. + * \param stmt The statement to be processed. + */ +Stmt ApplySplits(const Stage &stage, + const std::unordered_map& dom_map, Stmt stmt); + } // namespace op } // namespace tvm diff --git a/src/op/op_util.cc b/src/op/op_util.cc index ef0da51097a1..e6fadd3328a2 100644 --- a/src/op/op_util.cc +++ b/src/op/op_util.cc @@ -218,9 +218,9 @@ Stmt Substitute(Stmt s, IterVarType ForTypeToIterVarType(ir::ForType for_type) { switch(for_type) { case ForType::Serial: - return kOrdered; - case ForType::Parallel: return kDataPar; + case ForType::Parallel: + return kParallelized; case ForType::Vectorized: return kVectorized; case ForType::Unrolled: @@ -230,9 +230,9 @@ IterVarType ForTypeToIterVarType(ir::ForType for_type) { ir::ForType IterVarTypeToForType(IterVarType iter_type) { switch(iter_type) { - case kOrdered: - return ForType::Serial; case kDataPar: + return ForType::Serial; + case kParallelized: return ForType::Parallel; case kVectorized: return ForType::Vectorized; diff --git a/src/schedule/schedule_dataflow_rewrite.cc b/src/schedule/schedule_dataflow_rewrite.cc index f1820d2a7fc6..2e9a0aaac501 100644 --- a/src/schedule/schedule_dataflow_rewrite.cc +++ b/src/schedule/schedule_dataflow_rewrite.cc @@ -487,6 +487,9 @@ void RebaseNonZeroMinLoop(const Schedule& sch) { (*it).second->bind_thread.defined()) { continue; } + if (iv->dom.defined() && is_const_int(iv->dom->min, 0)) { + continue; + } if (idx < leaf_vars->data.size()) { // insert rebase IterVar rebased = IterVarNode::make( diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 7de03ff1b906..35c58ae41c4c 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -595,6 +595,7 @@ def hoo(a, b): def test_schedule(): # Test perfect loop split + # Test loop annotation @script def outer_product(a, b): c = output_tensor((128, 64), a.dtype) @@ -609,7 +610,9 @@ def outer_product(a, b): j, i = c.op.axis jo, ji = sch[c].split(j, 4) joo, joi = sch[c].split(jo, 4) + sch[c].vectorize(ji) ir = tvm.lower(sch, [a, b, c], simple_mode=True) + print(ir) assert isinstance(ir, tvm.stmt.ProducerConsumer) ir = ir.body assert isinstance(ir, tvm.stmt.AttrStmt) @@ -622,11 +625,8 @@ def outer_product(a, b): assert isinstance(ir, tvm.stmt.For) assert ir.loop_var.name == 'j.outer.inner' ir = ir.body - assert isinstance(ir, tvm.stmt.For) - assert ir.loop_var.name == 'j.inner' # Test imperfect loop split - # Test loop annotation # Test loop binds # Test loop reorder From bc121a390c65f154d8f08f6c79868184cf943b18 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Thu, 10 Jan 2019 14:38:16 -0800 Subject: [PATCH 05/21] fix lint --- src/op/hybrid_op.cc | 16 ++++++++-------- src/op/hybrid_op.h | 10 +++++----- src/op/op_util.cc | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 3e0e5e96df3d..84645f2cfbae 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -10,7 +10,7 @@ #include #include #include - +#include #include "op_util.h" #include "hybrid_op.h" @@ -205,15 +205,15 @@ Stmt ApplySplits(const Stage &stage, class LoopSpliter : public IRMutator { Expr factor; IterVar parent, inner, outer; + public: LoopSpliter(const SplitNode *split, - const std::unordered_map& dom_map) : + const std::unordered_map& dom_map) : factor(split->factor) { - auto &parent_ = split->parent; if (parent_->dom.defined()) { CHECK(is_const_int(parent_->dom->min, 0)); - parent= parent_; + parent = parent_; } else { CHECK(dom_map.count(parent_)); auto &dom = dom_map.find(parent_)->second; @@ -281,7 +281,7 @@ Stmt ApplyLoopAnnotations(const Stage &stage, Stmt stmt) { ForType for_type; public: LoopAnnotator(const Variable *var_, ForType for_type_) : var(var_), for_type(for_type_) {} - + Stmt Mutate_(const For *op, const Stmt &stmt) { if (op->loop_var.get() == var) { CHECK(for_type != op->for_type); @@ -323,8 +323,8 @@ Stmt ApplyLoopOrder(const Stage &stage, Stmt stmt) { return stmt; } -Stmt ApplySchedule(const Stage &stage, const - std::unordered_map& dom_map, Stmt stmt) { +Stmt ApplySchedule(const Stage &stage, + const std::unordered_map& dom_map, Stmt stmt) { stmt = ApplySplits(stage, dom_map, stmt); stmt = ApplyLoopAnnotations(stage, stmt); stmt = ApplyLoopOrder(stage, stmt); @@ -374,5 +374,5 @@ Stmt ReplaceProvideTensor(Stmt stmt, Stmt ret = repl.Mutate(stmt); return repl.found ? ret : stmt; } -} // namespace op +} // namespace op } // namespace tvm diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index 29cc774341cd..efc61d9e09ec 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -3,8 +3,8 @@ * \brief Helper utilities to implement hybrid_op. * \file hybrid_op.h */ -#ifndef TVM_HYBRID_OP_H -#define TVM_HYBRID_OP_H +#ifndef TVM_OP_HYBRID_OP_H_ +#define TVM_OP_HYBRID_OP_H_ #include #include @@ -52,7 +52,7 @@ Stmt ApplySchedule(const Stage& stage, Stmt ApplySplits(const Stage &stage, const std::unordered_map& dom_map, Stmt stmt); -} // namespace op -} // namespace tvm +} // namespace op +} // namespace tvm -#endif // TVM_HYBRID_OP_H +#endif // TVM_OP_HYBRID_OP_H_ diff --git a/src/op/op_util.cc b/src/op/op_util.cc index e6fadd3328a2..9bcbf41df276 100644 --- a/src/op/op_util.cc +++ b/src/op/op_util.cc @@ -216,7 +216,7 @@ Stmt Substitute(Stmt s, } IterVarType ForTypeToIterVarType(ir::ForType for_type) { - switch(for_type) { + switch (for_type) { case ForType::Serial: return kDataPar; case ForType::Parallel: @@ -229,7 +229,7 @@ IterVarType ForTypeToIterVarType(ir::ForType for_type) { } ir::ForType IterVarTypeToForType(IterVarType iter_type) { - switch(iter_type) { + switch (iter_type) { case kDataPar: return ForType::Serial; case kParallelized: From ecc940b064bcec0f8d8ed5ceed849a4db72d8482 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Thu, 10 Jan 2019 14:43:18 -0800 Subject: [PATCH 06/21] fix lint 1 --- src/op/op_util.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/op/op_util.cc b/src/op/op_util.cc index 9bcbf41df276..b18552d5c562 100644 --- a/src/op/op_util.cc +++ b/src/op/op_util.cc @@ -225,6 +225,8 @@ IterVarType ForTypeToIterVarType(ir::ForType for_type) { return kVectorized; case ForType::Unrolled: return kUnrolled; + default: + return kDataPar; } } From 64b23713b7c91f1fddc7388de165a70f4f08381b Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Thu, 10 Jan 2019 14:50:38 -0800 Subject: [PATCH 07/21] shit! --- src/schedule/schedule_dataflow_rewrite.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/schedule/schedule_dataflow_rewrite.cc b/src/schedule/schedule_dataflow_rewrite.cc index 2e9a0aaac501..f1820d2a7fc6 100644 --- a/src/schedule/schedule_dataflow_rewrite.cc +++ b/src/schedule/schedule_dataflow_rewrite.cc @@ -487,9 +487,6 @@ void RebaseNonZeroMinLoop(const Schedule& sch) { (*it).second->bind_thread.defined()) { continue; } - if (iv->dom.defined() && is_const_int(iv->dom->min, 0)) { - continue; - } if (idx < leaf_vars->data.size()) { // insert rebase IterVar rebased = IterVarNode::make( From 4cbbf424aa008072ca91947f16843e7b4cc34059 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 00:28:11 -0800 Subject: [PATCH 08/21] loop reorder supported --- src/op/hybrid_op.cc | 79 +++++++++++++++++++-- src/op/hybrid_op.h | 21 ++++++ tests/python/unittest/test_hybrid_script.py | 55 +++++++++----- 3 files changed, 134 insertions(+), 21 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 84645f2cfbae..3dcb29274ae4 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include "op_util.h" @@ -200,6 +201,7 @@ Stmt HybridOpNode::BuildProvide( namespace op { + Stmt ApplySplits(const Stage &stage, const std::unordered_map& dom_map, Stmt stmt) { class LoopSpliter : public IRMutator { @@ -275,7 +277,8 @@ Stmt ApplySplits(const Stage &stage, return stmt; } -Stmt ApplyLoopAnnotations(const Stage &stage, Stmt stmt) { +Stmt ApplyLoopAnnotations(const Stage &stage, + const std::unordered_map& rebased, Stmt stmt) { class LoopAnnotator : public IRMutator { const Variable *var; ForType for_type; @@ -296,7 +299,8 @@ Stmt ApplyLoopAnnotations(const Stage &stage, Stmt stmt) { bool equal = false; int found = 0; - const Variable *var = iter_var->var.get(); + const IterVar &actual = rebased.count(iter_var) ? rebased.find(iter_var)->second : iter_var; + const Variable *var = actual->var.get(); ForType expected = IterVarTypeToForType(iter_var->iter_type); if (stage->iter_var_attrs.count(iter_var)) { expected = IterVarTypeToForType(stage->iter_var_attrs[iter_var]->iter_type); @@ -319,15 +323,79 @@ Stmt ApplyLoopAnnotations(const Stage &stage, Stmt stmt) { return stmt; } -Stmt ApplyLoopOrder(const Stage &stage, Stmt stmt) { +Stmt ApplyLoopOrder(const Stage &stage, + const std::unordered_map &dom_map, + const std::unordered_map &rebased, Stmt stmt) { + std::vector current_order; + PostOrderVisit(stmt, [¤t_order](const NodeRef &node) { + if (const For *op = node.as()) + current_order.push_back(op->loop_var.get()); + }); + std::reverse(current_order.begin(), current_order.end()); + auto &required_ord = stage->leaf_iter_vars; + CHECK_EQ(current_order.size(), required_ord.size()) << "Cannot reorder the loops!"; + std::unordered_map reorder; + bool need_reorder = false; + for (size_t i = 0; i < current_order.size(); ++i) { + auto ¤t = current_order[i]; + const IterVar &iter_var = required_ord[i]; + const IterVar &required = rebased.count(iter_var) ? rebased.find(iter_var)->second : iter_var; + CHECK(required->dom.defined() || dom_map.count(required)) << required << "\n"; + reorder[current] = required; + if (current != required->var.get()) { + need_reorder = true; + } + } + + class LoopReorder : public IRMutator { + const Stage &stage; + const std::unordered_map &dom_map; + const std::unordered_map &reorder; + + public: + LoopReorder(const Stage &stage, + const std::unordered_map &dom_map, + const std::unordered_map &reorder) + : stage(stage), dom_map(dom_map), reorder(reorder) {} + + Stmt Mutate_(const For *op, const Stmt &stmt) { + // Reorder from in to out + Stmt body_ = IRMutator::Mutate(op->body); + CHECK(reorder.count(op->loop_var.get())); + auto target = reorder.find(op->loop_var.get())->second; + if (body_.same_as(op->body) && op->loop_var.get() == target->var.get()) + return stmt; + const Stmt &body = op->body.same_as(body_) ? op->body : body_; + ForType for_type = IterVarTypeToForType(target->iter_type); + if (stage->iter_var_attrs.count(target)) { + for_type = IterVarTypeToForType(stage->iter_var_attrs[target]->iter_type); + } + const Range &range = target->dom.defined() ? target->dom : dom_map.find(target)->second; + return For::make(target->var, range->min, range->extent, + for_type, HalideIR::DeviceAPI::None, body); + } + }; + + if (need_reorder) + return LoopReorder(stage, dom_map, reorder).Mutate(stmt); + return stmt; } Stmt ApplySchedule(const Stage &stage, const std::unordered_map& dom_map, Stmt stmt) { + // Gather rebased variables + std::unordered_map rebased; + for (auto rel : stage->relations) { + if (auto rebase = rel.as()) { + rebased[rebase->rebased] = rebase->parent; + CHECK(rebase->parent->dom.defined()); + CHECK(dom_map.count(rebase->rebased)); + } + } stmt = ApplySplits(stage, dom_map, stmt); - stmt = ApplyLoopAnnotations(stage, stmt); - stmt = ApplyLoopOrder(stage, stmt); + stmt = ApplyLoopOrder(stage, dom_map, rebased, stmt); + stmt = ApplyLoopAnnotations(stage, rebased, stmt); return stmt; } @@ -340,6 +408,7 @@ std::vector GatherLoopVars(Stmt stmt) { res_.push_back(IterVarNode::make(dom, loop_var, ForTypeToIterVarType(op->for_type))); } }); + std::reverse(res_.begin(), res_.end()); return res_; } diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index efc61d9e09ec..766212ac9bf1 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -52,6 +52,27 @@ Stmt ApplySchedule(const Stage& stage, Stmt ApplySplits(const Stage &stage, const std::unordered_map& dom_map, Stmt stmt); + +/*! + * \brief Apply loop annotation in the schedule on the function body. + * \param stage The schedule information to be applied. + * \param rebased The map specifies the rebase, a.k.a rename, relationship of these variables. + * \param stmt The statement to be processed. + */ +Stmt ApplyLoopAnnotations(const Stage &stage, + const std::unordered_map& rebased, Stmt stmt); + +/*! + * \brief Apply loop order in the schedule on the function body. + * \param stage The schedule information to be applied. + * \param dom_map The extents of the iterative variables may be used. + * \param rebased The map specifies the rebase, a.k.a rename, relationship of these variables. + * \param stmt The statement to be processed. + */ +Stmt ApplyLoopOrder(const Stage &stage, + const std::unordered_map &dom_map, + const std::unordered_map &rebased, Stmt stmt); + } // namespace op } // namespace tvm diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 35c58ae41c4c..04ceece24620 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -3,7 +3,7 @@ from tvm.hybrid.intrin import HYBRID_GLOBALS @nose.tools.nottest -def run_and_check(func, args, var_dict={}, target='llvm'): +def run_and_check(func, args, var_dict={}, target='llvm', sch=None): def tvm_val_2_py_val(val): val = tvm.ir_pass.Substitute(val, var_dict) val = tvm.ir_pass.Simplify(val) @@ -13,8 +13,13 @@ def tvm_val_2_py_val(val): ctx = tvm.context(target, 0) op = None - outs = func(*tuple(tvm.convert(i) if isinstance(i, list) else i for i in args)) - op = outs[0].op if isinstance(outs, list) else outs.op + if sch is None: + outs = func(*tuple(tvm.convert(i) if isinstance(i, list) else i for i in args)) + op = outs[0].op if isinstance(outs, list) else outs.op + sch = tvm.create_schedule(op) + else: + op = sch.outputs[0] + outs = list(op.outputs) emu_args = [] nd_args = [] @@ -30,13 +35,13 @@ def tvm_val_2_py_val(val): assert isinstance(i, list) emu_args.append(numpy.array(i)) - sch = tvm.create_schedule(op) + compile_args = [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.expr.Var))] + \ + (outs if isinstance(outs, list) else [outs]) module = tvm.build(sch, - [i for i in args if isinstance(i, (tvm.tensor.Tensor, tvm.expr.Var))] + \ - (outs if isinstance(outs, list) else [outs]), + compile_args, target=target) assert module - + out_tensors = [] for i in range(op.num_outputs): output = op.output(i) @@ -47,7 +52,7 @@ def tvm_val_2_py_val(val): ref_data = func(*emu_args) if isinstance(ref_data, numpy.ndarray): ref_data = [ref_data] - + module(*nd_args) for nd, np in zip(out_tensors, ref_data): @@ -595,29 +600,36 @@ def hoo(a, b): def test_schedule(): # Test perfect loop split + # Test loop reorder # Test loop annotation @script def outer_product(a, b): - c = output_tensor((128, 64), a.dtype) - for i in range(128): + c = output_tensor((64, 64), a.dtype) + for i in range(64): for j in range(64): c[i, j] = a[i] * b[j] return c - a = tvm.placeholder((128,), name='a') - b = tvm.placeholder((64,), name='b') + a = tvm.placeholder((64,), name='a', dtype='float32') + b = tvm.placeholder((64,), name='b', dtype='float32') c = outer_product(a, b) sch = tvm.create_schedule(c.op) - j, i = c.op.axis + i, j = c.op.axis + io, ii = sch[c].split(i, 4) + sch[c].parallel(ii) jo, ji = sch[c].split(j, 4) joo, joi = sch[c].split(jo, 4) sch[c].vectorize(ji) + sch[c].reorder(ii, io, joo, joi, ji) ir = tvm.lower(sch, [a, b, c], simple_mode=True) - print(ir) assert isinstance(ir, tvm.stmt.ProducerConsumer) ir = ir.body assert isinstance(ir, tvm.stmt.AttrStmt) ir = ir.body - assert ir.loop_var.name == 'i' + assert isinstance(ir, tvm.stmt.For) + assert ir.loop_var.name == 'i.inner' + ir = ir.body + assert isinstance(ir, tvm.stmt.For) + assert ir.loop_var.name == 'i.outer' ir = ir.body assert isinstance(ir, tvm.stmt.For) assert ir.loop_var.name == 'j.outer.outer' @@ -626,9 +638,20 @@ def outer_product(a, b): assert ir.loop_var.name == 'j.outer.inner' ir = ir.body + module = tvm.build(sch, [a, b, c]) + assert module + a = numpy.random.randn(64) + b = numpy.random.randn(64) + c = numpy.outer(a, b) + nd_a = tvm.ndarray.array(a.astype('float32')) + nd_b = tvm.ndarray.array(b.astype('float32')) + nd_c = tvm.ndarray.array(numpy.zeros((64, 64)).astype('float32')) + module(nd_a, nd_b, nd_c) + tvm.testing.assert_allclose(nd_c.asnumpy(), c, 1e-5, 1e-5) + #run_and_check(outer_product, [a, b], sch=sch) + # Test imperfect loop split # Test loop binds - # Test loop reorder if __name__ == "__main__": From b945d346ea4aa5621b743d32a9f7312342c3284c Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 10:52:49 -0800 Subject: [PATCH 09/21] support bind to add some tests --- src/op/hybrid_op.cc | 79 +++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 32 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 3dcb29274ae4..58718c12c40c 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -68,8 +68,8 @@ Array HybridOpNode::InputTensors() const { } Operation HybridOpNode::ReplaceInputs( - const Operation& self, - const std::unordered_map& rmap) const { + const Operation &self, + const std::unordered_map &rmap) const { CHECK_EQ(self.operator->(), this); auto n = make_node(*this); n->body = op::ReplaceTensor(this->body, rmap); @@ -89,13 +89,13 @@ Operation HybridOpNode::ReplaceInputs( } void HybridOpNode::PropBoundToInputs( - const Operation& self, - const std::unordered_map& dom_map, + const Operation &self, + const std::unordered_map &dom_map, std::unordered_map* out_dom_map) const { for (Tensor t : this->inputs) { auto it = out_dom_map->find(t); if (it == out_dom_map->end()) continue; - TensorDom& dom = it->second; + TensorDom &dom = it->second; for (size_t i = 0; i < t->shape.size(); ++i) { dom.data[i].emplace_back(IntSet::range( Range::make_by_min_extent( @@ -105,8 +105,8 @@ void HybridOpNode::PropBoundToInputs( } void HybridOpNode::GatherBound( - const Operation& self, - const std::unordered_map& tensor_dom, + const Operation &self, + const std::unordered_map &tensor_dom, std::unordered_map* out_dom_map) const { for (auto iter_var : axis) { CHECK(!out_dom_map->count(iter_var)); @@ -115,9 +115,9 @@ void HybridOpNode::GatherBound( } Stmt HybridOpNode::BuildRealize( - const Stage& stage, - const std::unordered_map& realize_map, - const Stmt& body) const { + const Stage &stage, + const std::unordered_map &realize_map, + const Stmt &body) const { CHECK_EQ(stage->op.get(), this); Stmt realize_body = body; for (int k = 0; k < num_outputs(); ++k) { @@ -136,8 +136,8 @@ Stmt HybridOpNode::BuildRealize( } Stmt HybridOpNode::BuildProvide( - const Stage& stage, - const std::unordered_map& dom_map, + const Stage &stage, + const std::unordered_map &dom_map, bool debug_keep_trivial_loop) const { CHECK_EQ(stage->op.operator->(), this); Stmt ret = AttrStmt::make(make_zero(Int(32)), attr::extern_scope, 0, this->body); @@ -203,14 +203,14 @@ namespace op { Stmt ApplySplits(const Stage &stage, - const std::unordered_map& dom_map, Stmt stmt) { + const std::unordered_map &dom_map, Stmt stmt) { class LoopSpliter : public IRMutator { Expr factor; IterVar parent, inner, outer; public: LoopSpliter(const SplitNode *split, - const std::unordered_map& dom_map) : + const std::unordered_map &dom_map) : factor(split->factor) { auto &parent_ = split->parent; if (parent_->dom.defined()) { @@ -278,46 +278,61 @@ Stmt ApplySplits(const Stage &stage, } Stmt ApplyLoopAnnotations(const Stage &stage, - const std::unordered_map& rebased, Stmt stmt) { + const std::unordered_map &rebased, Stmt stmt) { class LoopAnnotator : public IRMutator { const Variable *var; - ForType for_type; + const IterVarAttr &attr; + public: - LoopAnnotator(const Variable *var_, ForType for_type_) : var(var_), for_type(for_type_) {} + LoopAnnotator(const Variable *var_, const IterVarAttr &attr_) : var(var_), attr(attr_) {} Stmt Mutate_(const For *op, const Stmt &stmt) { if (op->loop_var.get() == var) { - CHECK(for_type != op->for_type); - return For::make(op->loop_var, op->min, op->extent, - for_type, op->device_api, op->body); + if (attr->bind_thread.defined()) { + const auto &iter_var = attr->bind_thread; + if (iter_var->dom.defined()) { + CHECK(is_const_int(iter_var->dom->min, 0)); + CHECK(Equal(iter_var->dom->extent, op->extent)) + << "Thread extent and loop extent mismatch!\n"; + } + std::unordered_map rmap; + rmap[op->loop_var.get()] = iter_var; + Stmt body = ir::Substitute(op->body, rmap); + return AttrStmt::make(iter_var, "thread_extent", op->extent, body); + } else { + return For::make(op->loop_var, op->min, op->extent, + IterVarTypeToForType(attr->iter_type), op->device_api, op->body); + } } return IRMutator::Mutate_(op, stmt); } }; for (auto &iter_var : stage->leaf_iter_vars) { - bool equal = false; + bool need_change = false; int found = 0; const IterVar &actual = rebased.count(iter_var) ? rebased.find(iter_var)->second : iter_var; const Variable *var = actual->var.get(); ForType expected = IterVarTypeToForType(iter_var->iter_type); + IterVarAttr attr; if (stage->iter_var_attrs.count(iter_var)) { - expected = IterVarTypeToForType(stage->iter_var_attrs[iter_var]->iter_type); + attr = stage->iter_var_attrs[iter_var]; + expected = IterVarTypeToForType(attr->iter_type); } - PostOrderVisit(stmt, [&found, &var, &expected, &equal](const NodeRef &node) { + PostOrderVisit(stmt, [&found, &var, &attr, &expected, &need_change](const NodeRef &node) { if (const For *op = node.as()) { if (op->loop_var.get() == var) { ++found; - equal = expected == op->for_type; + need_change = expected != op->for_type || (attr.defined() && attr->bind_thread.defined()); } } }); CHECK_EQ(found, 1) << " iter var should be found exactly once!"; - if (!equal) { - stmt = LoopAnnotator(var, expected).Mutate(stmt); + if (need_change) { + stmt = LoopAnnotator(var, attr).Mutate(stmt); } } return stmt; @@ -364,7 +379,7 @@ Stmt ApplyLoopOrder(const Stage &stage, CHECK(reorder.count(op->loop_var.get())); auto target = reorder.find(op->loop_var.get())->second; if (body_.same_as(op->body) && op->loop_var.get() == target->var.get()) - return stmt; + return stmt; const Stmt &body = op->body.same_as(body_) ? op->body : body_; ForType for_type = IterVarTypeToForType(target->iter_type); if (stage->iter_var_attrs.count(target)) { @@ -383,7 +398,7 @@ Stmt ApplyLoopOrder(const Stage &stage, } Stmt ApplySchedule(const Stage &stage, - const std::unordered_map& dom_map, Stmt stmt) { + const std::unordered_map &dom_map, Stmt stmt) { // Gather rebased variables std::unordered_map rebased; for (auto rel : stage->relations) { @@ -415,10 +430,10 @@ std::vector GatherLoopVars(Stmt stmt) { // replacer to replace tensors' usage in Provide class ProviderReplacer : public ir::IRMutator { public: - explicit ProviderReplacer(const std::unordered_map& vmap) + explicit ProviderReplacer(const std::unordered_map &vmap) : vmap_(vmap) {} - Stmt Mutate_(const ir::Provide* op, const Stmt& s) { + Stmt Mutate_(const ir::Provide* op, const Stmt &s) { Tensor t = Operation(op->func.node_).output(op->value_index); auto it = vmap_.find(t); if (it != vmap_.end()) { @@ -434,11 +449,11 @@ class ProviderReplacer : public ir::IRMutator { bool found{false}; private: - const std::unordered_map& vmap_; + const std::unordered_map &vmap_; }; Stmt ReplaceProvideTensor(Stmt stmt, - const std::unordered_map& replace) { + const std::unordered_map &replace) { ProviderReplacer repl(replace); Stmt ret = repl.Mutate(stmt); return repl.found ? ret : stmt; From 2639873c94d220210257b9fefb3efcfe59fdd2e0 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 12:06:32 -0800 Subject: [PATCH 10/21] fused tested --- src/op/hybrid_op.cc | 71 ++++++++++++++++----- tests/python/unittest/test_hybrid_script.py | 37 ++++++----- 2 files changed, 74 insertions(+), 34 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 58718c12c40c..05d2a6ee0a26 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -209,9 +209,10 @@ Stmt ApplySplits(const Stage &stage, IterVar parent, inner, outer; public: + bool splitted; LoopSpliter(const SplitNode *split, const std::unordered_map &dom_map) : - factor(split->factor) { + factor(split->factor), splitted(false) { auto &parent_ = split->parent; if (parent_->dom.defined()) { CHECK(is_const_int(parent_->dom->min, 0)); @@ -248,29 +249,65 @@ Stmt ApplySplits(const Stage &stage, IterVarTypeToForType(inner->iter_type), op->device_api, ret); ret = For::make(outer->var, Expr(0), outer->dom->extent, IterVarTypeToForType(outer->iter_type), op->device_api, ret); + splitted = true; return ret; } return IRMutator::Mutate_(op, stmt); } }; - bool changed = true; - while (changed) { - changed = false; - for (auto &rel : stage->relations) { - if (const SplitNode* split = rel.as()) { - bool not_splited = false; - PostOrderVisit(stmt, [¬_splited, &split](const NodeRef &node) { - if (const Variable *var = node.as()) { - if (var == split->parent->var.get()) - not_splited = true; - } - }); - if (not_splited) { - stmt = LoopSpliter(split, dom_map).Mutate(stmt); - changed = true; - } + class LoopFuser : public IRMutator { + const IterVar &parent; + const Variable *inner; + const Variable *outer; + bool under_outer; + Expr extent; + + public: + bool fused; + LoopFuser(const FuseNode *fuse_) + : parent(fuse_->fused), inner(fuse_->inner->var.get()), + outer(fuse_->outer->var.get()), under_outer(false), + extent(0), fused(false) {} + + Stmt Mutate_(const For *op, const Stmt &stmt) { + if (op->loop_var.get() == inner) { + CHECK(under_outer); + std::unordered_map rmap; + rmap[op->loop_var.get()] = parent % op->extent; + extent = op->extent; + fused = true; + return ir::Substitute(op->body, rmap); + } else if (op->loop_var.get() == outer) { + under_outer = true; + Stmt body = IRMutator::Mutate(op->body); + std::unordered_map rmap; + rmap[op->loop_var.get()] = parent / extent; + body = ir::Substitute(body, rmap); + under_outer = false; + return For::make(parent->var, Expr(0), extent * op->extent, + op->for_type, op->device_api, body); + } else if (under_outer) { + Stmt body = IRMutator::Mutate(op->body); + std::unordered_map rmap; + rmap[op->loop_var.get()] = parent / extent % op->extent; + body = ir::Substitute(body, rmap); + extent = extent * op->extent; + return body; } + return IRMutator::Mutate(stmt); + } + }; + + for (auto &rel : stage->relations) { + if (const SplitNode *split = rel.as()) { + LoopSpliter Spliter(split, dom_map); + stmt = Spliter.Mutate(stmt); + CHECK(Spliter.splitted); + } else if (const FuseNode *fuse = rel.as()) { + LoopFuser Fuser(fuse); + stmt = Fuser.Mutate(stmt); + CHECK(Fuser.fused); } } diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 04ceece24620..c1afbf1f5c38 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -3,7 +3,7 @@ from tvm.hybrid.intrin import HYBRID_GLOBALS @nose.tools.nottest -def run_and_check(func, args, var_dict={}, target='llvm', sch=None): +def run_and_check(func, args, var_dict={}, target='llvm', sch=None, outs=None): def tvm_val_2_py_val(val): val = tvm.ir_pass.Substitute(val, var_dict) val = tvm.ir_pass.Simplify(val) @@ -18,8 +18,9 @@ def tvm_val_2_py_val(val): op = outs[0].op if isinstance(outs, list) else outs.op sch = tvm.create_schedule(op) else: - op = sch.outputs[0] - outs = list(op.outputs) + assert outs is not None + assert isinstance(outs, list) + op = outs[0].op emu_args = [] nd_args = [] @@ -599,9 +600,6 @@ def hoo(a, b): run_and_check(hoo, [a, b]) def test_schedule(): - # Test perfect loop split - # Test loop reorder - # Test loop annotation @script def outer_product(a, b): c = output_tensor((64, 64), a.dtype) @@ -612,6 +610,10 @@ def outer_product(a, b): a = tvm.placeholder((64,), name='a', dtype='float32') b = tvm.placeholder((64,), name='b', dtype='float32') c = outer_product(a, b) + + # Test perfect loop split + # Test loop reorder + # Test loop annotation sch = tvm.create_schedule(c.op) i, j = c.op.axis io, ii = sch[c].split(i, 4) @@ -637,18 +639,19 @@ def outer_product(a, b): assert isinstance(ir, tvm.stmt.For) assert ir.loop_var.name == 'j.outer.inner' ir = ir.body + run_and_check(outer_product, [a, b], sch=sch, outs=[c]) - module = tvm.build(sch, [a, b, c]) - assert module - a = numpy.random.randn(64) - b = numpy.random.randn(64) - c = numpy.outer(a, b) - nd_a = tvm.ndarray.array(a.astype('float32')) - nd_b = tvm.ndarray.array(b.astype('float32')) - nd_c = tvm.ndarray.array(numpy.zeros((64, 64)).astype('float32')) - module(nd_a, nd_b, nd_c) - tvm.testing.assert_allclose(nd_c.asnumpy(), c, 1e-5, 1e-5) - #run_and_check(outer_product, [a, b], sch=sch) + # Test fuse + sch = tvm.create_schedule(c.op) + sch[c].fuse(c.op.axis[0], c.op.axis[1]) + ir = tvm.lower(sch, [a, b, c], simple_mode=True) + assert isinstance(ir, tvm.stmt.ProducerConsumer) + ir = ir.body + assert isinstance(ir, tvm.stmt.AttrStmt) + ir = ir.body + assert isinstance(ir, tvm.stmt.For) + assert ir.loop_var.name == 'i.j.fused' + run_and_check(outer_product, [a, b], sch=sch, outs=[c]) # Test imperfect loop split # Test loop binds From eac5f200903d1bb40ca16ccf34464318dbc318bc Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 12:13:03 -0800 Subject: [PATCH 11/21] imperfect loop testcase --- src/op/hybrid_op.cc | 6 ++++-- tests/python/unittest/test_hybrid_script.py | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 05d2a6ee0a26..e80546e586ce 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -262,14 +262,16 @@ Stmt ApplySplits(const Stage &stage, const Variable *outer; bool under_outer; Expr extent; - + public: bool fused; - LoopFuser(const FuseNode *fuse_) + explicit LoopFuser(const FuseNode *fuse_) : parent(fuse_->fused), inner(fuse_->inner->var.get()), outer(fuse_->outer->var.get()), under_outer(false), extent(0), fused(false) {} + // TODO: Handle imperfect loops + Stmt Mutate_(const For *op, const Stmt &stmt) { if (op->loop_var.get() == inner) { CHECK(under_outer); diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index c1afbf1f5c38..24f2a1600914 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -654,6 +654,11 @@ def outer_product(a, b): run_and_check(outer_product, [a, b], sch=sch, outs=[c]) # Test imperfect loop split + sch = tvm.create_schedule(c.op) + sch[c].split(c.op.axis[0], 3) + ir = tvm.lower(sch, [a, b, c], simple_mode=True) + run_and_check(outer_product, [a, b], sch=sch, outs=[c]) + # Test loop binds From 471af84e70d580a9f01b302ebea88d40a38d29fd Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 12:45:30 -0800 Subject: [PATCH 12/21] fix lint --- src/op/hybrid_op.cc | 7 ++++--- src/op/hybrid_op.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index e80546e586ce..c322c5d34ef1 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -202,7 +202,7 @@ Stmt HybridOpNode::BuildProvide( namespace op { -Stmt ApplySplits(const Stage &stage, +Stmt ApplyLoopShapes(const Stage &stage, const std::unordered_map &dom_map, Stmt stmt) { class LoopSpliter : public IRMutator { Expr factor; @@ -270,7 +270,7 @@ Stmt ApplySplits(const Stage &stage, outer(fuse_->outer->var.get()), under_outer(false), extent(0), fused(false) {} - // TODO: Handle imperfect loops + // TODO(@were): Handle imperfect loops Stmt Mutate_(const For *op, const Stmt &stmt) { if (op->loop_var.get() == inner) { @@ -447,13 +447,14 @@ Stmt ApplySchedule(const Stage &stage, CHECK(dom_map.count(rebase->rebased)); } } - stmt = ApplySplits(stage, dom_map, stmt); + stmt = ApplyLoopShapes(stage, dom_map, stmt); stmt = ApplyLoopOrder(stage, dom_map, rebased, stmt); stmt = ApplyLoopAnnotations(stage, rebased, stmt); return stmt; } std::vector GatherLoopVars(Stmt stmt) { + // TODO(@were): Write a comprehensive pass to analyze iter var types std::vector res_; PostOrderVisit(stmt, [&res_](const NodeRef &node) { if (const For *op = node.as()) { diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index 766212ac9bf1..ce75e6c37627 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -49,7 +49,7 @@ Stmt ApplySchedule(const Stage& stage, * \param dom_map The extents of the iterative variables may be used. * \param stmt The statement to be processed. */ -Stmt ApplySplits(const Stage &stage, +Stmt ApplyLoopShapes(const Stage &stage, const std::unordered_map& dom_map, Stmt stmt); From 9553514c411841dde36a4001694c6f466ed0cc78 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 13:09:08 -0800 Subject: [PATCH 13/21] add bind testcase --- tests/python/unittest/test_hybrid_script.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 24f2a1600914..f89123e346ec 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -288,9 +288,22 @@ def vec_add(a, b): a = tvm.placeholder((1000, ), dtype='float32', name='a') b = tvm.placeholder((1000, ), dtype='float32', name='b') - run_and_check(vec_add, [a, b], target='cuda') + @script + def raw(a, b): + c = output_tensor((1000, ), 'float32') + for i in range(1000): + c[i] = a[i] + b[i] + return c + + c = raw(a, b) + sch = tvm.create_schedule(c.op) + x = tvm.thread_axis('threadIdx.x') + sch[c].bind(c.op.axis[0], x) + run_and_check(raw, [a, b], sch=sch, outs=[c], target='cuda') + + def test_math_intrin(): @script def intrin_real(a): From eb7cee6079d1b85f2ec59d0fa756b4f2294170dc Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Fri, 11 Jan 2019 22:54:57 -0800 Subject: [PATCH 14/21] fix doc --- src/op/hybrid_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index ce75e6c37627..c66b9e7e083a 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -44,7 +44,7 @@ Stmt ApplySchedule(const Stage& stage, const std::unordered_map& dom_map, Stmt stmt); /*! - * \brief Apply loop splits in the schedule on the function body. + * \brief Apply loop splits and fuses in the schedule on the function body. * \param stage The schedule information to be applied. * \param dom_map The extents of the iterative variables may be used. * \param stmt The statement to be processed. From f3860ba872a4fa1b55345c0609057eb7346f527d Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Sat, 12 Jan 2019 20:04:41 -0800 Subject: [PATCH 15/21] fix online edit typo --- tests/python/unittest/test_hybrid_script.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index f89123e346ec..7e08a7dc9ccc 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -303,6 +303,22 @@ def raw(a, b): sch[c].bind(c.op.axis[0], x) run_and_check(raw, [a, b], sch=sch, outs=[c], target='cuda') + # Test loop binds + @tvm.hybrid.script + def goo(a, b): + c = output_tensor(a.shape, a.dtype) + len_b = len(b) + for i in const_range(len_b * 2): + if i < len_b: + c[i] = a[i] + b[i] + else: + c[i - len_b] = a[i - len_b] + b[i - len_b] + return c + a = tvm.placeholder((5, ), name='a', dtype='int32') + b = [1, 2, 3, 4, 5] + c = goo(a, tvm.convert(b)) + sch = tvm.create_schedule(c.op) + run_and_check(goo, [a, b], sch=sch, outs=[c]) def test_math_intrin(): @script @@ -583,6 +599,7 @@ def foo(a, b): run_and_check(foo, [a, b]) @tvm.hybrid.script +<<<<<<< 52c435f9c55a53e2e8e10b3a9fe672d2f8718122 def goo(a, b): c = output_tensor(a.shape, a.dtype) len_b = len(b) @@ -599,6 +616,8 @@ def goo(a, b): run_and_check(goo, [a, b]) @tvm.hybrid.script +======= +>>>>>>> fix online edit typo def hoo(a, b): c = output_tensor(a.shape, a.dtype) len_b = len(b) From d7e0b5ae8e4f97a5711fdec93eec7afcc2c52335 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Sun, 13 Jan 2019 10:51:03 -0800 Subject: [PATCH 16/21] resolve @mercymercy review --- src/op/hybrid_op.cc | 18 +++++------------- src/op/hybrid_op.h | 1 + 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index c322c5d34ef1..55e4f61b9fad 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -206,23 +206,15 @@ Stmt ApplyLoopShapes(const Stage &stage, const std::unordered_map &dom_map, Stmt stmt) { class LoopSpliter : public IRMutator { Expr factor; - IterVar parent, inner, outer; + const Variable *parent; + IterVar inner, outer; public: bool splitted; LoopSpliter(const SplitNode *split, const std::unordered_map &dom_map) : factor(split->factor), splitted(false) { - auto &parent_ = split->parent; - if (parent_->dom.defined()) { - CHECK(is_const_int(parent_->dom->min, 0)); - parent = parent_; - } else { - CHECK(dom_map.count(parent_)); - auto &dom = dom_map.find(parent_)->second; - CHECK(is_const_int(dom->min, 0)); - parent = IterVarNode::make(dom, parent_->var, parent_->iter_type); - } + parent = split->parent->var.get(); auto &inner_ = split->inner; CHECK(dom_map.count(inner_)); @@ -239,11 +231,11 @@ Stmt ApplyLoopShapes(const Stage &stage, } Stmt Mutate_(const For *op, const Stmt &stmt) { - if (op->loop_var.get() == parent->var.get()) { + if (op->loop_var.get() == parent) { std::unordered_map rmap; rmap[op->loop_var.get()] = inner + outer * factor; Stmt ret = ir::Substitute(op->body, rmap); - Expr cond = likely(outer * factor < (parent->dom->extent - inner)); + Expr cond = likely(outer * factor < (op->extent - inner)); ret = IfThenElse::make(cond, ret); ret = For::make(inner->var, Expr(0), inner->dom->extent, IterVarTypeToForType(inner->iter_type), op->device_api, ret); diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index c66b9e7e083a..db988b45875f 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -38,6 +38,7 @@ Stmt ReplaceProvideTensor(Stmt stmt, /*! * \brief Apply the schedule manipulation on the function body. * \param stmt The statement to be processed. + * \param dom_map The extents of the iterative variables may be used. * \param stage The schedule information to be applied. */ Stmt ApplySchedule(const Stage& stage, From 4a187143260398344823bb3de8746292b55e5cd1 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Sun, 13 Jan 2019 10:59:16 -0800 Subject: [PATCH 17/21] fix indent --- src/op/hybrid_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/op/hybrid_op.h b/src/op/hybrid_op.h index db988b45875f..892e420137d6 100644 --- a/src/op/hybrid_op.h +++ b/src/op/hybrid_op.h @@ -51,7 +51,7 @@ Stmt ApplySchedule(const Stage& stage, * \param stmt The statement to be processed. */ Stmt ApplyLoopShapes(const Stage &stage, - const std::unordered_map& dom_map, Stmt stmt); + const std::unordered_map& dom_map, Stmt stmt); /*! From f1a079df6a289e44c25d1dbe244ddebd1ca59e10 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Mon, 14 Jan 2019 09:22:34 -0800 Subject: [PATCH 18/21] i should convince myself it is not flaky test first --- docs/langref/hybrid_script.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/langref/hybrid_script.rst b/docs/langref/hybrid_script.rst index 122bcd95e690..7043281fcafb 100644 --- a/docs/langref/hybrid_script.rst +++ b/docs/langref/hybrid_script.rst @@ -68,17 +68,23 @@ to LLVM module. Tuning ~~~~~~ -**Under construction, not supported yet.** - Follow up the example above, you can use some tvm like interfaces to tune the code: .. code-block:: python + i, j = c.op.axis sch = tvm.create_schedule(op) jo, ji = sch.split(j, 4) sch.vectorize(ji) -``split``, ``reorder``, and loop_annotation will be supported! +For now, you can use loop annotations (``unroll``, ``parallel``, ``vectorize``, and ``bind``), +loop manipulation (``split`` and ``fuse``), and ``reorder``. + +.. note:: + + This is a preliminary function, so users should be in charge of the correctness + of the functionality after tuning. Specifically, users should be careful when + fusing and reorderding imperfect loops. Loops ~~~~~ From c5fa89f91ea040a6042bf653edf7bbf2620e2af3 Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Mon, 14 Jan 2019 10:04:51 -0800 Subject: [PATCH 19/21] fix test hybrid --- tests/python/unittest/test_hybrid_script.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/python/unittest/test_hybrid_script.py b/tests/python/unittest/test_hybrid_script.py index 7e08a7dc9ccc..a54fec3a7bf7 100644 --- a/tests/python/unittest/test_hybrid_script.py +++ b/tests/python/unittest/test_hybrid_script.py @@ -599,7 +599,6 @@ def foo(a, b): run_and_check(foo, [a, b]) @tvm.hybrid.script -<<<<<<< 52c435f9c55a53e2e8e10b3a9fe672d2f8718122 def goo(a, b): c = output_tensor(a.shape, a.dtype) len_b = len(b) @@ -616,8 +615,6 @@ def goo(a, b): run_and_check(goo, [a, b]) @tvm.hybrid.script -======= ->>>>>>> fix online edit typo def hoo(a, b): c = output_tensor(a.shape, a.dtype) len_b = len(b) From ea12b62c01f8e7c5308966cc0a084c8b620e6caf Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Mon, 14 Jan 2019 10:19:06 -0800 Subject: [PATCH 20/21] how many flaky test are you expecting; i ball ball u to let me pass --- src/op/hybrid_op.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/op/hybrid_op.cc b/src/op/hybrid_op.cc index 55e4f61b9fad..acd7b5737c5f 100644 --- a/src/op/hybrid_op.cc +++ b/src/op/hybrid_op.cc @@ -118,6 +118,7 @@ Stmt HybridOpNode::BuildRealize( const Stage &stage, const std::unordered_map &realize_map, const Stmt &body) const { + // TODO(@were): Add attribute inject here and remove it from hybrid parser. CHECK_EQ(stage->op.get(), this); Stmt realize_body = body; for (int k = 0; k < num_outputs(); ++k) { @@ -430,6 +431,7 @@ Stmt ApplyLoopOrder(const Stage &stage, Stmt ApplySchedule(const Stage &stage, const std::unordered_map &dom_map, Stmt stmt) { + // TODO(@were): Eliminate loop rebase in script parser and move the burden here // Gather rebased variables std::unordered_map rebased; for (auto rel : stage->relations) { From 30f2a247c0485a06ac106cab50ec8f9cff087ddb Mon Sep 17 00:00:00 2001 From: Jian Weng Date: Mon, 14 Jan 2019 12:39:00 -0800 Subject: [PATCH 21/21] rebase halide... --- 3rdparty/HalideIR | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/HalideIR b/3rdparty/HalideIR index a08e26e5a97f..6e7c1f046fda 160000 --- a/3rdparty/HalideIR +++ b/3rdparty/HalideIR @@ -1 +1 @@ -Subproject commit a08e26e5a97f4ef4d566a42f6c78704b3f9c7b8a +Subproject commit 6e7c1f046fda536562dc80977e93324fee2324bd