diff --git a/include/tvm/schedule_pass.h b/include/tvm/schedule_pass.h index b3f64db1eee9..189b999a253d 100644 --- a/include/tvm/schedule_pass.h +++ b/include/tvm/schedule_pass.h @@ -40,6 +40,16 @@ Stmt ScheduleOps(Schedule s, Map dom_map); */ void AutoInlineElemWise(Schedule sch); +/*! + * \brief To automatically inline operations with injective writes + * (i.e. writes without reduction or sequential loops). Note + * that in this case, guarantees about contiguity, transpose, stride, + * alignemnt and memory footprint in general do not hold. + * + * \param sch The schedule to be inlined. + */ +void AutoInlineInjective(Schedule sch); + } // namespace schedule } // namespace tvm #endif // TVM_SCHEDULE_PASS_H_ diff --git a/src/schedule/auto_inline_elem_wise.cc b/src/schedule/auto_inline_elem_wise.cc index 9fd073c0ac7a..1dc1ebbd9959 100644 --- a/src/schedule/auto_inline_elem_wise.cc +++ b/src/schedule/auto_inline_elem_wise.cc @@ -60,5 +60,38 @@ void AutoInlineElemWise(Schedule sch) { } } +bool IsBroadcast(const Operation& op) { + if (const ComputeOpNode* compute = op.as()) { + if (compute->reduce_axis.size()) { + return false; + } + // TODO(nicolasvasilache): Implement Me + } + return false; +} + +void AutoInlineBroadcast(Schedule sch) { + for (Stage s : sch->stages) { + if (!s.is_scheduled() && IsBroadcast(s->op) && !s->is_output) { + s.compute_inline(); + } + } +} + +bool IsInjective(const Operation& op) { + if (const ComputeOpNode* compute = op.as()) { + return compute->reduce_axis.size() == 0; + } + return false; +} + +void AutoInlineInjective(Schedule sch) { + for (Stage s : sch->stages) { + if (!s.is_scheduled() && IsInjective(s->op) && !s->is_output) { + s.compute_inline(); + } + } +} + } // namespace schedule } // namespace tvm diff --git a/topi/include/topi/broadcast.h b/topi/include/topi/broadcast.h new file mode 100644 index 000000000000..0a9f885c1b12 --- /dev/null +++ b/topi/include/topi/broadcast.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2017 by Contributors + * \brief Broadcast op constructions + * \file broadcast.h + */ +#ifndef TOPI_BROADCAST_H_ +#define TOPI_BROADCAST_H_ + +#include + +namespace topi { + +inline tvm::Tensor broadcast_to(const tvm::Tensor& I, + const tvm::Array& output_shape) { + CHECK_GE(output_shape.size(), I->shape.size()) + << "Not a broadcast, output dimensionality smaller than input.\noutput: " + << output_shape << "\nvs\ninput: " << I; + auto bh = detail::BroadcastShape(output_shape, I->shape); + CHECK_EQ(output_shape.size(), bh.common_shape.size()); + for (int i = 0; i < output_shape.size(); ++i) { + CHECK(tvm::ir::Equal(output_shape[i], bh.common_shape[i])); + } + auto l = [&](tvm::Array ovars) { + return I(detail::InputIndexFromBroadcast(ovars, I, bh.vars2, bh.all_vars)); + }; + return tvm::compute( + tvm::Array(bh.common_shape.begin(), bh.common_shape.end()), l); +} + +inline tvm::Tensor broadcast_add(const tvm::Tensor& A, const tvm::Tensor& B) { + auto l = [&](tvm::Expr a, tvm::Expr b) { return a + b; }; + return detail::WithBroadcast(l, A, B); +} + +inline tvm::Tensor broadcast_sub(const tvm::Tensor& A, const tvm::Tensor& B) { + auto l = [&](tvm::Expr a, tvm::Expr b) { return a - b; }; + return detail::WithBroadcast(l, A, B); +} + +inline tvm::Tensor broadcast_mul(const tvm::Tensor& A, const tvm::Tensor& B) { + auto l = [&](tvm::Expr a, tvm::Expr b) { return a * b; }; + return detail::WithBroadcast(l, A, B); +} + +inline tvm::Tensor broadcast_div(const tvm::Tensor& A, const tvm::Tensor& B) { + auto l = [&](tvm::Expr a, tvm::Expr b) { return a / b; }; + return detail::WithBroadcast(l, A, B); +} + +inline tvm::Tensor broadcast_mod(const tvm::Tensor& A, const tvm::Tensor& B) { + auto l = [&](tvm::Expr a, tvm::Expr b) { return a % b; }; + return detail::WithBroadcast(l, A, B); +} + +} // namespace topi + +#endif // TOPI_BROADCAST_H_ diff --git a/topi/include/topi/detail/broadcast.h b/topi/include/topi/detail/broadcast.h new file mode 100644 index 000000000000..a1b760ca21ac --- /dev/null +++ b/topi/include/topi/detail/broadcast.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 by Contributors + * \brief Detail broadcast. + * \file broadcast.h + */ +#ifndef TOPI_DETAIL_BROADCAST_H_ +#define TOPI_DETAIL_BROADCAST_H_ + +#include +#include + +#include "tvm/ir_pass.h" +#include "tvm/tvm.h" + +namespace topi { +namespace detail { + +struct BroadcastHelper { + std::deque common_shape; + std::deque all_vars; + std::deque vars1; + std::deque vars2; +}; + +inline BroadcastHelper BroadcastShape(const tvm::Array& shape1, + const tvm::Array& shape2) { + BroadcastHelper bh; + int s1_size = shape1.size(); + int s2_size = shape2.size(); + tvm::Expr one(1); + int i; + for (i = 1; i <= std::min(s1_size, s2_size); ++i) { + bh.all_vars.push_front(tvm::Var()); + if (tvm::ir::Equal(shape1[s1_size - i], shape2[s2_size - i])) { + bh.common_shape.push_front(shape1[s1_size - i]); + bh.vars1.push_front(bh.all_vars[0]); + bh.vars2.push_front(bh.all_vars[0]); + } else if (tvm::ir::Equal(one, shape1[s1_size - i])) { + CHECK(!tvm::ir::Equal(one, shape2[s2_size - i])); + bh.common_shape.push_front(shape2[s2_size - i]); + bh.vars2.push_front(bh.all_vars[0]); + } else if (tvm::ir::Equal(one, shape2[s2_size - i])) { + bh.common_shape.push_front(shape1[s1_size - i]); + bh.vars1.push_front(bh.all_vars[0]); + } else { + CHECK(false) << "Incompatible broadcast dims: " << shape1[s1_size - i] + << " and " << shape2[s2_size - i] << " in: " + << tvm::Array(shape1.begin(), shape1.end()) + << " and " + << tvm::Array(shape2.begin(), shape2.end()); + } + } + // Remaining dimensions whether on shape1 or shape2 can always be completed + auto max_size = std::max(s1_size, s2_size); + auto& shape = (s1_size > s2_size) ? shape1 : shape2; + auto& vars = (s1_size > s2_size) ? bh.vars1 : bh.vars2; + for (i = i; i <= max_size; ++i) { + bh.all_vars.push_front(tvm::Var()); + bh.common_shape.push_front(shape[max_size - i]); + vars.push_front(bh.all_vars[0]); + } + return bh; +} + +inline tvm::Array InputIndexFromBroadcast( + const tvm::Array& ovars, const tvm::Tensor& T, + const std::deque& my_vars, const std::deque& all_vars) { + tvm::Array ivars; + CHECK_EQ(ovars.size(), all_vars.size()); + // N^2, could use a map but NBD.. + int expected_dims = T->shape.size(); + for (int i = 0; i < ovars.size(); ++i) { + bool found = false; + for (int j = 0; j < my_vars.size(); ++j) { + if (all_vars[i].same_as(my_vars[j])) { + ivars.push_back(ovars[i]); + found = true; + break; + } + } + // Only inject 0 here if we have not yet reached the dimension of I + // (i.e. this must be a 1) + if (!found && (ovars.size() - i) <= expected_dims) { + ivars.push_back(tvm::make_zero(ovars[i].type())); + } + } + CHECK(expected_dims == ivars.size()); + return ivars; +} + + +template +inline tvm::Tensor WithBroadcast(FBinaryExpr op, const tvm::Tensor& A, + const tvm::Tensor& B) { + auto bh = BroadcastShape(A->shape, B->shape); + auto l = [&](tvm::Array ovars) { + return op(A(InputIndexFromBroadcast(ovars, A, bh.vars1, bh.all_vars)), + B(InputIndexFromBroadcast(ovars, B, bh.vars2, bh.all_vars))); + }; + return tvm::compute( + tvm::Array(bh.common_shape.begin(), bh.common_shape.end()), l); +} + +} // namespace detail +} // namespace topi + +#endif // TOPI_DETAIL_BROADCAST_H_ diff --git a/topi/include/topi/ewise.h b/topi/include/topi/ewise.h index 2909e726fe07..8de4d1f4f320 100644 --- a/topi/include/topi/ewise.h +++ b/topi/include/topi/ewise.h @@ -1,6 +1,6 @@ /*! * Copyright (c) 2017 by Contributors - * \file topi.h + * \file ewise.h * \brief Elementwise op constructions */ #ifndef TOPI_EWISE_H_ @@ -12,16 +12,17 @@ namespace topi { using namespace tvm; // Unary intrinsic operators -#define TOPI_DECLARE_UNARY_OP(OpName) \ - inline Tensor OpName(const Tensor& x) { \ - return compute(x->shape, [&](const Array& i) { \ - return ::tvm::OpName(x(i)); \ - }); \ +#define TOPI_DECLARE_UNARY_OP(OpName) \ + inline Tensor OpName(const Tensor& x) { \ + return compute(x->shape, [&](const Array& i) { \ + return ::tvm::OpName(x(i)); \ + }, "tensor", "ewise"); \ } TOPI_DECLARE_UNARY_OP(exp); TOPI_DECLARE_UNARY_OP(tanh); TOPI_DECLARE_UNARY_OP(sigmoid); TOPI_DECLARE_UNARY_OP(sqrt); + } // namespace topi #endif // TOPI_EWISE_H_ diff --git a/topi/include/topi/nn.h b/topi/include/topi/nn.h new file mode 100644 index 000000000000..dfd51e82f4ab --- /dev/null +++ b/topi/include/topi/nn.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2017 by Contributors + * \brief NN op constructions + * \file nn.h + */ +#ifndef TOPI_NN_H_ +#define TOPI_NN_H_ + +#include + +#include "tvm/ir.h" +#include "tvm/ir_pass.h" +#include "tvm/tvm.h" + +namespace topi { +namespace detail { + +template +tvm::Expr Map(const tvm::Array& exprs, T op) { + CHECK_GE(exprs.size(), 1); + tvm::Expr res = exprs[0]; + for (int i = 1; i < exprs.size(); ++i) { + res = op(res, exprs[i]); + } + return res; +} + +} // namespace detail + +template +inline tvm::Tensor relu(const tvm::Tensor& x, T threshold = static_cast(0)) { + return tvm::compute( + x->shape, + [&](const tvm::Array& i) { return tvm::max(x(i), threshold); }, + "tensor", "ewise"); +} + +inline tvm::Tensor pad( + const tvm::Tensor& t, const tvm::Array& pad_before, + tvm::Array pad_after = tvm::Array()) { + if (pad_after.size() < pad_before.size()) { + for (int i = pad_after.size(); i < pad_before.size(); ++i) { + pad_after.push_back(pad_before[i]); + } + } + CHECK_GE(pad_before.size(), 1); + CHECK_EQ(pad_before.size(), pad_after.size()); + tvm::Array output_shape; + for (int i = 0; i < t->shape.size(); ++i) { + if (i >= pad_before.size()) { + output_shape.push_back(t->shape[i]); + } else { + output_shape.push_back( + tvm::ir::Simplify(t->shape[i] + pad_before[i] + pad_after[i])); + } + } + auto l = [&](tvm::Array ovars) { + tvm::Array indices; + tvm::Array sel; + for (int i = 0; i < t->shape.size(); ++i) { + if (i >= pad_before.size()) { + indices.push_back(ovars[i]); + continue; + } + if (!tvm::ir::Equal(pad_before[i], 0)) { + sel.push_back(ovars[i] >= pad_before[i]); + indices.push_back(ovars[i] - pad_before[i]); + } else { + indices.push_back(ovars[i]); + } + if (!tvm::ir::Equal(pad_after[i], 0)) { + sel.push_back(tvm::ir::Simplify(ovars[i] < pad_before[i] + t->shape[i])); + } + } + return tvm::select(detail::Map(sel, tvm::ir::And::make), t(indices), 0); + }; + return tvm::compute(output_shape, l, "tensor", "ewise"); +} + +// Returns a compute that calculates a row-major matrix multiplication: +// A(i, k) * B(k, j), if trans_a == trans_b +// the usual transposed combinations, otherwise +inline tvm::Tensor matmult(const tvm::Tensor& A, const tvm::Tensor& B, + bool trans_a = false, bool trans_b = false) { + tvm::Array output_shape{A->shape[trans_a ? 1 : 0], + B->shape[trans_b ? 0 : 1]}; + auto k = tvm::reduce_axis(tvm::Range{0, A->shape[trans_a ? 0 : 1]}, "k"); + auto l = [&](tvm::Var i, tvm::Var j) { + return tvm::sum((trans_a ? A[k][i] : A[i][k]) * (trans_b ? B[j][k] : B[k][j]), + {k}); + }; + return tvm::compute(output_shape, l); +} + +inline tvm::Tensor conv2d_nchw(const tvm::Tensor& I, const tvm::Tensor& W, + int pad_h = 0, int pad_w = 0, int stride_h = 1, + int stride_w = 1) { + CHECK_EQ(4, I->shape.size()); + CHECK_EQ(4, W->shape.size()); + auto pH = I->shape[2]; + auto pW = I->shape[3]; + tvm::Array output_shape{ + I->shape[0], // B + W->shape[1], // O + (I->shape[2] - W->shape[2] + 2 * pad_h) / stride_h + 1, // H + (I->shape[3] - W->shape[3] + 2 * pad_w) / stride_w + 1 // W + }; + auto i = tvm::reduce_axis(tvm::Range{0, I->shape[1]}, "i"); + auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[2]}, "kh"); + auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[3]}, "kw"); + auto T = (pad_h == 0 && pad_w == 0) + ? I + : pad(I, {tvm::Expr(0), tvm::Expr(0), pad_h, pad_w}); + auto l = [&](tvm::Var b, tvm::Var o, tvm::Var h, tvm::Var w) { + return tvm::sum( + T(b, i, stride_h * h + kh, stride_w * w + kw) * W(i, o, kh, kw), + {i, kh, kw}); + }; + return tvm::compute(output_shape, l); +} + +inline tvm::Tensor conv2d_hwcn(const tvm::Tensor& I, const tvm::Tensor& W, + int pad_h = 0, int pad_w = 0, int stride_h = 1, + int stride_w = 1) { + CHECK_EQ(4, I->shape.size()); + CHECK_EQ(4, W->shape.size()); + auto pH = I->shape[2]; + auto pW = I->shape[3]; + tvm::Array output_shape{ + (I->shape[2] - W->shape[2] + 2 * pad_h) / stride_h + 1, // H + (I->shape[3] - W->shape[3] + 2 * pad_w) / stride_w + 1, // W + I->shape[2], // B + W->shape[3] // O + }; + auto i = tvm::reduce_axis(tvm::Range{0, I->shape[3]}, "i"); + auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[0]}, "kh"); + auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[1]}, "kw"); + auto T = (pad_h == 0 && pad_w == 0) ? I : pad(I, {pad_h, pad_w}); + auto l = [&](tvm::Var b, tvm::Var o, tvm::Var h, tvm::Var w) { + return tvm::sum( + T(stride_h * h + kh, stride_w * w + kw, i, b) * W(kh, kw, i, o), + {i, kh, kw}); + }; + return tvm::compute(output_shape, l); +} + +inline tvm::Tensor depthwise_conv2d_nchw(const tvm::Tensor& I, + const tvm::Tensor& W, int pad_h = 0, + int pad_w = 0, int stride_h = 1, + int stride_w = 1) { + CHECK_EQ(4, I->shape.size()); + CHECK_EQ(4, W->shape.size()); + auto pH = I->shape[2]; + auto pW = I->shape[3]; + auto pCM = W->shape[1]; // channel_multiplier + tvm::Array output_shape{ + I->shape[0], // B + W->shape[1], // O + (I->shape[2] - W->shape[2] + 2 * pad_h) / stride_h + 1, // H + (I->shape[3] - W->shape[3] + 2 * pad_w) / stride_w + 1 // W + }; + auto i = tvm::reduce_axis(tvm::Range{0, I->shape[1]}, "i"); + auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[2]}, "kh"); + auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[3]}, "kw"); + auto T = (pad_h == 0 && pad_w == 0) + ? I + : pad(I, {tvm::Expr(0), tvm::Expr(0), pad_h, pad_w}); + auto l = [&](tvm::Var b, tvm::Var o, tvm::Var h, tvm::Var w) { + return tvm::sum(T(b, i / pCM, stride_h * h + kh, stride_w * w + kw) * + W(i / pCM, o % pCM, kh, kw), + {i, kh, kw}); + }; + return tvm::compute(output_shape, l); +} + +inline tvm::Tensor group_conv2d_ngchw(const tvm::Tensor& I, + const tvm::Tensor& W, int pad_h = 0, + int pad_w = 0, int stride_h = 1, + int stride_w = 1) { + CHECK_EQ(5, I->shape.size()); + CHECK_EQ(5, W->shape.size()); + auto pH = I->shape[2]; + auto pW = I->shape[3]; + tvm::Array output_shape{ + I->shape[0], // B + I->shape[1], // G + W->shape[2], // O + (I->shape[3] - W->shape[3] + 2 * pad_h) / stride_h + 1, // H + (I->shape[4] - W->shape[4] + 2 * pad_w) / stride_w + 1 // W + }; + auto i = tvm::reduce_axis(tvm::Range{0, I->shape[2]}, "i"); + auto kh = tvm::reduce_axis(tvm::Range{0, W->shape[3]}, "kh"); + auto kw = tvm::reduce_axis(tvm::Range{0, W->shape[4]}, "kw"); + + auto T = (pad_h == 0 && pad_w == 0) + ? I + : pad(I, {tvm::Expr(0), tvm::Expr(0), tvm::Expr(0), pad_h, pad_w}); + auto l = [&](tvm::Var b, tvm::Var g, tvm::Var o, tvm::Var h, tvm::Var w) { + return tvm::sum( + I(b, g, i, stride_h * h + kh, stride_w * w + kw) * W(g, i, o, kh, kw), + {i, kh, kw}); + }; + return tvm::compute(output_shape, l); +} + +} // namespace topi +#endif // TOPI_NN_H_