Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions python/tvm/autotvm/feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,19 @@
import tvm._ffi

from tvm.target import Target
from tvm.te import schedule
from tvm.driver import build_module


def ana_lower(sch, args, binds=None, simple_mode=True):
"""Do lower while keeping all axes in IR
i.e. Do not eliminate loop with extent of 1, do not vectorize, unroll or inject virtual threads
"""
binds, _ = build_module.get_binds(args, compact=False, binds=binds)
sch = sch.normalize()
# Phase 0
bounds = schedule.InferBound(sch)
stmt = schedule.ScheduleOps(sch, bounds, True)
func = schedule.SchedulePostProcToPrimFunc(args, stmt, None)
mod = tvm.IRModule.from_expr(func._move())
context = tvm.transform.PassContext(config={"tir.debug_keep_trivial_loop": True})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need tir.debug_keep_trivial_loop?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This deliberately keeps the loop iterators in-place, even if they have extent=1, rather than the default behavior of replacing trivial iterators with a Let statement. As a result, the itervars can be examined for optimization parameters (e.g. in xgboost).

Longer term, I'd prefer having it always generate the loops with a lowering pass to identify/simplify the trivial loops, but that's a later item.

with context:
mod = build_module.schedule_to_module(sch, args, binds=binds)

mod = tvm.tir.transform.StorageFlatten(64)(mod._move())
mod = tvm.tir.transform.Simplify()(mod._move())
assert simple_mode
Expand Down
5 changes: 5 additions & 0 deletions python/tvm/driver/build_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def schedule_to_module(
binds: Optional[Mapping[tensor.Tensor, Buffer]] = None,
) -> IRModule:
"""According to the given schedule, form a function.

This is a low-level function intended for testing purposes, and
does not apply any optimization passes. In general, `tvm.lower`
and `tvm.build` should be used instead.

Parameters
----------
sch : tvm.te.schedule.Schedule
Expand Down
15 changes: 5 additions & 10 deletions python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import tvm
from tvm import relay
from tvm.relay.expr_functor import ExprMutator
from tvm.driver.build_module import get_binds
from tvm.driver.build_module import schedule_to_module

from .passes import ReplaceOperators, RemoveZeroStores, EncodeConstants
from .scheduler import schedule
Expand Down Expand Up @@ -64,22 +64,17 @@ def lower_ethosu(sch, args, const_dict, name="main"):
"no_unroll_loop_with_extent_one": True,
},
"tir.UnrollLoop": {"auto_max_depth": -1},
"tir.noalias": True,
"tir.debug_keep_trivial_loop": True,
}
# Merge two configs
curr_cfg = {**curr_cfg, **tir_compiler_cfg}

sch = sch.normalize()
bounds = tvm.te.schedule.InferBound(sch)
stmt = tvm.te.schedule.ScheduleOps(sch, bounds, True)

compact = tvm.te.schedule.VerifyCompactBuffer(stmt)
binds, arg_list = get_binds(args, compact, None)
func = tvm.te.schedule.SchedulePostProcToPrimFunc(arg_list, stmt, binds)

func = func.with_attr("global_symbol", name)
func = func.with_attr("tir.noalias", True)
mod = tvm.IRModule({name: func})
with tvm.transform.PassContext(config=curr_cfg):
mod = schedule_to_module(sch, args, name)

Copy link
Contributor

@electriclilies electriclilies Oct 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like that you've removed this python logic, but why call schedule_to_primfunc and then wrap the func in an IRModule? Why not use schedule_to_module?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Due to silliness and lack of pattern recognition on my part, since I was only looking for cases that could be replaced with schedule_to_primfunc at that point. Changing it to schedule_to_module, and thank you for catching it!

mod = tvm.tir.transform.Simplify()(mod)
mod = tvm.tir.transform.StorageFlatten(64)(mod)
mod = tvm.tir.transform.UnrollLoop()(mod)
Expand Down
38 changes: 10 additions & 28 deletions src/auto_scheduler/feature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <tvm/auto_scheduler/feature.h>
#include <tvm/auto_scheduler/measure.h>
#include <tvm/auto_scheduler/measure_record.h>
#include <tvm/driver/driver_api.h>
#include <tvm/runtime/registry.h>
#include <tvm/support/parallel_for.h>
#include <tvm/te/operation.h>
Expand All @@ -44,13 +45,6 @@
#include "search_policy/utils.h"
#include "utils.h"

namespace tvm {
// import the function from driver_api.cc
void GetBinds(const Array<te::Tensor>& args, bool compact,
const std::unordered_map<te::Tensor, tir::Buffer>& binds,
Map<te::Tensor, tir::Buffer>* out_binds, Array<ObjectRef>* out_arg_list);
} // namespace tvm

namespace tvm {
namespace auto_scheduler {

Expand Down Expand Up @@ -1268,35 +1262,25 @@ void GetPerStoreFeaturesWorkerFunc(const SearchTask& task, const State& state, i
Array<te::Tensor> tensors;

std::tie(sch, tensors) = task->compute_dag.ApplySteps(state->transform_steps);

// When inlining, replace const matrices with const values.
// Produces wrong IR, but good enough for feature extraction, and
// can improve the speed of feature extraction/search. Must be
// called before ScheduleToModule to have an effect.
sch = sch.normalize_for_feature_extraction();
auto bounds = te::InferBound(sch);

try {
auto stmt = te::ScheduleOps(sch, bounds, false);
Map<te::Tensor, te::Buffer> out_binds;
Array<ObjectRef> out_arg_list;
bool compact = te::VerifyCompactBuffer(stmt);
const std::string& name = "main";
GlobalVar global_var(name);

// Copied from driver_api.cc::lower
auto pass_ctx = tvm::transform::PassContext::Current();
GetBinds(tensors, compact, std::unordered_map<te::Tensor, te::Buffer>(), &out_binds,
&out_arg_list);
tir::PrimFunc f = te::SchedulePostProcToPrimFunc(out_arg_list, std::move(stmt), out_binds);
f = WithAttr(std::move(f), "global_symbol", runtime::String(name));

bool noalias = pass_ctx->GetConfig<Bool>("tir.noalias", Bool(true)).value();
auto mod = ScheduleToModule(sch, Array<ObjectRef>{tensors.begin(), tensors.end()}, name,
std::unordered_map<te::Tensor, te::Buffer>());

bool disable_vectorize =
pass_ctx->GetConfig<Bool>("tir.disable_vectorize", Bool(false)).value();
bool instrument_bound_checkers =
pass_ctx->GetConfig<Bool>("tir.instrument_bound_checkers", Bool(false)).value();

if (noalias) {
f = WithAttr(std::move(f), "tir.noalias", Bool(true));
}
auto mod = IRModule(Map<GlobalVar, BaseFunc>({{global_var, f}}));

if (IsGPUTask(task)) {
auto pass_list = Array<tvm::transform::Pass>();
// Phase 0
Expand All @@ -1323,9 +1307,7 @@ void GetPerStoreFeaturesWorkerFunc(const SearchTask& task, const State& state, i
const auto& optimize =
tir::transform::Sequential(Array<tvm::transform::Pass>{tir::transform::Simplify()});
mod = optimize(std::move(mod));
const auto& it = mod->functions.find(global_var);
ICHECK(it != mod->functions.end());
const auto& prim_func = (*it).second.as<PrimFuncNode>();
PrimFunc prim_func = Downcast<PrimFunc>(mod->Lookup(name));
GetPerStoreFeature(prim_func->body, task->hardware_params->cache_line_bytes, max_n_bufs,
feature);
} catch (Error& e) {
Expand Down
19 changes: 10 additions & 9 deletions src/driver/driver_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ TVM_REGISTER_PASS_CONFIG_OPTION("tir.disable_assert", Bool);
TVM_REGISTER_PASS_CONFIG_OPTION("tir.disable_vectorize", Bool);
TVM_REGISTER_PASS_CONFIG_OPTION("tir.is_entry_func", Bool);
TVM_REGISTER_PASS_CONFIG_OPTION("tir.add_lower_pass", Array<Array<ObjectRef>>);
TVM_REGISTER_PASS_CONFIG_OPTION("tir.debug_keep_trivial_loop", Bool);

using runtime::PackedFunc;
using runtime::TVMArgs;
Expand Down Expand Up @@ -287,24 +288,24 @@ IRModule ApplyPasses(IRModule mod, transform::Sequential seq) {
return mod;
}

// Convert te schedule to IRModule
IRModule ScheduleToModule(te::Schedule sch, const Array<ObjectRef>& args, const std::string& name,
const std::unordered_map<te::Tensor, tir::Buffer>& binds) {
// Convert te schedule to IRModule
Array<ObjectRef> out_arg_list;
transform::PassContext pass_ctx = transform::PassContext::Current();

sch = sch.normalize();

transform::PassContext pass_ctx = transform::PassContext::Current();
bool debug_keep_trivial_loop =
pass_ctx->GetConfig<Bool>("tir.debug_keep_trivial_loop", Bool(false)).value();

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does debug_keep_trivial_loop do?

Copy link
Contributor Author

@Lunderberg Lunderberg Oct 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This tells ScheduleOps to keep loops that have an extent of 1, where the default behavior is to replace trivial loops with a Let statement. (impl) The only place that it is used is in lower_ethosu to maintain the previous behavior of keeping the trivial loops. That said, I haven't looked into why the trivial loops are kept in that case.

Edit: And also used in the autotvm feature extraction

// Before TIR transformation.
Map<tir::IterVar, Range> bounds = te::InferBound(sch);
tir::Stmt stmt = te::ScheduleOps(sch, std::move(bounds), false);
tir::Stmt stmt = te::ScheduleOps(sch, te::InferBound(sch), debug_keep_trivial_loop);
bool compact = te::VerifyCompactBuffer(stmt);

Map<te::Tensor, tir::Buffer> out_binds;
Array<ObjectRef> out_arg_list;
GetBinds(args, compact, binds, &out_binds, &out_arg_list);

// Build the function
// At this point binds is only te::Tensors
// Build the function, converting from te::Tensor to tir::Buffer
tir::PrimFunc f = te::SchedulePostProcToPrimFunc(out_arg_list, std::move(stmt), out_binds);
f = WithAttr(std::move(f), "global_symbol", runtime::String(name));

Expand All @@ -325,7 +326,7 @@ TVM_REGISTER_GLOBAL("driver.schedule_to_module")
const Map<te::Tensor, tir::Buffer>& binds) {
std::unordered_map<te::Tensor, tir::Buffer> c_binds;
// Check to make sure binds is not null before doing the conversion;
if (binds.get() != nullptr) {
if (binds.defined()) {
for (auto kv : binds) {
c_binds.insert({kv.first, kv.second});
}
Expand Down
8 changes: 3 additions & 5 deletions tests/python/integration/test_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
# specific language governing permissions and limitations
# under the License.
import pytest
import numpy as np

import tvm
from tvm import te, topi
import numpy as np
from tvm.driver.build_module import schedule_to_module
import tvm.testing
import tvm.topi.testing

Expand Down Expand Up @@ -532,10 +533,7 @@ def test_reduce_storage_reuse():
target = tvm.target.Target("cuda")

def run_passes(sch, args):
bounds = tvm.te.schedule.InferBound(sch)
stmt = tvm.te.schedule.ScheduleOps(sch, bounds)
func = tvm.te.schedule.SchedulePostProcToPrimFunc(args, stmt, None)
mod = tvm.IRModule.from_expr(func)
mod = schedule_to_module(sch, args)
mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod)
return tvm.transform.Sequential(
[
Expand Down
26 changes: 10 additions & 16 deletions tests/python/unittest/test_te_schedule_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy as np

import tvm
from tvm import te
import numpy as np
from tvm.driver.build_module import schedule_to_module


def test_schedule0():
Expand All @@ -26,11 +28,8 @@ def test_schedule0():
A1 = te.compute((m, l), lambda i, j: A[i, j], name="A1")
s = te.create_schedule(A1.op)

bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)
func = tvm.te.schedule.SchedulePostProcToPrimFunc([A, A1], stmt, None)
assert isinstance(func, tvm.tir.PrimFunc)
mod = schedule_to_module(s, [A, A1])
assert isinstance(mod["main"], tvm.tir.PrimFunc)


def test_schedule1():
Expand All @@ -42,12 +41,9 @@ def test_schedule1():
s = te.create_schedule(A1.op)
xo, xi = s[A1].split(A1.op.axis[0], 8)
s[A1].pragma(xo, "auto_unroll_max_step", 10)
bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)

func = tvm.te.schedule.SchedulePostProcToPrimFunc([A, A1], stmt, None)
assert isinstance(func, tvm.tir.PrimFunc)
mod = schedule_to_module(s, [A, A1])
assert isinstance(mod["main"], tvm.tir.PrimFunc)


def test_schedule2():
Expand All @@ -60,11 +56,9 @@ def test_schedule2():
s = te.create_schedule(A2.op)
xo, xi = s[A2].split(A2.op.axis[0], 8)
s[A1].compute_at(s[A2], xo)
bounds = tvm.te.schedule.InferBound(s)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)
func = tvm.te.schedule.SchedulePostProcToPrimFunc([A, A2], stmt, None)
assert isinstance(func, tvm.tir.PrimFunc)

mod = schedule_to_module(s, [A, A2])
assert isinstance(mod["main"], tvm.tir.PrimFunc)


def test_schedule_scan():
Expand Down
18 changes: 4 additions & 14 deletions tests/python/unittest/test_tir_transform_inject_copy_intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import tvm
import tvm.testing
from tvm import te
from tvm.driver.build_module import schedule_to_module


def test_copy2d():
Expand Down Expand Up @@ -53,11 +54,7 @@ def test_copy_pad():
)
s = te.create_schedule(B.op)
s[B].pragma(B.op.axis[0], "memcpy")
bounds = tvm.te.schedule.InferBound(s)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)

func = tvm.te.schedule.SchedulePostProcToPrimFunc([A, B], stmt, None)
mod = tvm.IRModule.from_expr(func)
mod = schedule_to_module(s, [A, B])
mod = tvm.tir.transform.StorageFlatten(64)(mod)

def cb(src, dst, pad_before, pad_after, pad_value):
Expand All @@ -77,11 +74,7 @@ def test_single_point_test():
B = te.compute((1,), lambda i: A[i], name="B")
s = te.create_schedule(B.op)
s[B].pragma(B.op.axis[0], "memcpy")
bounds = tvm.te.schedule.InferBound(s)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)

func = tvm.te.schedule.SchedulePostProcToPrimFunc([A, B], stmt, None)
mod = tvm.IRModule.from_expr(func)
mod = schedule_to_module(s, [A, B])
mod = tvm.tir.transform.StorageFlatten(64)(mod)

def cb(src, dst, pad_before, pad_after, pad_value):
Expand All @@ -105,11 +98,8 @@ def test_copy_pad_split():
xo, xi = s[B].split(B.op.axis[0], factor=4)
s[Apad].compute_at(s[B], xo)
s[Apad].pragma(s[Apad].op.axis[0], "memcpy")
bounds = tvm.te.schedule.InferBound(s)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)

func = tvm.te.schedule.SchedulePostProcToPrimFunc([A, B], stmt, None)
mod = tvm.IRModule.from_expr(func)
mod = schedule_to_module(s, [A, B])
mod = tvm.tir.transform.StorageFlatten(64)(mod._move())
mod = tvm.tir.transform.Simplify()(mod._move())

Expand Down
9 changes: 4 additions & 5 deletions tests/python/unittest/test_tir_transform_make_packed_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import numpy

import tvm
from tvm import te
import numpy
from tvm.driver.build_module import schedule_to_module


def test_makeapi():
Expand All @@ -27,10 +29,7 @@ def test_makeapi():
C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C")
s = te.create_schedule(C.op)

bounds = tvm.te.schedule.InferBound(s)
stmt = tvm.te.schedule.ScheduleOps(s, bounds)
func = tvm.te.schedule.SchedulePostProcToPrimFunc([n, A, B, C], stmt, None)
mod = tvm.IRModule.from_expr(func)
mod = schedule_to_module(s, [n, A, B, C])
mod = tvm.tir.transform.StorageFlatten(64)(mod)
mod = tvm.tir.transform.Apply(
lambda f: f.with_attr(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,17 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import tvm
from tvm import te
import numpy as np

import tvm
import tvm.testing
from tvm import te
from tvm.driver.build_module import schedule_to_module
from tvm.topi.math import cast


def run_passes(sch, args):
bounds = tvm.te.schedule.InferBound(sch)
assert isinstance(bounds, tvm.container.Map)
stmt = tvm.te.schedule.ScheduleOps(sch, bounds)

func = tvm.te.schedule.SchedulePostProcToPrimFunc(args, stmt, None)
mod = tvm.IRModule.from_expr(func)
mod = schedule_to_module(sch, args)
return tvm.transform.Sequential(
[
tvm.tir.transform.StorageFlatten(64),
Expand Down
Loading