Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions include/tvm/ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,17 @@ constexpr const char* tvm_stack_make_array = "tvm_stack_make_array";
* }
*/
constexpr const char* tvm_call_packed = "tvm_call_packed";
/*!
* \brief See pesudo code
*
* int tvm_call_trace_packed(name, TVMValue* args) {
* ModuleNode* env = GetCurrentEnv();
* const PackedFunc* f = env->GetFuncFromEnv(name);
* (*f)(args, type_code_of(args), len(args));
* return 0;
* }
*/
constexpr const char *tvm_call_trace_packed = "tvm_call_trace_packed";
/*!
* \brief See pesudo code
* Mark the content as thread local context, can get optimized
Expand Down Expand Up @@ -422,6 +433,25 @@ constexpr const char* tvm_thread_context = "tvm_thread_context";
* }
*/
constexpr const char* tvm_call_packed_lowered = "tvm_call_packed_lowered";
/*!
* \brief Lowered version of trace intrinsic, the space of value and
* type codes are explicitly allocated. The return value is the
* (end - 1) value on the stack.
*
* int tvm_call_trace_packed_lowered(name,
* TVMValue* value_stack,
* int* tcode_stack,
* int begin,
* int end) {
* ModuleNode* env = GetCurrentEnv();
* const PackedFunc* f = env->GetFuncFromEnv(name);
* f->CallPacked(TVMArgs(value_stack[begin:end],
* tcode_stack[begin:end]),
* TVMRetValue(value_stack + end, tcode_stack + end));
* }
*/
constexpr const char *tvm_call_trace_packed_lowered =
"tvm_call_trace_packed_lowered";
/*!
* \brief See pseudo code
*
Expand Down
36 changes: 36 additions & 0 deletions python/tvm/intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,42 @@ def _rule_float_direct(op):
return call_pure_extern(op.dtype, op.name, *op.args)
return None

@_register_func("tvm.default_trace_action")
def _tvm_default_trace_action(*args):
print(list(args))

def trace(args, trace_action="tvm.default_trace_action"):
"""Trace tensor data at the runtime.

The trace function allows to trace specific tensor at the
runtime. The tracing value should come as last argument.
The trace action should be specified, by default
tvm.default_trace_action is used.

Parameters
----------
args : list of Expr or Buffers.
Positional arguments.

trace_action : str.
The name of the trace action.

Returns
-------
call : Expr
The call expression.

See Also
--------
tvm.call_packed : Creates packed function.
"""
if not isinstance(args, list):
raise Exception("tvm.trace consumes the args as list type")
call_args = [_pack_buffer(x) if isinstance(x, _Buffer) else x for x in args]
call_args.insert(0, trace_action)
return _make.Call(
args[-1].dtype, "tvm_call_trace_packed", call_args, _Call.Intrinsic, None, 0)

# opencl pattern for exp
register_intrin_rule("opencl", "exp", _rule_float_direct, override=True)
# default pattern for exp
Expand Down
99 changes: 70 additions & 29 deletions src/codegen/llvm/codegen_cpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -526,42 +526,81 @@ llvm::Value* CodeGenCPU::GetPackedFuncHandle(const std::string& fname) {
return phi;
}

llvm::Value* CodeGenCPU::CreateCallPacked(const Call* op) {
CHECK_EQ(op->args.size(), 5U);
std::string func_name = op->args[0].as<StringImm>()->value;
llvm::Value* handle = GetPackedFuncHandle(func_name);
llvm::BasicBlock *
CodeGenCPU::MakeCallPacked(const Array<Expr> &args, llvm::Value **rvalue,
llvm::Value **ret_tcode, const Type &r_type,
const int64_t begin, const int64_t end) {
using llvm::BasicBlock;
std::string func_name = args[0].as<StringImm>()->value;
llvm::Value *handle = GetPackedFuncHandle(func_name);
// call the function
int64_t begin = op->args[3].as<IntImm>()->value;
int64_t end = op->args[4].as<IntImm>()->value;
int64_t nargs = end - begin;
CHECK_GE(nargs, 0);
llvm::Value* stack_value = MakeValue(op->args[1]);
llvm::Value* stack_tcode = MakeValue(op->args[2]);
llvm::Value* arg_value = builder_->CreateInBoundsGEP(
builder_->CreatePointerCast(
stack_value, t_tvm_value_->getPointerTo()), ConstInt32(begin));
llvm::Value* arg_tcode = CreateBufferPtr(
Int(32), stack_tcode, ConstInt32(begin));
llvm::Value* ret_value = builder_->CreateInBoundsGEP(
builder_->CreatePointerCast(
stack_value, t_tvm_value_->getPointerTo()), ConstInt32(end));
llvm::Value* ret_tcode = CreateBufferPtr(
Int(32), stack_tcode, ConstInt32(end));
CheckCallSuccess(
builder_->CreateCall(
RuntimeTVMFuncCall(),
{handle, arg_value, arg_tcode, ConstInt32(nargs),
ret_value, ret_tcode}));
Type r_type = op->type;
llvm::Value *stack_value = MakeValue(args[1]);
llvm::Value *stack_tcode = MakeValue(args[2]);
llvm::Value *arg_value = builder_->CreateInBoundsGEP(
builder_->CreatePointerCast(stack_value, t_tvm_value_->getPointerTo()),
ConstInt32(begin));
llvm::Value *arg_tcode =
CreateBufferPtr(Int(32), stack_tcode, ConstInt32(begin));
llvm::Value *ret_value = builder_->CreateInBoundsGEP(
builder_->CreatePointerCast(stack_value, t_tvm_value_->getPointerTo()),
ConstInt32(end));
*ret_tcode = CreateBufferPtr(Int(32), stack_tcode, ConstInt32(end));
BasicBlock *end_block = CheckCallSuccess(builder_->CreateCall(
RuntimeTVMFuncCall(), {handle, arg_value, arg_tcode, ConstInt32(nargs),
ret_value, *ret_tcode}));
Type r_api_type = ir::APIType(r_type);
llvm::Value* rvalue =
builder_->CreateAlignedLoad(
builder_->CreatePointerCast(
ret_value, LLVMType(r_api_type)->getPointerTo()), 8);
rvalue = CreateCast(r_api_type, r_type, rvalue);
*rvalue = builder_->CreateAlignedLoad(
builder_->CreatePointerCast(ret_value,
LLVMType(r_api_type)->getPointerTo()),
8);
*rvalue = CreateCast(r_api_type, r_type, *rvalue);
return end_block;
}

llvm::Value *CodeGenCPU::CreateCallPacked(const Call *op) {
CHECK_EQ(op->args.size(), 5U);
llvm::Value *rvalue = nullptr;
llvm::Value *ret_tcode = nullptr;
MakeCallPacked(op->args, &rvalue, &ret_tcode, op->type,
op->args[3].as<IntImm>()->value,
op->args[4].as<IntImm>()->value);
return rvalue;
}

llvm::Value *CodeGenCPU::CreateCallTracePacked(const Call *op) {
using llvm::BasicBlock;
CHECK_EQ(op->args.size(), 6U);
llvm::Value *rvalue = nullptr;
llvm::Value *ret_tcode = nullptr;
BasicBlock *end_block = MakeCallPacked(
op->args, &rvalue, &ret_tcode, op->type, op->args[3].as<IntImm>()->value,
op->args[4].as<IntImm>()->value);
// Get traced value.
llvm::Value *traced_value = MakeValue(op->args[5]);
// The update_block handles case when we need to update the return value.
BasicBlock *update_block =
BasicBlock::Create(*ctx_, "update_block", function_);
// The continue_block handles case when we need to return original
// traced value.
BasicBlock *continue_block =
BasicBlock::Create(*ctx_, "continue_block", function_);
llvm::Value *ret_tcode_value = builder_->CreateAlignedLoad(ret_tcode, 8);
// Check the ret_type_code and create cmp instruction.
llvm::Value *cmp = builder_->CreateICmpNE(
ret_tcode_value, llvm::ConstantInt::get(t_int_, kNull));
builder_->CreateCondBr(cmp, update_block, continue_block);
builder_->SetInsertPoint(update_block);
builder_->CreateBr(continue_block);
builder_->SetInsertPoint(continue_block);
// The return value depends on from what bb we come from.
llvm::PHINode *phi_rvalue = builder_->CreatePHI(traced_value->getType(), 2);
phi_rvalue->addIncoming(rvalue, update_block);
phi_rvalue->addIncoming(traced_value, end_block);
return phi_rvalue;
}

llvm::Value* CodeGenCPU::RuntimeTVMFuncCall() {
if (f_tvm_func_call_ != nullptr) return f_tvm_func_call_;
return GetContextPtr(gv_tvm_func_call_);
Expand Down Expand Up @@ -608,6 +647,8 @@ void CodeGenCPU::AddStartupFunction() {
llvm::Value* CodeGenCPU::CreateIntrinsic(const Call* op) {
if (op->is_intrinsic(intrinsic::tvm_call_packed_lowered)) {
return CreateCallPacked(op);
} else if (op->is_intrinsic(intrinsic::tvm_call_trace_packed_lowered)) {
return CreateCallTracePacked(op);
} else if (op->is_intrinsic(intrinsic::tvm_static_handle)) {
return CreateStaticHandle();
} else if (op->is_intrinsic(intrinsic::tvm_throw_last_error)) {
Expand Down
7 changes: 7 additions & 0 deletions src/codegen/llvm/codegen_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,15 @@ class CodeGenCPU : public CodeGenLLVM {
void UnpackClosureData(llvm::Value*cdata,
const Array<Var>& fields,
std::unordered_map<const Variable*, llvm::Value*>* vmap);
// Make packed call.
llvm::BasicBlock *MakeCallPacked(const Array<Expr> &args,
llvm::Value **rvalue,
llvm::Value **ret_tcode, const Type &r_type,
const int64_t begin, const int64_t end);
// create call into tvm packed function.
llvm::Value* CreateCallPacked(const Call* op);
// Create trace call into tvm packed function.
llvm::Value* CreateCallTracePacked(const Call *op);
// Create static initialization
void CreateStaticInit(const std::string& init_fname, const Stmt& body);
// Create parallel launch
Expand Down
53 changes: 52 additions & 1 deletion src/pass/lower_tvm_builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ class BuiltinLower : public IRMutator {
stmt = IRMutator::Mutate(stmt);
CHECK_EQ(run_shape_stack_, 0);
CHECK_EQ(run_array_stack_, 0);
CHECK_EQ(run_arg_stack_, 0);
while (prep_seq_.size() != 0) {
stmt = Block::make(prep_seq_.back(), stmt);
prep_seq_.pop_back();
Expand Down Expand Up @@ -140,6 +139,8 @@ class BuiltinLower : public IRMutator {
Expr Mutate_(const Call* op, const Expr &e) final {
if (op->is_intrinsic(intrinsic::tvm_call_packed)) {
return MakeCallPacked(op, e);
} else if (op->is_intrinsic(intrinsic::tvm_call_trace_packed)) {
return MakeCallTracePacked(op, e);
} else if (op->is_intrinsic(intrinsic::tvm_stack_make_shape)) {
return MakeShape(op, e);
} else if (op->is_intrinsic(intrinsic::tvm_stack_make_array)) {
Expand Down Expand Up @@ -256,6 +257,56 @@ class BuiltinLower : public IRMutator {
packed_args, Call::Intrinsic);
}

Expr MakeCallTracePacked(const Call *op, const Expr &e) {
size_t restore_shape_stack = run_shape_stack_;
size_t restore_array_stack = run_array_stack_;
size_t arg_stack_begin = run_arg_stack_;
run_arg_stack_ += op->args.size();
size_t args_size = op->args.size();
CHECK_GT(args_size, 0);
Expr expr = IRMutator::Mutate_(op, e);
op = expr.as<Call>();
for (size_t i = 1; i < op->args.size(); ++i) {
Expr stack_index = ConstInt32(arg_stack_begin + i - 1);
Expr arg = op->args[i];
Type t = arg.type();
Type api_type = APIType(t);
if (t != api_type) {
arg = Cast::make(api_type, arg);
}
prep_seq_.emplace_back(TVMStructSet(
stack_value_, static_cast<int>(arg_stack_begin + i - 1),
intrinsic::kTVMValueContent, arg));
int arg_tcode = api_type.code();
CHECK(!IsArrayHandle(arg)) << "Trace does not support Buffers";
prep_seq_.emplace_back(
Store::make(stack_tcode_,
ConstInt32(arg_tcode),
stack_index, const_true(1)));
}
// UPDATE stack value
max_arg_stack_ = std::max(run_arg_stack_, max_arg_stack_);
max_shape_stack_ = std::max(run_shape_stack_, max_shape_stack_);
max_array_stack_ = std::max(run_array_stack_, max_array_stack_);
run_shape_stack_ = restore_shape_stack;
run_array_stack_ = restore_array_stack;
// Update the top of the stack, so we can use more than one
// packed function's arguments with the one stack.
run_arg_stack_ = arg_stack_begin + args_size - 1;
Array<Expr> packed_args = {
op->args[0],
stack_value_,
stack_tcode_,
ConstInt32(arg_stack_begin),
ConstInt32(arg_stack_begin + op->args.size() - 1),
// Pass traced value.
op->args[args_size - 1]
};
return Call::make(
op->type, intrinsic::tvm_call_trace_packed_lowered,
packed_args, Call::Intrinsic);
}

private:
bool IsArrayHandle(const Expr& arg) {
// specially set array handle.
Expand Down
Loading