diff --git a/apps/README.md b/apps/README.md index f513b68d1892..da5347da66e9 100644 --- a/apps/README.md +++ b/apps/README.md @@ -3,9 +3,9 @@ This folder contains various extension projects using TVM, they also serve as examples on how to use TVM in your own project. If you are interested in writing optimized kernels with TVM, checkout [TOPI: TVM Operator Inventory](../topi). +If you are interested in end to end deep learning model compilation, checkout [NNVM Compiler](https://github.com/dmlc/nnvm). - [extension](extension) How to extend TVM C++ api along with python API. -- [graph_executor](graph_executor) Build nnvm graph executor with TVM. - [ios_rpc](ios_rpc) iOS RPC server. - [android_rpc](android_rpc) Android RPC server. -- [howto_deploy](howto_depploy) Tutorial on how to deploy TVM with minimum code dependency. +- [howto_deploy](howto_depploy) Tutorial on how to deploy TVM with minimum code dependency. \ No newline at end of file diff --git a/apps/graph_executor/Makefile b/apps/graph_executor/Makefile deleted file mode 100644 index e9969ba833e4..000000000000 --- a/apps/graph_executor/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -# Minimum Makefile for the extension package -TVM_ROOT=$(shell cd ../..; pwd) -NNVM_PATH=nnvm -DMLC_CORE=${TVM_ROOT}/dmlc-core - -PKG_CFLAGS = -std=c++11 -O2 -fPIC\ - -I${TVM_ROOT}/include\ - -I${DMLC_CORE}/include\ - -I${TVM_ROOT}/dlpack/include\ - -I${TVM_ROOT}/HalideIR/src - -PKG_LDFLAGS = -UNAME_S := $(shell uname -s) - -ifeq ($(UNAME_S), Darwin) - PKG_LDFLAGS += -undefined dynamic_lookup - WHOLE_ARCH= -all_load - NO_WHOLE_ARCH= -noall_load -else - WHOLE_ARCH= --whole-archive - NO_WHOLE_ARCH= --no-whole-archive -endif - -NNVM_CONTRIB_SRC = $(wildcard src/*.cc) -NNVM_CONTRIB_OBJ = $(patsubst src/%.cc, build/%.o, $(NNVM_CONTRIB_SRC)) - -include $(DMLC_CORE)/make/dmlc.mk - -ALL_DEP = $(NNVM_CONTRIB_OBJ) - -PKG_CFLAGS += -I${NNVM_PATH}/include -ALL_DEP += ${DMLC_CORE}/libdmlc.a ${NNVM_PATH}/lib/libnnvm.a - -.PHONY: clean all - -all: lib/libtvm_graph_exec.so - -nnvm: - git clone https://github.com/dmlc/nnvm --recursive - -nnvm/lib/libnnvm.a: | nnvm - + cd nnvm; make ; cd - - -$(DMLC_CORE)/libdmlc.a: - + cd $(DMLC_CORE); make libdmlc.a; cd $(TVM_ROOT) - -build/%.o: src/%.cc | nnvm - @mkdir -p $(@D) - $(CXX) $(PKG_CFLAGS) -MM -MT build/$*.o $< >build/$*.d - $(CXX) -c $(PKG_CFLAGS) -c $< -o $@ - -lib/libtvm_graph_exec.so: $(ALL_DEP) - @mkdir -p $(@D) - $(CXX) $(PKG_CFLAGS) -shared -o $@ $(filter %.o, $^) $(PKG_LDFLAGS) \ - -Wl,${WHOLE_ARCH} $(filter %.a, $^) -Wl,${NO_WHOLE_ARCH} $(PKG_LDFLAGS) - -clean: - $(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o */*.d */*/*.d */*/*/*.d - --include build/*.d --include build/*/*.d diff --git a/apps/graph_executor/README.md b/apps/graph_executor/README.md deleted file mode 100644 index 59519e2d24f3..000000000000 --- a/apps/graph_executor/README.md +++ /dev/null @@ -1,6 +0,0 @@ -Example Graph Executor -====================== -This folder contains a minimum example of graph executor library based on TVM and NNVM. -It demonstrates how to build a computation graph compilation and execution framework. - -- The to build library, need to clone and build into root of the repo. diff --git a/apps/graph_executor/python/tvm_graph/__init__.py b/apps/graph_executor/python/tvm_graph/__init__.py deleted file mode 100644 index 286aef3f30d9..000000000000 --- a/apps/graph_executor/python/tvm_graph/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""The graph build library""" -from __future__ import absolute_import as _abs -import tvm -from . import _base -from nnvm.symbol import * -from . import op_tvm_def -from .build import build, bind, save_params, compile_graph, remote_load_exec - - diff --git a/apps/graph_executor/python/tvm_graph/_base.py b/apps/graph_executor/python/tvm_graph/_base.py deleted file mode 100644 index 3005e0e5a435..000000000000 --- a/apps/graph_executor/python/tvm_graph/_base.py +++ /dev/null @@ -1,20 +0,0 @@ -from __future__ import absolute_import as _abs -import os -import sys - -if sys.version_info[0] == 3: - import builtins as __builtin__ -else: - import __builtin__ - -curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) - -if hasattr(__builtin__, "NNVM_BASE_PATH"): - assert __builtin__.NNVM_BASE_PATH == curr_path -else: - __builtin__.NNVM_BASE_PATH = curr_path - -if hasattr(__builtin__, "NNVM_LIBRARY_NAME"): - assert __builtin__.NNVM_LIBRARY_NAME == curr_path -else: - __builtin__.NNVM_LIBRARY_NAME = "libtvm_graph_exec" diff --git a/apps/graph_executor/python/tvm_graph/build.py b/apps/graph_executor/python/tvm_graph/build.py deleted file mode 100644 index 424595f684ce..000000000000 --- a/apps/graph_executor/python/tvm_graph/build.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Logics related to build.""" -import nnvm.graph as graph -import tvm -import json - -DTYPE_DICT = { - "float32": 0 -} - -_create_exec = tvm.get_global_func("tvm_graph._create_executor") - -def build(sym, target, shape, dtype="float32"): - # Do shape inference in python. - g = graph.create(sym) - jgraph = json.loads(g.apply('SaveJSON').json_attr('json')) - jnodes = jgraph['nodes'] - jnode_row_ptr = jgraph['node_row_ptr'] - nindex = {n['name']: i for i, n in enumerate(jnodes)} - list_shape = [[]] * jnode_row_ptr[-1] - list_dtype = [DTYPE_DICT[dtype]] * jnode_row_ptr[-1] - for k, v in shape.items(): - list_shape[jnode_row_ptr[nindex[k]]] = v - g._set_json_attr("shape", list_shape, 'list_shape') - g._set_json_attr("dtype", list_dtype, 'list_int') - g._set_json_attr("target", target, 'str') - g = g.apply("InferShape").apply("InferType") - g = g.apply("GraphPartition").apply("GraphFuse") - return g - - -def bind(g, ctx): - m = _create_exec(g.handle, ctx.device_type, ctx.device_id) - return m - -_get_module = tvm.get_global_func("tvm_graph._get_module_from_graph") - -def compile_graph(lib_fname, sym, target, shape, dtype="float32"): - g = build(sym, target, shape, dtype) - m = _get_module(g.handle) - m.save(lib_fname) - json_str = g.apply('SaveJSON').json_attr('json') - return json_str - -@tvm.register_func("tvm_graph.lower") -def _lower(sch, inputs, func_name): - f = tvm.lower(sch, inputs, name=func_name) - return f if isinstance( - f, (tvm.container.Array, tuple, list)) else [f] - - -@tvm.register_func("tvm_graph.build_target") -def _build(funcs, target): - return tvm.build(funcs, target=target) - - -_save_param_dict = tvm.get_global_func("tvm_graph._save_param_dict") - -def save_params(fname, params): - args = [] - args.append(fname) - args.append(len(params)) - for kv in params.items(): - args.append(kv[0]) - args.append(kv[1]) - _save_param_dict(*args) - - -def remote_load_exec(sess, sym_json, remote_module_name, param_blob, ctx): - """Load a remote graph executor, with the local files. - Parameters - ---------- - sym_json : str - The symbol json file. - - remote_module_fname : str - The relative library location to remote temp folder. The - library need to be uploaded first. - - param_blob : bytes or bytearray - The binary file to the local parameters. - - Returns - ------- - exec : GraphExecutor - The remote graph executor containing remote function. - """ - if "load_executor" not in sess._remote_funcs: - sess._remote_funcs["load_executor"] = sess.get_function("tvm_graph._load_executor") - assert ctx.device_type / tvm.contrib.rpc.RPC_SESS_MASK == sess._tbl_index + 1 - device_type = ctx.device_type % tvm.contrib.rpc.RPC_SESS_MASK - return sess._remote_funcs["load_executor"](sym_json, - remote_module_name, - bytearray(param_blob), - device_type, - ctx.device_id) diff --git a/apps/graph_executor/python/tvm_graph/op_tvm_def.py b/apps/graph_executor/python/tvm_graph/op_tvm_def.py deleted file mode 100644 index 96da5cde9e07..000000000000 --- a/apps/graph_executor/python/tvm_graph/op_tvm_def.py +++ /dev/null @@ -1,16 +0,0 @@ -"""NNVM operator definitions.""" -import tvm - -@tvm.register_func("tvm_graph.compute.add") -def compute_add(a, b): - return tvm.compute(a.shape, lambda *i: a(*i) + b(*i)) - -@tvm.register_func("tvm_graph.compute.exp") -def compute_exp(a): - return tvm.compute(a.shape, lambda *i: tvm.exp(a(*i))) - -@tvm.register_func("tvm_graph.schedule.ewise") -def schedule_ewise(outs, target): - s = tvm.create_schedule([x.op for x in outs]) - tvm.schedule.AutoInlineElemWise(s) - return s diff --git a/apps/graph_executor/src/graph_executor.cc b/apps/graph_executor/src/graph_executor.cc deleted file mode 100644 index d0a755f3e443..000000000000 --- a/apps/graph_executor/src/graph_executor.cc +++ /dev/null @@ -1,375 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file graph_executor.cc - */ -#include "./graph_executor.h" - -namespace tvm { -namespace contrib { - -PackedFunc GraphExecutor::GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self) { - // return member functions during query. - if (name == "set_input") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - if (args[0].type_code() == kStr) { - this->SetInput(this->GetIndex(args[0]), args[1]); - } else { - this->SetInput(args[0], args[1]); - } - }); - } else if (name == "get_output") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - this->GetOutput(args[0], args[1]); - }); - } else if (name == "run") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - this->Run(); - }); - } else if (name == "load_params") { - return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { - this->LoadParamsFromBlob(args[0]); - }); - } else { - return PackedFunc(); - } -} - -GraphExecutor::~GraphExecutor() { - for (DLTensor* t : storage_pool_) { - TVM_CCALL(TVMArrayFree(t)); - } -} - -void GraphExecutor::Run() { - // setup the array and requirements. - for (size_t i = 0; i < op_execs_.size(); ++i) { - if (op_execs_[i]) op_execs_[i](); - } -} - -void GraphExecutor::Init(const nnvm::Graph& g, TVMContext ctx) { - graph_ = g; - ctx_ = ctx; - module_ = g.GetAttr("module"); - this->SetupNameIndex(); - this->SetupStorage(); - this->SetupOpExecs(); -} - -int GraphExecutor::GetIndex(std::string name) { - CHECK(name_idx_.count(name)) - << name << " is not in the graph."; - return name_idx_.at(name); -} - -void GraphExecutor::SetInput(int index, DLTensor* data_in) { - const auto& idx = graph_.indexed_graph(); - CHECK_LT(static_cast(index), idx.input_nodes().size()); - uint32_t eid = idx.entry_id(idx.input_nodes()[index], 0); - TVM_CCALL(TVMArrayCopyFromTo(data_in, &data_entry_[eid], nullptr)); -} - -void GraphExecutor::GetOutput(int index, DLTensor* data_out) { - const auto& idx = graph_.indexed_graph(); - CHECK_LT(static_cast(index), idx.outputs().size()); - uint32_t eid = idx.entry_id(idx.outputs()[index]); - TVM_CCALL(TVMArrayCopyFromTo(&data_entry_[eid], data_out, nullptr)); -} - -bool LoadDLTensor(dmlc::Stream* strm, DLTensor* tensor) { - uint64_t header, reserved; - CHECK(strm->Read(&header, sizeof(header))) - << "Invalid DLTensor file format"; - CHECK(strm->Read(&reserved, sizeof(reserved))) - << "Invalid DLTensor file format"; - CHECK(header == kTVMNDArrayMagic) - << "Invalid DLTensor file format"; - - CHECK(strm->Read(&tensor->ctx, sizeof(tensor->ctx))) - << "Invalid DLTensor file format"; - CHECK(strm->Read(&tensor->ndim, sizeof(tensor->ndim))) - << "Invalid DLTensor file format"; - CHECK(strm->Read(&tensor->dtype, sizeof(tensor->dtype))) - << "Invalid DLTensor file format"; - - int ndim = tensor->ndim; - CHECK(strm->Read(tensor->shape, sizeof(int64_t) * ndim)) - << "Invalid DLTensor file format"; - - int64_t size = 1; - int type_size = tensor->dtype.bits / 8; - for (int i = 0; i < ndim; ++i) { - size *= tensor->shape[i]; - } - int64_t data_byte_size; - CHECK(strm->Read(&data_byte_size, sizeof(data_byte_size))) - << "Invalid DLTensor file format"; - CHECK(data_byte_size == type_size * size) - << "Invalid DLTensor file format"; - CHECK(strm->Read(tensor->data, type_size * size)) - << "Invalid DLTensor file format"; - return true; -} - -void GraphExecutor::LoadParams(dmlc::Stream *strm) { - uint64_t header, reserved; - CHECK(strm->Read(&header)) - << "Invalid parameters file format"; - CHECK(header == kTVMNDArrayListMagic) - << "Invalid parameters file format"; - CHECK(strm->Read(&reserved)) - << "Invalid parameters file format"; - - std::vector names; - CHECK(strm->Read(&names)) - << "Invalid parameters file format"; - - std::unordered_map name_eid; - const auto& idx = graph_.indexed_graph(); - for (int nid : idx.input_nodes()) { - name_eid.emplace(idx[nid].source->attrs.name, idx.entry_id(nid, 0)); - } - - uint64_t sz; - strm->Read(&sz, sizeof(sz)); - size_t size = static_cast(sz); - CHECK(size == names.size()) - << "Invalid parameters file format"; - for (size_t i = 0; i < size; ++i) { - auto iter = name_eid.find(names[i]); - CHECK(iter != name_eid.end()); - CHECK(LoadDLTensor(strm, &data_entry_[iter->second])) - << "Invalid parameters file format"; - } -} - -void GraphExecutor::LoadParamsFromBlob(std::string param_blob) { - dmlc::MemoryStringStream strm(¶m_blob); - this->LoadParams(&strm); -} - -void GraphExecutor::SetupNameIndex() { - nnvm::Symbol s; - s.outputs = graph_.outputs; - std::vector input_names = s.ListInputNames(nnvm::Symbol::kAll); - for (size_t i = 0; i < input_names.size(); ++i) { - name_idx_[input_names[i]] = i; - } -} - -void GraphExecutor::SetupStorage() { - const auto& idx = graph_.indexed_graph(); - // Grab saved optimization plan from graph. - auto vstorage = graph_.MoveCopyAttr("storage_id"); - const auto& vtype = graph_.GetAttr("dltype"); - data_shape_ = graph_.GetAttr("shape"); - data_entry_.resize(idx.num_node_entries()); - - // Find the maximum space size. - int max_id = 0; - for (size_t i = 0; i < data_shape_.size(); ++i) { - max_id = std::max(vstorage[i] + 1, max_id); - } - for (const auto& e : idx.input_nodes()) { - vstorage[idx.entry_id(e, 0)] = max_id++; - } - // size of each storage pool entry - std::vector pool_entry_bytes; - // Find the maximum space size. - for (size_t i = 0; i < data_shape_.size(); ++i) { - int storage_id = vstorage[i]; - size_t size = data_shape_[i].Size(); - CHECK_GE(storage_id, 0) << "Do not support runtime shape op"; - - DLDataType t = vtype[i]; - size_t bits = t.bits * t.lanes; - CHECK_EQ(bits % 8U, 0U); - size_t bytes = (bits / 8U) * size; - - size_t sid = static_cast(storage_id); - if (sid >= pool_entry_bytes.size()) { - pool_entry_bytes.resize(sid + 1, 0); - } - pool_entry_bytes[sid] = std::max(pool_entry_bytes[sid], bytes); - } - // Allocate the space. - for (size_t i = 0; i < pool_entry_bytes.size(); ++i) { - TShape shape{static_cast(pool_entry_bytes[i] + 3) / 4}; - DLTensor* tensor; - TVM_CCALL(TVMArrayAlloc( - shape.data(), 1, kFloat, 32, 1, ctx_.device_type, ctx_.device_id, &tensor)); - storage_pool_.push_back(tensor); - } - // Assign the pooled entries. - for (size_t i = 0; i < data_entry_.size(); ++i) { - int storage_id = vstorage[i]; - data_entry_[i] = *storage_pool_[storage_id]; - data_entry_[i].shape = const_cast(data_shape_[i].data()); - data_entry_[i].ndim = data_shape_[i].ndim(); - data_entry_[i].dtype = vtype[i]; - } -} - -void GraphExecutor::SetupOpExecs() { - static const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op"); - const auto& idx = graph_.indexed_graph(); - op_execs_.resize(idx.num_nodes()); - // setup the array and requirements. - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - if (inode.source->is_variable()) continue; - std::vector args; - for (const auto& e : inode.inputs) { - args.push_back(data_entry_[idx.entry_id(e)]); - } - for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { - uint32_t eid = idx.entry_id(nid, index); - args.push_back(data_entry_[eid]); - } - CHECK_EQ(inode.source->op(), tvm_op) - << "transform the graph to tvm op"; - op_execs_[nid] = CreateTVMOp( - inode.source->attrs, args, inode.inputs.size()); - } -} - -FOpExec GraphExecutor::CreateTVMOp(const nnvm::NodeAttrs& attrs, - std::vector args, - size_t num_inputs) { - struct OpArgs { - std::vector args; - std::vector arg_values; - std::vector arg_tcodes; - std::vector shape_data; - }; - auto it = attrs.dict.find("func_name"); - CHECK(it != attrs.dict.end()) - << "tvm_op must need func_name attr"; - bool flatten = (attrs.dict.at("flatten_data") == "1"); - std::shared_ptr arg_ptr = std::make_shared(); - // setup address. - arg_ptr->args = std::move(args); - if (flatten) { - arg_ptr->shape_data.resize(arg_ptr->args.size()); - } - for (size_t i = 0; i < arg_ptr->args.size(); ++i) { - TVMValue v; - DLTensor* t = &(arg_ptr->args[i]); - v.v_handle = t; - arg_ptr->arg_values.push_back(v); - arg_ptr->arg_tcodes.push_back(kArrayHandle); - if (flatten) { - int64_t s = 1; - arg_ptr->shape_data[i] = std::accumulate( - t->shape, t->shape + t->ndim, 1, std::multiplies()); - t->ndim = 1; - t->shape = &(arg_ptr->shape_data[i]); - } - } - // get compiled function from module. - runtime::PackedFunc pf = module_.GetFunction(it->second, false); - CHECK(pf != nullptr) << "no such function in module: " << it->second; - auto fexec = [arg_ptr, pf] () { - runtime::TVMRetValue rv; - runtime::TVMArgs targs(arg_ptr->arg_values.data(), - arg_ptr->arg_tcodes.data(), - static_cast(arg_ptr->arg_values.size())); - pf.CallPacked(targs, &rv); - }; - return fexec; -} - -/*! \brief Parse keyword arguments as PType arguments and save to parsed */ -template -inline void ParamParser(nnvm::NodeAttrs* attrs) { - PType param; - try { - param.Init(attrs->dict); - } catch (const dmlc::ParamError& e) { - std::ostringstream os; - os << e.what(); - os << ", in operator " << attrs->op->name << "(" - << "name=\"" << attrs->name << "\""; - for (const auto& k : attrs->dict) { - os << ", " << k.first << "=\"" << k.second << "\""; - } - os << ")"; - throw dmlc::ParamError(os.str()); - } - attrs->parsed = std::move(param); -} - -DMLC_REGISTER_PARAMETER(TVMOpParam); - -// ewise tvm op -NNVM_REGISTER_OP(tvm_op) -.set_attr_parser(ParamParser) -.set_num_inputs([](const NodeAttrs& attrs) { - const TVMOpParam& param = nnvm::get(attrs.parsed); - return param.num_inputs; - }) -.set_num_outputs([](const NodeAttrs& attrs) { - const TVMOpParam& param = nnvm::get(attrs.parsed); - return param.num_outputs; - }); - -TVM_REGISTER_GLOBAL("tvm_graph._load_executor") -.set_body([](TVMArgs args, TVMRetValue *rv) { - std::string sym_json = args[0]; - std::string lib_fname = args[1]; - std::string param_blob = args[2]; - TVMContext ctx; - ctx.device_type = static_cast(args[3].operator int()); - ctx.device_id = args[4]; - - // load graph from json string - nnvm::Graph g; - g.attrs["json"] = std::make_shared(sym_json); - g = nnvm::ApplyPass(std::move(g), "LoadJSON"); - - // load module from file - static const PackedFunc* fsys_load_ = nullptr; - if (fsys_load_ == nullptr) { - fsys_load_ = runtime::Registry::Get("tvm.contrib.rpc.server.load_module"); - CHECK(fsys_load_ != nullptr); - } - runtime::Module m = (*fsys_load_)(lib_fname); - g.attrs["module"] = std::make_shared(m); - - std::shared_ptr exec = - std::make_shared(); - exec->Init(g, ctx); - - // load params form stream of string - exec->LoadParamsFromBlob(std::move(param_blob)); - - *rv = tvm::runtime::Module(exec); - }); -} // namespace contrib -} // namespace tvm - -namespace dmlc { -namespace json { - -template<> -struct Handler { - static void Write(JSONWriter *writer, const DLDataType& data) { - std::vector tmp({data.code, data.bits, data.lanes}); - writer->Write(tmp); - } - - static void Read(JSONReader *reader, DLDataType* data) { - std::vector tmp; - reader->Read(&tmp); - data->code = tmp[0]; - data->bits = tmp[1]; - data->lanes = tmp[2]; - } -}; - -DMLC_JSON_ENABLE_ANY(std::vector, list_dltype); - -} // namespace dmlc -} // namespace json diff --git a/apps/graph_executor/src/graph_executor.h b/apps/graph_executor/src/graph_executor.h deleted file mode 100644 index 3953646c271a..000000000000 --- a/apps/graph_executor/src/graph_executor.h +++ /dev/null @@ -1,119 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file graph_executor.h - */ -#ifndef TVM_GRAPH_EXECUTOR_H_ -#define TVM_GRAPH_EXECUTOR_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace tvm { -namespace contrib { - -using tvm::runtime::TVMArgs; -using tvm::runtime::TVMRetValue; -using tvm::runtime::PackedFunc; -using nnvm::StorageVector; -using nnvm::ShapeVector; -using nnvm::TShape; -using nnvm::NodeAttrs; - -/*! \brief DLPack compatible data types */ -using DLTypeVector = std::vector; - -/*! \brief The executor function */ -using FOpExec = std::function; - -/*! \brief macro to do C API call */ -#define TVM_CCALL(func) \ - { \ - int ret = (func); \ - CHECK_EQ(ret, 0) \ - << TVMGetLastError(); \ - } - -constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F; -constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7; - -/*! \brief Graph Executor with TVM runtime */ -class GraphExecutor : public runtime::ModuleNode { - public: - const char* type_key() const { - return "GraphExecutor"; - } - PackedFunc GetFunction( - const std::string& name, - const std::shared_ptr& sptr_to_self); - // Destructor - ~GraphExecutor(); - // Setup with a given graph - void Init(const nnvm::Graph& g, TVMContext ctx); - // Get index of variable - int GetIndex(std::string name); - // Copy data to index-th input - void SetInput(int index, DLTensor* data_in); - // Copy index-th output to data_out - void GetOutput(int index, DLTensor* data_out); - // Load parameters from stream - void LoadParams(dmlc::Stream* strm); - // Load parameters from binary file blob - void LoadParamsFromBlob(std::string param_blob); - // Execute the graph. - void Run(); - - private: - // functions - void SetupNameIndex(); - void SetupStorage(); - void SetupOpExecs(); - // Constructor to create TVM op - FOpExec CreateTVMOp(const nnvm::NodeAttrs& attrs, - std::vector inputs, - size_t num_inputs); - // The graph to be executed. - nnvm::Graph graph_; - // The execution context - TVMContext ctx_; - // Common storage pool - std::vector storage_pool_; - // The data shape - std::vector data_shape_; - // The data entry - std::vector data_entry_; - // The operation lambda on each node - std::vector op_execs_; - // The code module. - tvm::runtime::Module module_; - std::unordered_map name_idx_; -}; - - -struct TVMOpParam : public dmlc::Parameter { - std::string func_name; - uint32_t num_inputs; - uint32_t num_outputs; - bool flatten_data; - DMLC_DECLARE_PARAMETER(TVMOpParam) { - DMLC_DECLARE_FIELD(func_name); - DMLC_DECLARE_FIELD(num_inputs) - .set_default(1); - DMLC_DECLARE_FIELD(num_outputs) - .set_default(1); - DMLC_DECLARE_FIELD(flatten_data) - .set_default(false); - } -}; -} // namespace contrib -} // namespace tvm - -#endif // TVM_GRAPH_EXECUTOR_H_ diff --git a/apps/graph_executor/src/graph_executor_ext.cc b/apps/graph_executor/src/graph_executor_ext.cc deleted file mode 100644 index 2ab2a8354b9f..000000000000 --- a/apps/graph_executor/src/graph_executor_ext.cc +++ /dev/null @@ -1,87 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file graph_executor_ext.cc - */ -#include "./graph_executor.h" - -namespace tvm { -namespace contrib { - -bool SaveDLTensor(dmlc::Stream* strm, DLTensor* tensor) { - uint64_t header = kTVMNDArrayMagic, reserved = 0; - strm->Write(&header, sizeof(header)); - strm->Write(&reserved, sizeof(reserved)); - - strm->Write(&tensor->ctx, sizeof(tensor->ctx)); - strm->Write(&tensor->ndim, sizeof(tensor->ndim)); - strm->Write(&tensor->dtype, sizeof(tensor->dtype)); - - int ndim = tensor->ndim; - strm->Write(tensor->shape, sizeof(int64_t) * ndim); - - int type_size = tensor->dtype.bits / 8; - int64_t size = 1; - for (int i = 0; i < ndim; ++i) { - size *= tensor->shape[i]; - } - int64_t data_byte_size = type_size * size; - strm->Write(&data_byte_size, sizeof(data_byte_size)); - strm->Write(tensor->data, data_byte_size); - return true; -} - -TVM_REGISTER_GLOBAL("tvm_graph._save_param_dict") -.set_body([](TVMArgs args, TVMRetValue *rv) { - std::string fname = args[0]; - int num_params = args[1]; - std::vector names; - names.reserve(num_params); - std::vector arrays; - arrays.reserve(num_params); - for (int i = 2; i < (2 + 2*num_params); i += 2) { - names.emplace_back(args[i].operator std::string()); - arrays.emplace_back(args[i+1].operator DLTensor*()); - } - - std::unique_ptr fo(dmlc::Stream::Create(fname.c_str(), "w")); - uint64_t header = kTVMNDArrayListMagic, reserved = 0; - fo->Write(&header, sizeof(header)); - fo->Write(&reserved, sizeof(reserved)); - - fo->Write(names); - { - uint64_t sz = static_cast(arrays.size()); - fo->Write(&sz, sizeof(sz)); - for (size_t i = 0; i < sz; ++i) { - SaveDLTensor(fo.get(), arrays[i]); - } - } - }); - -// Create executor -tvm::runtime::Module CreateExecutor(nnvm::Graph g, TVMContext ctx) { - std::shared_ptr exec = - std::make_shared(); - exec->Init(g, ctx); - return tvm::runtime::Module(exec); -} - -TVM_REGISTER_GLOBAL("tvm_graph._create_executor") -.set_body([](TVMArgs args, TVMRetValue *rv) { - void* graph_handle = args[0]; - int device_type = args[1]; - int device_id = args[2]; - TVMContext ctx{static_cast(device_type), device_id}; - nnvm::Graph g = static_cast(graph_handle)[0]; - *rv = CreateExecutor(g, ctx); - }); - - -TVM_REGISTER_GLOBAL("tvm_graph._get_module_from_graph") -.set_body([](TVMArgs args, TVMRetValue *rv) { - void* graph_handle = args[0]; - nnvm::Graph* g = static_cast(graph_handle); - *rv = g->MoveCopyAttr("module"); - }); -} // namespace contrib -} // namespace tvm diff --git a/apps/graph_executor/src/graph_handle.cc b/apps/graph_executor/src/graph_handle.cc deleted file mode 100644 index 3754946a2e85..000000000000 --- a/apps/graph_executor/src/graph_handle.cc +++ /dev/null @@ -1,19 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file graph_handle.cc - */ -#include -#include "./graph_handle.h" - -namespace tvm { - -TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) -.set_dispatch([](const GraphHandleNode *op, IRPrinter *p) { - p->stream << "graph-handle(" - << "handle=0x" << std::hex - << reinterpret_cast(op->graph_handle) << ")"; -}); - -TVM_REGISTER_NODE_TYPE(GraphHandleNode); - -} // namespace tvm diff --git a/apps/graph_executor/src/graph_handle.h b/apps/graph_executor/src/graph_handle.h deleted file mode 100644 index f23e1a2b3759..000000000000 --- a/apps/graph_executor/src/graph_handle.h +++ /dev/null @@ -1,33 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file graph.h - * \brief Data structure about computational graph. - */ -#ifndef TVM_GRAPH_HANDLE_H_ -#define TVM_GRAPH_HANDLE_H_ - -#include -#include - -namespace tvm { - -/*! - * \brief Computational graph handle. - * Use GraphHandle as its container type - */ -struct GraphHandleNode : public Node { - void *graph_handle; - - void VisitAttrs(AttrVisitor* v) final { - v->Visit("graph_handle", &graph_handle); - } - - static constexpr const char* _type_key = "GraphHandle"; - TVM_DECLARE_NODE_TYPE_INFO(GraphHandleNode, Node); -}; - -/*! \brief Defines graph handle */ -TVM_DEFINE_NODE_REF(GraphHandle, GraphHandleNode); - -} // namespace tvm -#endif // TVM_GRAPH_HANDLE_H_ diff --git a/apps/graph_executor/src/graph_pass.cc b/apps/graph_executor/src/graph_pass.cc deleted file mode 100644 index c70e7a01d889..000000000000 --- a/apps/graph_executor/src/graph_pass.cc +++ /dev/null @@ -1,606 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file Additional optimization pass of NNVM. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "./op_attr_types.h" - -namespace tvm { -namespace contrib { - -using nnvm::any; -using nnvm::IndexedGraph; - -// The single fuse rule. -enum class FuseRule { - kUknown, - kFuseToMaster, - kRealize -}; - -DLDataType GetDLType(int type_flag) { - if (type_flag == 0) return Type2TVMType(Float(32)); - LOG(FATAL) << "unknown type_flag=" << type_flag; - return Type2TVMType(Float(32)); -} - -// Partition the graph into segments -// Each segment will be compiled into one operator. -// Need also mark the property of the segment. -nnvm::Graph GraphPartition(nnvm::Graph g) { - // setup ref counter - const IndexedGraph& idx = g.indexed_graph(); - // Get attributes from the graph - const ShapeVector& shape_vec = g.GetAttr("shape"); - const DTypeVector& dtype_vec = g.GetAttr("dtype"); - // Transform to dltype - // In future, directly fo type inference in dltype. - DLTypeVector dltype_vec = DLTypeVector(dtype_vec.size()); - for (size_t i = 0; i < dtype_vec.size(); ++i) { - dltype_vec[i] = GetDLType(dtype_vec[i]); - } - - // Reference counter of each op node - // For now, always store result when an op is referred more than once. - std::vector ref_count(idx.num_nodes(), 0); - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - if (inode.source->is_variable()) continue; - for (const auto& e : inode.inputs) { - ++ref_count[e.node_id]; - } - } - for (const auto& e : idx.outputs()) { - // this line will realize all the outputs - ref_count[e.node_id] += 2; - } - // Pattern fo the subgraph - std::vector pattern_vec(idx.num_nodes(), kExtern); - // Whether node can be fused to parent. - std::vector fuse_vec(idx.num_nodes(), FuseRule::kUknown); - // Master node id of fusion segment. - std::vector master_vec(idx.num_nodes(), -1); - // Operator pattern - static auto& op_pattern = nnvm::Op::GetAttr("TOpPattern"); - - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - if (inode.source->is_variable()) { - fuse_vec[nid] = FuseRule::kRealize; continue; - } - TOpPattern pt = op_pattern.get(inode.source->op(), kExtern); - - if (pt <= kBroadcast) { - int chosen_master = -1; - bool ewise = inode.source->num_outputs() == 1; - for (const auto& e : inode.inputs) { - if (fuse_vec[e.node_id] == FuseRule::kUknown) { - TOpPattern ipt = pattern_vec[e.node_id]; - if (ipt != kElemWise) ewise = false; - if (ipt <= kBroadcast) { - fuse_vec[e.node_id] = FuseRule::kFuseToMaster; - } else if (ipt == kComplex && chosen_master == -1 && - shape_vec[idx.entry_id(nid, 0)] == shape_vec[idx.entry_id(e)]) { - chosen_master = master_vec[e.node_id]; - fuse_vec[e.node_id] = FuseRule::kFuseToMaster; - } else { - fuse_vec[e.node_id] = FuseRule::kRealize; - } - } - if (ewise) { - if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) { - ewise = false; - } - } - } - master_vec[nid] = chosen_master; - if (chosen_master != -1) { - pt = kComplex; - } else { - pt = ewise ? kElemWise : kBroadcast; - } - } else { - master_vec[nid] = nid; - for (const auto& e : inode.inputs) { - if (fuse_vec[e.node_id] == FuseRule::kUknown) { - fuse_vec[e.node_id] = FuseRule::kRealize; - if (master_vec[e.node_id] == -1) { - master_vec[e.node_id] = e.node_id; - } - } - } - } - - pattern_vec[nid] = pt; - if (ref_count[nid] > 1) { - fuse_vec[nid] = FuseRule::kRealize; - if (master_vec[nid] == -1) { - master_vec[nid] = nid; - } - } - } - - - // point to the group root id of each node - std::vector group_vec(idx.num_nodes(), -1); - for (uint32_t i = idx.num_nodes(); i != 0; --i) { - uint32_t nid = i - 1; - const auto& inode = idx[nid]; - if (group_vec[nid] == -1) { - group_vec[nid] = nid; - } - // propagate the group id. - for (const auto& e : inode.inputs) { - if (fuse_vec[e.node_id] == FuseRule::kFuseToMaster) { - CHECK(group_vec[e.node_id] == -1|| - group_vec[e.node_id] == group_vec[nid]); - group_vec[e.node_id] = group_vec[nid]; - } - } - } - g.attrs["group_root"] = std::make_shared(std::move(group_vec)); - g.attrs["group_master"] = std::make_shared(std::move(master_vec)); - g.attrs["pattern"] = std::make_shared(std::move(pattern_vec)); - g.attrs["dltype"] = std::make_shared(std::move(dltype_vec)); - return g; -} - -NNVM_REGISTER_PASS(GraphPartition) -.set_body(GraphPartition) -.depend_graph_attr("shape") -.depend_graph_attr("dtype") -.provide_graph_attr("dltype"); - -struct NodeEntryHash { - size_t operator()(const IndexedGraph::NodeEntry& e) const { - return e.node_id; - } -}; - -struct NodeEntryEqual { - size_t operator()(const IndexedGraph::NodeEntry& a, - const IndexedGraph::NodeEntry& b) const { - return a.node_id == b.node_id && a.index == b.index; - } -}; - -// Auxiliary data structure for representing fused op. -struct FuseEntry { - // The inputs - std::vector inputs; - // The input map - std::unordered_map imap; - // Output tensors - Array outputs; - // Placeholder for inputs - Array placeholder; - // Computing schedule - Schedule schedule; - // Function name - std::string func_name; -}; - -// Fuse the partitioned graph into segments. -// Create a new graph with fused noded. -// Also inheritate attribute shape, dltype from previous graph. -nnvm::Graph GraphFuse(nnvm::Graph g) { - // setup ref counter - const IndexedGraph& idx = g.indexed_graph(); - // Get attributes from the graph - const ShapeVector& shape_vec = g.GetAttr("shape"); - const DLTypeVector& dltype_vec = g.GetAttr("dltype"); - const DTypeVector& dtype_vec = g.GetAttr("dtype"); - const std::vector& group_vec = g.GetAttr >("group_root"); - const std::vector& master_vec = g.GetAttr >("group_master"); - const std::vector& pattern_vec = - g.GetAttr >("pattern"); - std::string target = g.GetAttr("target"); - std::vector fuse_vec(idx.num_nodes()); - // setup inputs and placeholder. - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - if (inode.source->is_variable()) continue; - CHECK_GE(group_vec[nid], 0); - int root_id = group_vec[nid]; - FuseEntry& fe = fuse_vec[root_id]; - TOpPattern pt = pattern_vec[root_id]; - for (const auto& e : inode.inputs) { - if (group_vec[e.node_id] != root_id && fe.imap.count(e) == 0) { - Array shape; - if (pt == kElemWise) { - // elementwise support flatten - int64_t prod = 1; - for (int64_t x : shape_vec[idx.entry_id(e)]) { - prod *= x; - } - CHECK_LE(prod, static_cast(std::numeric_limits::max())); - shape.push_back(make_const(Int(32), prod)); - } else { - for (int64_t x : shape_vec[idx.entry_id(e)]) { - CHECK_LE(x, static_cast(std::numeric_limits::max())); - shape.push_back(make_const(Int(32), x)); - } - } - std::ostringstream os_name; - os_name << "input" << fe.inputs.size(); - Tensor data = placeholder( - shape, TVMType2Type(dltype_vec[idx.entry_id(e)]), - os_name.str()); - fe.imap[e] = data; - fe.inputs.push_back(e); - fe.placeholder.push_back(data); - } - } - } - // Setup the Tensor - std::vector tensor_vec(idx.num_node_entries()); - static auto& fcompute = - nnvm::Op::GetAttr("FTVMCompute"); - static auto& fschedule = - nnvm::Op::GetAttr("FTVMSchedule"); - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - if (inode.source->is_variable()) continue; - int root_id = group_vec[nid]; - FuseEntry& fe = fuse_vec[root_id]; - Array inputs; - // input loading - for (const auto& e : inode.inputs) { - if (group_vec[e.node_id] != root_id) { - auto it = fe.imap.find(e); - CHECK(it != fe.imap.end()); - inputs.push_back(it->second); - } else { - Tensor t = tensor_vec[idx.entry_id(e)]; - CHECK(t.defined()); - inputs.push_back(t); - } - } - Array out = fcompute[inode.source->op()]( - inode.source->attrs, inputs); - CHECK_EQ(out.size(), inode.source->num_outputs()); - - // schedule on root node, and use master's schedule - if (nid != root_id) { - for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) { - uint32_t eid = idx.entry_id(nid, index); - tensor_vec[eid] = out[index]; - } - } else { - fe.outputs = out; - int master = master_vec[root_id]; - CHECK_GE(master, 0); - fe.schedule = fschedule[idx[master].source->op()]( - idx[master].source->attrs, fe.outputs, target); - std::ostringstream os; - os << idx[master].source->attrs.name + "_id" << nid; - fe.func_name = os.str(); - } - } - static const PackedFunc& flower = GetPackedFunc("tvm_graph.lower"); - static const PackedFunc& fbuild = GetPackedFunc("tvm_graph.build_target"); - Array funcs; - for (const FuseEntry& fe : fuse_vec) { - if (fe.schedule.defined()) { - Array args = fe.placeholder; - for (tvm::Tensor x : fe.outputs) { - args.push_back(x); - } - Array ret = flower(fe.schedule, args, fe.func_name); - for (LoweredFunc x : ret) { - funcs.push_back(x); - } - } - } - tvm::runtime::Module module = fbuild(funcs, target); - // Final step: Remap the node, with given attribute - const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op"); - - std::unordered_map old_new; - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - if (inode.source->is_variable()) { - nnvm::NodePtr np = nnvm::Node::Create(); - np->attrs = inode.source->attrs; - old_new[nid] = np; - } else { - int root_id = group_vec[nid]; - if (nid != root_id) continue; - FuseEntry& fe = fuse_vec[root_id]; - nnvm::NodePtr np = nnvm::Node::Create(); - np->attrs.op = tvm_op; - np->attrs.name = inode.source->attrs.name; - np->attrs.dict["num_inputs"] = std::to_string(fe.inputs.size()); - np->attrs.dict["num_outputs"] = std::to_string(fe.outputs.size()); - np->attrs.dict["func_name"] = fuse_vec[nid].func_name; - np->attrs.dict["flatten_data"] = std::to_string(pattern_vec[nid] == kElemWise); - np->op()->attr_parser(&(np->attrs)); - for (const auto& e : fe.inputs) { - auto it = old_new.find(e.node_id); - CHECK(it != old_new.end()) - << "cannot find node_id=" << e.node_id; - np->inputs.emplace_back( - nnvm::NodeEntry{it->second, e.index, e.version}); - } - for (const uint32_t node_id : inode.control_deps) { - auto it = old_new.find(node_id); - CHECK(it != old_new.end()); - np->control_deps.emplace_back(it->second); - } - old_new[nid] = np; - } - } - - nnvm::Graph ret; - for (const auto& e : idx.outputs()) { - auto it = old_new.find(group_vec[e.node_id]); - CHECK(it != old_new.end()) - << "cannot find node_id=" << e.node_id; - ret.outputs.emplace_back( - nnvm::NodeEntry{it->second, e.index, e.version}); - } - const IndexedGraph& new_idx = ret.indexed_graph(); - ShapeVector new_shape_vec = ShapeVector(new_idx.num_node_entries(), TShape()); - DTypeVector new_dtype_vec = DTypeVector(new_idx.num_node_entries()); - DLTypeVector new_dltype_vec = DLTypeVector(new_idx.num_node_entries()); - for (const auto& kv : old_new) { - uint32_t nid = kv.first; - const auto& inode = idx[nid]; - for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) { - uint32_t new_eid = new_idx.entry_id(new_idx.node_id(kv.second.get()), i); - uint32_t old_eid = idx.entry_id(nid, i); - new_shape_vec[new_eid] = shape_vec[old_eid]; - new_dtype_vec[new_eid] = dtype_vec[old_eid]; - new_dltype_vec[new_eid] = dltype_vec[old_eid]; - } - } - ret.attrs["shape"] = std::make_shared(std::move(new_shape_vec)); - ret.attrs["dtype"] = std::make_shared(std::move(new_dtype_vec)); - ret.attrs["dltype"] = std::make_shared(std::move(new_dltype_vec)); - ret.attrs["module"] = std::make_shared(std::move(module)); - ret = nnvm::ApplyPass(ret, "PlanMemory"); - return ret; -} - -NNVM_REGISTER_PASS(GraphFuse) -.set_body(GraphFuse); - -const TLayoutInfo& GetDefaultLayout() { - static TLayoutInfo default_layout = "default"; - return default_layout; -} - -nnvm::NodePtr CreateLayoutTransformNode(const std::string& src, - const std::string& dst) { - static const nnvm::Op* trans_op = nnvm::Op::Get("layout_transform"); - static int count = 0; - nnvm::NodePtr n = nnvm::Node::Create(); - n->attrs.op = trans_op; - n->attrs.name = src + "_to_" + dst + std::to_string(count++); - n->attrs.dict["src_layout"] = src; - n->attrs.dict["dst_layout"] = dst; - n->op()->attr_parser(&(n->attrs)); - return n; -} - -/*! - * \brief A simple layout transform pass that will - * insert layout transform nodes automatically. - */ -nnvm::Graph LayoutTransform(nnvm::Graph src) { - static auto& op_layout_request = - nnvm::Op::GetAttr("FTVMLayoutRequest"); - static auto& op_vecop = - nnvm::Op::GetAttr("FTVMVectorizedOp"); - static auto& op_pattern = nnvm::Op::GetAttr("TOpPattern"); - - const ShapeVector& shape_vec = src.GetAttr("shape"); - const std::vector& input_layouts = - src.GetAttr >("layout"); - - const IndexedGraph& idx = src.indexed_graph(); - std::vector produce_vec(idx.num_node_entries(), GetDefaultLayout()); - std::vector mirror_vec(idx.num_nodes(), nullptr); - - // use op pattern to decide whether an op is map - auto is_map_op = [&](size_t nid) { - TOpPattern pt = op_pattern.get(idx[nid].source->op(), kExtern); - bool is_map = (pt <= kBroadcast); - if (pt == kBroadcast) { - for (const auto& e : idx[nid].inputs) { - if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) { - is_map = false; - break; - } - } - } - return is_map; - }; - - for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { - const auto& inode = idx[nid]; - nnvm::NodePtr new_node = nnvm::Node::Create(); - *new_node = *(inode.source); - if (new_node->is_variable()) { - auto input_iter = std::find( - idx.input_nodes().cbegin(), idx.input_nodes().cend(), nid); - CHECK(input_iter != idx.input_nodes().cend()); - size_t input_id = std::distance(idx.input_nodes().cbegin(), input_iter); - produce_vec[idx.entry_id(nid, 0)] = input_layouts[input_id]; - mirror_vec[nid] = new_node; - continue; - } - - if (op_vecop.count(inode.source->op())) { - new_node = op_vecop[inode.source->op()](inode.source); - new_node->inputs.resize(new_node->num_inputs()); - } - - // set up output and input layouts - std::vector request_ilayouts(new_node->num_inputs(), GetDefaultLayout()); - if (op_layout_request.count(new_node->op())) { - std::vector produce_olayouts(new_node->num_outputs(), GetDefaultLayout()); - CHECK(op_layout_request[new_node->op()](new_node->attrs, &request_ilayouts, &produce_olayouts)) - << "Layout request fail"; - - CHECK_EQ(request_ilayouts.size(), new_node->num_inputs()); - CHECK_EQ(produce_olayouts.size(), new_node->num_outputs()); - for (size_t i = 0; i < new_node->num_outputs(); ++i) { - produce_vec[idx.entry_id(nid, i)] = produce_olayouts[i]; - } - } - - bool map_layout = is_map_op(nid); - if (map_layout) { - const TLayoutInfo& layout = produce_vec[idx.entry_id(inode.inputs[0])]; - for (const auto& e : inode.inputs) { - if (produce_vec[idx.entry_id(e)] != layout) { - map_layout = false; - break; - } - } - if (map_layout) { - for (size_t i = 0; i < inode.source->num_outputs(); ++i) { - produce_vec[idx.entry_id(nid, i)] = layout; - } - } - } - - for (size_t i = 0; i < inode.inputs.size(); ++i) { - const auto& e = inode.inputs[i]; - const nnvm::NodePtr& in = mirror_vec[e.node_id]; - new_node->inputs[i] = - nnvm::NodeEntry{in, e.index, e.version}; - - TLayoutInfo produce = produce_vec[idx.entry_id(e)]; - TLayoutInfo request = request_ilayouts[i]; - if (!map_layout && (produce != request)) { - nnvm::NodePtr tnode = CreateLayoutTransformNode(produce, request); - tnode->attrs.name = - idx[e.node_id].source->attrs.name + "_" + request; - tnode->inputs.emplace_back(new_node->inputs[i]); - new_node->inputs[i] = nnvm::NodeEntry{tnode, 0, 0}; - } - } - mirror_vec[nid] = new_node; - } - - std::vector outputs; - for (const auto& e : idx.outputs()) { - TLayoutInfo produce = produce_vec[idx.entry_id(e)]; - if (produce != GetDefaultLayout()) { - nnvm::NodePtr tnode = CreateLayoutTransformNode(produce, GetDefaultLayout()); - tnode->attrs.name = - idx[e.node_id].source->attrs.name + "_default"; - tnode->inputs.emplace_back( - nnvm::NodeEntry{mirror_vec[e.node_id], e.index, e.version}); - outputs.emplace_back(nnvm::NodeEntry{tnode, 0, 0}); - } else { - outputs.emplace_back( - nnvm::NodeEntry{mirror_vec[e.node_id], e.index, e.version}); - } - } - - nnvm::Graph ret; - ret.outputs = std::move(outputs); - return ret; -} - -NNVM_REGISTER_PASS(LayoutTransform) -.set_body(LayoutTransform); - -DMLC_REGISTER_PARAMETER(LayoutTransformParam); - -/*! \brief Parse keyword arguments as PType arguments and save to parsed */ -template -inline void ParamParser(nnvm::NodeAttrs* attrs) { - PType param; - try { - param.Init(attrs->dict); - } catch (const dmlc::ParamError& e) { - std::ostringstream os; - os << e.what(); - os << ", in operator " << attrs->op->name << "(" - << "name=\"" << attrs->name << "\""; - for (const auto& k : attrs->dict) { - os << ", " << k.first << "=\"" << k.second << "\""; - } - os << ")"; - throw dmlc::ParamError(os.str()); - } - attrs->parsed = std::move(param); -} - -NNVM_REGISTER_OP(layout_transform) -.set_attr_parser(ParamParser) -.set_num_inputs(1) -.set_num_outputs(1) -.add_argument("data", "NDArray-or-Symbol", "Input data") -.add_arguments(LayoutTransformParam::__FIELDS__()); - - -nnvm::Graph PruneGraph(nnvm::Graph src) { - const auto& params = src.GetAttr >("params"); - - std::unordered_set pruned; - nnvm::NodeEntryMap entry_var; - DFSVisit(src.outputs, [&](const nnvm::NodePtr& n) { - bool can_be_pruned = true; - if (n->is_variable()) { - if (params.count(n->attrs.name)) { - pruned.emplace(n.get()); - } - can_be_pruned = false; - } - - for (const auto& e : n->inputs) { - if (!pruned.count(e.node.get())) { - can_be_pruned = false; - } - } - if (can_be_pruned) { - pruned.emplace(n.get()); - } else { - // scan again to find edge nodes, skip variables - for (auto& e : n->inputs) { - if (!e.node->is_variable() && pruned.count(e.node.get())) { - if (!entry_var.count(e)) { - nnvm::NodePtr var = nnvm::Node::Create(); - var->attrs.name = e.node->attrs.name + "_output" + std::to_string(e.index); - entry_var.emplace(e, var); - } - e = nnvm::NodeEntry{entry_var.at(e), 0, 0}; - } - } - } - }); - - nnvm::Graph pre_graph; - pre_graph.outputs.reserve(entry_var.size()); - std::vector output_names; - output_names.reserve(entry_var.size()); - for (auto kv : entry_var) { - if (kv.first.node->is_variable()) continue; - pre_graph.outputs.emplace_back(kv.first); - output_names.emplace_back(kv.second->attrs.name); - } - - pre_graph.attrs["pruned_params"] = - std::make_shared(std::move(output_names)); - src.attrs["pre_graph"] = - std::make_shared(std::move(pre_graph)); - return src; -} - -NNVM_REGISTER_PASS(PruneGraph) -.set_body(PruneGraph); -} // namespace contrib -} // namespace tvm diff --git a/apps/graph_executor/src/op_attr_types.h b/apps/graph_executor/src/op_attr_types.h deleted file mode 100644 index a851473ca336..000000000000 --- a/apps/graph_executor/src/op_attr_types.h +++ /dev/null @@ -1,121 +0,0 @@ -/*! - * Copyright (c) 2016 by Contributors - * \file op_attr_types.h - * \brief The Expr and related elements in DataFlow construction. - */ -#ifndef TVM_OP_ATTR_TYPES_H_ -#define TVM_OP_ATTR_TYPES_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace tvm { -namespace contrib { - -using runtime::PackedFunc; -using nnvm::StorageVector; -using nnvm::ShapeVector; -using nnvm::DTypeVector; -using nnvm::TShape; -using nnvm::NodeAttrs; - -/*! \brief DLPack compatible data types */ -using DLTypeVector = std::vector; -/*! - * \brief Computation description interface - * \param attrs The attribute of the node. - * \param inputs The input tensors(placeholders) - * \return The output description of the tensor. - */ -using FTVMCompute = std::function< - Array - (const NodeAttrs& attrs, const Array& inputs)>; - -/*! - * \brief Build the computation schedule for - * op whose root is at current op. - * \param attrs The attribute of the node. - * \param outs The output tensors. - * \param target The build target. - * \return schedule The computation schedule. - */ -using FTVMSchedule = std::function< - Schedule(const NodeAttrs& attrs, - const Array& outs, - const std::string& target)>; - -/*! \brief Layout Information. */ -using TLayoutInfo = std::string; - -/*! - * \brief The producer consumer function of node layout - * \param attrs The attribute of the node. - * \param ilayouts The input layouts that the node request. - * \param olayouts The output layouts that the node produce. - * \return bool The success flag. - */ -using FTVMLayoutRequest = std::function *ilayouts, - std::vector *olayouts)>; - -/*! \brief The default layout. */ -const TLayoutInfo& GetDefaultLayout(); - -/*! \brief Parameters of layout transform operator */ -struct LayoutTransformParam : public dmlc::Parameter { - std::string src_layout; - std::string dst_layout; - DMLC_DECLARE_PARAMETER(LayoutTransformParam) { - DMLC_DECLARE_FIELD(src_layout); - DMLC_DECLARE_FIELD(dst_layout); - } -}; - -/*! \brief Transform from normal operator to vectorized operator */ -using FTVMVectorizedOp = std::function; - -// The storage result of op -enum OpPatternKind : int { - // Elementwise operation - kElemWise, - // Broadcast operation - kBroadcast, - // Complex operation, can fuse bcast in input/outputs - // but cannot chain another complex op - kComplex, - // Extern operation, cannot fuse anything. - kExtern -}; - -using TOpPattern = int; - -/*! - * \brief Get PackedFunction from global registry and - * report error if it does not exist - * \param name The name of the function. - * \return The created PackedFunc. - */ -inline const PackedFunc& GetPackedFunc(const std::string& name) { - const PackedFunc* pf = tvm::runtime::Registry::Get(name); - CHECK(pf != nullptr) << "Cannot find function " << name << " in registry"; - return *pf; -} - -/*! - * \brief Create a Graph execution module by a given graph and the code module. - * \param g The graph to be executed. - * \param m The tvm module containing the functions. - * \return The created executor module. - */ -tvm::runtime::Module CreateExecutor(nnvm::Graph g); -} // namespace contrib -} // namespace tvm -#endif // TVM_OP_ATTR_TYPES_H_ diff --git a/apps/graph_executor/src/op_decl.cc b/apps/graph_executor/src/op_decl.cc deleted file mode 100644 index 989db15cebd9..000000000000 --- a/apps/graph_executor/src/op_decl.cc +++ /dev/null @@ -1,42 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file Operator Declarations. - */ -#include -#include -#include "./op_attr_types.h" - -namespace tvm { -namespace contrib { - -using namespace nnvm; - -inline bool SameShape(const NodeAttrs& attrs, - std::vector *ishape, - std::vector *oshape) { - if (ishape->size() == 0 || (*ishape)[0].ndim() == 0) return false; - for (TShape& pshape : *oshape) { - pshape = (*ishape)[0]; - } - for (TShape& pshape : *ishape) { - pshape = (*ishape)[0]; - } - return true; -} - -NNVM_REGISTER_OP_GROUP(ElementwiseOpAttr) -.set_attr("TOpPattern", kBroadcast) -.set_attr("FInferShape", SameShape); - -NNVM_REGISTER_OP(__add_symbol__) -.describe("add two data together") -.set_num_inputs(2) -.include("ElementwiseOpAttr"); - -NNVM_REGISTER_OP(exp) -.describe("Take exp") -.set_num_inputs(1) -.include("ElementwiseOpAttr"); - -} // namespace contrib -} // namespace tvm diff --git a/apps/graph_executor/src/op_tvm_def.cc b/apps/graph_executor/src/op_tvm_def.cc deleted file mode 100644 index 508fe185f132..000000000000 --- a/apps/graph_executor/src/op_tvm_def.cc +++ /dev/null @@ -1,47 +0,0 @@ -/*! - * Copyright (c) 2017 by Contributors - * \file Operator defintions in TVM. - */ -#include -#include -#include "./op_attr_types.h" - -namespace tvm { -namespace contrib { - -using namespace nnvm; - -Array -ComputeAdd(const NodeAttrs& attrs, - const Array& inputs) { - static const PackedFunc& pf = GetPackedFunc("tvm_graph.compute.add"); - CHECK_EQ(inputs.size(), 2U); - Tensor ret = pf(inputs[0], inputs[1]); - return {ret}; -} - -Array -ComputeExp(const NodeAttrs& attrs, - const Array& inputs) { - static const PackedFunc& pf = GetPackedFunc("tvm_graph.compute.exp"); - CHECK_EQ(inputs.size(), 1U); - Tensor ret = pf(inputs[0]); - return {ret}; -} - -Schedule ScheduleEWise(const NodeAttrs& attrs, - const Array& outs, - const std::string& target) { - static const PackedFunc& pf = GetPackedFunc("tvm_graph.schedule.ewise"); - return pf(outs, target); -} - -NNVM_REGISTER_OP(__add_symbol__) -.set_attr("FTVMCompute", ComputeAdd) -.set_attr("FTVMSchedule", ScheduleEWise); - -NNVM_REGISTER_OP(exp) -.set_attr("FTVMCompute", ComputeExp) -.set_attr("FTVMSchedule", ScheduleEWise); -} // namespace contrib -} // namespace tvm diff --git a/apps/graph_executor/tests/test_executor.py b/apps/graph_executor/tests/test_executor.py deleted file mode 100644 index 3d89f26caa14..000000000000 --- a/apps/graph_executor/tests/test_executor.py +++ /dev/null @@ -1,32 +0,0 @@ -import tvm_graph as tg -import numpy as np -import tvm - -def test_compile(): - x = tg.Variable('x') - y = tg.Variable('y') - z = tg.exp(y + x) - shape = (10, 128) - dtype = tvm.float32 - g = tg.build(z, "llvm", - shape={'x': shape, - 'y': shape}) - m = tg.bind(g, tvm.cpu(0)) - # get member functions - set_input, run, get_output = m['set_input'], m['run'], m['get_output'] - na = tvm.nd.array(np.ones(shape).astype(dtype)) - nb = tvm.nd.array(np.ones(shape).astype(dtype)) - # set inputs - set_input('x', na) - set_input('y', nb) - # execute - run() - # get outputs - out = tvm.nd.array(np.zeros(shape).astype(dtype)) - get_output(0, out) - np.testing.assert_allclose( - out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy())) - -if __name__ == "__main__": - test_compile() - diff --git a/apps/graph_executor/tests/test_rpc_executor.py b/apps/graph_executor/tests/test_rpc_executor.py deleted file mode 100644 index 65cd2512ebae..000000000000 --- a/apps/graph_executor/tests/test_rpc_executor.py +++ /dev/null @@ -1,56 +0,0 @@ -import tvm -from tvm.contrib import util, rpc -import tvm_graph as tg -import numpy as np -import os - -def test_rpc_executor(): - host = 'localhost' - port = 9091 - server = rpc.Server(host, port) - - tmp = util.tempdir() - sym_fname = tmp.relpath('net.json') - lib_fname = tmp.relpath('net.o') - param_fname = tmp.relpath('net.param') - - x = tg.Variable('x') - y = tg.Variable('y') - sym = tg.exp(y + x) + tg.exp(x + y) - - shape = (10, 128) - dtype = tvm.float32 - na = tvm.nd.array(np.ones(shape).astype(dtype)) - nb = tvm.nd.array(np.ones(shape).astype(dtype)) - tg.save_params(param_fname, {'x': na, 'y': nb}) - - remote = rpc.connect(host, port) - ctx = remote.cpu(0) - - target = "llvm" - shapes = {'x': shape, 'y': shape} - - sym_json = tg.compile_graph(lib_fname, sym, target, shapes) - remote.upload(lib_fname) - param_blob = bytearray(open(param_fname, "rb").read()) - - rm = tg.remote_load_exec(remote, - sym_json, - os.path.basename(lib_fname), - param_blob, - ctx) - - run, get_output = rm['run'], rm['get_output'] - - nc = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx) - run() - get_output(0, nc) - - npa = na.asnumpy() - npb = nb.asnumpy() - np.testing.assert_allclose(nc.asnumpy(), - np.exp(npa + npb) + np.exp(npb + npa)) - server.terminate() - -if __name__ == "__main__": - test_rpc_executor() diff --git a/apps/graph_executor/tests/test_save_load.py b/apps/graph_executor/tests/test_save_load.py deleted file mode 100644 index 4b5631c8f45e..000000000000 --- a/apps/graph_executor/tests/test_save_load.py +++ /dev/null @@ -1,38 +0,0 @@ -import tvm_graph as tg -import numpy as np -import tvm - -def test_save_load(): - shape = (10, 128) - dtype = tvm.float32 - na = tvm.nd.array(np.ones(shape).astype(dtype)) - nb = tvm.nd.array(np.ones(shape).astype(dtype)) - - x = tg.Variable('x') - y = tg.Variable('y') - z = tg.exp(y + x) - - g = tg.build(z, "llvm", shape={'x': shape, 'y': shape}) - m0 = tg.bind(g, tvm.cpu(0)) - set_input0, run0, get_output0 = m0['set_input'], m0['run'], m0['get_output'] - set_input0(0, na) - set_input0(1, nb) - run0() - out0 = tvm.nd.array(np.zeros(shape).astype(dtype)) - get_output0(0, out0) - - tg.save_params('test.params', {'x': na, 'y': nb}) - - # create another executor - m1 = tg.bind(g, tvm.cpu(0)) - load_params1 = m1['load_params'] - load_params1(bytearray(open('test.params', 'rb').read())) - - run1, get_output1 = m1['run'], m1['get_output'] - run1() - out1 = tvm.nd.array(np.zeros(shape).astype(dtype)) - get_output1(0, out1) - np.testing.assert_allclose(out0.asnumpy(), out1.asnumpy()) - -if __name__ == "__main__": - test_save_load() diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc index 941b045a026a..b130230c5ed7 100644 --- a/src/codegen/llvm/codegen_llvm.cc +++ b/src/codegen/llvm/codegen_llvm.cc @@ -525,27 +525,20 @@ llvm::Value* CodeGenLLVM::CreateCallExtern(const Call* op) { llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) { if (op->is_intrinsic("llvm_intrin")) { - CHECK_GE(op->args.size(), 1U); + CHECK_GE(op->args.size(), 2U); llvm::Intrinsic::ID id = static_cast( op->args[0].as()->value); + uint64_t num_signature = op->args[1].as()->value; std::vector arg_value; - std::vector arg_type; - for (size_t i = 1; i < op->args.size(); ++i) { + std::vector sig_type; + for (size_t i = 2; i < op->args.size(); ++i) { arg_value.push_back(MakeValue(op->args[i])); - arg_type.push_back(arg_value.back()->getType()); + if (i - 2 < num_signature) { + sig_type.push_back(arg_value.back()->getType()); + } } llvm::Function* f = llvm::Intrinsic::getDeclaration( - module_.get(), id, arg_type); - return builder_->CreateCall(f, arg_value); - } else if (op->is_intrinsic("llvm_builtin")) { - CHECK_GE(op->args.size(), 1U); - llvm::Intrinsic::ID id = static_cast( - op->args[0].as()->value); - std::vector arg_value; - for (size_t i = 1; i < op->args.size(); ++i) { - arg_value.push_back(MakeValue(op->args[i])); - } - llvm::Function* f = llvm::Intrinsic::getDeclaration(module_.get(), id, {}); + module_.get(), id, sig_type); return builder_->CreateCall(f, arg_value); } else if (op->is_intrinsic(Call::bitwise_and)) { return builder_->CreateAnd(MakeValue(op->args[0]), MakeValue(op->args[1])); diff --git a/src/codegen/llvm/intrin_rule_llvm.cc b/src/codegen/llvm/intrin_rule_llvm.cc index 2fb2b3c0f288..dd5ce9847b40 100644 --- a/src/codegen/llvm/intrin_rule_llvm.cc +++ b/src/codegen/llvm/intrin_rule_llvm.cc @@ -16,25 +16,8 @@ namespace llvm { using namespace ir; -template -inline void DispatchLLVMBuildin(const TVMArgs& targs, TVMRetValue* rv) { - Expr e = targs[0]; - const Call* call = e.as(); - CHECK(call != nullptr); - Array cargs; - // intrin id. - cargs.push_back(UIntImm::make(UInt(32), id)); - for (Expr arg : call->args) { - cargs.push_back(arg); - } - *rv = Call::make( - call->type, "llvm_builtin", cargs, Call::Intrinsic); -} - -TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.prefetch") -.set_body(DispatchLLVMBuildin<::llvm::Intrinsic::prefetch>); - -template +// num_signature means number of arguments used to query signature +template inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { Expr e = targs[0]; const Call* call = e.as(); @@ -42,6 +25,8 @@ inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { Array cargs; // intrin id. cargs.push_back(UIntImm::make(UInt(32), id)); + cargs.push_back(UIntImm::make(UInt(32), num_signature)); + for (Expr arg : call->args) { cargs.push_back(arg); } @@ -49,7 +34,7 @@ inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { call->type, "llvm_intrin", cargs, Call::PureIntrinsic); } -template +template inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { Expr e = targs[0]; const Call* call = e.as(); @@ -57,6 +42,7 @@ inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { Array cargs; // intrin id. cargs.push_back(UIntImm::make(UInt(32), id)); + cargs.push_back(UIntImm::make(UInt(32), num_signature)); for (Expr arg : call->args) { cargs.push_back(arg); } @@ -64,20 +50,23 @@ inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { call->type, "llvm_intrin", cargs, Call::Intrinsic); } +TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.prefetch") +.set_body(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 0>); + TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.exp") -.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp>); +.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>); TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.fma") -.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd>); +.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 1>); TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.log") -.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log>); +.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>); TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sqrt") -.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt>); +.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>); TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.pow") -.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow>); +.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>); } // namespace llvm } // namespace codegen diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 8dcf6cfd8ae1..757f2429ad32 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -1,6 +1,5 @@ #!/bin/bash export PYTHONPATH=python:apps/extension/python -export PYTHONPATH=${PYTHONPATH}:apps/graph_executor/python:apps/graph_executor/nnvm/python export LD_LIBRARY_PATH=lib:${LD_LIBRARY_PATH} rm -rf python/tvm/*.pyc python/tvm/*/*.pyc @@ -14,12 +13,6 @@ make || exit -1 cd ../.. python -m nose -v apps/extension/tests || exit -1 -# Test NNVM integration -cd apps/graph_executor -make || exit -1 -cd ../.. -python -m nose -v apps/graph_executor/tests || exit -1 - TVM_FFI=cython python -m nose -v tests/python/integration || exit -1 TVM_FFI=ctypes python3 -m nose -v tests/python/integration || exit -1 TVM_FFI=cython python -m nose -v tests/python/contrib || exit -1