From abef4957038138e617721bbe44458a4d508554e7 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 15 Oct 2017 13:37:09 -0700 Subject: [PATCH 1/3] [CODEGEN] Allow link additional module --- python/tvm/__init__.py | 4 +- python/tvm/contrib/nvcc.py | 60 ++++++++++++++++++++++++++++ python/tvm/contrib/rocm.py | 1 + src/codegen/llvm/codegen_llvm.cc | 11 +++++ src/codegen/llvm/codegen_llvm.h | 8 +++- src/codegen/llvm/codegen_nvptx.cc | 22 +++++++++- src/codegen/llvm/intrin_rule_llvm.cc | 42 +------------------ src/codegen/llvm/intrin_rule_llvm.h | 56 ++++++++++++++++++++++++++ src/codegen/llvm/llvm_common.h | 2 + 9 files changed, 162 insertions(+), 44 deletions(-) create mode 100644 src/codegen/llvm/intrin_rule_llvm.h diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 90ac45988bcb..e23eed7168dc 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -30,4 +30,6 @@ from .schedule import create_schedule from .build_module import build, lower, build_config from .tag import tag_scope -from .contrib import rocm as _rocm + +# Contrib initializers +from .contrib import rocm as _rocm, nvcc as _nvcc diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 9651466b723d..3242619c30f8 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -3,8 +3,12 @@ from __future__ import absolute_import as _abs import subprocess +import os +import warnings from . import util from .. import ndarray as nd +from ..api import register_func + def compile_cuda(code, target="ptx", @@ -72,3 +76,59 @@ def compile_cuda(code, raise RuntimeError(msg) return bytearray(open(file_target, "rb").read()) + + +def find_cuda_path(): + """Utility function to find cuda path + + Returns + ------- + path : str + Path to cuda root. + """ + if "CUDA_PATH" in os.environ: + return os.environ["CUDA_PATH"] + cmd = ["which", "nvcc"] + proc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + (out, _) = proc.communicate() + if proc.returncode == 0: + return os.path.abspath(os.path.join(out.strip(), "../..")) + cuda_path = "/usr/local/cuda" + if os.path.exists(os.path.join(cuda_path, "bin/nvcc")): + return cuda_path + raise RuntimeError("Cannot find cuda path") + + +def find_libdevice_path(arch): + """Utility function to find libdevice + + Parameters + ---------- + arch : int + The compute architecture in int + """ + cuda_path = find_cuda_path() + lib_path = os.path.join(cuda_path, "nvvm/libdevice") + selected_ver = 0 + selected_path = None + + for fn in os.listdir(lib_path): + if not fn.startswith("libdevice"): + continue + ver = int(fn.split(".")[-3].split("_")[-1]) + if ver > selected_ver and ver <= arch: + selected_ver = ver + selected_path = fn + if selected_path is None: + raise RuntimeError("Cannot find libdevice for arch {}".format(arch)) + return os.path.join(lib_path, selected_path) + + +@register_func("tvm_callback_libdevice_path") +def callback_libdevice_path(arch): + try: + return find_libdevice_path(arch) + except RuntimeError: + warnings.warn("Cannot find libdevice path") + return "" diff --git a/python/tvm/contrib/rocm.py b/python/tvm/contrib/rocm.py index c367aef24e21..ee956c85e0ed 100644 --- a/python/tvm/contrib/rocm.py +++ b/python/tvm/contrib/rocm.py @@ -26,6 +26,7 @@ def rocm_link(in_file, out_file): msg += str(out) raise RuntimeError(msg) + @register_func("tvm_callback_rocm_link") def callback_rocm_link(obj_bin): """Links object file generated from LLVM to HSA Code Object diff --git a/src/codegen/llvm/codegen_llvm.cc b/src/codegen/llvm/codegen_llvm.cc index 299e2d8483b0..cb2eae40eaeb 100644 --- a/src/codegen/llvm/codegen_llvm.cc +++ b/src/codegen/llvm/codegen_llvm.cc @@ -147,10 +147,21 @@ void CodeGenLLVM::AddFunctionInternal(const LoweredFunc& f, bool ret_void) { std::unique_ptr CodeGenLLVM::Finish() { this->AddStartupFunction(); + // link modules + for (size_t i = 0; i < link_modules_.size(); ++i) { + CHECK(!llvm::Linker::linkModules(*module_, std::move(link_modules_[i]))) + << "Failed to link modules"; + } + link_modules_.clear(); + // optimize this->Optimize(); return std::move(module_); } +void CodeGenLLVM::AddLinkModule(std::unique_ptr&& mod) { + link_modules_.emplace_back(std::move(mod)); +} + void CodeGenLLVM::AddMainFunction(const std::string& entry_func_name) { LOG(FATAL) << "not implemented"; } diff --git a/src/codegen/llvm/codegen_llvm.h b/src/codegen/llvm/codegen_llvm.h index 631c42f7b226..e4a0b24d381a 100644 --- a/src/codegen/llvm/codegen_llvm.h +++ b/src/codegen/llvm/codegen_llvm.h @@ -66,6 +66,11 @@ class CodeGenLLVM : * \return the created module. */ virtual std::unique_ptr Finish(); + /*! + * \brief Add mod to be linked with the generated module + * \param mod The module to be linked. + */ + void AddLinkModule(std::unique_ptr&& mod); /*! * \brief Create Value for expression e * \param e The expression to be created value for. @@ -227,7 +232,8 @@ class CodeGenLLVM : llvm::MDNode* md_very_likely_branch_{nullptr}; llvm::MDNode* md_tbaa_root_{nullptr}; llvm::MDNode* md_tbaa_alias_set_{nullptr}; - + // modules to be linked. + std::vector > link_modules_; /*! \brief native vector bits of current targetx*/ int native_vector_bits_{0}; /*! \brief the storage scope of allocation */ diff --git a/src/codegen/llvm/codegen_nvptx.cc b/src/codegen/llvm/codegen_nvptx.cc index ede882895f95..d147709ff1a2 100644 --- a/src/codegen/llvm/codegen_nvptx.cc +++ b/src/codegen/llvm/codegen_nvptx.cc @@ -153,9 +153,10 @@ inline int DetectCUDAComputeVersion() { runtime::Module BuildNVPTX(Array funcs, std::string target) { CHECK(target.length() >= 5 && target.substr(0, 5) == "nvptx"); + int compute_ver = DetectCUDAComputeVersion(); std::ostringstream config; config << "-mtriple=nvptx64-nvidia-cuda -mcpu=sm_" - << DetectCUDAComputeVersion() + << compute_ver << target.substr(5, target.length() - 5); llvm::TargetMachine* tm = GetLLVMTargetMachine(config.str()); std::unique_ptr cg(new CodeGenNVPTX()); @@ -164,6 +165,25 @@ runtime::Module BuildNVPTX(Array funcs, std::string target) { for (LoweredFunc f : funcs) { cg->AddFunction(f); } + + const auto* flibdevice_path = + tvm::runtime::Registry::Get("tvm_callback_libdevice_path"); + if (flibdevice_path != nullptr) { + std::string path = (*flibdevice_path)(compute_ver); + if (path.length() != 0) { + llvm::SMDiagnostic err; + std::unique_ptr mlib = llvm::parseIRFile(path, err, *ctx); + if (mlib.get() == nullptr) { + std::string msg = err.getMessage(); + LOG(FATAL) << "Fail to load bitcode file " << path << "\n" + << "line " << err.getLineNo() << ":" << msg; + } + mlib->setTargetTriple(tm->getTargetTriple().str()); + mlib->setDataLayout(tm->createDataLayout()); + // TODO(tqchen) libdevice linking not yet working. + // cg->AddLinkModule(std::move(mlib)); + } + } std::unique_ptr module = cg->Finish(); llvm::SmallString<8> data_ptx, data_ll; llvm::raw_svector_ostream dest_ptx(data_ptx), dest_ll(data_ll); diff --git a/src/codegen/llvm/intrin_rule_llvm.cc b/src/codegen/llvm/intrin_rule_llvm.cc index dd5ce9847b40..63bcca0985a0 100644 --- a/src/codegen/llvm/intrin_rule_llvm.cc +++ b/src/codegen/llvm/intrin_rule_llvm.cc @@ -4,52 +4,12 @@ */ #ifdef TVM_LLVM_VERSION -#include -#include -#include -#include -#include "./llvm_common.h" +#include "./intrin_rule_llvm.h" namespace tvm { namespace codegen { namespace llvm { -using namespace ir; - -// num_signature means number of arguments used to query signature -template -inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { - Expr e = targs[0]; - const Call* call = e.as(); - CHECK(call != nullptr); - Array cargs; - // intrin id. - cargs.push_back(UIntImm::make(UInt(32), id)); - cargs.push_back(UIntImm::make(UInt(32), num_signature)); - - for (Expr arg : call->args) { - cargs.push_back(arg); - } - *rv = Call::make( - call->type, "llvm_intrin", cargs, Call::PureIntrinsic); -} - -template -inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { - Expr e = targs[0]; - const Call* call = e.as(); - CHECK(call != nullptr); - Array cargs; - // intrin id. - cargs.push_back(UIntImm::make(UInt(32), id)); - cargs.push_back(UIntImm::make(UInt(32), num_signature)); - for (Expr arg : call->args) { - cargs.push_back(arg); - } - *rv = Call::make( - call->type, "llvm_intrin", cargs, Call::Intrinsic); -} - TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.prefetch") .set_body(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 0>); diff --git a/src/codegen/llvm/intrin_rule_llvm.h b/src/codegen/llvm/intrin_rule_llvm.h new file mode 100644 index 000000000000..85641cb178e7 --- /dev/null +++ b/src/codegen/llvm/intrin_rule_llvm.h @@ -0,0 +1,56 @@ +/*! + * Copyright (c) 2017 by Contributors + * \file intrin_rule_llvm.h + * \brief Common utilities for llvm intrinsics. + */ +#ifndef TVM_CODEGEN_LLVM_INTRIN_RULE_LLVM_H_ +#define TVM_CODEGEN_LLVM_INTRIN_RULE_LLVM_H_ +#ifdef TVM_LLVM_VERSION + +#include +#include +#include +#include +#include "./llvm_common.h" + +namespace tvm { +namespace codegen { +// num_signature means number of arguments used to query signature +template +inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) { + Expr e = targs[0]; + const ir::Call* call = e.as(); + CHECK(call != nullptr); + Array cargs; + // intrin id. + cargs.push_back(ir::UIntImm::make(UInt(32), id)); + cargs.push_back(ir::UIntImm::make(UInt(32), num_signature)); + + for (Expr arg : call->args) { + cargs.push_back(arg); + } + *rv = ir::Call::make( + call->type, "llvm_intrin", cargs, ir::Call::PureIntrinsic); +} + +template +inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) { + Expr e = targs[0]; + const ir::Call* call = e.as(); + CHECK(call != nullptr); + Array cargs; + // intrin id. + cargs.push_back(ir::UIntImm::make(UInt(32), id)); + cargs.push_back(ir::UIntImm::make(UInt(32), num_signature)); + for (Expr arg : call->args) { + cargs.push_back(arg); + } + *rv = ir::Call::make( + call->type, "llvm_intrin", cargs, ir::Call::Intrinsic); +} + +} // namespace codegen +} // namespace tvm + +#endif // LLVM_VERSION +#endif // TVM_CODEGEN_LLVM_INTRIN_RULE_LLVM_H_ diff --git a/src/codegen/llvm/llvm_common.h b/src/codegen/llvm/llvm_common.h index da905f4709e0..11ff66d8ca38 100644 --- a/src/codegen/llvm/llvm_common.h +++ b/src/codegen/llvm/llvm_common.h @@ -43,6 +43,8 @@ #include #include +#include + #include #include From 962aa9a5bfa119cf1cb58d285ef3cb0c6a68fbe8 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 15 Oct 2017 13:51:21 -0700 Subject: [PATCH 2/3] fix py3 --- python/tvm/contrib/nvcc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 3242619c30f8..1267b14fc46d 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -8,7 +8,7 @@ from . import util from .. import ndarray as nd from ..api import register_func - +from .._ffi.base import py_str def compile_cuda(code, target="ptx", @@ -92,8 +92,9 @@ def find_cuda_path(): proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = proc.communicate() + out = py_str(out) if proc.returncode == 0: - return os.path.abspath(os.path.join(out.strip(), "../..")) + return os.path.abspath(os.path.join(str(out).strip(), "../..")) cuda_path = "/usr/local/cuda" if os.path.exists(os.path.join(cuda_path, "bin/nvcc")): return cuda_path @@ -125,7 +126,6 @@ def find_libdevice_path(arch): return os.path.join(lib_path, selected_path) -@register_func("tvm_callback_libdevice_path") def callback_libdevice_path(arch): try: return find_libdevice_path(arch) From 610b4ca7fd319deed9008dab0b8b825b10bb4306 Mon Sep 17 00:00:00 2001 From: tqchen Date: Sun, 15 Oct 2017 13:59:47 -0700 Subject: [PATCH 3/3] add register back --- python/tvm/contrib/nvcc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 1267b14fc46d..ac8dbf65b2bc 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -101,6 +101,7 @@ def find_cuda_path(): raise RuntimeError("Cannot find cuda path") +@register_func("tvm_callback_libdevice_path") def find_libdevice_path(arch): """Utility function to find libdevice