From 73726a89e6afb4c9aa118c943674b01c31597886 Mon Sep 17 00:00:00 2001 From: tobe Date: Tue, 3 Dec 2019 19:49:22 +0800 Subject: [PATCH 01/30] Add implementation of TVMDSOOp --- CMakeLists.txt | 2 + cmake/config.cmake | 3 + cmake/modules/contrib/TFOP.cmake | 45 ++++ python/tvm/contrib/tf_op/__init__.py | 20 ++ python/tvm/contrib/tf_op/module.py | 104 +++++++++ src/contrib/tf_op/index_seq.h | 44 ++++ src/contrib/tf_op/tvm_dso_op_kernels.cc | 285 ++++++++++++++++++++++++ src/contrib/tf_op/tvm_dso_ops.cc | 91 ++++++++ 8 files changed, 594 insertions(+) create mode 100644 cmake/modules/contrib/TFOP.cmake create mode 100644 python/tvm/contrib/tf_op/__init__.py create mode 100644 python/tvm/contrib/tf_op/module.py create mode 100644 src/contrib/tf_op/index_seq.h create mode 100644 src/contrib/tf_op/tvm_dso_op_kernels.cc create mode 100644 src/contrib/tf_op/tvm_dso_ops.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index bf18ffc9e856..2ec625d0dbed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,7 @@ tvm_option(USE_MSVC_MT "Build with MT" OFF) tvm_option(USE_MICRO "Build with Micro" OFF) tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF) tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF) +tvm_option(USE_TFOP "Build with TensorFlow TVMDSOOp" OFF) # 3rdparty libraries tvm_option(DLPACK_PATH "Path to DLPACK" "3rdparty/dlpack/include") @@ -257,6 +258,7 @@ include(cmake/modules/contrib/MicroStandaloneRuntime.cmake) include(cmake/modules/contrib/Sort.cmake) include(cmake/modules/contrib/NNPack.cmake) include(cmake/modules/contrib/HybridDump.cmake) +include(cmake/modules/contrib/TFOP.cmake) if(NOT MSVC) include(CheckCXXCompilerFlag) diff --git a/cmake/config.cmake b/cmake/config.cmake index dba1b44e676e..01918fb2f974 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -184,3 +184,6 @@ set(USE_VTA_FPGA OFF) # Whether to build the example external runtime module set(USE_EXAMPLE_EXT_RUNTIME OFF) + +# Whether to build the TensorFlow TVMDSOOp module +set(USE_TFOP OFF) diff --git a/cmake/modules/contrib/TFOP.cmake b/cmake/modules/contrib/TFOP.cmake new file mode 100644 index 000000000000..52d017c08e16 --- /dev/null +++ b/cmake/modules/contrib/TFOP.cmake @@ -0,0 +1,45 @@ + +if(NOT USE_TFOP STREQUAL "OFF") + + if ("${TVM_HOME}" STREQUAL "") + message(FATAL_ERROR "TVM_HOME is not defined") + else() + message("Use TVM_HOME=\"${TVM_HOME}\"") + endif() + + include_directories(${TVM_HOME}/include) + include_directories(${TVM_HOME}/3rdparty/dlpack/include) + include_directories(${TVM_HOME}/3rdparty/dmlc-core/include) + + execute_process(COMMAND python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" + OUTPUT_VARIABLE TF_COMPILE_FLAGS_STR + RESULT_VARIABLE TF_STATUS) + if (NOT ${TF_STATUS} EQUAL 0) + message(FATAL_ERROR "Fail to get TensorFlow compile flags") + endif() + + execute_process(COMMAND python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" + OUTPUT_VARIABLE TF_LINK_FLAGS_STR + RESULT_VARIABLE TF_STATUS) + if (NOT ${TF_STATUS} EQUAL 0) + message(FATAL_ERROR "Fail to get TensorFlow link flags") + endif() + + string(REGEX REPLACE "\n" " " TF_FLAGS "${TF_COMPILE_FLAGS} ${TF_LINK_FLAGS}") + message("Use TensorFlow flags=\"${TF_FLAGS}\"") + separate_arguments(TF_COMPILE_FLAGS UNIX_COMMAND ${TF_COMPILE_FLAGS_STR}) + separate_arguments(TF_LINK_FLAGS UNIX_COMMAND ${TF_LINK_FLAGS_STR}) + + + set(OP_LIBRARY_NAME tvm_dso_op) + file(GLOB_RECURSE TFTVM_SRCS src/*.cc) + add_library(${OP_LIBRARY_NAME} SHARED ${TFTVM_SRCS}) + set_target_properties(${OP_LIBRARY_NAME} PROPERTIES PREFIX "") + + set(TFTVM_COMPILE_FLAGS -O2 -ldl -g) + set(TFTVM_LINK_FLAGS -ltvm_runtime -L${TVM_HOME}/build) + target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) + target_link_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) + +endif() + diff --git a/python/tvm/contrib/tf_op/__init__.py b/python/tvm/contrib/tf_op/__init__.py new file mode 100644 index 000000000000..9f80b266cb91 --- /dev/null +++ b/python/tvm/contrib/tf_op/__init__.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from . import module + +Module = module.Module diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py new file mode 100644 index 000000000000..9a9e1f386fc0 --- /dev/null +++ b/python/tvm/contrib/tf_op/module.py @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import tensorflow as tf +from tensorflow.python.framework import load_library + + +class Module(): + + def __init__(self, lib_path): + self.lib_path = lib_path + + def func(self, name, output_dtype=None, output_shape=None): + return Func(self.lib_path, name, output_dtype, output_shape) + + def __getitem__(self, func_name): + return self.func(func_name) + + +class Func(): + + def __init__(self, lib_path, func_name, output_dtype, output_shape): + self.lib_path = lib_path + self.func_name = func_name + self.output_dtype = output_dtype + + # const(0) indicate invalid dynamic shape + self.dynamic_output_shape = tf.constant(0, tf.int64) + self.static_output_shape = None + self.has_static_output_shape = False # extra flag is required + + if self._is_static_shape(output_shape): + self.static_output_shape = output_shape + self.has_static_output_shape = True + elif output_shape is not None: + self.dynamic_output_shape = self._pack_shape_tensor(output_shape) + + # TODO: support non-xpu device + #self.device = device + # delay initialization to called first time, where num input arguments is known + self.tvm_dso_op = None + self.module = load_library.load_op_library('tvm_dso_op.so') + + def apply(self, *params): + if self.tvm_dso_op is None: + num_inputs = len(params) + self.tvm_dso_op = getattr(self.module, "tvm_dso_op%s" % num_inputs) + + return self.tvm_dso_op(*params, + dynamic_output_shape=self.dynamic_output_shape, + static_output_shape=self.static_output_shape, + has_static_output_shape=self.has_static_output_shape, + lib_path=self.lib_path, + func_name=self.func_name, + output_dtype=self.output_dtype) + + def __call__(self, *params): + return self.apply(*params) + + def _is_static_shape(self, shape): + if shape is None or not isinstance(shape, list): + return False + for d in shape: + if not isinstance(d, int): + return False + if d < 0: + raise Exception("Negative dimension is illegal: %d" % d) + return True + + def _pack_shape_tensor(self, shape): + if isinstance(shape, tf.Tensor): + if shape.dtype == tf.int32: + shape = tf.cast(shape, tf.int64) + return shape + elif isinstance(shape, list): + shape_dims = [] + for d in shape: + if isinstance(d, int): + shape_dims.append(tf.constant(d, tf.int64)) + elif isinstance(d, tf.Tensor) and len(d.shape) == 0: + if d.dtype == tf.int32: + d = tf.cast(d, tf.int64) + shape_dims.append(d) + else: + raise TypeError("Input shape dimension is neither scala tensor nor int") + return tf.stack(shape_dims) + else: + raise TypeError("Input shape is neither tensor nor list") + + + diff --git a/src/contrib/tf_op/index_seq.h b/src/contrib/tf_op/index_seq.h new file mode 100644 index 000000000000..3d01e405d239 --- /dev/null +++ b/src/contrib/tf_op/index_seq.h @@ -0,0 +1,44 @@ +/** + * Refer to std::index_sequence (since c++14) + * Utilities to invoke variadic function with template + */ +#ifndef __TFTVM_INDEX_SEQ__ +#define __TFTVM_INDEX_SEQ__ + +template +struct IndexSeq {}; + +template +struct IndexSeqHelper : public IndexSeqHelper {}; + +template +struct IndexSeqHelper<0U, Tail ...> { + using type = IndexSeq; +}; + +template +using make_index_sequence = typename IndexSeqHelper::type; + + +template +decltype(auto) apply_variadic_impl(F f, T(&t)[N], IndexSeq) { + return f(t[Idx]...); +} + +template +decltype(auto) apply_variadic(F f, T(&t)[N]) { + return apply_variadic_impl(f, t, make_index_sequence{}); +} + +template +decltype(auto) apply_variadic_by_ptrs_impl(F f, T(&t)[N], IndexSeq) { + return f(&t[Idx]...); +} + +template +decltype(auto) apply_variadic_by_ptrs(F f, T(&t)[N]) { + return apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); +} + +#endif + diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc new file mode 100644 index 000000000000..178b8abc210e --- /dev/null +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "tensorflow/core/framework/op_kernel.h" + +#include "index_seq.h" + +using namespace tensorflow; + +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; +typedef gtl::InlinedVector ShapeContainer; + + +template +class TVMDSOOpTrait; + + +class TensorAsBuf { + public: + Tensor inline_tensor; + Tensor* tensor; + + size_t size; + size_t offset; + + int device_type; + + char* origin_buf; + char* buf; + + void CopyToOrigin() { + if (buf == origin_buf) { + return; + } + if (device_type == kDLCPU) { + memcpy(origin_buf, buf + offset, size); + } else { + cudaMemcpy(origin_buf, buf + offset, size, cudaMemcpyDeviceToDevice); + } + } + + void CopyFromOrigin() { + if (buf == origin_buf) { + return; + } + if (device_type == kDLCPU) { + memcpy(buf + offset, origin_buf, size); + } else { + cudaMemcpy(buf + offset, origin_buf, size, cudaMemcpyDeviceToDevice); + } + } +}; + + +int GetDLPackDtype(const Tensor& tf_tensor, DLDataType* res) { + auto dtype = tf_tensor.dtype(); + if (dtype == DT_FLOAT) { + res->code = kDLFloat; + res->bits = 32; + res->lanes = 1; + } else if (dtype == DT_INT64) { + res->code = kDLInt; + res->bits = 64; + res->lanes = 1; + } else if (dtype == DT_INT32) { + res->code = kDLInt; + res->bits = 32; + res->lanes = 1; + } else { + return -1; + } + return 0; +} + + +void EnsureAlignment(OpKernelContext* ctx, const Tensor& tensor, TensorAsBuf* out) { + char* buf = (char*) tensor.tensor_data().data(); + out->origin_buf = buf; + out->size = tensor.TotalBytes(); + + int alignment = 64; + char* aligned = (char*)(((uint64_t)buf + alignment - 1) & (~ (alignment - 1))); + if (buf == aligned) { + out->tensor = const_cast(&tensor); + out->buf = buf; + out->offset = 0; + } else { + TensorShape buf_shape; + int64 dims[1] = { (int64)(tensor.TotalBytes() + alignment) }; + TensorShapeUtils::MakeShape(dims, 1, &buf_shape); + + out->tensor = &out->inline_tensor; + ctx->allocate_temp(tensor.dtype(), buf_shape, out->tensor); + + buf = (char*)(out->tensor->tensor_data().data()); + char* buf_aligned = (char*)(((uint64_t)buf + alignment) & (~ (alignment - 1))); + out->buf = buf; + out->offset = buf_aligned - buf; + } +} + + +int MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, int64_t* tf_shape, DLTensor* out) { + DLDataType dlpack_type; + const Tensor& tensor = *src.tensor; + + int status = GetDLPackDtype(tensor, &dlpack_type); + if (status != 0) { + return status; + } + out->ctx = ctx; + out->ndim = tensor.shape().dims(); + out->shape = tf_shape; + out->strides = NULL; + out->byte_offset = 0; + out->dtype = dlpack_type; + out->data = src.buf + src.offset; + return 0; +} + + +template <> +class TVMDSOOpTrait { + public: + static const int device_type = kDLCPU; + + static int device_id(OpKernelContext* context) { + return 0; + } + +}; + + +template <> +class TVMDSOOpTrait { + public: + static const int device_type = kDLGPU; + + static int device_id(OpKernelContext* context) { + auto device_base = context->device(); + auto gpu_device_info = device_base->tensorflow_gpu_device_info(); + return gpu_device_info->gpu_id; + } +}; + + +template +class TVMDSOOp : public OpKernel { + +private: + tvm::runtime::PackedFunc tvm_func; + string lib_path; + string func_name; + + DataType output_dtype; + + bool has_static_output_shape; + std::vector static_output_shape; + + void initAttributes(OpKernelConstruction* context) { + context->GetAttr("lib_path", &lib_path); + context->GetAttr("func_name", &func_name); + context->GetAttr("output_dtype", &output_dtype); + + context->GetAttr("has_static_output_shape", &has_static_output_shape); + context->GetAttr("static_output_shape", &static_output_shape); + } + + public: + explicit TVMDSOOp(OpKernelConstruction* context) : OpKernel(context) { + + // Get attr + initAttributes(context); + + // Load TVM function from dynamic library + tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile(lib_path); + LOG(INFO) << "Verify dynamic loading from " << lib_path << " device_type=" << TVMDSOOpTrait::device_type; + tvm_func = mod_dylib.GetFunction(func_name); + CHECK(tvm_func != nullptr); + } + + void Compute(OpKernelContext* context) override { + + DLTensor args[NUM_INPUTS + 1]; + TensorAsBuf buf_info[NUM_INPUTS]; + ShapeContainer shapes[NUM_INPUTS]; + + int status; + int device_id = TVMDSOOpTrait::device_id(context); + int device_type = TVMDSOOpTrait::device_type; + + DLContext dl_ctx = { DLDeviceType(device_type), device_id }; + + // Get output shape + TensorShape output_shape; + auto& output_shape_tensor = context->input(NUM_INPUTS); + if (has_static_output_shape) { + // use static output shape + const int64* dims = static_output_shape.data(); + TensorShapeUtils::MakeShape(dims, static_output_shape.size(), &output_shape); + } else if (output_shape_tensor.dims() == 1) { + // use shape tensor values as output shape + const int64* dims = output_shape_tensor.flat().data(); + TensorShapeUtils::MakeShape(dims, 1, &output_shape); + } else { + // use input tensor shape by default + output_shape = context->input(0).shape(); + } + + for (int i = 0; i < NUM_INPUTS; ++i) { + // Grab the input tensor + auto& input_tensor = context->input(i); + + // Create shape container, should keep ref during execution + shapes[i] = input_tensor.shape().dim_sizes(); + auto shape_ptr = (int64_t*) shapes[i].data(); + + TensorAsBuf& input = buf_info[i]; + input.device_type = device_type; + + EnsureAlignment(context, input_tensor, &input); + input.CopyFromOrigin(); + + status = MakeDLTensor(input, dl_ctx, shape_ptr, &args[i]); + OP_REQUIRES(context, status == 0, Status(error::INTERNAL, "Fail to create dlpack tensor for input")); + } + + // Allocate output tensor + Tensor* output_tensor; + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor)); + auto output_shape_dim_buf = output_tensor->shape().dim_sizes(); // should keep alive on stack + auto output_shape_ptr = (int64_t*) output_shape_dim_buf.data(); + + TensorAsBuf output; + output.device_type = device_type; + EnsureAlignment(context, *output_tensor, &output); + + status = MakeDLTensor(output, dl_ctx, output_shape_ptr, &args[NUM_INPUTS]); + OP_REQUIRES(context, status == 0, Status(error::INTERNAL, "Fail to create dlpack tensor for output")); + + apply_variadic_by_ptrs(tvm_func, args); + + output.CopyToOrigin(); + } +}; + + + +#define REGISTER_TFTVM_KERNEL(n) \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(DEVICE_CPU), TVMDSOOp); \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(DEVICE_GPU), TVMDSOOp); \ + +REGISTER_TFTVM_KERNEL(1) +REGISTER_TFTVM_KERNEL(2) +REGISTER_TFTVM_KERNEL(3) +REGISTER_TFTVM_KERNEL(4) +REGISTER_TFTVM_KERNEL(5) +REGISTER_TFTVM_KERNEL(6) +REGISTER_TFTVM_KERNEL(7) +REGISTER_TFTVM_KERNEL(8) + diff --git a/src/contrib/tf_op/tvm_dso_ops.cc b/src/contrib/tf_op/tvm_dso_ops.cc new file mode 100644 index 000000000000..8369e928599b --- /dev/null +++ b/src/contrib/tf_op/tvm_dso_ops.cc @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "tensorflow/core/framework/op.h" + +using namespace tensorflow; + +#define REGISTER_TFTVM_OP(n) REGISTER_OP("TvmDsoOp" #n) \ + .Output("output: output_dtype") \ + .Attr("lib_path: string") \ + .Attr("func_name: string") \ + .Attr("output_dtype: {int32, int64, float} = DT_FLOAT") \ + .Attr("static_output_shape: list(int) >= 0 = []") \ + .Attr("has_static_output_shape: bool") \ + + +REGISTER_TFTVM_OP(1) + .Input("input: T").Attr("T: type") \ + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(2) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(3) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("input3: T3").Attr("T3: type") + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(4) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("input3: T3").Attr("T3: type") + .Input("input4: T4").Attr("T4: type") + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(5) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("input3: T3").Attr("T3: type") + .Input("input4: T4").Attr("T4: type") + .Input("input5: T5").Attr("T5: type") + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(6) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("input3: T3").Attr("T3: type") + .Input("input4: T4").Attr("T4: type") + .Input("input5: T5").Attr("T5: type") + .Input("input6: T6").Attr("T6: type") + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(7) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("input3: T3").Attr("T3: type") + .Input("input4: T4").Attr("T4: type") + .Input("input5: T5").Attr("T5: type") + .Input("input6: T6").Attr("T6: type") + .Input("input7: T7").Attr("T7: type") + .Input("dynamic_output_shape: int64"); + +REGISTER_TFTVM_OP(8) + .Input("input1: T1").Attr("T1: type") + .Input("input2: T2").Attr("T2: type") + .Input("input3: T3").Attr("T3: type") + .Input("input4: T4").Attr("T4: type") + .Input("input5: T5").Attr("T5: type") + .Input("input6: T6").Attr("T6: type") + .Input("input7: T7").Attr("T7: type") + .Input("input8: T8").Attr("T8: type") + .Input("dynamic_output_shape: int64"); From 9c3f732d1a57179ab3e3f24ec02de55e5d9d7ddf Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 4 Dec 2019 12:04:24 +0800 Subject: [PATCH 02/30] feat: Update cmake script to work with c++11 and in-repo build --- cmake/modules/contrib/TFOP.cmake | 24 ++++++++++++------------ src/contrib/tf_op/index_seq.h | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cmake/modules/contrib/TFOP.cmake b/cmake/modules/contrib/TFOP.cmake index 52d017c08e16..8720d4b308dd 100644 --- a/cmake/modules/contrib/TFOP.cmake +++ b/cmake/modules/contrib/TFOP.cmake @@ -1,15 +1,16 @@ if(NOT USE_TFOP STREQUAL "OFF") - if ("${TVM_HOME}" STREQUAL "") - message(FATAL_ERROR "TVM_HOME is not defined") - else() - message("Use TVM_HOME=\"${TVM_HOME}\"") - endif() - - include_directories(${TVM_HOME}/include) - include_directories(${TVM_HOME}/3rdparty/dlpack/include) - include_directories(${TVM_HOME}/3rdparty/dmlc-core/include) + # If want build this directly comment out below lines. + # if ("${TVM_HOME}" STREQUAL "") + # message(FATAL_ERROR "TVM_HOME is not defined") + # else() + # message("Use TVM_HOME=\"${TVM_HOME}\"") + #endif() + # include_directories(${TVM_HOME}/include) + # include_directories(${TVM_HOME}/3rdparty/dlpack/include) + # include_directories(${TVM_HOME}/3rdparty/dmlc-core/include) + # set(TFTVM_LINK_FLAGS -ltvm_runtime -L${TVM_HOME}/build) execute_process(COMMAND python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" OUTPUT_VARIABLE TF_COMPILE_FLAGS_STR @@ -26,18 +27,17 @@ if(NOT USE_TFOP STREQUAL "OFF") endif() string(REGEX REPLACE "\n" " " TF_FLAGS "${TF_COMPILE_FLAGS} ${TF_LINK_FLAGS}") - message("Use TensorFlow flags=\"${TF_FLAGS}\"") separate_arguments(TF_COMPILE_FLAGS UNIX_COMMAND ${TF_COMPILE_FLAGS_STR}) separate_arguments(TF_LINK_FLAGS UNIX_COMMAND ${TF_LINK_FLAGS_STR}) set(OP_LIBRARY_NAME tvm_dso_op) - file(GLOB_RECURSE TFTVM_SRCS src/*.cc) + file(GLOB_RECURSE TFTVM_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/src/contrib/tf_op/*.cc) add_library(${OP_LIBRARY_NAME} SHARED ${TFTVM_SRCS}) set_target_properties(${OP_LIBRARY_NAME} PROPERTIES PREFIX "") set(TFTVM_COMPILE_FLAGS -O2 -ldl -g) - set(TFTVM_LINK_FLAGS -ltvm_runtime -L${TVM_HOME}/build) + set(TFTVM_LINK_FLAGS -ltvm_runtime) target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) target_link_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) diff --git a/src/contrib/tf_op/index_seq.h b/src/contrib/tf_op/index_seq.h index 3d01e405d239..6a0479e609c3 100644 --- a/src/contrib/tf_op/index_seq.h +++ b/src/contrib/tf_op/index_seq.h @@ -21,23 +21,23 @@ using make_index_sequence = typename IndexSeqHelper::type; template -decltype(auto) apply_variadic_impl(F f, T(&t)[N], IndexSeq) { - return f(t[Idx]...); +void apply_variadic_impl(F f, T(&t)[N], IndexSeq) { + f(t[Idx]...); } template -decltype(auto) apply_variadic(F f, T(&t)[N]) { - return apply_variadic_impl(f, t, make_index_sequence{}); +void apply_variadic(F f, T(&t)[N]) { + apply_variadic_impl(f, t, make_index_sequence{}); } template -decltype(auto) apply_variadic_by_ptrs_impl(F f, T(&t)[N], IndexSeq) { - return f(&t[Idx]...); +void apply_variadic_by_ptrs_impl(F f, T(&t)[N], IndexSeq) { + f(&t[Idx]...); } template -decltype(auto) apply_variadic_by_ptrs(F f, T(&t)[N]) { - return apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); +void apply_variadic_by_ptrs(F f, T(&t)[N]) { + apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); } #endif From 83612178f95400f284a5e658600f583960c3352f Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 4 Dec 2019 12:32:39 +0800 Subject: [PATCH 03/30] feat: Use libtvm as oplib dependency --- cmake/modules/contrib/TFOP.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/contrib/TFOP.cmake b/cmake/modules/contrib/TFOP.cmake index 8720d4b308dd..2517414208fe 100644 --- a/cmake/modules/contrib/TFOP.cmake +++ b/cmake/modules/contrib/TFOP.cmake @@ -35,9 +35,9 @@ if(NOT USE_TFOP STREQUAL "OFF") file(GLOB_RECURSE TFTVM_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/src/contrib/tf_op/*.cc) add_library(${OP_LIBRARY_NAME} SHARED ${TFTVM_SRCS}) set_target_properties(${OP_LIBRARY_NAME} PROPERTIES PREFIX "") + add_dependencies(${OP_LIBRARY_NAME} tvm) set(TFTVM_COMPILE_FLAGS -O2 -ldl -g) - set(TFTVM_LINK_FLAGS -ltvm_runtime) target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) target_link_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) From ef387fd607c1ae0e87215d4f82b0bca791ddcb33 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 4 Dec 2019 15:42:02 +0800 Subject: [PATCH 04/30] fix: Add missing link dependency to libtvm --- cmake/modules/contrib/TFOP.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/modules/contrib/TFOP.cmake b/cmake/modules/contrib/TFOP.cmake index 2517414208fe..46ae422b1bf2 100644 --- a/cmake/modules/contrib/TFOP.cmake +++ b/cmake/modules/contrib/TFOP.cmake @@ -35,6 +35,7 @@ if(NOT USE_TFOP STREQUAL "OFF") file(GLOB_RECURSE TFTVM_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/src/contrib/tf_op/*.cc) add_library(${OP_LIBRARY_NAME} SHARED ${TFTVM_SRCS}) set_target_properties(${OP_LIBRARY_NAME} PROPERTIES PREFIX "") + set(TFTVM_LINK_FLAGS -ltvm -L${CMAKE_CURRENT_BINARY_DIR}) add_dependencies(${OP_LIBRARY_NAME} tvm) set(TFTVM_COMPILE_FLAGS -O2 -ldl -g) From 056d4b1f1dcfce94c698e4b435d66176441c3f23 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 2 Jan 2020 18:10:36 +0800 Subject: [PATCH 05/30] feat: Update tf tvmdso op by review comments --- CMakeLists.txt | 4 +- .../contrib/{TFOP.cmake => TF_TVMDSOOP.cmake} | 10 +- python/tvm/contrib/tf_op/module.py | 18 ++++ src/codegen/build_module.cc | 1 + src/contrib/tf_op/index_seq.h | 21 +++- src/contrib/tf_op/tvm_dso_op_kernels.cc | 96 +++++++++---------- 6 files changed, 91 insertions(+), 59 deletions(-) rename cmake/modules/contrib/{TFOP.cmake => TF_TVMDSOOP.cmake} (80%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ec625d0dbed..2afd78424437 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,7 +42,7 @@ tvm_option(USE_MSVC_MT "Build with MT" OFF) tvm_option(USE_MICRO "Build with Micro" OFF) tvm_option(INSTALL_DEV "Install compiler infrastructure" OFF) tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF) -tvm_option(USE_TFOP "Build with TensorFlow TVMDSOOp" OFF) +tvm_option(USE_TF_TVMDSOOP "Build with TensorFlow TVMDSOOp" OFF) # 3rdparty libraries tvm_option(DLPACK_PATH "Path to DLPACK" "3rdparty/dlpack/include") @@ -258,7 +258,7 @@ include(cmake/modules/contrib/MicroStandaloneRuntime.cmake) include(cmake/modules/contrib/Sort.cmake) include(cmake/modules/contrib/NNPack.cmake) include(cmake/modules/contrib/HybridDump.cmake) -include(cmake/modules/contrib/TFOP.cmake) +include(cmake/modules/contrib/TF_TVMDSOOP.cmake) if(NOT MSVC) include(CheckCXXCompilerFlag) diff --git a/cmake/modules/contrib/TFOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake similarity index 80% rename from cmake/modules/contrib/TFOP.cmake rename to cmake/modules/contrib/TF_TVMDSOOP.cmake index 46ae422b1bf2..f104ee618f1b 100644 --- a/cmake/modules/contrib/TFOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -1,5 +1,5 @@ -if(NOT USE_TFOP STREQUAL "OFF") +if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") # If want build this directly comment out below lines. # if ("${TVM_HOME}" STREQUAL "") @@ -12,14 +12,16 @@ if(NOT USE_TFOP STREQUAL "OFF") # include_directories(${TVM_HOME}/3rdparty/dmlc-core/include) # set(TFTVM_LINK_FLAGS -ltvm_runtime -L${TVM_HOME}/build) - execute_process(COMMAND python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" + find_package(Python COMPONENTS Interpreter) + + execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" OUTPUT_VARIABLE TF_COMPILE_FLAGS_STR RESULT_VARIABLE TF_STATUS) if (NOT ${TF_STATUS} EQUAL 0) message(FATAL_ERROR "Fail to get TensorFlow compile flags") endif() - execute_process(COMMAND python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" + execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" OUTPUT_VARIABLE TF_LINK_FLAGS_STR RESULT_VARIABLE TF_STATUS) if (NOT ${TF_STATUS} EQUAL 0) @@ -38,7 +40,7 @@ if(NOT USE_TFOP STREQUAL "OFF") set(TFTVM_LINK_FLAGS -ltvm -L${CMAKE_CURRENT_BINARY_DIR}) add_dependencies(${OP_LIBRARY_NAME} tvm) - set(TFTVM_COMPILE_FLAGS -O2 -ldl -g) + # set(TFTVM_COMPILE_FLAGS ${CMAKE_CXX_FLAGS}) target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) target_link_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index 9a9e1f386fc0..e31778cf06f8 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -19,11 +19,28 @@ class Module(): + """Module container of TensorFlow TVMDSO op which wraps exported + TVM op implementation library to be called on TensorFlow side""" def __init__(self, lib_path): self.lib_path = lib_path def func(self, name, output_dtype=None, output_shape=None): + """Get tvm op function wrapped as TensorFlow tensor to tensor function + + Parameters + ---------- + name: str + function name + output_dtype: str or TensorFlow datatype + Output datatype, default is float32 + output_shape: List of integer/tf scalar tensor or tf shape tensor + Output shape, default the same with first input's shape + + Returns + ---------- + Func object that act as TensorFlow tensor to tensor function. + """ return Func(self.lib_path, name, output_dtype, output_shape) def __getitem__(self, func_name): @@ -31,6 +48,7 @@ def __getitem__(self, func_name): class Func(): + """Function object that act as TensorFlow tensor to tensor function.""" def __init__(self, lib_path, func_name, output_dtype, output_shape): self.lib_path = lib_path diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index 80fd57af66f9..0fb761d768b0 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -572,6 +572,7 @@ runtime::Module build(const Map>& inputs, auto& fhost = host_dev_funcs[0]; auto& fdevice = host_dev_funcs[1]; // Get the module for a certain target. + printf("build device module: %s\n", it.first->device_name.c_str()); runtime::Module mdev = DeviceBuild(fdevice, it.first); for (const auto& it : fhost) { fhost_all.push_back(it); diff --git a/src/contrib/tf_op/index_seq.h b/src/contrib/tf_op/index_seq.h index 6a0479e609c3..98fcd717b224 100644 --- a/src/contrib/tf_op/index_seq.h +++ b/src/contrib/tf_op/index_seq.h @@ -1,4 +1,23 @@ -/** +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* * Refer to std::index_sequence (since c++14) * Utilities to invoke variadic function with template */ diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 178b8abc210e..a1c991c1d67e 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -28,11 +28,10 @@ #include "index_seq.h" -using namespace tensorflow; typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; -typedef gtl::InlinedVector ShapeContainer; +typedef tensorflow::gtl::InlinedVector ShapeContainer; template @@ -41,8 +40,8 @@ class TVMDSOOpTrait; class TensorAsBuf { public: - Tensor inline_tensor; - Tensor* tensor; + tensorflow::Tensor inline_tensor; + tensorflow::Tensor* tensor; size_t size; size_t offset; @@ -76,28 +75,22 @@ class TensorAsBuf { }; -int GetDLPackDtype(const Tensor& tf_tensor, DLDataType* res) { +tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, DLDataType* res) { auto dtype = tf_tensor.dtype(); - if (dtype == DT_FLOAT) { - res->code = kDLFloat; - res->bits = 32; - res->lanes = 1; - } else if (dtype == DT_INT64) { - res->code = kDLInt; - res->bits = 64; - res->lanes = 1; - } else if (dtype == DT_INT32) { - res->code = kDLInt; - res->bits = 32; - res->lanes = 1; + if (dtype == tensorflow::DT_FLOAT) { + *res = {kDLFloat, 32, 1}; + } else if (dtype == tensorflow::DT_INT64) { + *res = {kDLInt, 64, 1}; + } else if (dtype == tensorflow::DT_INT32) { + *res = {kDLInt, 32, 1}; } else { - return -1; + return tensorflow::Status(tensorflow::error::INTERNAL, "Fail to get dlpack datatype"); } - return 0; + return tensorflow::Status::OK(); } -void EnsureAlignment(OpKernelContext* ctx, const Tensor& tensor, TensorAsBuf* out) { +void EnsureAlignment(tensorflow::OpKernelContext* ctx, const tensorflow::Tensor& tensor, TensorAsBuf* out) { char* buf = (char*) tensor.tensor_data().data(); out->origin_buf = buf; out->size = tensor.TotalBytes(); @@ -105,13 +98,13 @@ void EnsureAlignment(OpKernelContext* ctx, const Tensor& tensor, TensorAsBuf* ou int alignment = 64; char* aligned = (char*)(((uint64_t)buf + alignment - 1) & (~ (alignment - 1))); if (buf == aligned) { - out->tensor = const_cast(&tensor); + out->tensor = const_cast(&tensor); out->buf = buf; out->offset = 0; } else { - TensorShape buf_shape; - int64 dims[1] = { (int64)(tensor.TotalBytes() + alignment) }; - TensorShapeUtils::MakeShape(dims, 1, &buf_shape); + tensorflow::TensorShape buf_shape; + tensorflow::int64 dims[1] = { (tensorflow::int64)(tensor.TotalBytes() + alignment) }; + tensorflow::TensorShapeUtils::MakeShape(dims, 1, &buf_shape); out->tensor = &out->inline_tensor; ctx->allocate_temp(tensor.dtype(), buf_shape, out->tensor); @@ -124,22 +117,22 @@ void EnsureAlignment(OpKernelContext* ctx, const Tensor& tensor, TensorAsBuf* ou } -int MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, int64_t* tf_shape, DLTensor* out) { +tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, int64_t* tf_shape, DLTensor* out) { DLDataType dlpack_type; - const Tensor& tensor = *src.tensor; + const tensorflow::Tensor& tensor = *src.tensor; - int status = GetDLPackDtype(tensor, &dlpack_type); - if (status != 0) { + auto status = GetDLPackDtype(tensor, &dlpack_type); + if (! status.ok()) { return status; } out->ctx = ctx; out->ndim = tensor.shape().dims(); out->shape = tf_shape; - out->strides = NULL; + out->strides = nullptr; out->byte_offset = 0; out->dtype = dlpack_type; out->data = src.buf + src.offset; - return 0; + return tensorflow::Status::OK(); } @@ -148,7 +141,7 @@ class TVMDSOOpTrait { public: static const int device_type = kDLCPU; - static int device_id(OpKernelContext* context) { + static int device_id(tensorflow::OpKernelContext* context) { return 0; } @@ -160,7 +153,7 @@ class TVMDSOOpTrait { public: static const int device_type = kDLGPU; - static int device_id(OpKernelContext* context) { + static int device_id(tensorflow::OpKernelContext* context) { auto device_base = context->device(); auto gpu_device_info = device_base->tensorflow_gpu_device_info(); return gpu_device_info->gpu_id; @@ -169,19 +162,19 @@ class TVMDSOOpTrait { template -class TVMDSOOp : public OpKernel { +class TVMDSOOp : public tensorflow::OpKernel { private: tvm::runtime::PackedFunc tvm_func; - string lib_path; - string func_name; + std::string lib_path; + std::string func_name; - DataType output_dtype; + tensorflow::DataType output_dtype; bool has_static_output_shape; - std::vector static_output_shape; + std::vector static_output_shape; - void initAttributes(OpKernelConstruction* context) { + void initAttributes(tensorflow::OpKernelConstruction* context) { context->GetAttr("lib_path", &lib_path); context->GetAttr("func_name", &func_name); context->GetAttr("output_dtype", &output_dtype); @@ -191,41 +184,40 @@ class TVMDSOOp : public OpKernel { } public: - explicit TVMDSOOp(OpKernelConstruction* context) : OpKernel(context) { + explicit TVMDSOOp(tensorflow::OpKernelConstruction* context) : tensorflow::OpKernel(context) { // Get attr initAttributes(context); // Load TVM function from dynamic library tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile(lib_path); - LOG(INFO) << "Verify dynamic loading from " << lib_path << " device_type=" << TVMDSOOpTrait::device_type; tvm_func = mod_dylib.GetFunction(func_name); CHECK(tvm_func != nullptr); } - void Compute(OpKernelContext* context) override { + void Compute(tensorflow::OpKernelContext* context) override { DLTensor args[NUM_INPUTS + 1]; TensorAsBuf buf_info[NUM_INPUTS]; ShapeContainer shapes[NUM_INPUTS]; - int status; + tensorflow::Status status; int device_id = TVMDSOOpTrait::device_id(context); int device_type = TVMDSOOpTrait::device_type; DLContext dl_ctx = { DLDeviceType(device_type), device_id }; // Get output shape - TensorShape output_shape; + tensorflow::TensorShape output_shape; auto& output_shape_tensor = context->input(NUM_INPUTS); if (has_static_output_shape) { // use static output shape - const int64* dims = static_output_shape.data(); - TensorShapeUtils::MakeShape(dims, static_output_shape.size(), &output_shape); + const tensorflow::int64* dims = static_output_shape.data(); + tensorflow::TensorShapeUtils::MakeShape(dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - const int64* dims = output_shape_tensor.flat().data(); - TensorShapeUtils::MakeShape(dims, 1, &output_shape); + const tensorflow::int64* dims = output_shape_tensor.flat().data(); + tensorflow::TensorShapeUtils::MakeShape(dims, 1, &output_shape); } else { // use input tensor shape by default output_shape = context->input(0).shape(); @@ -246,11 +238,11 @@ class TVMDSOOp : public OpKernel { input.CopyFromOrigin(); status = MakeDLTensor(input, dl_ctx, shape_ptr, &args[i]); - OP_REQUIRES(context, status == 0, Status(error::INTERNAL, "Fail to create dlpack tensor for input")); + OP_REQUIRES_OK(context, status); } // Allocate output tensor - Tensor* output_tensor; + tensorflow::Tensor* output_tensor; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor)); auto output_shape_dim_buf = output_tensor->shape().dim_sizes(); // should keep alive on stack auto output_shape_ptr = (int64_t*) output_shape_dim_buf.data(); @@ -260,7 +252,7 @@ class TVMDSOOp : public OpKernel { EnsureAlignment(context, *output_tensor, &output); status = MakeDLTensor(output, dl_ctx, output_shape_ptr, &args[NUM_INPUTS]); - OP_REQUIRES(context, status == 0, Status(error::INTERNAL, "Fail to create dlpack tensor for output")); + OP_REQUIRES_OK(context, status); apply_variadic_by_ptrs(tvm_func, args); @@ -271,8 +263,8 @@ class TVMDSOOp : public OpKernel { #define REGISTER_TFTVM_KERNEL(n) \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(DEVICE_CPU), TVMDSOOp); \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(DEVICE_GPU), TVMDSOOp); \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_CPU), TVMDSOOp); \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_GPU), TVMDSOOp); \ REGISTER_TFTVM_KERNEL(1) REGISTER_TFTVM_KERNEL(2) From 69df54930b6393a16dfd4b07641d64519db0a26c Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 16 Jan 2020 16:21:08 +0800 Subject: [PATCH 06/30] fix: Update with pr comments --- cmake/modules/contrib/TF_TVMDSOOP.cmake | 28 ++++++++++++++----------- python/tvm/contrib/tf_op/module.py | 6 ++---- src/contrib/tf_op/tvm_dso_op_kernels.cc | 18 ++++++++++++++-- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index f104ee618f1b..97e5dd4d2190 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -1,17 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") - - # If want build this directly comment out below lines. - # if ("${TVM_HOME}" STREQUAL "") - # message(FATAL_ERROR "TVM_HOME is not defined") - # else() - # message("Use TVM_HOME=\"${TVM_HOME}\"") - #endif() - # include_directories(${TVM_HOME}/include) - # include_directories(${TVM_HOME}/3rdparty/dlpack/include) - # include_directories(${TVM_HOME}/3rdparty/dmlc-core/include) - # set(TFTVM_LINK_FLAGS -ltvm_runtime -L${TVM_HOME}/build) - find_package(Python COMPONENTS Interpreter) execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index e31778cf06f8..fd977bd218f8 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -66,9 +66,7 @@ def __init__(self, lib_path, func_name, output_dtype, output_shape): elif output_shape is not None: self.dynamic_output_shape = self._pack_shape_tensor(output_shape) - # TODO: support non-xpu device - #self.device = device - # delay initialization to called first time, where num input arguments is known + # delay op initialization to where Func.apply() get called first time self.tvm_dso_op = None self.module = load_library.load_op_library('tvm_dso_op.so') @@ -113,7 +111,7 @@ def _pack_shape_tensor(self, shape): d = tf.cast(d, tf.int64) shape_dims.append(d) else: - raise TypeError("Input shape dimension is neither scala tensor nor int") + raise TypeError("Input shape dimension is neither scalar tensor nor int") return tf.stack(shape_dims) else: raise TypeError("Input shape is neither tensor nor list") diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index a1c991c1d67e..9f6b1d0c285b 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -34,10 +34,16 @@ typedef Eigen::GpuDevice GPUDevice; typedef tensorflow::gtl::InlinedVector ShapeContainer; +// Op utility trait for diffrent device type template template class TVMDSOOpTrait; +// Buffer information used for actual computation. +// Each buffer is associated with one TensorFlow tensor +// whose underlying buffer is record into "origin_buf". +// For input tensor, we copy data from origin_buf to buf +// and for output tensor, copy data from buf to origin_buf class TensorAsBuf { public: tensorflow::Tensor inline_tensor; @@ -57,8 +63,11 @@ class TensorAsBuf { } if (device_type == kDLCPU) { memcpy(origin_buf, buf + offset, size); - } else { + } else if (device_type == kDLGPU) { cudaMemcpy(origin_buf, buf + offset, size, cudaMemcpyDeviceToDevice); + } else { + LOG(FATAL) << "Only support CPU and CUDA now. Device " + << device_type << " is not implemented currently"; } } @@ -68,8 +77,11 @@ class TensorAsBuf { } if (device_type == kDLCPU) { memcpy(buf + offset, origin_buf, size); - } else { + } else if (device_type == kDLGPU) { cudaMemcpy(buf + offset, origin_buf, size, cudaMemcpyDeviceToDevice); + } else { + LOG(FATAL) << "Only support CPU and CUDA now. Device " + << device_type << " is not implemented currently"; } } }; @@ -90,6 +102,7 @@ tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, DLDataTyp } +// Ensure buffer used for actual computation take 64byte alignment void EnsureAlignment(tensorflow::OpKernelContext* ctx, const tensorflow::Tensor& tensor, TensorAsBuf* out) { char* buf = (char*) tensor.tensor_data().data(); out->origin_buf = buf; @@ -117,6 +130,7 @@ void EnsureAlignment(tensorflow::OpKernelContext* ctx, const tensorflow::Tensor& } +// Create DLPack tensor from TensorFlow tensor tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, int64_t* tf_shape, DLTensor* out) { DLDataType dlpack_type; const tensorflow::Tensor& tensor = *src.tensor; From c2e72a6a2e7426af690bbef6004c1f8cca285154 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 16 Jan 2020 17:23:17 +0800 Subject: [PATCH 07/30] fix: Fix lint --- cmake/config.cmake | 2 +- python/tvm/contrib/tf_op/__init__.py | 2 +- python/tvm/contrib/tf_op/module.py | 178 ++++++++++++------------ src/contrib/tf_op/index_seq.h | 6 +- src/contrib/tf_op/tvm_dso_op_kernels.cc | 124 ++++++++++------- src/contrib/tf_op/tvm_dso_ops.cc | 2 - 6 files changed, 164 insertions(+), 150 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 65556998d185..bece89261f5a 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -195,4 +195,4 @@ set(USE_VTA_FPGA OFF) set(USE_EXAMPLE_EXT_RUNTIME OFF) # Whether to build the TensorFlow TVMDSOOp module -set(USE_TFOP OFF) +set(USE_TF_TVMDSOOP OFF) diff --git a/python/tvm/contrib/tf_op/__init__.py b/python/tvm/contrib/tf_op/__init__.py index 9f80b266cb91..cabd8b716e7d 100644 --- a/python/tvm/contrib/tf_op/__init__.py +++ b/python/tvm/contrib/tf_op/__init__.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +"""Module container of TensorFlow TVMDSO op""" from . import module Module = module.Module diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index fd977bd218f8..ed2622fd3cff 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -14,107 +14,105 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +"""Module container of TensorFlow TVMDSO op""" import tensorflow as tf from tensorflow.python.framework import load_library class Module(): - """Module container of TensorFlow TVMDSO op which wraps exported - TVM op implementation library to be called on TensorFlow side""" + """Module container of TensorFlow TVMDSO op which wraps exported + TVM op implementation library to be called on TensorFlow side""" - def __init__(self, lib_path): - self.lib_path = lib_path + def __init__(self, lib_path): + self.lib_path = lib_path - def func(self, name, output_dtype=None, output_shape=None): - """Get tvm op function wrapped as TensorFlow tensor to tensor function + def func(self, name, output_dtype=None, output_shape=None): + """Get tvm op function wrapped as TensorFlow tensor to tensor function - Parameters - ---------- - name: str - function name - output_dtype: str or TensorFlow datatype - Output datatype, default is float32 - output_shape: List of integer/tf scalar tensor or tf shape tensor - Output shape, default the same with first input's shape + Parameters + ---------- + name: str + function name + output_dtype: str or TensorFlow datatype + Output datatype, default is float32 + output_shape: List of integer/tf scalar tensor or tf shape tensor + Output shape, default the same with first input's shape - Returns - ---------- - Func object that act as TensorFlow tensor to tensor function. - """ - return Func(self.lib_path, name, output_dtype, output_shape) + Returns + ---------- + Func object that act as TensorFlow tensor to tensor function. + """ + return Func(self.lib_path, name, output_dtype, output_shape) - def __getitem__(self, func_name): - return self.func(func_name) + def __getitem__(self, func_name): + return self.func(func_name) class Func(): - """Function object that act as TensorFlow tensor to tensor function.""" - - def __init__(self, lib_path, func_name, output_dtype, output_shape): - self.lib_path = lib_path - self.func_name = func_name - self.output_dtype = output_dtype - - # const(0) indicate invalid dynamic shape - self.dynamic_output_shape = tf.constant(0, tf.int64) - self.static_output_shape = None - self.has_static_output_shape = False # extra flag is required - - if self._is_static_shape(output_shape): - self.static_output_shape = output_shape - self.has_static_output_shape = True - elif output_shape is not None: - self.dynamic_output_shape = self._pack_shape_tensor(output_shape) - - # delay op initialization to where Func.apply() get called first time - self.tvm_dso_op = None - self.module = load_library.load_op_library('tvm_dso_op.so') - - def apply(self, *params): - if self.tvm_dso_op is None: - num_inputs = len(params) - self.tvm_dso_op = getattr(self.module, "tvm_dso_op%s" % num_inputs) - - return self.tvm_dso_op(*params, - dynamic_output_shape=self.dynamic_output_shape, - static_output_shape=self.static_output_shape, - has_static_output_shape=self.has_static_output_shape, - lib_path=self.lib_path, - func_name=self.func_name, - output_dtype=self.output_dtype) - - def __call__(self, *params): - return self.apply(*params) - - def _is_static_shape(self, shape): - if shape is None or not isinstance(shape, list): - return False - for d in shape: - if not isinstance(d, int): - return False - if d < 0: - raise Exception("Negative dimension is illegal: %d" % d) - return True - - def _pack_shape_tensor(self, shape): - if isinstance(shape, tf.Tensor): - if shape.dtype == tf.int32: - shape = tf.cast(shape, tf.int64) - return shape - elif isinstance(shape, list): - shape_dims = [] - for d in shape: - if isinstance(d, int): - shape_dims.append(tf.constant(d, tf.int64)) - elif isinstance(d, tf.Tensor) and len(d.shape) == 0: - if d.dtype == tf.int32: - d = tf.cast(d, tf.int64) - shape_dims.append(d) + """Function object that act as TensorFlow tensor to tensor function.""" + + def __init__(self, lib_path, func_name, output_dtype, output_shape): + self.lib_path = lib_path + self.func_name = func_name + self.output_dtype = output_dtype + + # const(0) indicate invalid dynamic shape + self.dynamic_output_shape = tf.constant(0, tf.int64) + self.static_output_shape = None + self.has_static_output_shape = False # extra flag is required + + if self._is_static_shape(output_shape): + self.static_output_shape = output_shape + self.has_static_output_shape = True + elif output_shape is not None: + self.dynamic_output_shape = self._pack_shape_tensor(output_shape) + + # delay op initialization to where Func.apply() get called first time + self.tvm_dso_op = None + self.module = load_library.load_op_library('tvm_dso_op.so') + + def apply(self, *params): + if self.tvm_dso_op is None: + num_inputs = len(params) + self.tvm_dso_op = getattr(self.module, "tvm_dso_op%s" % num_inputs) + + return self.tvm_dso_op(*params, + dynamic_output_shape=self.dynamic_output_shape, + static_output_shape=self.static_output_shape, + has_static_output_shape=self.has_static_output_shape, + lib_path=self.lib_path, + func_name=self.func_name, + output_dtype=self.output_dtype) + + def __call__(self, *params): + return self.apply(*params) + + def _is_static_shape(self, shape): + if shape is None or not isinstance(shape, list): + return False + for dim_value in shape: + if not isinstance(dim_value, int): + return False + if dim_value < 0: + raise Exception("Negative dimension is illegal: %d" % dim_value) + return True + + def _pack_shape_tensor(self, shape): + if isinstance(shape, tf.Tensor): + if shape.dtype == tf.int32: + shape = tf.cast(shape, tf.int64) + return shape + elif isinstance(shape, list): + shape_dims = [] + for dim_value in shape: + if isinstance(dim_value, int): + shape_dims.append(tf.constant(dim_value, tf.int64)) + elif isinstance(dim_value, tf.Tensor) and dim_value.shape.rank == 0: + if dim_value.dtype == tf.int32: + dim_value = tf.cast(dim_value, tf.int64) + shape_dims.append(dim_value) + else: + raise TypeError("Input shape dimension is neither scalar tensor nor int") + return tf.stack(shape_dims) else: - raise TypeError("Input shape dimension is neither scalar tensor nor int") - return tf.stack(shape_dims) - else: - raise TypeError("Input shape is neither tensor nor list") - - - + raise TypeError("Input shape is neither tensor nor list") diff --git a/src/contrib/tf_op/index_seq.h b/src/contrib/tf_op/index_seq.h index 98fcd717b224..7a9a423a5977 100644 --- a/src/contrib/tf_op/index_seq.h +++ b/src/contrib/tf_op/index_seq.h @@ -21,8 +21,8 @@ * Refer to std::index_sequence (since c++14) * Utilities to invoke variadic function with template */ -#ifndef __TFTVM_INDEX_SEQ__ -#define __TFTVM_INDEX_SEQ__ +#ifndef TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ +#define TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ template struct IndexSeq {}; @@ -59,5 +59,5 @@ void apply_variadic_by_ptrs(F f, T(&t)[N]) { apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); } -#endif +#endif // TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 9f6b1d0c285b..363de7199307 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -17,15 +17,15 @@ * under the License. */ -#include #include #include + #include #include #include #include -#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/op_kernel.h" #include "index_seq.h" @@ -33,6 +33,10 @@ typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; typedef tensorflow::gtl::InlinedVector ShapeContainer; +using tensorflow::OpKernel; +using tensorflow::OpKernelConstruction; +using tensorflow::OpKernelContext; + // Op utility trait for diffrent device type template template @@ -45,7 +49,7 @@ class TVMDSOOpTrait; // For input tensor, we copy data from origin_buf to buf // and for output tensor, copy data from buf to origin_buf class TensorAsBuf { - public: + public: tensorflow::Tensor inline_tensor; tensorflow::Tensor* tensor; @@ -54,7 +58,7 @@ class TensorAsBuf { int device_type; - char* origin_buf; + char* origin_buf; char* buf; void CopyToOrigin() { @@ -62,9 +66,10 @@ class TensorAsBuf { return; } if (device_type == kDLCPU) { - memcpy(origin_buf, buf + offset, size); + memcpy(origin_buf, buf + offset, size); } else if (device_type == kDLGPU) { - cudaMemcpy(origin_buf, buf + offset, size, cudaMemcpyDeviceToDevice); + cudaMemcpy(origin_buf, buf + offset, + size, cudaMemcpyDeviceToDevice); } else { LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type << " is not implemented currently"; @@ -76,9 +81,10 @@ class TensorAsBuf { return; } if (device_type == kDLCPU) { - memcpy(buf + offset, origin_buf, size); + memcpy(buf + offset, origin_buf, size); } else if (device_type == kDLGPU) { - cudaMemcpy(buf + offset, origin_buf, size, cudaMemcpyDeviceToDevice); + cudaMemcpy(buf + offset, origin_buf, + size, cudaMemcpyDeviceToDevice); } else { LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type << " is not implemented currently"; @@ -87,7 +93,8 @@ class TensorAsBuf { }; -tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, DLDataType* res) { +tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, + DLDataType* res) { auto dtype = tf_tensor.dtype(); if (dtype == tensorflow::DT_FLOAT) { *res = {kDLFloat, 32, 1}; @@ -96,34 +103,40 @@ tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, DLDataTyp } else if (dtype == tensorflow::DT_INT32) { *res = {kDLInt, 32, 1}; } else { - return tensorflow::Status(tensorflow::error::INTERNAL, "Fail to get dlpack datatype"); + return tensorflow::Status(tensorflow::error::INTERNAL, + "Fail to get dlpack datatype"); } return tensorflow::Status::OK(); } -// Ensure buffer used for actual computation take 64byte alignment -void EnsureAlignment(tensorflow::OpKernelContext* ctx, const tensorflow::Tensor& tensor, TensorAsBuf* out) { - char* buf = (char*) tensor.tensor_data().data(); +// Ensure buffer used for actual computation take 64byte alignment +void EnsureAlignment(OpKernelContext* ctx, + const tensorflow::Tensor& tensor, + TensorAsBuf* out) { + char* buf = const_cast(tensor.tensor_data().data()); out->origin_buf = buf; - out->size = tensor.TotalBytes(); + out->size = tensor.TotalBytes(); int alignment = 64; - char* aligned = (char*)(((uint64_t)buf + alignment - 1) & (~ (alignment - 1))); + char* aligned = reinterpret_cast( + ((uint64_t)buf + alignment - 1) & (~(alignment - 1))); if (buf == aligned) { out->tensor = const_cast(&tensor); out->buf = buf; out->offset = 0; } else { tensorflow::TensorShape buf_shape; - tensorflow::int64 dims[1] = { (tensorflow::int64)(tensor.TotalBytes() + alignment) }; + tensorflow::int64 dims[1] = + { (tensorflow::int64)(tensor.TotalBytes() + alignment) }; tensorflow::TensorShapeUtils::MakeShape(dims, 1, &buf_shape); - + out->tensor = &out->inline_tensor; ctx->allocate_temp(tensor.dtype(), buf_shape, out->tensor); - - buf = (char*)(out->tensor->tensor_data().data()); - char* buf_aligned = (char*)(((uint64_t)buf + alignment) & (~ (alignment - 1))); + + buf = const_cast(out->tensor->tensor_data().data()); + char* buf_aligned = reinterpret_cast( + ((uint64_t)buf + alignment) & (~(alignment - 1))); out->buf = buf; out->offset = buf_aligned - buf; } @@ -131,12 +144,13 @@ void EnsureAlignment(tensorflow::OpKernelContext* ctx, const tensorflow::Tensor& // Create DLPack tensor from TensorFlow tensor -tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, int64_t* tf_shape, DLTensor* out) { +tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, + int64_t* tf_shape, DLTensor* out) { DLDataType dlpack_type; const tensorflow::Tensor& tensor = *src.tensor; auto status = GetDLPackDtype(tensor, &dlpack_type); - if (! status.ok()) { + if (!status.ok()) { return status; } out->ctx = ctx; @@ -144,7 +158,7 @@ tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, in out->shape = tf_shape; out->strides = nullptr; out->byte_offset = 0; - out->dtype = dlpack_type; + out->dtype = dlpack_type; out->data = src.buf + src.offset; return tensorflow::Status::OK(); } @@ -152,22 +166,21 @@ tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, in template <> class TVMDSOOpTrait { - public: + public: static const int device_type = kDLCPU; - static int device_id(tensorflow::OpKernelContext* context) { + static int device_id(OpKernelContext* context) { return 0; } - }; template <> class TVMDSOOpTrait { - public: + public: static const int device_type = kDLGPU; - static int device_id(tensorflow::OpKernelContext* context) { + static int device_id(OpKernelContext* context) { auto device_base = context->device(); auto gpu_device_info = device_base->tensorflow_gpu_device_info(); return gpu_device_info->gpu_id; @@ -176,41 +189,39 @@ class TVMDSOOpTrait { template -class TVMDSOOp : public tensorflow::OpKernel { - -private: +class TVMDSOOp : public OpKernel { + private: tvm::runtime::PackedFunc tvm_func; std::string lib_path; std::string func_name; tensorflow::DataType output_dtype; - + bool has_static_output_shape; std::vector static_output_shape; - void initAttributes(tensorflow::OpKernelConstruction* context) { + void initAttributes(OpKernelConstruction* context) { context->GetAttr("lib_path", &lib_path); context->GetAttr("func_name", &func_name); context->GetAttr("output_dtype", &output_dtype); - + context->GetAttr("has_static_output_shape", &has_static_output_shape); context->GetAttr("static_output_shape", &static_output_shape); } public: - explicit TVMDSOOp(tensorflow::OpKernelConstruction* context) : tensorflow::OpKernel(context) { - + explicit TVMDSOOp(OpKernelConstruction* context) : OpKernel(context) { // Get attr initAttributes(context); // Load TVM function from dynamic library - tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile(lib_path); + tvm::runtime::Module mod_dylib = + tvm::runtime::Module::LoadFromFile(lib_path); tvm_func = mod_dylib.GetFunction(func_name); CHECK(tvm_func != nullptr); } - - void Compute(tensorflow::OpKernelContext* context) override { + void Compute(tensorflow::OpKernelContext* context) override { DLTensor args[NUM_INPUTS + 1]; TensorAsBuf buf_info[NUM_INPUTS]; ShapeContainer shapes[NUM_INPUTS]; @@ -218,7 +229,7 @@ class TVMDSOOp : public tensorflow::OpKernel { tensorflow::Status status; int device_id = TVMDSOOpTrait::device_id(context); int device_type = TVMDSOOpTrait::device_type; - + DLContext dl_ctx = { DLDeviceType(device_type), device_id }; // Get output shape @@ -227,23 +238,25 @@ class TVMDSOOp : public tensorflow::OpKernel { if (has_static_output_shape) { // use static output shape const tensorflow::int64* dims = static_output_shape.data(); - tensorflow::TensorShapeUtils::MakeShape(dims, static_output_shape.size(), &output_shape); + tensorflow::TensorShapeUtils::MakeShape( + dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - const tensorflow::int64* dims = output_shape_tensor.flat().data(); + const tensorflow::int64* dims = + output_shape_tensor.flat().data(); tensorflow::TensorShapeUtils::MakeShape(dims, 1, &output_shape); } else { // use input tensor shape by default output_shape = context->input(0).shape(); } - + for (int i = 0; i < NUM_INPUTS; ++i) { // Grab the input tensor auto& input_tensor = context->input(i); // Create shape container, should keep ref during execution shapes[i] = input_tensor.shape().dim_sizes(); - auto shape_ptr = (int64_t*) shapes[i].data(); + auto shape_ptr = reinterpret_cast(shapes[i].data()); TensorAsBuf& input = buf_info[i]; input.device_type = device_type; @@ -257,28 +270,33 @@ class TVMDSOOp : public tensorflow::OpKernel { // Allocate output tensor tensorflow::Tensor* output_tensor; - OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor)); - auto output_shape_dim_buf = output_tensor->shape().dim_sizes(); // should keep alive on stack - auto output_shape_ptr = (int64_t*) output_shape_dim_buf.data(); - + OP_REQUIRES_OK(context, context->allocate_output( + 0, output_shape, &output_tensor)); + // shape dimension buf should keel alive on stack + auto output_shape_dim_buf = output_tensor->shape().dim_sizes(); + auto output_shape_ptr = reinterpret_cast( + output_shape_dim_buf.data()); + TensorAsBuf output; output.device_type = device_type; EnsureAlignment(context, *output_tensor, &output); status = MakeDLTensor(output, dl_ctx, output_shape_ptr, &args[NUM_INPUTS]); OP_REQUIRES_OK(context, status); - + apply_variadic_by_ptrs(tvm_func, args); - - output.CopyToOrigin(); + + output.CopyToOrigin(); } }; #define REGISTER_TFTVM_KERNEL(n) \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_CPU), TVMDSOOp); \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_GPU), TVMDSOOp); \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ + .Device(tensorflow::DEVICE_CPU), TVMDSOOp); \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ + .Device(tensorflow::DEVICE_GPU), TVMDSOOp); \ REGISTER_TFTVM_KERNEL(1) REGISTER_TFTVM_KERNEL(2) diff --git a/src/contrib/tf_op/tvm_dso_ops.cc b/src/contrib/tf_op/tvm_dso_ops.cc index 8369e928599b..dfb21b204b81 100644 --- a/src/contrib/tf_op/tvm_dso_ops.cc +++ b/src/contrib/tf_op/tvm_dso_ops.cc @@ -19,8 +19,6 @@ #include "tensorflow/core/framework/op.h" -using namespace tensorflow; - #define REGISTER_TFTVM_OP(n) REGISTER_OP("TvmDsoOp" #n) \ .Output("output: output_dtype") \ .Attr("lib_path: string") \ From 729a5dc60970dfc915f60c1430e69e7c2c8681ec Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 5 Mar 2020 14:34:32 +0800 Subject: [PATCH 08/30] feat: Add test script and fix gpu shape --- python/tvm/contrib/tf_op/test_tfop_module.py | 100 +++++++++++++++++++ src/codegen/build_module.cc | 1 - src/contrib/tf_op/tvm_dso_op_kernels.cc | 19 +++- 3 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 python/tvm/contrib/tf_op/test_tfop_module.py diff --git a/python/tvm/contrib/tf_op/test_tfop_module.py b/python/tvm/contrib/tf_op/test_tfop_module.py new file mode 100644 index 000000000000..8f1f147b7968 --- /dev/null +++ b/python/tvm/contrib/tf_op/test_tfop_module.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test script for tf op module""" +import tempfile +import os +import tensorflow as tf +import numpy as np +import tvm +from tvm.contrib import tf_op + + +def export_cpu_add_lib(): + """create cpu add op lib""" + n = tvm.var("n") + ph_a = tvm.placeholder((n,), name='ph_a') + ph_b = tvm.placeholder((n,), name='ph_b') + ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = tvm.create_schedule(ph_c.op) + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "llvm", name="vector_add") + lib_path = tempfile.mktemp("tvm_add_dll.so") + fadd_dylib.export_library(lib_path) + return lib_path + + +def export_gpu_add_lib(): + """create gpu add op lib""" + n = tvm.var("n") + ph_a = tvm.placeholder((n,), name='ph_a') + ph_b = tvm.placeholder((n,), name='ph_b') + ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = tvm.create_schedule(ph_c.op) + b_axis, t_axis = sched[ph_c].split(ph_c.op.axis[0], factor=64) + sched[ph_c].bind(b_axis, tvm.thread_axis("blockIdx.x")) + sched[ph_c].bind(t_axis, tvm.thread_axis("threadIdx.x")) + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "cuda", name="vector_add") + lib_path = tempfile.mktemp("tvm_add_cuda_dll.so") + fadd_dylib.export_library(lib_path) + return lib_path + + +def test_add(session, lib_path, tf_device): + """test add lib with TensorFlow wrapper""" + module = tf_op.Module(lib_path) + + left = tf.placeholder("float32", shape=[4]) + right = tf.placeholder("float32", shape=[4]) + + feed_dict = {left: [1.0, 2.0, 3.0, 4.0], right: [5.0, 6.0, 7.0, 8.0]} + expect = np.asarray([6.0, 8.0, 10.0, 12.0]) + + add1 = module.func("vector_add", output_shape=[4], output_dtype="float") + add2 = module.func("vector_add", output_shape=tf.shape(left), output_dtype="float") + add3 = module.func("vector_add", output_shape=[tf.shape(left)[0]], output_dtype="float") + + with tf.device(tf_device): + output1 = session.run(add1(left, right), feed_dict) + np.testing.assert_equal(output1, expect) + + output2 = session.run(add2(left, right), feed_dict) + np.testing.assert_equal(output2, expect) + + output3 = session.run(add3(left, right), feed_dict) + np.testing.assert_equal(output3, expect) + + +def main(): + """main test function""" + cpu_lib = None + gpu_lib = None + try: + cpu_lib = export_cpu_add_lib() + gpu_lib = export_gpu_add_lib() + with tf.Session() as session: + test_add(session, cpu_lib, "/cpu:0") + test_add(session, gpu_lib, "/gpu:0") + finally: + if cpu_lib is not None: + os.remove(cpu_lib) + if gpu_lib is not None: + os.remove(gpu_lib) + + +if __name__ == "__main__": + main() diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index 0fb761d768b0..80fd57af66f9 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -572,7 +572,6 @@ runtime::Module build(const Map>& inputs, auto& fhost = host_dev_funcs[0]; auto& fdevice = host_dev_funcs[1]; // Get the module for a certain target. - printf("build device module: %s\n", it.first->device_name.c_str()); runtime::Module mdev = DeviceBuild(fdevice, it.first); for (const auto& it : fhost) { fhost_all.push_back(it); diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 363de7199307..24cb8531ee4f 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -242,9 +242,22 @@ class TVMDSOOp : public OpKernel { dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - const tensorflow::int64* dims = - output_shape_tensor.flat().data(); - tensorflow::TensorShapeUtils::MakeShape(dims, 1, &output_shape); + tensorflow::int64 num_dims = output_shape_tensor.NumElements(); + if (TVMDSOOpTrait::device_type == kDLGPU) { + const tensorflow::int64* flat = + output_shape_tensor.flat().data(); + tensorflow::int64* dims = new tensorflow::int64[num_dims]; + cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, + cudaMemcpyDeviceToHost); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, &output_shape); + delete dims; + } else { + const tensorflow::int64* dims = + output_shape_tensor.flat().data(); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, &output_shape); + } } else { // use input tensor shape by default output_shape = context->input(0).shape(); From 7f1b5b317f058186e7d29a579234f4b5c98631eb Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 5 Mar 2020 14:34:32 +0800 Subject: [PATCH 09/30] feat: Add test script and fix gpu shape --- python/tvm/contrib/tf_op/test_tfop_module.py | 100 +++++++++++++++++++ src/codegen/build_module.cc | 1 - src/contrib/tf_op/tvm_dso_op_kernels.cc | 19 +++- 3 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 python/tvm/contrib/tf_op/test_tfop_module.py diff --git a/python/tvm/contrib/tf_op/test_tfop_module.py b/python/tvm/contrib/tf_op/test_tfop_module.py new file mode 100644 index 000000000000..8f1f147b7968 --- /dev/null +++ b/python/tvm/contrib/tf_op/test_tfop_module.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test script for tf op module""" +import tempfile +import os +import tensorflow as tf +import numpy as np +import tvm +from tvm.contrib import tf_op + + +def export_cpu_add_lib(): + """create cpu add op lib""" + n = tvm.var("n") + ph_a = tvm.placeholder((n,), name='ph_a') + ph_b = tvm.placeholder((n,), name='ph_b') + ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = tvm.create_schedule(ph_c.op) + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "llvm", name="vector_add") + lib_path = tempfile.mktemp("tvm_add_dll.so") + fadd_dylib.export_library(lib_path) + return lib_path + + +def export_gpu_add_lib(): + """create gpu add op lib""" + n = tvm.var("n") + ph_a = tvm.placeholder((n,), name='ph_a') + ph_b = tvm.placeholder((n,), name='ph_b') + ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = tvm.create_schedule(ph_c.op) + b_axis, t_axis = sched[ph_c].split(ph_c.op.axis[0], factor=64) + sched[ph_c].bind(b_axis, tvm.thread_axis("blockIdx.x")) + sched[ph_c].bind(t_axis, tvm.thread_axis("threadIdx.x")) + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "cuda", name="vector_add") + lib_path = tempfile.mktemp("tvm_add_cuda_dll.so") + fadd_dylib.export_library(lib_path) + return lib_path + + +def test_add(session, lib_path, tf_device): + """test add lib with TensorFlow wrapper""" + module = tf_op.Module(lib_path) + + left = tf.placeholder("float32", shape=[4]) + right = tf.placeholder("float32", shape=[4]) + + feed_dict = {left: [1.0, 2.0, 3.0, 4.0], right: [5.0, 6.0, 7.0, 8.0]} + expect = np.asarray([6.0, 8.0, 10.0, 12.0]) + + add1 = module.func("vector_add", output_shape=[4], output_dtype="float") + add2 = module.func("vector_add", output_shape=tf.shape(left), output_dtype="float") + add3 = module.func("vector_add", output_shape=[tf.shape(left)[0]], output_dtype="float") + + with tf.device(tf_device): + output1 = session.run(add1(left, right), feed_dict) + np.testing.assert_equal(output1, expect) + + output2 = session.run(add2(left, right), feed_dict) + np.testing.assert_equal(output2, expect) + + output3 = session.run(add3(left, right), feed_dict) + np.testing.assert_equal(output3, expect) + + +def main(): + """main test function""" + cpu_lib = None + gpu_lib = None + try: + cpu_lib = export_cpu_add_lib() + gpu_lib = export_gpu_add_lib() + with tf.Session() as session: + test_add(session, cpu_lib, "/cpu:0") + test_add(session, gpu_lib, "/gpu:0") + finally: + if cpu_lib is not None: + os.remove(cpu_lib) + if gpu_lib is not None: + os.remove(gpu_lib) + + +if __name__ == "__main__": + main() diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index 0fb761d768b0..80fd57af66f9 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -572,7 +572,6 @@ runtime::Module build(const Map>& inputs, auto& fhost = host_dev_funcs[0]; auto& fdevice = host_dev_funcs[1]; // Get the module for a certain target. - printf("build device module: %s\n", it.first->device_name.c_str()); runtime::Module mdev = DeviceBuild(fdevice, it.first); for (const auto& it : fhost) { fhost_all.push_back(it); diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 363de7199307..24cb8531ee4f 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -242,9 +242,22 @@ class TVMDSOOp : public OpKernel { dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - const tensorflow::int64* dims = - output_shape_tensor.flat().data(); - tensorflow::TensorShapeUtils::MakeShape(dims, 1, &output_shape); + tensorflow::int64 num_dims = output_shape_tensor.NumElements(); + if (TVMDSOOpTrait::device_type == kDLGPU) { + const tensorflow::int64* flat = + output_shape_tensor.flat().data(); + tensorflow::int64* dims = new tensorflow::int64[num_dims]; + cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, + cudaMemcpyDeviceToHost); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, &output_shape); + delete dims; + } else { + const tensorflow::int64* dims = + output_shape_tensor.flat().data(); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, &output_shape); + } } else { // use input tensor shape by default output_shape = context->input(0).shape(); From 0debcd682e99b4c04d1be091b0b0e5f9383d93f2 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Fri, 6 Mar 2020 11:57:49 +0800 Subject: [PATCH 10/30] fix: Conditional build tftvm op for gpu --- cmake/modules/contrib/TF_TVMDSOOP.cmake | 4 ++ python/tvm/contrib/tf_op/module.py | 2 +- src/contrib/tf_op/tvm_dso_op_kernels.cc | 60 +++++++++++++++++-------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index 97e5dd4d2190..ddd3b9c31c3d 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -24,6 +24,10 @@ if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") if (NOT ${TF_STATUS} EQUAL 0) message(FATAL_ERROR "Fail to get TensorFlow compile flags") endif() + + if(NOT USE_CUDA STREQUAL "OFF") + add_definitions(-DTF_TVMDSOOP_ENABLE_GPU) + endif() execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" OUTPUT_VARIABLE TF_LINK_FLAGS_STR diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index ed2622fd3cff..08384ad8b0ba 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -102,7 +102,7 @@ def _pack_shape_tensor(self, shape): if shape.dtype == tf.int32: shape = tf.cast(shape, tf.int64) return shape - elif isinstance(shape, list): + if isinstance(shape, list): shape_dims = [] for dim_value in shape: if isinstance(dim_value, int): diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 24cb8531ee4f..c82d60db0e70 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -17,7 +17,9 @@ * under the License. */ +#ifdef TF_TVMDSOOP_ENABLE_GPU #include +#endif #include #include @@ -67,9 +69,11 @@ class TensorAsBuf { } if (device_type == kDLCPU) { memcpy(origin_buf, buf + offset, size); +#ifdef TF_TVMDSOOP_ENABLE_GPU } else if (device_type == kDLGPU) { cudaMemcpy(origin_buf, buf + offset, size, cudaMemcpyDeviceToDevice); +#endif } else { LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type << " is not implemented currently"; @@ -82,9 +86,11 @@ class TensorAsBuf { } if (device_type == kDLCPU) { memcpy(buf + offset, origin_buf, size); +#ifdef TF_TVMDSOOP_ENABLE_GPU } else if (device_type == kDLGPU) { cudaMemcpy(buf + offset, origin_buf, size, cudaMemcpyDeviceToDevice); +#endif } else { LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type << " is not implemented currently"; @@ -172,9 +178,19 @@ class TVMDSOOpTrait { static int device_id(OpKernelContext* context) { return 0; } -}; + static void make_shape_from_tensor( + const tensorflow::Tensor& shape_tensor, + tensorflow::TensorShape* output_shape) { + tensorflow::int64 num_dims = shape_tensor.NumElements(); + const tensorflow::int64* dims = + shape_tensor.flat().data(); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, output_shape); + } +}; +#ifdef TF_TVMDSOOP_ENABLE_GPU template <> class TVMDSOOpTrait { public: @@ -185,7 +201,22 @@ class TVMDSOOpTrait { auto gpu_device_info = device_base->tensorflow_gpu_device_info(); return gpu_device_info->gpu_id; } + + static void make_shape_from_tensor( + const tensorflow::Tensor& shape_tensor, + tensorflow::TensorShape* output_shape) { + tensorflow::int64 num_dims = shape_tensor.NumElements(); + const tensorflow::int64* flat = + shape_tensor.flat().data(); + tensorflow::int64* dims = new tensorflow::int64[num_dims]; + cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, + cudaMemcpyDeviceToHost); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, output_shape); + delete dims; + } }; +#endif template @@ -242,22 +273,8 @@ class TVMDSOOp : public OpKernel { dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - tensorflow::int64 num_dims = output_shape_tensor.NumElements(); - if (TVMDSOOpTrait::device_type == kDLGPU) { - const tensorflow::int64* flat = - output_shape_tensor.flat().data(); - tensorflow::int64* dims = new tensorflow::int64[num_dims]; - cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, - cudaMemcpyDeviceToHost); - tensorflow::TensorShapeUtils::MakeShape( - dims, num_dims, &output_shape); - delete dims; - } else { - const tensorflow::int64* dims = - output_shape_tensor.flat().data(); - tensorflow::TensorShapeUtils::MakeShape( - dims, num_dims, &output_shape); - } + TVMDSOOpTrait::make_shape_from_tensor( + output_shape_tensor, &output_shape); } else { // use input tensor shape by default output_shape = context->input(0).shape(); @@ -304,12 +321,17 @@ class TVMDSOOp : public OpKernel { }; - +#ifdef TF_TVMDSOOP_ENABLE_GPU #define REGISTER_TFTVM_KERNEL(n) \ REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ .Device(tensorflow::DEVICE_CPU), TVMDSOOp); \ REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ - .Device(tensorflow::DEVICE_GPU), TVMDSOOp); \ + .Device(tensorflow::DEVICE_GPU), TVMDSOOp); +#else +#define REGISTER_TFTVM_KERNEL(n) \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ + .Device(tensorflow::DEVICE_CPU), TVMDSOOp); +#endif REGISTER_TFTVM_KERNEL(1) REGISTER_TFTVM_KERNEL(2) From f5be2f3d224a05deeb6da6104ff590ab6c399c31 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Fri, 6 Mar 2020 11:57:49 +0800 Subject: [PATCH 11/30] fix: Conditional build tftvm op for gpu --- cmake/modules/contrib/TF_TVMDSOOP.cmake | 4 ++ python/tvm/contrib/tf_op/module.py | 2 +- src/contrib/tf_op/tvm_dso_op_kernels.cc | 60 +++++++++++++++++-------- 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index 97e5dd4d2190..ddd3b9c31c3d 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -24,6 +24,10 @@ if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") if (NOT ${TF_STATUS} EQUAL 0) message(FATAL_ERROR "Fail to get TensorFlow compile flags") endif() + + if(NOT USE_CUDA STREQUAL "OFF") + add_definitions(-DTF_TVMDSOOP_ENABLE_GPU) + endif() execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" OUTPUT_VARIABLE TF_LINK_FLAGS_STR diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index ed2622fd3cff..08384ad8b0ba 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -102,7 +102,7 @@ def _pack_shape_tensor(self, shape): if shape.dtype == tf.int32: shape = tf.cast(shape, tf.int64) return shape - elif isinstance(shape, list): + if isinstance(shape, list): shape_dims = [] for dim_value in shape: if isinstance(dim_value, int): diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 24cb8531ee4f..c82d60db0e70 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -17,7 +17,9 @@ * under the License. */ +#ifdef TF_TVMDSOOP_ENABLE_GPU #include +#endif #include #include @@ -67,9 +69,11 @@ class TensorAsBuf { } if (device_type == kDLCPU) { memcpy(origin_buf, buf + offset, size); +#ifdef TF_TVMDSOOP_ENABLE_GPU } else if (device_type == kDLGPU) { cudaMemcpy(origin_buf, buf + offset, size, cudaMemcpyDeviceToDevice); +#endif } else { LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type << " is not implemented currently"; @@ -82,9 +86,11 @@ class TensorAsBuf { } if (device_type == kDLCPU) { memcpy(buf + offset, origin_buf, size); +#ifdef TF_TVMDSOOP_ENABLE_GPU } else if (device_type == kDLGPU) { cudaMemcpy(buf + offset, origin_buf, size, cudaMemcpyDeviceToDevice); +#endif } else { LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type << " is not implemented currently"; @@ -172,9 +178,19 @@ class TVMDSOOpTrait { static int device_id(OpKernelContext* context) { return 0; } -}; + static void make_shape_from_tensor( + const tensorflow::Tensor& shape_tensor, + tensorflow::TensorShape* output_shape) { + tensorflow::int64 num_dims = shape_tensor.NumElements(); + const tensorflow::int64* dims = + shape_tensor.flat().data(); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, output_shape); + } +}; +#ifdef TF_TVMDSOOP_ENABLE_GPU template <> class TVMDSOOpTrait { public: @@ -185,7 +201,22 @@ class TVMDSOOpTrait { auto gpu_device_info = device_base->tensorflow_gpu_device_info(); return gpu_device_info->gpu_id; } + + static void make_shape_from_tensor( + const tensorflow::Tensor& shape_tensor, + tensorflow::TensorShape* output_shape) { + tensorflow::int64 num_dims = shape_tensor.NumElements(); + const tensorflow::int64* flat = + shape_tensor.flat().data(); + tensorflow::int64* dims = new tensorflow::int64[num_dims]; + cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, + cudaMemcpyDeviceToHost); + tensorflow::TensorShapeUtils::MakeShape( + dims, num_dims, output_shape); + delete dims; + } }; +#endif template @@ -242,22 +273,8 @@ class TVMDSOOp : public OpKernel { dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - tensorflow::int64 num_dims = output_shape_tensor.NumElements(); - if (TVMDSOOpTrait::device_type == kDLGPU) { - const tensorflow::int64* flat = - output_shape_tensor.flat().data(); - tensorflow::int64* dims = new tensorflow::int64[num_dims]; - cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, - cudaMemcpyDeviceToHost); - tensorflow::TensorShapeUtils::MakeShape( - dims, num_dims, &output_shape); - delete dims; - } else { - const tensorflow::int64* dims = - output_shape_tensor.flat().data(); - tensorflow::TensorShapeUtils::MakeShape( - dims, num_dims, &output_shape); - } + TVMDSOOpTrait::make_shape_from_tensor( + output_shape_tensor, &output_shape); } else { // use input tensor shape by default output_shape = context->input(0).shape(); @@ -304,12 +321,17 @@ class TVMDSOOp : public OpKernel { }; - +#ifdef TF_TVMDSOOP_ENABLE_GPU #define REGISTER_TFTVM_KERNEL(n) \ REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ .Device(tensorflow::DEVICE_CPU), TVMDSOOp); \ REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ - .Device(tensorflow::DEVICE_GPU), TVMDSOOp); \ + .Device(tensorflow::DEVICE_GPU), TVMDSOOp); +#else +#define REGISTER_TFTVM_KERNEL(n) \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ + .Device(tensorflow::DEVICE_CPU), TVMDSOOp); +#endif REGISTER_TFTVM_KERNEL(1) REGISTER_TFTVM_KERNEL(2) From 7eae33d2014a0e3cfa43ae2057f89b3ebfc013f7 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Mon, 9 Mar 2020 17:44:04 +0800 Subject: [PATCH 12/30] fix: Fix pylint of tf_op module.py --- python/tvm/contrib/tf_op/module.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index 08384ad8b0ba..5e8892d6746b 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -101,8 +101,7 @@ def _pack_shape_tensor(self, shape): if isinstance(shape, tf.Tensor): if shape.dtype == tf.int32: shape = tf.cast(shape, tf.int64) - return shape - if isinstance(shape, list): + elif isinstance(shape, list): shape_dims = [] for dim_value in shape: if isinstance(dim_value, int): @@ -113,6 +112,7 @@ def _pack_shape_tensor(self, shape): shape_dims.append(dim_value) else: raise TypeError("Input shape dimension is neither scalar tensor nor int") - return tf.stack(shape_dims) + shape = tf.stack(shape_dims) else: raise TypeError("Input shape is neither tensor nor list") + return shape From cd8fd80aa7960f6b74664db826cee51ef1773a34 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Mon, 9 Mar 2020 17:44:04 +0800 Subject: [PATCH 13/30] fix: Fix pylint of tf_op module.py --- python/tvm/contrib/tf_op/module.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index 08384ad8b0ba..5e8892d6746b 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -101,8 +101,7 @@ def _pack_shape_tensor(self, shape): if isinstance(shape, tf.Tensor): if shape.dtype == tf.int32: shape = tf.cast(shape, tf.int64) - return shape - if isinstance(shape, list): + elif isinstance(shape, list): shape_dims = [] for dim_value in shape: if isinstance(dim_value, int): @@ -113,6 +112,7 @@ def _pack_shape_tensor(self, shape): shape_dims.append(dim_value) else: raise TypeError("Input shape dimension is neither scalar tensor nor int") - return tf.stack(shape_dims) + shape = tf.stack(shape_dims) else: raise TypeError("Input shape is neither tensor nor list") + return shape From 1fc54ecad79765d01c29af77747028b801f5a76d Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Tue, 10 Mar 2020 12:53:48 +0800 Subject: [PATCH 14/30] feat: Conditional enable gpu test for tftvm op --- python/tvm/contrib/tf_op/test_tfop_module.py | 33 +++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/python/tvm/contrib/tf_op/test_tfop_module.py b/python/tvm/contrib/tf_op/test_tfop_module.py index 8f1f147b7968..b75736e841c2 100644 --- a/python/tvm/contrib/tf_op/test_tfop_module.py +++ b/python/tvm/contrib/tf_op/test_tfop_module.py @@ -19,6 +19,7 @@ """Test script for tf op module""" import tempfile import os +import logging import tensorflow as tf import numpy as np import tvm @@ -32,7 +33,7 @@ def export_cpu_add_lib(): ph_b = tvm.placeholder((n,), name='ph_b') ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') sched = tvm.create_schedule(ph_c.op) - fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "llvm", name="vector_add") + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "c", name="vector_add") lib_path = tempfile.mktemp("tvm_add_dll.so") fadd_dylib.export_library(lib_path) return lib_path @@ -79,22 +80,38 @@ def test_add(session, lib_path, tf_device): np.testing.assert_equal(output3, expect) -def main(): - """main test function""" +def cpu_test(session): + """test function for cpu""" cpu_lib = None - gpu_lib = None try: cpu_lib = export_cpu_add_lib() - gpu_lib = export_gpu_add_lib() - with tf.Session() as session: - test_add(session, cpu_lib, "/cpu:0") - test_add(session, gpu_lib, "/gpu:0") + test_add(session, cpu_lib, "/cpu:0") finally: if cpu_lib is not None: os.remove(cpu_lib) + + +def gpu_test(session): + """test function for gpu""" + gpu_lib = None + try: + gpu_lib = export_gpu_add_lib() + test_add(session, gpu_lib, "/gpu:0") + finally: if gpu_lib is not None: os.remove(gpu_lib) +def main(): + """main test function""" + with tf.Session() as session: + if tvm.module.enabled("cpu"): + logging.info("Test TensorFlow op on cpu kernel") + cpu_test(session) + if tvm.module.enabled("gpu"): + logging.info("Test TensorFlow op on gpu kernel") + gpu_test(session) + + if __name__ == "__main__": main() From b4b9f964396b316b28064934ba01a7de7b01bfd8 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Tue, 10 Mar 2020 12:53:48 +0800 Subject: [PATCH 15/30] feat: Conditional enable gpu test for tftvm op --- python/tvm/contrib/tf_op/test_tfop_module.py | 33 +++++++++++++++----- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/python/tvm/contrib/tf_op/test_tfop_module.py b/python/tvm/contrib/tf_op/test_tfop_module.py index 8f1f147b7968..b75736e841c2 100644 --- a/python/tvm/contrib/tf_op/test_tfop_module.py +++ b/python/tvm/contrib/tf_op/test_tfop_module.py @@ -19,6 +19,7 @@ """Test script for tf op module""" import tempfile import os +import logging import tensorflow as tf import numpy as np import tvm @@ -32,7 +33,7 @@ def export_cpu_add_lib(): ph_b = tvm.placeholder((n,), name='ph_b') ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') sched = tvm.create_schedule(ph_c.op) - fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "llvm", name="vector_add") + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "c", name="vector_add") lib_path = tempfile.mktemp("tvm_add_dll.so") fadd_dylib.export_library(lib_path) return lib_path @@ -79,22 +80,38 @@ def test_add(session, lib_path, tf_device): np.testing.assert_equal(output3, expect) -def main(): - """main test function""" +def cpu_test(session): + """test function for cpu""" cpu_lib = None - gpu_lib = None try: cpu_lib = export_cpu_add_lib() - gpu_lib = export_gpu_add_lib() - with tf.Session() as session: - test_add(session, cpu_lib, "/cpu:0") - test_add(session, gpu_lib, "/gpu:0") + test_add(session, cpu_lib, "/cpu:0") finally: if cpu_lib is not None: os.remove(cpu_lib) + + +def gpu_test(session): + """test function for gpu""" + gpu_lib = None + try: + gpu_lib = export_gpu_add_lib() + test_add(session, gpu_lib, "/gpu:0") + finally: if gpu_lib is not None: os.remove(gpu_lib) +def main(): + """main test function""" + with tf.Session() as session: + if tvm.module.enabled("cpu"): + logging.info("Test TensorFlow op on cpu kernel") + cpu_test(session) + if tvm.module.enabled("gpu"): + logging.info("Test TensorFlow op on gpu kernel") + gpu_test(session) + + if __name__ == "__main__": main() From 864c4a5265bc97c8a8c60abf04ca34dbfdcbfa0e Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Mon, 16 Mar 2020 18:37:13 +0800 Subject: [PATCH 16/30] feat: Add tf_tvmdsoop test script as an app test --- apps/tf_tvmdsoop/CMakeLists.txt | 32 ++++++++++++++++++ apps/tf_tvmdsoop/prepare_tfop_module.sh | 33 +++++++++++++++++++ .../tf_tvmdsoop/tests}/test_tfop_module.py | 0 cmake/modules/contrib/TF_TVMDSOOP.cmake | 5 ++- tests/scripts/task_python_integration.sh | 6 ++++ 5 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 apps/tf_tvmdsoop/CMakeLists.txt create mode 100644 apps/tf_tvmdsoop/prepare_tfop_module.sh rename {python/tvm/contrib/tf_op => apps/tf_tvmdsoop/tests}/test_tfop_module.py (100%) diff --git a/apps/tf_tvmdsoop/CMakeLists.txt b/apps/tf_tvmdsoop/CMakeLists.txt new file mode 100644 index 000000000000..ab9cc34e6fe0 --- /dev/null +++ b/apps/tf_tvmdsoop/CMakeLists.txt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +cmake_minimum_required(VERSION 3.2) +project(tf_tvmdsoop C CXX) + +set(CMAKE_CURRENT_SOURCE_DIR ${TVM_ROOT}) + +include_directories(${TVM_ROOT}/3rdparty/dlpack/include/) +include_directories(${TVM_ROOT}/3rdparty/dmlc-core/include/) +include_directories(${TVM_ROOT}/include) + +link_directories(${TVM_ROOT}/build) + +include(${TVM_ROOT}/cmake/util/FindCUDA.cmake) +include(${TVM_ROOT}/cmake/modules/CUDA.cmake) + +set(BUILD_TVMDSOOP_ONLY ON) +include(${TVM_ROOT}/cmake/modules/contrib/TF_TVMDSOOP.cmake) diff --git a/apps/tf_tvmdsoop/prepare_tfop_module.sh b/apps/tf_tvmdsoop/prepare_tfop_module.sh new file mode 100644 index 000000000000..1081f3b5bf52 --- /dev/null +++ b/apps/tf_tvmdsoop/prepare_tfop_module.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +TVM_ROOT=$(cd $(dirname $0)/../..; pwd) +echo "TVM_ROOT=${TVM_ROOT}" + +export PYTHONPATH=${TVM_ROOT}/python + +python -c "import tvm; print(tvm.module.enabled('gpu'))" | grep -e 1 +if [[ $? -eq 0 ]]; then + echo "Build TF_TVMDSOOP with gpu support" + CMAKE_OPTIONS="-DUSE_CUDA=ON -DTVM_ROOT=${TVM_ROOT}" +else + CMAKE_OPTIONS="-DUSE_CUDA=OFF -DTVM_ROOT=${TVM_ROOT}" +fi + +mkdir -p build; cd build; cmake .. ${CMAKE_OPTIONS}; make + diff --git a/python/tvm/contrib/tf_op/test_tfop_module.py b/apps/tf_tvmdsoop/tests/test_tfop_module.py similarity index 100% rename from python/tvm/contrib/tf_op/test_tfop_module.py rename to apps/tf_tvmdsoop/tests/test_tfop_module.py diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index ddd3b9c31c3d..c5dd122dfb36 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -46,7 +46,10 @@ if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") add_library(${OP_LIBRARY_NAME} SHARED ${TFTVM_SRCS}) set_target_properties(${OP_LIBRARY_NAME} PROPERTIES PREFIX "") set(TFTVM_LINK_FLAGS -ltvm -L${CMAKE_CURRENT_BINARY_DIR}) - add_dependencies(${OP_LIBRARY_NAME} tvm) + + if (NOT BUILD_TVMDSOOP_ONLY STREQUAL "ON") + add_dependencies(${OP_LIBRARY_NAME} tvm) + endif() # set(TFTVM_COMPILE_FLAGS ${CMAKE_CXX_FLAGS}) target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 5c00fd9c8896..8b08c4fa8573 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -53,6 +53,12 @@ cd ../.. TVM_FFI=cython python3 -m pytest -v apps/dso_plugin_module TVM_FFI=ctypes python3 -m pytest -v apps/dso_plugin_module +# Test TensorFlow TVMDSOOP +cd apps/tf_tvmdsoop +sh prepare_tfop_module.sh +cd ../.. +TVM_FFI=ctypes LD_LIBRARY_PATH=apps/tf_tvmdsoop/build:$LD_LIBRARY_PATH \ + python3 -m pytest -v apps/tf_tvmdsoop/tests TVM_FFI=ctypes python3 -m pytest -v tests/python/integration TVM_FFI=ctypes python3 -m pytest -v tests/python/contrib From ec055117c7580a25c9687fe3df7bd2d4415a5eb8 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Tue, 17 Mar 2020 17:59:29 +0800 Subject: [PATCH 17/30] fix: Fix gpu/cpu enabled check on tvm in test script --- apps/tf_tvmdsoop/prepare_tfop_module.sh | 4 +-- apps/tf_tvmdsoop/tests/test_tfop_module.py | 29 +++++++++++----------- tests/scripts/task_python_integration.sh | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/apps/tf_tvmdsoop/prepare_tfop_module.sh b/apps/tf_tvmdsoop/prepare_tfop_module.sh index 1081f3b5bf52..3d1caf0c7274 100644 --- a/apps/tf_tvmdsoop/prepare_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_tfop_module.sh @@ -21,8 +21,8 @@ echo "TVM_ROOT=${TVM_ROOT}" export PYTHONPATH=${TVM_ROOT}/python -python -c "import tvm; print(tvm.module.enabled('gpu'))" | grep -e 1 -if [[ $? -eq 0 ]]; then +python3 -c "import tvm; print(tvm.runtime.enabled('gpu'))" | grep -e 1 +if [ "$?" -eq 0 ]; then echo "Build TF_TVMDSOOP with gpu support" CMAKE_OPTIONS="-DUSE_CUDA=ON -DTVM_ROOT=${TVM_ROOT}" else diff --git a/apps/tf_tvmdsoop/tests/test_tfop_module.py b/apps/tf_tvmdsoop/tests/test_tfop_module.py index b75736e841c2..00c9370b2f4f 100644 --- a/apps/tf_tvmdsoop/tests/test_tfop_module.py +++ b/apps/tf_tvmdsoop/tests/test_tfop_module.py @@ -23,16 +23,17 @@ import tensorflow as tf import numpy as np import tvm +from tvm import te from tvm.contrib import tf_op def export_cpu_add_lib(): """create cpu add op lib""" - n = tvm.var("n") - ph_a = tvm.placeholder((n,), name='ph_a') - ph_b = tvm.placeholder((n,), name='ph_b') - ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') - sched = tvm.create_schedule(ph_c.op) + n = te.var("n") + ph_a = te.placeholder((n,), name='ph_a') + ph_b = te.placeholder((n,), name='ph_b') + ph_c = te.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = te.create_schedule(ph_c.op) fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "c", name="vector_add") lib_path = tempfile.mktemp("tvm_add_dll.so") fadd_dylib.export_library(lib_path) @@ -41,14 +42,14 @@ def export_cpu_add_lib(): def export_gpu_add_lib(): """create gpu add op lib""" - n = tvm.var("n") - ph_a = tvm.placeholder((n,), name='ph_a') - ph_b = tvm.placeholder((n,), name='ph_b') - ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') - sched = tvm.create_schedule(ph_c.op) + n = te.var("n") + ph_a = te.placeholder((n,), name='ph_a') + ph_b = te.placeholder((n,), name='ph_b') + ph_c = te.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = te.create_schedule(ph_c.op) b_axis, t_axis = sched[ph_c].split(ph_c.op.axis[0], factor=64) - sched[ph_c].bind(b_axis, tvm.thread_axis("blockIdx.x")) - sched[ph_c].bind(t_axis, tvm.thread_axis("threadIdx.x")) + sched[ph_c].bind(b_axis, te.thread_axis("blockIdx.x")) + sched[ph_c].bind(t_axis, te.thread_axis("threadIdx.x")) fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "cuda", name="vector_add") lib_path = tempfile.mktemp("tvm_add_cuda_dll.so") fadd_dylib.export_library(lib_path) @@ -105,10 +106,10 @@ def gpu_test(session): def main(): """main test function""" with tf.Session() as session: - if tvm.module.enabled("cpu"): + if tvm.runtime.enabled("cpu"): logging.info("Test TensorFlow op on cpu kernel") cpu_test(session) - if tvm.module.enabled("gpu"): + if tvm.runtime.enabled("gpu"): logging.info("Test TensorFlow op on gpu kernel") gpu_test(session) diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 8b08c4fa8573..b7c3ba1e18ad 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -57,7 +57,7 @@ TVM_FFI=ctypes python3 -m pytest -v apps/dso_plugin_module cd apps/tf_tvmdsoop sh prepare_tfop_module.sh cd ../.. -TVM_FFI=ctypes LD_LIBRARY_PATH=apps/tf_tvmdsoop/build:$LD_LIBRARY_PATH \ +LD_LIBRARY_PATH=apps/tf_tvmdsoop/build:$LD_LIBRARY_PATH \ python3 -m pytest -v apps/tf_tvmdsoop/tests TVM_FFI=ctypes python3 -m pytest -v tests/python/integration From 51ed77956316793a8eed85486b91506020f89df0 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Tue, 17 Mar 2020 18:45:50 +0800 Subject: [PATCH 18/30] fix: Make tf tvmdso op test script runnable with pytest --- ...ule.sh => prepare_and_test_tfop_module.sh} | 13 +- apps/tf_tvmdsoop/tests/test_tfop_module.py | 156 +++++++++--------- tests/scripts/task_python_integration.sh | 5 +- 3 files changed, 88 insertions(+), 86 deletions(-) rename apps/tf_tvmdsoop/{prepare_tfop_module.sh => prepare_and_test_tfop_module.sh} (82%) diff --git a/apps/tf_tvmdsoop/prepare_tfop_module.sh b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh similarity index 82% rename from apps/tf_tvmdsoop/prepare_tfop_module.sh rename to apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh index 3d1caf0c7274..3a4a6937de7f 100644 --- a/apps/tf_tvmdsoop/prepare_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh @@ -23,11 +23,14 @@ export PYTHONPATH=${TVM_ROOT}/python python3 -c "import tvm; print(tvm.runtime.enabled('gpu'))" | grep -e 1 if [ "$?" -eq 0 ]; then - echo "Build TF_TVMDSOOP with gpu support" + echo "Build TF_TVMDSOOP with gpu support and execute tests" CMAKE_OPTIONS="-DUSE_CUDA=ON -DTVM_ROOT=${TVM_ROOT}" -else - CMAKE_OPTIONS="-DUSE_CUDA=OFF -DTVM_ROOT=${TVM_ROOT}" -fi + + mkdir -p build + pushd build + cmake .. ${CMAKE_OPTIONS} && make + popd -mkdir -p build; cd build; cmake .. ${CMAKE_OPTIONS}; make + LD_LIBRARY_PATH=./build:$LD_LIBRARY_PATH python3 -m pytest -v ./tests +fi diff --git a/apps/tf_tvmdsoop/tests/test_tfop_module.py b/apps/tf_tvmdsoop/tests/test_tfop_module.py index 00c9370b2f4f..f2dee98ee01c 100644 --- a/apps/tf_tvmdsoop/tests/test_tfop_module.py +++ b/apps/tf_tvmdsoop/tests/test_tfop_module.py @@ -27,84 +27,84 @@ from tvm.contrib import tf_op -def export_cpu_add_lib(): - """create cpu add op lib""" - n = te.var("n") - ph_a = te.placeholder((n,), name='ph_a') - ph_b = te.placeholder((n,), name='ph_b') - ph_c = te.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') - sched = te.create_schedule(ph_c.op) - fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "c", name="vector_add") - lib_path = tempfile.mktemp("tvm_add_dll.so") - fadd_dylib.export_library(lib_path) - return lib_path - - -def export_gpu_add_lib(): - """create gpu add op lib""" - n = te.var("n") - ph_a = te.placeholder((n,), name='ph_a') - ph_b = te.placeholder((n,), name='ph_b') - ph_c = te.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') - sched = te.create_schedule(ph_c.op) - b_axis, t_axis = sched[ph_c].split(ph_c.op.axis[0], factor=64) - sched[ph_c].bind(b_axis, te.thread_axis("blockIdx.x")) - sched[ph_c].bind(t_axis, te.thread_axis("threadIdx.x")) - fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "cuda", name="vector_add") - lib_path = tempfile.mktemp("tvm_add_cuda_dll.so") - fadd_dylib.export_library(lib_path) - return lib_path - - -def test_add(session, lib_path, tf_device): - """test add lib with TensorFlow wrapper""" - module = tf_op.Module(lib_path) - - left = tf.placeholder("float32", shape=[4]) - right = tf.placeholder("float32", shape=[4]) - - feed_dict = {left: [1.0, 2.0, 3.0, 4.0], right: [5.0, 6.0, 7.0, 8.0]} - expect = np.asarray([6.0, 8.0, 10.0, 12.0]) - - add1 = module.func("vector_add", output_shape=[4], output_dtype="float") - add2 = module.func("vector_add", output_shape=tf.shape(left), output_dtype="float") - add3 = module.func("vector_add", output_shape=[tf.shape(left)[0]], output_dtype="float") - - with tf.device(tf_device): - output1 = session.run(add1(left, right), feed_dict) - np.testing.assert_equal(output1, expect) - - output2 = session.run(add2(left, right), feed_dict) - np.testing.assert_equal(output2, expect) - - output3 = session.run(add3(left, right), feed_dict) - np.testing.assert_equal(output3, expect) - - -def cpu_test(session): - """test function for cpu""" - cpu_lib = None - try: - cpu_lib = export_cpu_add_lib() - test_add(session, cpu_lib, "/cpu:0") - finally: - if cpu_lib is not None: - os.remove(cpu_lib) - - -def gpu_test(session): - """test function for gpu""" - gpu_lib = None - try: - gpu_lib = export_gpu_add_lib() - test_add(session, gpu_lib, "/gpu:0") - finally: - if gpu_lib is not None: - os.remove(gpu_lib) - - -def main(): +def test_use_tvmdso_op(): """main test function""" + + def export_cpu_add_lib(): + """create cpu add op lib""" + n = te.var("n") + ph_a = te.placeholder((n,), name='ph_a') + ph_b = te.placeholder((n,), name='ph_b') + ph_c = te.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = te.create_schedule(ph_c.op) + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "c", name="vector_add") + lib_path = tempfile.mktemp("tvm_add_dll.so") + fadd_dylib.export_library(lib_path) + return lib_path + + + def export_gpu_add_lib(): + """create gpu add op lib""" + n = te.var("n") + ph_a = te.placeholder((n,), name='ph_a') + ph_b = te.placeholder((n,), name='ph_b') + ph_c = te.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') + sched = te.create_schedule(ph_c.op) + b_axis, t_axis = sched[ph_c].split(ph_c.op.axis[0], factor=64) + sched[ph_c].bind(b_axis, te.thread_axis("blockIdx.x")) + sched[ph_c].bind(t_axis, te.thread_axis("threadIdx.x")) + fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "cuda", name="vector_add") + lib_path = tempfile.mktemp("tvm_add_cuda_dll.so") + fadd_dylib.export_library(lib_path) + return lib_path + + + def test_add(session, lib_path, tf_device): + """test add lib with TensorFlow wrapper""" + module = tf_op.Module(lib_path) + + left = tf.placeholder("float32", shape=[4]) + right = tf.placeholder("float32", shape=[4]) + + feed_dict = {left: [1.0, 2.0, 3.0, 4.0], right: [5.0, 6.0, 7.0, 8.0]} + expect = np.asarray([6.0, 8.0, 10.0, 12.0]) + + add1 = module.func("vector_add", output_shape=[4], output_dtype="float") + add2 = module.func("vector_add", output_shape=tf.shape(left), output_dtype="float") + add3 = module.func("vector_add", output_shape=[tf.shape(left)[0]], output_dtype="float") + + with tf.device(tf_device): + output1 = session.run(add1(left, right), feed_dict) + np.testing.assert_equal(output1, expect) + + output2 = session.run(add2(left, right), feed_dict) + np.testing.assert_equal(output2, expect) + + output3 = session.run(add3(left, right), feed_dict) + np.testing.assert_equal(output3, expect) + + + def cpu_test(session): + """test function for cpu""" + cpu_lib = None + try: + cpu_lib = export_cpu_add_lib() + test_add(session, cpu_lib, "/cpu:0") + finally: + if cpu_lib is not None: + os.remove(cpu_lib) + + + def gpu_test(session): + """test function for gpu""" + gpu_lib = None + try: + gpu_lib = export_gpu_add_lib() + test_add(session, gpu_lib, "/gpu:0") + finally: + if gpu_lib is not None: + os.remove(gpu_lib) + with tf.Session() as session: if tvm.runtime.enabled("cpu"): logging.info("Test TensorFlow op on cpu kernel") @@ -115,4 +115,4 @@ def main(): if __name__ == "__main__": - main() + test_use_tvmdso_op() diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index b7c3ba1e18ad..3569505cb551 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -55,10 +55,9 @@ TVM_FFI=ctypes python3 -m pytest -v apps/dso_plugin_module # Test TensorFlow TVMDSOOP cd apps/tf_tvmdsoop -sh prepare_tfop_module.sh +TVM_FFI=cython sh prepare_and_test_tfop_module.sh +TVM_FFI=ctypes sh prepare_and_test_tfop_module.sh cd ../.. -LD_LIBRARY_PATH=apps/tf_tvmdsoop/build:$LD_LIBRARY_PATH \ - python3 -m pytest -v apps/tf_tvmdsoop/tests TVM_FFI=ctypes python3 -m pytest -v tests/python/integration TVM_FFI=ctypes python3 -m pytest -v tests/python/contrib From 685f7d049ed00cf4af1d1652c34e24c4368490de Mon Sep 17 00:00:00 2001 From: wrongtest Date: Tue, 17 Mar 2020 19:08:10 +0800 Subject: [PATCH 19/30] remove unused test script test_tfop_module.py --- python/tvm/contrib/tf_op/test_tfop_module.py | 117 ------------------- 1 file changed, 117 deletions(-) delete mode 100644 python/tvm/contrib/tf_op/test_tfop_module.py diff --git a/python/tvm/contrib/tf_op/test_tfop_module.py b/python/tvm/contrib/tf_op/test_tfop_module.py deleted file mode 100644 index b75736e841c2..000000000000 --- a/python/tvm/contrib/tf_op/test_tfop_module.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Test script for tf op module""" -import tempfile -import os -import logging -import tensorflow as tf -import numpy as np -import tvm -from tvm.contrib import tf_op - - -def export_cpu_add_lib(): - """create cpu add op lib""" - n = tvm.var("n") - ph_a = tvm.placeholder((n,), name='ph_a') - ph_b = tvm.placeholder((n,), name='ph_b') - ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') - sched = tvm.create_schedule(ph_c.op) - fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "c", name="vector_add") - lib_path = tempfile.mktemp("tvm_add_dll.so") - fadd_dylib.export_library(lib_path) - return lib_path - - -def export_gpu_add_lib(): - """create gpu add op lib""" - n = tvm.var("n") - ph_a = tvm.placeholder((n,), name='ph_a') - ph_b = tvm.placeholder((n,), name='ph_b') - ph_c = tvm.compute(ph_a.shape, lambda i: ph_a[i] + ph_b[i], name='ph_c') - sched = tvm.create_schedule(ph_c.op) - b_axis, t_axis = sched[ph_c].split(ph_c.op.axis[0], factor=64) - sched[ph_c].bind(b_axis, tvm.thread_axis("blockIdx.x")) - sched[ph_c].bind(t_axis, tvm.thread_axis("threadIdx.x")) - fadd_dylib = tvm.build(sched, [ph_a, ph_b, ph_c], "cuda", name="vector_add") - lib_path = tempfile.mktemp("tvm_add_cuda_dll.so") - fadd_dylib.export_library(lib_path) - return lib_path - - -def test_add(session, lib_path, tf_device): - """test add lib with TensorFlow wrapper""" - module = tf_op.Module(lib_path) - - left = tf.placeholder("float32", shape=[4]) - right = tf.placeholder("float32", shape=[4]) - - feed_dict = {left: [1.0, 2.0, 3.0, 4.0], right: [5.0, 6.0, 7.0, 8.0]} - expect = np.asarray([6.0, 8.0, 10.0, 12.0]) - - add1 = module.func("vector_add", output_shape=[4], output_dtype="float") - add2 = module.func("vector_add", output_shape=tf.shape(left), output_dtype="float") - add3 = module.func("vector_add", output_shape=[tf.shape(left)[0]], output_dtype="float") - - with tf.device(tf_device): - output1 = session.run(add1(left, right), feed_dict) - np.testing.assert_equal(output1, expect) - - output2 = session.run(add2(left, right), feed_dict) - np.testing.assert_equal(output2, expect) - - output3 = session.run(add3(left, right), feed_dict) - np.testing.assert_equal(output3, expect) - - -def cpu_test(session): - """test function for cpu""" - cpu_lib = None - try: - cpu_lib = export_cpu_add_lib() - test_add(session, cpu_lib, "/cpu:0") - finally: - if cpu_lib is not None: - os.remove(cpu_lib) - - -def gpu_test(session): - """test function for gpu""" - gpu_lib = None - try: - gpu_lib = export_gpu_add_lib() - test_add(session, gpu_lib, "/gpu:0") - finally: - if gpu_lib is not None: - os.remove(gpu_lib) - - -def main(): - """main test function""" - with tf.Session() as session: - if tvm.module.enabled("cpu"): - logging.info("Test TensorFlow op on cpu kernel") - cpu_test(session) - if tvm.module.enabled("gpu"): - logging.info("Test TensorFlow op on gpu kernel") - gpu_test(session) - - -if __name__ == "__main__": - main() From ea6328b92d775a756232c3e1918339b1cfb9df63 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 18 Mar 2020 11:09:16 +0800 Subject: [PATCH 20/30] fix: Remove pushd & popd in tfdsoop test script --- apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh index 3a4a6937de7f..857bd9ed2b02 100644 --- a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh @@ -27,10 +27,8 @@ if [ "$?" -eq 0 ]; then CMAKE_OPTIONS="-DUSE_CUDA=ON -DTVM_ROOT=${TVM_ROOT}" mkdir -p build - pushd build - cmake .. ${CMAKE_OPTIONS} && make - popd - + cd build; cmake .. ${CMAKE_OPTIONS} && make + cd .. LD_LIBRARY_PATH=./build:$LD_LIBRARY_PATH python3 -m pytest -v ./tests fi From 0ae094261c2279888c76b29e59f8e740162c87f2 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 18 Mar 2020 13:16:51 +0800 Subject: [PATCH 21/30] fix: Upgrade tftvmop use python3 to find TensorFlow --- cmake/modules/contrib/TF_TVMDSOOP.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index c5dd122dfb36..a7a6b497ca9c 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -16,9 +16,9 @@ # under the License. if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") - find_package(Python COMPONENTS Interpreter) + find_package(Python3 COMPONENTS Interpreter) - execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" OUTPUT_VARIABLE TF_COMPILE_FLAGS_STR RESULT_VARIABLE TF_STATUS) if (NOT ${TF_STATUS} EQUAL 0) @@ -29,7 +29,7 @@ if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") add_definitions(-DTF_TVMDSOOP_ENABLE_GPU) endif() - execute_process(COMMAND ${Python_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" + execute_process(COMMAND ${Python3_EXECUTABLE} -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" OUTPUT_VARIABLE TF_LINK_FLAGS_STR RESULT_VARIABLE TF_STATUS) if (NOT ${TF_STATUS} EQUAL 0) From 95947006ab8eff1161f320371c0ad26503e941de Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 18 Mar 2020 13:16:51 +0800 Subject: [PATCH 22/30] fix: Upgrade tftvmop use python3 to find TensorFlow --- apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh index 857bd9ed2b02..be33b57de67b 100644 --- a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh @@ -24,7 +24,7 @@ export PYTHONPATH=${TVM_ROOT}/python python3 -c "import tvm; print(tvm.runtime.enabled('gpu'))" | grep -e 1 if [ "$?" -eq 0 ]; then echo "Build TF_TVMDSOOP with gpu support and execute tests" - CMAKE_OPTIONS="-DUSE_CUDA=ON -DTVM_ROOT=${TVM_ROOT}" + CMAKE_OPTIONS="-DUSE_CUDA=ON -DPython3_EXECUTABLE=python3 -DTVM_ROOT=${TVM_ROOT}" mkdir -p build cd build; cmake .. ${CMAKE_OPTIONS} && make From 8a5d2fbd444b5cd09762c6a4c235282fa8fcee4e Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 18 Mar 2020 16:20:05 +0800 Subject: [PATCH 23/30] fix: Change target_link_options to target_link_libraries --- cmake/modules/contrib/TF_TVMDSOOP.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index a7a6b497ca9c..bf716267ed5e 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -53,7 +53,7 @@ if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") # set(TFTVM_COMPILE_FLAGS ${CMAKE_CXX_FLAGS}) target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) - target_link_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) + target_link_libraries(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) endif() From b8fbd2e2c17e30df68e927f3768fa3571b07056f Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 19 Mar 2020 10:56:53 +0800 Subject: [PATCH 24/30] fix: Add tftvmop build script's c++ option --- apps/tf_tvmdsoop/CMakeLists.txt | 1 + cmake/modules/contrib/TF_TVMDSOOP.cmake | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/tf_tvmdsoop/CMakeLists.txt b/apps/tf_tvmdsoop/CMakeLists.txt index ab9cc34e6fe0..a445d0108fd3 100644 --- a/apps/tf_tvmdsoop/CMakeLists.txt +++ b/apps/tf_tvmdsoop/CMakeLists.txt @@ -28,5 +28,6 @@ link_directories(${TVM_ROOT}/build) include(${TVM_ROOT}/cmake/util/FindCUDA.cmake) include(${TVM_ROOT}/cmake/modules/CUDA.cmake) +set(TFTVM_COMPILE_FLAGS -std=c++11) set(BUILD_TVMDSOOP_ONLY ON) include(${TVM_ROOT}/cmake/modules/contrib/TF_TVMDSOOP.cmake) diff --git a/cmake/modules/contrib/TF_TVMDSOOP.cmake b/cmake/modules/contrib/TF_TVMDSOOP.cmake index bf716267ed5e..e92822a397ae 100644 --- a/cmake/modules/contrib/TF_TVMDSOOP.cmake +++ b/cmake/modules/contrib/TF_TVMDSOOP.cmake @@ -51,7 +51,6 @@ if(NOT USE_TF_TVMDSOOP STREQUAL "OFF") add_dependencies(${OP_LIBRARY_NAME} tvm) endif() - # set(TFTVM_COMPILE_FLAGS ${CMAKE_CXX_FLAGS}) target_compile_options(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_COMPILE_FLAGS} ${TF_COMPILE_FLAGS}) target_link_libraries(${OP_LIBRARY_NAME} PUBLIC ${TFTVM_LINK_FLAGS} ${TF_LINK_FLAGS}) From 380e1d7f6176e13df0501642c87502fa6aaaea2a Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 19 Mar 2020 11:50:00 +0800 Subject: [PATCH 25/30] fix: Add tvm library path to tf op test library path --- apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh index be33b57de67b..ef1b63771526 100644 --- a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh @@ -29,6 +29,6 @@ if [ "$?" -eq 0 ]; then mkdir -p build cd build; cmake .. ${CMAKE_OPTIONS} && make cd .. - LD_LIBRARY_PATH=./build:$LD_LIBRARY_PATH python3 -m pytest -v ./tests + LD_LIBRARY_PATH=${TVM_ROOT}/build:./build:$LD_LIBRARY_PATH python3 -m pytest -v ./tests fi From 0b3884d165ec6528df44768d0f0969b5120eb22c Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 19 Mar 2020 12:41:04 +0800 Subject: [PATCH 26/30] fix: Debug ci build for tftvm dso op --- apps/tf_tvmdsoop/CMakeLists.txt | 5 +++-- apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/apps/tf_tvmdsoop/CMakeLists.txt b/apps/tf_tvmdsoop/CMakeLists.txt index a445d0108fd3..cb601ef6d30d 100644 --- a/apps/tf_tvmdsoop/CMakeLists.txt +++ b/apps/tf_tvmdsoop/CMakeLists.txt @@ -17,7 +17,10 @@ cmake_minimum_required(VERSION 3.2) project(tf_tvmdsoop C CXX) +set(TFTVM_COMPILE_FLAGS -std=c++11) +set(BUILD_TVMDSOOP_ONLY ON) set(CMAKE_CURRENT_SOURCE_DIR ${TVM_ROOT}) +set(CMAKE_CURRENT_BINARY_DIR ${TVM_ROOT}/build) include_directories(${TVM_ROOT}/3rdparty/dlpack/include/) include_directories(${TVM_ROOT}/3rdparty/dmlc-core/include/) @@ -28,6 +31,4 @@ link_directories(${TVM_ROOT}/build) include(${TVM_ROOT}/cmake/util/FindCUDA.cmake) include(${TVM_ROOT}/cmake/modules/CUDA.cmake) -set(TFTVM_COMPILE_FLAGS -std=c++11) -set(BUILD_TVMDSOOP_ONLY ON) include(${TVM_ROOT}/cmake/modules/contrib/TF_TVMDSOOP.cmake) diff --git a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh index ef1b63771526..f5bcb4797aa2 100644 --- a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh @@ -29,6 +29,10 @@ if [ "$?" -eq 0 ]; then mkdir -p build cd build; cmake .. ${CMAKE_OPTIONS} && make cd .. + + ldd ./build/tvm_dso_op.so + ls ${TVM_ROOT}/build + LD_LIBRARY_PATH=${TVM_ROOT}/build:./build:$LD_LIBRARY_PATH python3 -m pytest -v ./tests fi From 9fd18b823c5a2ff1ac9bfaad091bda84b19e00bf Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Wed, 1 Apr 2020 11:05:35 +0800 Subject: [PATCH 27/30] fix: Fix cmake error and skip tfop test --- cmake/config.cmake | 5 +++-- tests/scripts/task_python_integration.sh | 8 +++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 4e3f56116a1b..65c60df60d6d 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -118,7 +118,7 @@ set(USE_MICRO_STANDALONE_RUNTIME OFF) # - ON: enable llvm with cmake's find search # - OFF: disable llvm # - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available. -set(USE_LLVM OFF) +set(USE_LLVM ON) #--------------------------------------------- # Contrib libraries @@ -203,4 +203,5 @@ set(USE_VTA_FPGA OFF) set(USE_EXAMPLE_EXT_RUNTIME OFF) # Whether to build the TensorFlow TVMDSOOp module -set(USE_TF_TVMDSOOP OFF) +set(USE_TF_TVMDSOOP ON) + diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index 3569505cb551..dcd8139abd81 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -53,11 +53,9 @@ cd ../.. TVM_FFI=cython python3 -m pytest -v apps/dso_plugin_module TVM_FFI=ctypes python3 -m pytest -v apps/dso_plugin_module -# Test TensorFlow TVMDSOOP -cd apps/tf_tvmdsoop -TVM_FFI=cython sh prepare_and_test_tfop_module.sh -TVM_FFI=ctypes sh prepare_and_test_tfop_module.sh -cd ../.. +# Do not enable TensorFlow op +# TVM_FFI=cython sh prepare_and_test_tfop_module.sh +# TVM_FFI=ctypes sh prepare_and_test_tfop_module.sh TVM_FFI=ctypes python3 -m pytest -v tests/python/integration TVM_FFI=ctypes python3 -m pytest -v tests/python/contrib From 8ac182f0b6c589fa952d9806704386836725a2e4 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Thu, 2 Apr 2020 12:11:57 +0800 Subject: [PATCH 28/30] fix: Fix typo and indentation issues --- .../prepare_and_test_tfop_module.sh | 3 - python/tvm/contrib/tf_op/__init__.py | 2 +- python/tvm/contrib/tf_op/module.py | 10 +- src/contrib/tf_op/index_seq.h | 32 +- src/contrib/tf_op/tvm_dso_op_kernels.cc | 308 ++++++++---------- src/contrib/tf_op/tvm_dso_ops.cc | 129 +++++--- 6 files changed, 239 insertions(+), 245 deletions(-) diff --git a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh index f5bcb4797aa2..2bde4f87c84e 100644 --- a/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh +++ b/apps/tf_tvmdsoop/prepare_and_test_tfop_module.sh @@ -30,9 +30,6 @@ if [ "$?" -eq 0 ]; then cd build; cmake .. ${CMAKE_OPTIONS} && make cd .. - ldd ./build/tvm_dso_op.so - ls ${TVM_ROOT}/build - LD_LIBRARY_PATH=${TVM_ROOT}/build:./build:$LD_LIBRARY_PATH python3 -m pytest -v ./tests fi diff --git a/python/tvm/contrib/tf_op/__init__.py b/python/tvm/contrib/tf_op/__init__.py index cabd8b716e7d..05d0ecc1ddc1 100644 --- a/python/tvm/contrib/tf_op/__init__.py +++ b/python/tvm/contrib/tf_op/__init__.py @@ -17,4 +17,4 @@ """Module container of TensorFlow TVMDSO op""" from . import module -Module = module.Module +OpModule = module.OpModule diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index 5e8892d6746b..446800c82a03 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -19,7 +19,7 @@ from tensorflow.python.framework import load_library -class Module(): +class OpModule(): """Module container of TensorFlow TVMDSO op which wraps exported TVM op implementation library to be called on TensorFlow side""" @@ -40,16 +40,16 @@ def func(self, name, output_dtype=None, output_shape=None): Returns ---------- - Func object that act as TensorFlow tensor to tensor function. + Func object that acts as TensorFlow tensor to tensor function. """ - return Func(self.lib_path, name, output_dtype, output_shape) + return TensorFunc(self.lib_path, name, output_dtype, output_shape) def __getitem__(self, func_name): return self.func(func_name) -class Func(): - """Function object that act as TensorFlow tensor to tensor function.""" +class TensorFunc(): + """Function object that acts as TensorFlow tensor to tensor function.""" def __init__(self, lib_path, func_name, output_dtype, output_shape): self.lib_path = lib_path diff --git a/src/contrib/tf_op/index_seq.h b/src/contrib/tf_op/index_seq.h index 7a9a423a5977..5448c1f5f42d 100644 --- a/src/contrib/tf_op/index_seq.h +++ b/src/contrib/tf_op/index_seq.h @@ -19,45 +19,43 @@ /* * Refer to std::index_sequence (since c++14) - * Utilities to invoke variadic function with template + * Utilities to invoke variadic function with template */ #ifndef TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ #define TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ -template +template struct IndexSeq {}; -template -struct IndexSeqHelper : public IndexSeqHelper {}; +template +struct IndexSeqHelper : public IndexSeqHelper {}; -template -struct IndexSeqHelper<0U, Tail ...> { - using type = IndexSeq; +template +struct IndexSeqHelper<0U, Tail...> { + using type = IndexSeq; }; template using make_index_sequence = typename IndexSeqHelper::type; - template -void apply_variadic_impl(F f, T(&t)[N], IndexSeq) { - f(t[Idx]...); +void apply_variadic_impl(F f, T (&t)[N], IndexSeq) { + f(t[Idx]...); } template -void apply_variadic(F f, T(&t)[N]) { - apply_variadic_impl(f, t, make_index_sequence{}); +void apply_variadic(F f, T (&t)[N]) { + apply_variadic_impl(f, t, make_index_sequence{}); } template -void apply_variadic_by_ptrs_impl(F f, T(&t)[N], IndexSeq) { - f(&t[Idx]...); +void apply_variadic_by_ptrs_impl(F f, T (&t)[N], IndexSeq) { + f(&t[Idx]...); } template -void apply_variadic_by_ptrs(F f, T(&t)[N]) { - apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); +void apply_variadic_by_ptrs(F f, T (&t)[N]) { + apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); } #endif // TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ - diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index c82d60db0e70..03024da9a478 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -21,15 +21,13 @@ #include #endif #include - -#include #include -#include +#include #include +#include -#include "tensorflow/core/framework/op_kernel.h" #include "index_seq.h" - +#include "tensorflow/core/framework/op_kernel.h" typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; @@ -39,12 +37,10 @@ using tensorflow::OpKernel; using tensorflow::OpKernelConstruction; using tensorflow::OpKernelContext; - // Op utility trait for diffrent device type template template class TVMDSOOpTrait; - // Buffer information used for actual computation. // Each buffer is associated with one TensorFlow tensor // whose underlying buffer is record into "origin_buf". @@ -52,173 +48,150 @@ class TVMDSOOpTrait; // and for output tensor, copy data from buf to origin_buf class TensorAsBuf { public: - tensorflow::Tensor inline_tensor; - tensorflow::Tensor* tensor; + tensorflow::Tensor inline_tensor; + tensorflow::Tensor* tensor; - size_t size; - size_t offset; + size_t size; + size_t offset; - int device_type; + int device_type; - char* origin_buf; - char* buf; + char* origin_buf; + char* buf; - void CopyToOrigin() { - if (buf == origin_buf) { - return; - } - if (device_type == kDLCPU) { - memcpy(origin_buf, buf + offset, size); + void CopyToOrigin() { + if (buf == origin_buf) { + return; + } + if (device_type == kDLCPU) { + memcpy(origin_buf, buf + offset, size); #ifdef TF_TVMDSOOP_ENABLE_GPU - } else if (device_type == kDLGPU) { - cudaMemcpy(origin_buf, buf + offset, - size, cudaMemcpyDeviceToDevice); + } else if (device_type == kDLGPU) { + cudaMemcpy(origin_buf, buf + offset, size, cudaMemcpyDeviceToDevice); #endif - } else { - LOG(FATAL) << "Only support CPU and CUDA now. Device " - << device_type << " is not implemented currently"; - } + } else { + LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type + << " is not implemented currently"; } + } - void CopyFromOrigin() { - if (buf == origin_buf) { - return; - } - if (device_type == kDLCPU) { - memcpy(buf + offset, origin_buf, size); + void CopyFromOrigin() { + if (buf == origin_buf) { + return; + } + if (device_type == kDLCPU) { + memcpy(buf + offset, origin_buf, size); #ifdef TF_TVMDSOOP_ENABLE_GPU - } else if (device_type == kDLGPU) { - cudaMemcpy(buf + offset, origin_buf, - size, cudaMemcpyDeviceToDevice); + } else if (device_type == kDLGPU) { + cudaMemcpy(buf + offset, origin_buf, size, cudaMemcpyDeviceToDevice); #endif - } else { - LOG(FATAL) << "Only support CPU and CUDA now. Device " - << device_type << " is not implemented currently"; - } + } else { + LOG(FATAL) << "Only support CPU and CUDA now. Device " << device_type + << " is not implemented currently"; } + } }; - -tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, - DLDataType* res) { - auto dtype = tf_tensor.dtype(); - if (dtype == tensorflow::DT_FLOAT) { - *res = {kDLFloat, 32, 1}; - } else if (dtype == tensorflow::DT_INT64) { - *res = {kDLInt, 64, 1}; - } else if (dtype == tensorflow::DT_INT32) { - *res = {kDLInt, 32, 1}; - } else { - return tensorflow::Status(tensorflow::error::INTERNAL, - "Fail to get dlpack datatype"); - } - return tensorflow::Status::OK(); +tensorflow::Status GetDLPackDtype(const tensorflow::Tensor& tf_tensor, DLDataType* res) { + auto dtype = tf_tensor.dtype(); + if (dtype == tensorflow::DT_FLOAT) { + *res = {kDLFloat, 32, 1}; + } else if (dtype == tensorflow::DT_INT64) { + *res = {kDLInt, 64, 1}; + } else if (dtype == tensorflow::DT_INT32) { + *res = {kDLInt, 32, 1}; + } else { + return tensorflow::Status(tensorflow::error::INTERNAL, "Fail to get dlpack datatype"); + } + return tensorflow::Status::OK(); } - // Ensure buffer used for actual computation take 64byte alignment -void EnsureAlignment(OpKernelContext* ctx, - const tensorflow::Tensor& tensor, - TensorAsBuf* out) { - char* buf = const_cast(tensor.tensor_data().data()); - out->origin_buf = buf; - out->size = tensor.TotalBytes(); - - int alignment = 64; - char* aligned = reinterpret_cast( - ((uint64_t)buf + alignment - 1) & (~(alignment - 1))); - if (buf == aligned) { - out->tensor = const_cast(&tensor); - out->buf = buf; - out->offset = 0; - } else { - tensorflow::TensorShape buf_shape; - tensorflow::int64 dims[1] = - { (tensorflow::int64)(tensor.TotalBytes() + alignment) }; - tensorflow::TensorShapeUtils::MakeShape(dims, 1, &buf_shape); - - out->tensor = &out->inline_tensor; - ctx->allocate_temp(tensor.dtype(), buf_shape, out->tensor); - - buf = const_cast(out->tensor->tensor_data().data()); - char* buf_aligned = reinterpret_cast( - ((uint64_t)buf + alignment) & (~(alignment - 1))); - out->buf = buf; - out->offset = buf_aligned - buf; - } +void EnsureAlignment(OpKernelContext* ctx, const tensorflow::Tensor& tensor, TensorAsBuf* out) { + char* buf = const_cast(tensor.tensor_data().data()); + out->origin_buf = buf; + out->size = tensor.TotalBytes(); + + int alignment = 64; + char* aligned = reinterpret_cast(((uint64_t)buf + alignment - 1) & (~(alignment - 1))); + if (buf == aligned) { + out->tensor = const_cast(&tensor); + out->buf = buf; + out->offset = 0; + } else { + tensorflow::TensorShape buf_shape; + tensorflow::int64 dims[1] = {(tensorflow::int64)(tensor.TotalBytes() + alignment)}; + tensorflow::TensorShapeUtils::MakeShape(dims, 1, &buf_shape); + + out->tensor = &out->inline_tensor; + ctx->allocate_temp(tensor.dtype(), buf_shape, out->tensor); + + buf = const_cast(out->tensor->tensor_data().data()); + char* buf_aligned = reinterpret_cast(((uint64_t)buf + alignment) & (~(alignment - 1))); + out->buf = buf; + out->offset = buf_aligned - buf; + } } - // Create DLPack tensor from TensorFlow tensor -tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, - int64_t* tf_shape, DLTensor* out) { - DLDataType dlpack_type; - const tensorflow::Tensor& tensor = *src.tensor; - - auto status = GetDLPackDtype(tensor, &dlpack_type); - if (!status.ok()) { - return status; - } - out->ctx = ctx; - out->ndim = tensor.shape().dims(); - out->shape = tf_shape; - out->strides = nullptr; - out->byte_offset = 0; - out->dtype = dlpack_type; - out->data = src.buf + src.offset; - return tensorflow::Status::OK(); +tensorflow::Status MakeDLTensor(const TensorAsBuf& src, const DLContext& ctx, int64_t* tf_shape, + DLTensor* out) { + DLDataType dlpack_type; + const tensorflow::Tensor& tensor = *src.tensor; + + auto status = GetDLPackDtype(tensor, &dlpack_type); + if (!status.ok()) { + return status; + } + out->ctx = ctx; + out->ndim = tensor.shape().dims(); + out->shape = tf_shape; + out->strides = nullptr; + out->byte_offset = 0; + out->dtype = dlpack_type; + out->data = src.buf + src.offset; + return tensorflow::Status::OK(); } - template <> class TVMDSOOpTrait { public: - static const int device_type = kDLCPU; + static const int device_type = kDLCPU; - static int device_id(OpKernelContext* context) { - return 0; - } + static int device_id(OpKernelContext* context) { return 0; } - static void make_shape_from_tensor( - const tensorflow::Tensor& shape_tensor, - tensorflow::TensorShape* output_shape) { - tensorflow::int64 num_dims = shape_tensor.NumElements(); - const tensorflow::int64* dims = - shape_tensor.flat().data(); - tensorflow::TensorShapeUtils::MakeShape( - dims, num_dims, output_shape); - } + static void make_shape_from_tensor(const tensorflow::Tensor& shape_tensor, + tensorflow::TensorShape* output_shape) { + tensorflow::int64 num_dims = shape_tensor.NumElements(); + const tensorflow::int64* dims = shape_tensor.flat().data(); + tensorflow::TensorShapeUtils::MakeShape(dims, num_dims, output_shape); + } }; #ifdef TF_TVMDSOOP_ENABLE_GPU template <> class TVMDSOOpTrait { public: - static const int device_type = kDLGPU; + static const int device_type = kDLGPU; - static int device_id(OpKernelContext* context) { - auto device_base = context->device(); - auto gpu_device_info = device_base->tensorflow_gpu_device_info(); - return gpu_device_info->gpu_id; - } + static int device_id(OpKernelContext* context) { + auto device_base = context->device(); + auto gpu_device_info = device_base->tensorflow_gpu_device_info(); + return gpu_device_info->gpu_id; + } - static void make_shape_from_tensor( - const tensorflow::Tensor& shape_tensor, - tensorflow::TensorShape* output_shape) { - tensorflow::int64 num_dims = shape_tensor.NumElements(); - const tensorflow::int64* flat = - shape_tensor.flat().data(); - tensorflow::int64* dims = new tensorflow::int64[num_dims]; - cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, - cudaMemcpyDeviceToHost); - tensorflow::TensorShapeUtils::MakeShape( - dims, num_dims, output_shape); - delete dims; - } + static void make_shape_from_tensor(const tensorflow::Tensor& shape_tensor, + tensorflow::TensorShape* output_shape) { + tensorflow::int64 num_dims = shape_tensor.NumElements(); + const tensorflow::int64* flat = shape_tensor.flat().data(); + tensorflow::int64* dims = new tensorflow::int64[num_dims]; + cudaMemcpy(dims, flat, sizeof(tensorflow::int64) * num_dims, cudaMemcpyDeviceToHost); + tensorflow::TensorShapeUtils::MakeShape(dims, num_dims, output_shape); + delete dims; + } }; #endif - template class TVMDSOOp : public OpKernel { private: @@ -246,8 +219,7 @@ class TVMDSOOp : public OpKernel { initAttributes(context); // Load TVM function from dynamic library - tvm::runtime::Module mod_dylib = - tvm::runtime::Module::LoadFromFile(lib_path); + tvm::runtime::Module mod_dylib = tvm::runtime::Module::LoadFromFile(lib_path); tvm_func = mod_dylib.GetFunction(func_name); CHECK(tvm_func != nullptr); } @@ -261,7 +233,7 @@ class TVMDSOOp : public OpKernel { int device_id = TVMDSOOpTrait::device_id(context); int device_type = TVMDSOOpTrait::device_type; - DLContext dl_ctx = { DLDeviceType(device_type), device_id }; + DLContext dl_ctx = {DLDeviceType(device_type), device_id}; // Get output shape tensorflow::TensorShape output_shape; @@ -269,43 +241,39 @@ class TVMDSOOp : public OpKernel { if (has_static_output_shape) { // use static output shape const tensorflow::int64* dims = static_output_shape.data(); - tensorflow::TensorShapeUtils::MakeShape( - dims, static_output_shape.size(), &output_shape); + tensorflow::TensorShapeUtils::MakeShape(dims, static_output_shape.size(), &output_shape); } else if (output_shape_tensor.dims() == 1) { // use shape tensor values as output shape - TVMDSOOpTrait::make_shape_from_tensor( - output_shape_tensor, &output_shape); + TVMDSOOpTrait::make_shape_from_tensor(output_shape_tensor, &output_shape); } else { // use input tensor shape by default output_shape = context->input(0).shape(); } for (int i = 0; i < NUM_INPUTS; ++i) { - // Grab the input tensor - auto& input_tensor = context->input(i); + // Grab the input tensor + auto& input_tensor = context->input(i); - // Create shape container, should keep ref during execution - shapes[i] = input_tensor.shape().dim_sizes(); - auto shape_ptr = reinterpret_cast(shapes[i].data()); + // Create shape container, should keep ref during execution + shapes[i] = input_tensor.shape().dim_sizes(); + auto shape_ptr = reinterpret_cast(shapes[i].data()); - TensorAsBuf& input = buf_info[i]; - input.device_type = device_type; + TensorAsBuf& input = buf_info[i]; + input.device_type = device_type; - EnsureAlignment(context, input_tensor, &input); - input.CopyFromOrigin(); + EnsureAlignment(context, input_tensor, &input); + input.CopyFromOrigin(); - status = MakeDLTensor(input, dl_ctx, shape_ptr, &args[i]); - OP_REQUIRES_OK(context, status); + status = MakeDLTensor(input, dl_ctx, shape_ptr, &args[i]); + OP_REQUIRES_OK(context, status); } // Allocate output tensor tensorflow::Tensor* output_tensor; - OP_REQUIRES_OK(context, context->allocate_output( - 0, output_shape, &output_tensor)); + OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output_tensor)); // shape dimension buf should keel alive on stack auto output_shape_dim_buf = output_tensor->shape().dim_sizes(); - auto output_shape_ptr = reinterpret_cast( - output_shape_dim_buf.data()); + auto output_shape_ptr = reinterpret_cast(output_shape_dim_buf.data()); TensorAsBuf output; output.device_type = device_type; @@ -320,17 +288,16 @@ class TVMDSOOp : public OpKernel { } }; - #ifdef TF_TVMDSOOP_ENABLE_GPU -#define REGISTER_TFTVM_KERNEL(n) \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ - .Device(tensorflow::DEVICE_CPU), TVMDSOOp); \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ - .Device(tensorflow::DEVICE_GPU), TVMDSOOp); +#define REGISTER_TFTVM_KERNEL(n) \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_CPU), \ + TVMDSOOp); \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_GPU), \ + TVMDSOOp); #else -#define REGISTER_TFTVM_KERNEL(n) \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n) \ - .Device(tensorflow::DEVICE_CPU), TVMDSOOp); +#define REGISTER_TFTVM_KERNEL(n) \ + REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_CPU), \ + TVMDSOOp); #endif REGISTER_TFTVM_KERNEL(1) @@ -341,4 +308,3 @@ REGISTER_TFTVM_KERNEL(5) REGISTER_TFTVM_KERNEL(6) REGISTER_TFTVM_KERNEL(7) REGISTER_TFTVM_KERNEL(8) - diff --git a/src/contrib/tf_op/tvm_dso_ops.cc b/src/contrib/tf_op/tvm_dso_ops.cc index dfb21b204b81..f228313949cb 100644 --- a/src/contrib/tf_op/tvm_dso_ops.cc +++ b/src/contrib/tf_op/tvm_dso_ops.cc @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -19,71 +19,104 @@ #include "tensorflow/core/framework/op.h" -#define REGISTER_TFTVM_OP(n) REGISTER_OP("TvmDsoOp" #n) \ - .Output("output: output_dtype") \ - .Attr("lib_path: string") \ - .Attr("func_name: string") \ - .Attr("output_dtype: {int32, int64, float} = DT_FLOAT") \ - .Attr("static_output_shape: list(int) >= 0 = []") \ - .Attr("has_static_output_shape: bool") \ +#define REGISTER_TFTVM_OP(n) \ + REGISTER_OP("TvmDsoOp" #n) \ + .Output("output: output_dtype") \ + .Attr("lib_path: string") \ + .Attr("func_name: string") \ + .Attr("output_dtype: {int32, int64, float} = DT_FLOAT") \ + .Attr("static_output_shape: list(int) >= 0 = []") \ + .Attr("has_static_output_shape: bool") - -REGISTER_TFTVM_OP(1) - .Input("input: T").Attr("T: type") \ - .Input("dynamic_output_shape: int64"); +REGISTER_TFTVM_OP(1).Input("input: T").Attr("T: type").Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(2) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") .Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(3) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") - .Input("input3: T3").Attr("T3: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") + .Input("input3: T3") + .Attr("T3: type") .Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(4) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") - .Input("input3: T3").Attr("T3: type") - .Input("input4: T4").Attr("T4: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") + .Input("input3: T3") + .Attr("T3: type") + .Input("input4: T4") + .Attr("T4: type") .Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(5) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") - .Input("input3: T3").Attr("T3: type") - .Input("input4: T4").Attr("T4: type") - .Input("input5: T5").Attr("T5: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") + .Input("input3: T3") + .Attr("T3: type") + .Input("input4: T4") + .Attr("T4: type") + .Input("input5: T5") + .Attr("T5: type") .Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(6) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") - .Input("input3: T3").Attr("T3: type") - .Input("input4: T4").Attr("T4: type") - .Input("input5: T5").Attr("T5: type") - .Input("input6: T6").Attr("T6: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") + .Input("input3: T3") + .Attr("T3: type") + .Input("input4: T4") + .Attr("T4: type") + .Input("input5: T5") + .Attr("T5: type") + .Input("input6: T6") + .Attr("T6: type") .Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(7) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") - .Input("input3: T3").Attr("T3: type") - .Input("input4: T4").Attr("T4: type") - .Input("input5: T5").Attr("T5: type") - .Input("input6: T6").Attr("T6: type") - .Input("input7: T7").Attr("T7: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") + .Input("input3: T3") + .Attr("T3: type") + .Input("input4: T4") + .Attr("T4: type") + .Input("input5: T5") + .Attr("T5: type") + .Input("input6: T6") + .Attr("T6: type") + .Input("input7: T7") + .Attr("T7: type") .Input("dynamic_output_shape: int64"); REGISTER_TFTVM_OP(8) - .Input("input1: T1").Attr("T1: type") - .Input("input2: T2").Attr("T2: type") - .Input("input3: T3").Attr("T3: type") - .Input("input4: T4").Attr("T4: type") - .Input("input5: T5").Attr("T5: type") - .Input("input6: T6").Attr("T6: type") - .Input("input7: T7").Attr("T7: type") - .Input("input8: T8").Attr("T8: type") + .Input("input1: T1") + .Attr("T1: type") + .Input("input2: T2") + .Attr("T2: type") + .Input("input3: T3") + .Attr("T3: type") + .Input("input4: T4") + .Attr("T4: type") + .Input("input5: T5") + .Attr("T5: type") + .Input("input6: T6") + .Attr("T6: type") + .Input("input7: T7") + .Attr("T7: type") + .Input("input8: T8") + .Attr("T8: type") .Input("dynamic_output_shape: int64"); From 93436693abb12b114e0bb023ffdc3663359c2c00 Mon Sep 17 00:00:00 2001 From: baoxinqi Date: Fri, 3 Apr 2020 13:24:39 +0800 Subject: [PATCH 29/30] feat: Use TF list input op def --- apps/tf_tvmdsoop/tests/test_tfop_module.py | 2 +- python/tvm/contrib/tf_op/module.py | 9 +- src/contrib/tf_op/index_seq.h | 61 ----------- src/contrib/tf_op/tvm_dso_op_kernels.cc | 52 +++++----- src/contrib/tf_op/tvm_dso_ops.cc | 111 ++------------------- 5 files changed, 39 insertions(+), 196 deletions(-) delete mode 100644 src/contrib/tf_op/index_seq.h diff --git a/apps/tf_tvmdsoop/tests/test_tfop_module.py b/apps/tf_tvmdsoop/tests/test_tfop_module.py index f2dee98ee01c..1672b58fd60a 100644 --- a/apps/tf_tvmdsoop/tests/test_tfop_module.py +++ b/apps/tf_tvmdsoop/tests/test_tfop_module.py @@ -61,7 +61,7 @@ def export_gpu_add_lib(): def test_add(session, lib_path, tf_device): """test add lib with TensorFlow wrapper""" - module = tf_op.Module(lib_path) + module = tf_op.OpModule(lib_path) left = tf.placeholder("float32", shape=[4]) right = tf.placeholder("float32", shape=[4]) diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index 446800c82a03..f67f715a1be6 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -67,16 +67,11 @@ def __init__(self, lib_path, func_name, output_dtype, output_shape): elif output_shape is not None: self.dynamic_output_shape = self._pack_shape_tensor(output_shape) - # delay op initialization to where Func.apply() get called first time - self.tvm_dso_op = None self.module = load_library.load_op_library('tvm_dso_op.so') + self.tvm_dso_op = self.module.tvm_dso_op def apply(self, *params): - if self.tvm_dso_op is None: - num_inputs = len(params) - self.tvm_dso_op = getattr(self.module, "tvm_dso_op%s" % num_inputs) - - return self.tvm_dso_op(*params, + return self.tvm_dso_op(params, dynamic_output_shape=self.dynamic_output_shape, static_output_shape=self.static_output_shape, has_static_output_shape=self.has_static_output_shape, diff --git a/src/contrib/tf_op/index_seq.h b/src/contrib/tf_op/index_seq.h deleted file mode 100644 index 5448c1f5f42d..000000000000 --- a/src/contrib/tf_op/index_seq.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/* - * Refer to std::index_sequence (since c++14) - * Utilities to invoke variadic function with template - */ -#ifndef TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ -#define TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ - -template -struct IndexSeq {}; - -template -struct IndexSeqHelper : public IndexSeqHelper {}; - -template -struct IndexSeqHelper<0U, Tail...> { - using type = IndexSeq; -}; - -template -using make_index_sequence = typename IndexSeqHelper::type; - -template -void apply_variadic_impl(F f, T (&t)[N], IndexSeq) { - f(t[Idx]...); -} - -template -void apply_variadic(F f, T (&t)[N]) { - apply_variadic_impl(f, t, make_index_sequence{}); -} - -template -void apply_variadic_by_ptrs_impl(F f, T (&t)[N], IndexSeq) { - f(&t[Idx]...); -} - -template -void apply_variadic_by_ptrs(F f, T (&t)[N]) { - apply_variadic_by_ptrs_impl(f, t, make_index_sequence{}); -} - -#endif // TVM_CONTRIB_TF_OP_INDEX_SEQ_H_ diff --git a/src/contrib/tf_op/tvm_dso_op_kernels.cc b/src/contrib/tf_op/tvm_dso_op_kernels.cc index 03024da9a478..d74d8fb917e5 100644 --- a/src/contrib/tf_op/tvm_dso_op_kernels.cc +++ b/src/contrib/tf_op/tvm_dso_op_kernels.cc @@ -26,7 +26,6 @@ #include #include -#include "index_seq.h" #include "tensorflow/core/framework/op_kernel.h" typedef Eigen::ThreadPoolDevice CPUDevice; @@ -37,6 +36,10 @@ using tensorflow::OpKernel; using tensorflow::OpKernelConstruction; using tensorflow::OpKernelContext; +using tvm::runtime::TVMArgs; +using tvm::runtime::TVMArgsSetter; +using tvm::runtime::TVMRetValue; + // Op utility trait for diffrent device type template template class TVMDSOOpTrait; @@ -192,7 +195,7 @@ class TVMDSOOpTrait { }; #endif -template +template class TVMDSOOp : public OpKernel { private: tvm::runtime::PackedFunc tvm_func; @@ -225,9 +228,12 @@ class TVMDSOOp : public OpKernel { } void Compute(tensorflow::OpKernelContext* context) override { - DLTensor args[NUM_INPUTS + 1]; - TensorAsBuf buf_info[NUM_INPUTS]; - ShapeContainer shapes[NUM_INPUTS]; + // the last input is output shape spec + const int num_inputs = context->num_inputs() - 1; + const int num_total_args = num_inputs + 1; + std::vector args(num_total_args); + std::vector buf_info(num_inputs); + std::vector shapes(num_inputs); tensorflow::Status status; int device_id = TVMDSOOpTrait::device_id(context); @@ -237,7 +243,7 @@ class TVMDSOOp : public OpKernel { // Get output shape tensorflow::TensorShape output_shape; - auto& output_shape_tensor = context->input(NUM_INPUTS); + auto& output_shape_tensor = context->input(num_inputs); if (has_static_output_shape) { // use static output shape const tensorflow::int64* dims = static_output_shape.data(); @@ -250,7 +256,7 @@ class TVMDSOOp : public OpKernel { output_shape = context->input(0).shape(); } - for (int i = 0; i < NUM_INPUTS; ++i) { + for (int i = 0; i < num_inputs; ++i) { // Grab the input tensor auto& input_tensor = context->input(i); @@ -279,32 +285,26 @@ class TVMDSOOp : public OpKernel { output.device_type = device_type; EnsureAlignment(context, *output_tensor, &output); - status = MakeDLTensor(output, dl_ctx, output_shape_ptr, &args[NUM_INPUTS]); + status = MakeDLTensor(output, dl_ctx, output_shape_ptr, &args[num_inputs]); OP_REQUIRES_OK(context, status); - apply_variadic_by_ptrs(tvm_func, args); + // Prepare PackedFunc arguments + std::vector tvm_values(num_total_args); + std::vector tvm_type_codes(num_total_args); + TVMArgsSetter setter(tvm_values.data(), tvm_type_codes.data()); + for (int k = 0; k < num_total_args; ++k) { + setter(k, &args[k]); + } + TVMRetValue rv; + tvm_func.CallPacked(TVMArgs(tvm_values.data(), tvm_type_codes.data(), num_total_args), &rv); output.CopyToOrigin(); } }; #ifdef TF_TVMDSOOP_ENABLE_GPU -#define REGISTER_TFTVM_KERNEL(n) \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_CPU), \ - TVMDSOOp); \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_GPU), \ - TVMDSOOp); +REGISTER_KERNEL_BUILDER(Name("TvmDsoOp").Device(tensorflow::DEVICE_CPU), TVMDSOOp); +REGISTER_KERNEL_BUILDER(Name("TvmDsoOp").Device(tensorflow::DEVICE_GPU), TVMDSOOp); #else -#define REGISTER_TFTVM_KERNEL(n) \ - REGISTER_KERNEL_BUILDER(Name("TvmDsoOp" #n).Device(tensorflow::DEVICE_CPU), \ - TVMDSOOp); +REGISTER_KERNEL_BUILDER(Name("TvmDsoOp").Device(tensorflow::DEVICE_CPU), TVMDSOOp); #endif - -REGISTER_TFTVM_KERNEL(1) -REGISTER_TFTVM_KERNEL(2) -REGISTER_TFTVM_KERNEL(3) -REGISTER_TFTVM_KERNEL(4) -REGISTER_TFTVM_KERNEL(5) -REGISTER_TFTVM_KERNEL(6) -REGISTER_TFTVM_KERNEL(7) -REGISTER_TFTVM_KERNEL(8) diff --git a/src/contrib/tf_op/tvm_dso_ops.cc b/src/contrib/tf_op/tvm_dso_ops.cc index f228313949cb..1183b2ef34b5 100644 --- a/src/contrib/tf_op/tvm_dso_ops.cc +++ b/src/contrib/tf_op/tvm_dso_ops.cc @@ -19,104 +19,13 @@ #include "tensorflow/core/framework/op.h" -#define REGISTER_TFTVM_OP(n) \ - REGISTER_OP("TvmDsoOp" #n) \ - .Output("output: output_dtype") \ - .Attr("lib_path: string") \ - .Attr("func_name: string") \ - .Attr("output_dtype: {int32, int64, float} = DT_FLOAT") \ - .Attr("static_output_shape: list(int) >= 0 = []") \ - .Attr("has_static_output_shape: bool") - -REGISTER_TFTVM_OP(1).Input("input: T").Attr("T: type").Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(2) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(3) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("input3: T3") - .Attr("T3: type") - .Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(4) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("input3: T3") - .Attr("T3: type") - .Input("input4: T4") - .Attr("T4: type") - .Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(5) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("input3: T3") - .Attr("T3: type") - .Input("input4: T4") - .Attr("T4: type") - .Input("input5: T5") - .Attr("T5: type") - .Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(6) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("input3: T3") - .Attr("T3: type") - .Input("input4: T4") - .Attr("T4: type") - .Input("input5: T5") - .Attr("T5: type") - .Input("input6: T6") - .Attr("T6: type") - .Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(7) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("input3: T3") - .Attr("T3: type") - .Input("input4: T4") - .Attr("T4: type") - .Input("input5: T5") - .Attr("T5: type") - .Input("input6: T6") - .Attr("T6: type") - .Input("input7: T7") - .Attr("T7: type") - .Input("dynamic_output_shape: int64"); - -REGISTER_TFTVM_OP(8) - .Input("input1: T1") - .Attr("T1: type") - .Input("input2: T2") - .Attr("T2: type") - .Input("input3: T3") - .Attr("T3: type") - .Input("input4: T4") - .Attr("T4: type") - .Input("input5: T5") - .Attr("T5: type") - .Input("input6: T6") - .Attr("T6: type") - .Input("input7: T7") - .Attr("T7: type") - .Input("input8: T8") - .Attr("T8: type") - .Input("dynamic_output_shape: int64"); +REGISTER_OP("TvmDsoOp") + .Input("input_args: ListT") + .Attr("ListT: list({int8, int32, int64, float16, float32})") + .Input("dynamic_output_shape: int64") + .Output("output: output_dtype") + .Attr("lib_path: string") + .Attr("func_name: string") + .Attr("output_dtype: {int8, int32, int64, float16, float32} = DT_FLOAT") + .Attr("static_output_shape: list(int) >= 0 = []") + .Attr("has_static_output_shape: bool"); From 38af1ceb99555b491f86fdd5d5bdd59b7a51e86e Mon Sep 17 00:00:00 2001 From: wrongtest Date: Tue, 7 Apr 2020 12:54:55 +0800 Subject: [PATCH 30/30] fix: Fix style and unexpected changes --- cmake/config.cmake | 2 +- python/tvm/contrib/tf_op/module.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/config.cmake b/cmake/config.cmake index 55958e278be0..04b6c3b66ea4 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -118,7 +118,7 @@ set(USE_MICRO_STANDALONE_RUNTIME OFF) # - ON: enable llvm with cmake's find search # - OFF: disable llvm # - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available. -set(USE_LLVM ON) +set(USE_LLVM OFF) #--------------------------------------------- # Contrib libraries diff --git a/python/tvm/contrib/tf_op/module.py b/python/tvm/contrib/tf_op/module.py index f67f715a1be6..f13670e39895 100644 --- a/python/tvm/contrib/tf_op/module.py +++ b/python/tvm/contrib/tf_op/module.py @@ -19,7 +19,7 @@ from tensorflow.python.framework import load_library -class OpModule(): +class OpModule: """Module container of TensorFlow TVMDSO op which wraps exported TVM op implementation library to be called on TensorFlow side""" @@ -48,7 +48,7 @@ def __getitem__(self, func_name): return self.func(func_name) -class TensorFunc(): +class TensorFunc: """Function object that acts as TensorFlow tensor to tensor function.""" def __init__(self, lib_path, func_name, output_dtype, output_shape):