From 0d39f88996ec5d55d6aae0cf3bccde548adad3da Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Fri, 11 May 2018 15:11:33 +0530 Subject: [PATCH 01/12] CPP implementation of L2Norm and LRN ops --- topi/include/topi/cuda/nn.h | 107 +++++++++++++++++++++ topi/include/topi/nn/l2_norm.h | 46 +++++++++ topi/include/topi/nn/local_response_norm.h | 76 +++++++++++++++ topi/include/topi/rocm/nn.h | 41 ++++++++ topi/python/topi/cuda/nn.py | 60 ++---------- topi/python/topi/generic/nn.py | 11 ++- topi/python/topi/nn/l2_norm.py | 10 +- topi/python/topi/nn/local_response_norm.py | 28 +----- topi/python/topi/rocm/nn.py | 11 ++- topi/src/topi.cc | 39 ++++++++ topi/tests/python/test_topi_l2norm.py | 7 +- topi/tests/python/test_topi_lrn.py | 15 +-- topi/tests/python_cpp/test_topi_l2norm.py | 75 +++++++++++++++ topi/tests/python_cpp/test_topi_lrn.py | 101 +++++++++++++++++++ 14 files changed, 525 insertions(+), 102 deletions(-) create mode 100644 topi/include/topi/cuda/nn.h create mode 100644 topi/include/topi/nn/l2_norm.h create mode 100644 topi/include/topi/nn/local_response_norm.h create mode 100644 topi/include/topi/rocm/nn.h create mode 100644 topi/tests/python_cpp/test_topi_l2norm.py create mode 100644 topi/tests/python_cpp/test_topi_lrn.py diff --git a/topi/include/topi/cuda/nn.h b/topi/include/topi/cuda/nn.h new file mode 100644 index 000000000000..a425a47b5ce7 --- /dev/null +++ b/topi/include/topi/cuda/nn.h @@ -0,0 +1,107 @@ +/*! +* Copyright (c) 2018 by Contributors +* \file cuda/nn.h +* \brief CUDA schedule for lrn and l2 normalization operations +*/ +#ifndef TOPI_CUDA_NN_H_ +#define TOPI_CUDA_NN_H_ + +#include "tvm/tvm.h" +#include "tvm/build_module.h" +#include "topi/tags.h" + +namespace topi { +using namespace tvm; +namespace cuda { +/*! +* \brief Create a CUDA schedule for LRN +* +* \param target The target to generate a schedule for. +* \param outs The output tensors. +* +* \return A schedule for the given ops. +*/ +inline Schedule schedule_lrn(const Target &target, const Array& outs) { + Array out_ops; + for (auto t : outs) { + out_ops.push_back(t->op); + } + auto s = create_schedule(out_ops); + auto num_thread = 64; + auto block_x = tvm::thread_axis(Range(), "blockIdx.x"); + auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); + auto lrn = outs[0]; + auto sqr_sum_up = lrn->op->InputTensors()[1]; + auto sqr_sum = sqr_sum_up->op->InputTensors()[0]; + auto set_pad = sqr_sum->op->InputTensors()[0]; + s[set_pad].bind(set_pad->op.as()->axis[0], block_x); + auto rxk = sqr_sum->op.as()->reduce_axis[0]; + IterVar xko, xki; + s[sqr_sum].split(rxk, num_thread, &xko, &xki); + auto srf = s.rfactor(sqr_sum, xki)[0]; + s[sqr_sum].bind(s[sqr_sum]->op.as()->axis[0], block_x); + s[sqr_sum].bind(s[sqr_sum]->op.as()->reduce_axis[0], thread_x); + s[srf].compute_at(s[sqr_sum], s[sqr_sum]->op.as()->reduce_axis[0]); + s[sqr_sum_up].bind(sqr_sum_up->op.as()->axis[0], block_x); + IterVar xto, xti; + s[lrn].split_by_nparts(lrn->op.as()->axis[1], num_thread, &xto, &xti); + s[lrn].bind(lrn->op.as()->axis[0], block_x); + s[lrn].bind(xto, thread_x); + + return s; +} + +/*! +* \brief Create a CUDA schedule for L2 normalization +* +* \param target The target to generate a schedule for. +* \param outs The output tensors. +* +* \return A schedule for the given ops. +*/ +inline Schedule schedule_l2norm(const Target &target, const Array& outs) { + Array out_ops; + for (auto t : outs) { + out_ops.push_back(t->op); + } + auto s = create_schedule(out_ops); + + std::function traverse; + traverse = [&](const Operation& op) { + // Inline all one-to-one-mapping operators except the last stage (output) + if (is_injective(op->tag) || op->tag == "l2norm") { + if (!detail::contains(s->outputs, op)) { + s[op].compute_inline(); + } + for (auto tensor : op->InputTensors()) { + if (tensor->op->InputTensors().size() > 0) { + traverse(tensor->op); + } + } + } else if (op->tag == "comm_reduce") { + ScheduleReduce(target, op, s, false); + for (auto tensor : op->InputTensors()) { + traverse(tensor->op); + } + } else { + LOG(ERROR) << "Unsupported operator " << op->tag; + } + }; + + traverse(outs[0]->op); + auto num_thread = 64; + auto l2norm = outs[0]; + auto block_x = tvm::thread_axis(Range(), "blockIdx.x"); + auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); + IterVar xto, xti; + s[l2norm].split_by_nparts(l2norm->op.as()->axis[1], num_thread, &xto, &xti); + s[l2norm].bind(l2norm->op.as()->axis[0], block_x); + s[l2norm].bind(xto, thread_x); + return s; +} +} // namespace cuda +} // namespace topi +#endif // TOPI_CUDA_NN_H_ + + + diff --git a/topi/include/topi/nn/l2_norm.h b/topi/include/topi/nn/l2_norm.h new file mode 100644 index 000000000000..e1fe25cb6040 --- /dev/null +++ b/topi/include/topi/nn/l2_norm.h @@ -0,0 +1,46 @@ +/*! + * Copyright (c) 2018 by Contributors + * \brief l2 normalization op constructions + * \file nn/l2_norm.h + */ +#ifndef TOPI_NN_L2_NORM_H_ +#define TOPI_NN_L2_NORM_H_ + +#include +#include +#include "topi/tags.h" +#include "tvm/tvm.h" +namespace topi { +namespace nn { +using namespace tvm; + +/*! +* \brief L2 normalization inference operator +* +* \param data The input tensor. 4-D with shape [batch, channel, height, width] +* \param eps Epsilon to prevent div by 0 +* \param axis Axes over the normalization applied +* \param name The name of the operation +* \param tag The tag to mark the operation +* +* \return A Tensor whose op member is the l2 normalization operation +*/ +inline Tensor l2norm_instance(const Tensor& data, + float eps, + const Array& axis, + std::string name = "tensor", + std::string tag = "l2norm") { + CHECK_EQ(data->shape.size(), 4) << "L2 norm requires 4-D input"; + auto input_shape = data->shape; + Tensor dot_value = pow(data, static_cast(2.0)); + Tensor sum_value = topi::sum(dot_value, axis, true); + Tensor expand_sum = topi::broadcast_to(sum_value, input_shape); + return topi::broadcast_div(data, + topi::sqrt(tvm::compute(expand_sum->shape, + [&](const Array& i){ + return (max(expand_sum(i), eps)); + }, name = name, tag = tag))); +} +} // namespace nn +} // namespace topi +#endif // TOPI_NN_L2_NORM_H_ diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h new file mode 100644 index 000000000000..0e31c7c0515d --- /dev/null +++ b/topi/include/topi/nn/local_response_norm.h @@ -0,0 +1,76 @@ +/*! + * Copyright (c) 2018 by Contributors + * \brief local response normalization op constructions + * \file nn/local_response_normalization.h + */ +#ifndef TOPI_NN_LOCAL_RESPONSE_NORM_H_ +#define TOPI_NN_LOCAL_RESPONSE_NORM_H_ + +#include + +#include "topi/tags.h" +#include "tvm/tvm.h" + +namespace topi { +namespace nn { +using namespace tvm; + +/*! +* \brief Local response normalization inference operator +* +* \param data The input tensor. 4-D shape NCHW or NHWC +* \param size Integer to define normalisation window size +* \param axis Input data layout channel axis +* \param alpha Float scaling factor +* \param beta Exponent value +* \param bias Offset to avoid dividing by zero +* \param name The name of the operation +* \param tag The tag to mark the operation +* +* \return A Tensor whose op member is the Local response normalization operation +*/ +inline Tensor lrn(const Tensor& data, + int size, + int axis = 1, + float alpha = 0.0001, + float beta = 0.75, + float bias = 2, + std::string name = "tensor", + std::string tag = kBroadcast) { + CHECK_EQ(data->shape.size(), 4) << "LRN requires 4-D input"; + assert(size % 2 == 1); + assert(axis == 1 || axis == 3); + auto input_shape = data->shape; + Array pad_before{ 0, 0, 0, 0}; + Array pad_after{ 0, 0, 0, 0}; + pad_before.Set(axis, static_cast(size/2)); + pad_after.Set(axis, static_cast(size/2)); + auto pad_data = pad(data, pad_before, pad_after, 0, "pad_data"); + auto rxs = tvm::reduce_axis(Range(0, size), "rxs"); + Tensor sqr_sum; + if (axis == 1) { + sqr_sum = tvm::compute(input_shape, + [&](Var i, Var l, Var j, Var k) { + return tvm::sum(pad_data(i, l + rxs, j, k) * + pad_data(i, l + rxs, j, k), + {rxs}); + }); + } else if (axis == 3) { + sqr_sum = tvm::compute(input_shape, + [&](Var i, Var l, Var j, Var k) { + return tvm::sum(pad_data(i, l, j, k + rxs) * + pad_data(i, l, j, k + rxs), + {rxs}); + }); + } + auto sqrt_sum_up = tvm::compute(input_shape, + [&](Var i, Var j, Var k, Var l) { + return tvm::pow(bias + + (alpha * sqr_sum(i, j, k, l) / size), + beta); + }); + return topi::broadcast_div(data, sqrt_sum_up); +} +} // namespace nn +} // namespace topi +#endif // TOPI_NN_LOCAL_RESPONSE_NORM_H_ diff --git a/topi/include/topi/rocm/nn.h b/topi/include/topi/rocm/nn.h new file mode 100644 index 000000000000..98b74ad80593 --- /dev/null +++ b/topi/include/topi/rocm/nn.h @@ -0,0 +1,41 @@ +/*! +* Copyright (c) 2018 by Contributors +* \file rocm/nn.h +* \brief rocm schedule for lrn and l2 normalization operations +*/ +#ifndef TOPI_ROCM_NN_H_ +#define TOPI_ROCM_NN_H_ + +#include "tvm/tvm.h" +#include "tvm/build_module.h" +#include "topi/tags.h" + +namespace topi { +using namespace tvm; +namespace rocm { +/*! +* \brief Create a rocm schedule for LRN +* +* \param target The target to generate a schedule for. +* \param outs The output tensors. +* +* \return A schedule for the given ops. +*/ +inline Schedule schedule_lrn(const Target &target, const Array& outs) { + return topi::cuda::schedule_lrn(target, outs); +} + +/*! +* \brief Create a rocm schedule for L2 Normalization +* +* \param target The target to generate a schedule for. +* \param outs The output tensors. +* +* \return A schedule for the given ops. +*/ +inline Schedule schedule_l2norm(const Target &target, const Array& outs) { + return topi::cuda::schedule_l2norm(target, outs); +} +} // namespace rocm +} // namespace topi +#endif // TOPI_ROCM_NN_H_ diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py index e8757970505b..c565698001fe 100644 --- a/topi/python/topi/cuda/nn.py +++ b/topi/python/topi/cuda/nn.py @@ -4,8 +4,7 @@ import tvm from .. import generic -from .. import tag -from .reduction import _schedule_reduce +from .. import cpp @generic.schedule_lrn.register(["cuda"]) def schedule_lrn(outs): @@ -22,28 +21,9 @@ def schedule_lrn(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - num_thread = 64 - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - - lrn = outs[0] - sqr_sum_up = lrn.op.input_tensors[1] - sqr_sum = sqr_sum_up.op.input_tensors[0] - set_pad = sqr_sum.op.input_tensors[0] - s[set_pad].bind(set_pad.op.axis[0], block_x) - rxk = sqr_sum.op.reduce_axis[0] - _, xki = s[sqr_sum].split(rxk, factor=num_thread) - srf = s.rfactor(sqr_sum, xki) - s[sqr_sum].bind(s[sqr_sum].op.axis[0], block_x) - s[sqr_sum].bind(s[sqr_sum].op.reduce_axis[0], thread_x) - s[srf].compute_at(s[sqr_sum], s[sqr_sum].op.reduce_axis[0]) - s[sqr_sum_up].bind(sqr_sum_up.op.axis[0], block_x) - xto, _ = s[lrn].split(lrn.op.axis[1], nparts=num_thread) - s[lrn].bind(lrn.op.axis[0], block_x) - s[lrn].bind(xto, thread_x) - return s + target = tvm.target.current_target(allow_none=False) + cpp_target = cpp.TEST_create_target(target.target_name) + return cpp.cuda.schedule_lrn(cpp_target, outs) @generic.schedule_l2norm.register(["cuda"]) def schedule_l2norm(outs): @@ -60,32 +40,6 @@ def schedule_l2norm(outs): sch: Schedule The computation schedule for the op. """ - outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs - s = tvm.create_schedule([x.op for x in outs]) - - def traverse(OP): - '''inline all one-to-one-mapping operators - except the last stage (output)''' - if tag.is_injective(OP.tag) or OP.tag == 'l2norm': - if OP not in s.outputs: - s[OP].compute_inline() - for tensor in OP.input_tensors: - if tensor.op.input_tensors: - traverse(tensor.op) - elif OP.tag == 'comm_reduce': - _schedule_reduce(OP, s, is_idx_reduce=False) - for tensor in OP.input_tensors: - traverse(tensor.op) - else: - raise RuntimeError("Unsupported operator tag: %s" % OP.tag) - traverse(outs[0].op) - - num_thread = 64 - l2norm = outs[0] - block_x = tvm.thread_axis("blockIdx.x") - thread_x = tvm.thread_axis((0, num_thread), "threadIdx.x") - xto, _ = s[l2norm].split(l2norm.op.axis[1], nparts=num_thread) - s[l2norm].bind(l2norm.op.axis[0], block_x) - s[l2norm].bind(xto, thread_x) - - return s + target = tvm.target.current_target(allow_none=False) + cpp_target = cpp.TEST_create_target(target.target_name) + return cpp.cuda.schedule_l2norm(cpp_target, outs) diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index 5a16d12206a3..7f67f8093856 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -2,7 +2,7 @@ """Generic nn operators""" from __future__ import absolute_import as _abs import tvm - +from .. import cpp def _default_schedule(outs, auto_inline): """Default schedule for llvm.""" @@ -273,8 +273,9 @@ def schedule_lrn(outs): sch: Schedule The computation schedule for the op. """ - return _default_schedule(outs, False) - + target = tvm.target.current_target(allow_none=False) + cpp_target = cpp.TEST_create_target(target.target_name) + return cpp.generic.default_schedule(cpp_target, outs, False) @tvm.target.generic_func def schedule_l2norm(outs): @@ -291,4 +292,6 @@ def schedule_l2norm(outs): sch: Schedule The computation schedule for the op. """ - return _default_schedule(outs, False) + target = tvm.target.current_target(allow_none=False) + cpp_target = cpp.TEST_create_target(target.target_name) + return cpp.generic.default_schedule(cpp_target, outs, False) diff --git a/topi/python/topi/nn/l2_norm.py b/topi/python/topi/nn/l2_norm.py index 6b5381a85599..964c8f8d264a 100644 --- a/topi/python/topi/nn/l2_norm.py +++ b/topi/python/topi/nn/l2_norm.py @@ -2,7 +2,7 @@ """TVM operator for l2norm""" from __future__ import absolute_import import tvm -import topi +from .. import cpp @tvm.target.generic_func def l2norm_instance(data, eps, axis=None): @@ -26,10 +26,4 @@ def l2norm_instance(data, eps, axis=None): output : tvm.Tensor 4-D output with same shape """ - assert len(data.shape) == 4, "only support 4-dim lrn" - dot_value = topi.cpp.pow(data, 2.0) - sum_value = topi.sum(dot_value, axis=axis, keepdims=True) - expand_sum = topi.broadcast_to(sum_value, data.shape) - return topi.broadcast_div(data, topi.sqrt(\ - tvm.compute(expand_sum.shape, lambda i, j, k, l:\ - tvm.max(expand_sum[i, j, k, l], eps), tag='l2norm'))) + return cpp.nn.l2norm_instance(data, eps, axis) diff --git a/topi/python/topi/nn/local_response_norm.py b/topi/python/topi/nn/local_response_norm.py index b44e02214acc..73eb41242513 100644 --- a/topi/python/topi/nn/local_response_norm.py +++ b/topi/python/topi/nn/local_response_norm.py @@ -2,8 +2,7 @@ """TVM operator for local response norm compute.""" from __future__ import absolute_import import tvm -import topi -from .pad import pad +from .. import cpp @tvm.target.generic_func def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2): @@ -42,27 +41,4 @@ def lrn(data, size, axis=1, alpha=0.0001, beta=0.75, bias=2): output : tvm.Tensor 4-D output with same shape """ - assert len(data.shape) == 4, "only support 4-dim lrn" - assert (size % 2) == 1, "size should be odd number" - assert (axis == 1) or (axis == 3), "axis should 1 or 3 for NCHW and NHWC" - ##Add padding on left & right of size radius first - pad_after = pad_before = [0, 0, 0, 0] - pad_after[axis] = pad_before[axis] = (size//2) - pad_data = pad(data, pad_before, pad_after, name="pad_data") - - rxs = tvm.reduce_axis((0, size), name='rxs') - if axis == 1: - #NCHW layout - sqr_sum = tvm.compute(data.shape, lambda i, j, k, l: tvm.sum( - pad_data[i, j + rxs, k, l] * pad_data[i, j + rxs, k, l], - axis=rxs)) - elif axis == 3: - #NHWC layout - sqr_sum = tvm.compute(data.shape, lambda i, j, k, l: tvm.sum( - pad_data[i, j, k, l + rxs] * pad_data[i, j, k, l + rxs], - axis=rxs)) - - sqr_sum_up = tvm.compute(data.shape, lambda i, j, k, l: tvm.power( - (bias + (alpha * sqr_sum[i, j, k, l] / size)), beta)) - - return topi.broadcast_div(data, sqr_sum_up) + return cpp.nn.lrn(data, size, axis, alpha, beta, bias) diff --git a/topi/python/topi/rocm/nn.py b/topi/python/topi/rocm/nn.py index d9c529155f7b..ca935740f2e2 100644 --- a/topi/python/topi/rocm/nn.py +++ b/topi/python/topi/rocm/nn.py @@ -1,13 +1,18 @@ """scheduler for normalization functions on rocm backend""" from __future__ import absolute_import as _abs -import topi +import tvm from .. import generic +from .. import cpp @generic.schedule_lrn.register(["rocm", "gpu"]) def schedule_lrn(outs): - return topi.cuda.schedule_lrn(outs) + target = tvm.target.current_target(allow_none=False) + cpp_target = cpp.TEST_create_target(target.target_name) + return cpp.rocm.schedule_lrn(cpp_target, outs) @generic.schedule_l2norm.register(["rocm", "gpu"]) def schedule_l2norm(outs): - return topi.cuda.schedule_l2norm(outs) + target = tvm.target.current_target(allow_none=False) + cpp_target = cpp.TEST_create_target(target.target_name) + return cpp.rocm.schedule_l2norm(cpp_target, outs) diff --git a/topi/src/topi.cc b/topi/src/topi.cc index 4169f5f563ad..cd3ad66bb45a 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include @@ -39,6 +41,7 @@ #include #include #include +#include #include #include @@ -46,6 +49,7 @@ #include #include +#include namespace topi { @@ -359,6 +363,20 @@ TVM_REGISTER_GLOBAL("topi.nn.log_softmax") *rv = nn::log_softmax(args[0]); }); +/* Ops from nn/l2_norm.h */ +TVM_REGISTER_GLOBAL("topi.nn.l2norm_instance") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = nn::l2norm_instance(args[0], static_cast(args[1]), args[2]); + }); + +TVM_REGISTER_GLOBAL("topi.nn.lrn") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = nn::lrn(args[0], args[1], args[2], + static_cast(args[3]), + static_cast(args[4]), + static_cast(args[5])); + }); + TVM_REGISTER_GLOBAL("topi.vision.reorg") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = vision::reorg(args[0], args[1]); @@ -435,6 +453,17 @@ TVM_REGISTER_GLOBAL("topi.rocm.schedule_region") .set_body([](TVMArgs args, TVMRetValue *rv) { *rv = topi::rocm::schedule_region(args[0], args[1]); }); + +TVM_REGISTER_GLOBAL("topi.rocm.schedule_lrn") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::rocm::schedule_lrn(args[0], args[1]); + }); + +TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2norm") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::rocm::schedule_l2norm(args[0], args[1]); + }); + /* CUDA schedules */ TVM_REGISTER_GLOBAL("topi.cuda.dense_cuda") .set_body([](TVMArgs args, TVMRetValue *rv) { @@ -481,6 +510,16 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_region") *rv = topi::cuda::schedule_region(args[0], args[1]); }); +TVM_REGISTER_GLOBAL("topi.cuda.schedule_lrn") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::cuda::schedule_lrn(args[0], args[1]); + }); + +TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2norm") +.set_body([](TVMArgs args, TVMRetValue *rv) { + *rv = topi::cuda::schedule_l2norm(args[0], args[1]); + }); + /*! \brief Builder function for instantiating schedules. */ using FTVMScheduleBuilder = std::function< tvm::Schedule(const tvm::Target& target, const tvm::Array& outs)>; diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py index 182099ff9367..999b82169149 100644 --- a/topi/tests/python/test_topi_l2norm.py +++ b/topi/tests/python/test_topi_l2norm.py @@ -47,7 +47,10 @@ def check_device(device): return print("Running on target: %s" % device) with tvm.target.create(device): - s = topi.generic.schedule_l2norm(B) + if device == 'llvm': + s = topi.generic.schedule_l2norm([B]) + else: + s = topi.cuda.schedule_l2norm([B]) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) @@ -59,7 +62,7 @@ def check_device(device): def test_l2norm(): verify_l2norm(1, 3, 20, 20, 0.001) - verify_l2norm(1, 3, 20, 20, 0.001, 1) + verify_l2norm(1, 3, 20, 20, 0.001, (1,)) verify_l2norm(1, 3, 20, 20, 0.001, (1, 2)) verify_l2norm(1, 3, 20, 20, 0.001, (2, 3)) verify_l2norm(1, 3, 20, 20, 0.001, (0, 3)) diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py index 596e5747a6c5..69c3fa404b71 100644 --- a/topi/tests/python/test_topi_lrn.py +++ b/topi/tests/python/test_topi_lrn.py @@ -70,13 +70,16 @@ def verify_lrn(shape, size, axis, bias, alpha, beta): b_np = lrn_python(a_np, size, axis, bias, alpha, beta) def check_device(device): - ctx = tvm.context(device, 0) - if not ctx.exist: + if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device) return print("Running on target: %s" % device) with tvm.target.create(device): - s = topi.generic.schedule_lrn(B) + if device == 'llvm': + s = topi.generic.schedule_lrn([B]) + else: + s = topi.cuda.schedule_lrn([B]) + ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) @@ -87,9 +90,9 @@ def check_device(device): check_device(device) def test_lrn(): - verify_lrn((1, 3, 5, 5), 3, 1, 1, 1, 0.5) - verify_lrn((1, 3, 5, 5), 3, 3, 1, 1, 0.5) - verify_lrn((1, 3, 20, 20), 3, 1, 2, 1, 0.75) + verify_lrn((1, 3, 5, 5), 3, 1, 1.0, 1.0, 0.5) + verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5) + verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75) if __name__ == "__main__": test_lrn() diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py new file mode 100644 index 000000000000..5e81406d2177 --- /dev/null +++ b/topi/tests/python_cpp/test_topi_l2norm.py @@ -0,0 +1,75 @@ +"""Test code for l2 normalization""" +import os +import numpy as np +import tvm +import topi +import logging +from topi.util import get_const_tuple + +def l2norm_instance_python(a_np, eps, axis=None): + """L2 norm operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + eps : float + epsilon constant value + axis : list of int + axis over the normalization applied + + Returns + ------- + l2norm_out : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + batch, axis1, axis2, axis3 = a_np.shape + sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) + sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) + l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + dot_value = np.power(a_np, 2.0) + sqr_sum = np.sum(dot_value, axis, keepdims=True) + sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) + return np.divide(a_np, sqrt_sum) + +def verify_l2norm(n, c, h, w, eps, axis=None): + '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation''' + A = tvm.placeholder((n, c, h, w), name='A') + B = topi.cpp.nn.l2norm_instance(A, eps, axis) + dtype = A.dtype + + a_np = np.random.uniform(size=(n, c, h, w)).astype(dtype) + b_np = l2norm_instance_python(a_np, eps, axis) + + def check_device(device): + if not tvm.module.enabled(device): + print("Skip because %s is not enabled" % device) + return + print("Running on target: %s" % device) + target = topi.cpp.TEST_create_target(device) + if device == "llvm": + s = topi.cpp.generic.default_schedule(target, [B], False) + else: + s = topi.cpp.cuda.schedule_l2norm(target, [B]) + ctx = tvm.context(device, 0) + a = tvm.nd.array(a_np, ctx) + b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) + func = tvm.build(s, [A, B], device, name="l2_norm") + func(a, b) + np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) + + for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']: + check_device(device) + +def test_l2_norm(): + verify_l2norm(1, 3, 20, 20, 0.001) + verify_l2norm(1, 3, 20, 20, 0.001, (1,)) + verify_l2norm(1, 3, 20, 20, 0.001, (1, 2)) + verify_l2norm(1, 3, 20, 20, 0.001, (2, 3)) + verify_l2norm(1, 3, 20, 20, 0.001, (0, 3)) + verify_l2norm(1, 3, 20, 20, 0.001, (0, 2, 3)) + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + test_l2_norm() diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py new file mode 100644 index 000000000000..69c66c028c69 --- /dev/null +++ b/topi/tests/python_cpp/test_topi_lrn.py @@ -0,0 +1,101 @@ +"""Test code for LRN""" +import os +import numpy as np +import tvm +import topi +import logging +from topi.util import get_const_tuple + +def lrn_python(a_np, size, axis, bias, alpha, beta): + """Local response norm operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + size : int + normalisation window size + + axis : int + input data layout channel axis + + bias : float + offset to avoid dividing by 0. constant value + + alpha : float + contant valie + + beta : float + exponent constant value + + Returns + ------- + b_np : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + axis0, axis1, axis2, axis3 = a_np.shape + radius = size // 2 + sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) + sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype) + lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + def sum_dot_values(i, j, k, l): + axis_size = a_np.shape[axis] + if (axis == 1): + #NCHW layout + sum_start = j-radius if j-radius >= 0 else 0 + sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size + sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \ + a_np[i, sum_start:sum_end, k, l]) + elif (axis == 3): + #NHWC layout + sum_start = l-radius if l-radius >= 0 else 0 + sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size + sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ + a_np[i, j, k, sum_start:sum_end]) + + for i in range(axis0): + for j in range(axis1): + for k in range(axis2): + for l in range(axis3): + sum_dot_values(i, j, k, l) + + sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) + return np.divide(a_np, sqr_sum_up) + +def verify_lrn(shape, size, axis, bias, alpha, beta): + '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation''' + A = tvm.placeholder(shape, name='A') + B = topi.cpp.nn.lrn(A, size, axis, alpha, beta, bias) + dtype = A.dtype + + a_np = np.random.uniform(size=shape).astype(dtype) + b_np = lrn_python(a_np, size, axis, bias, alpha, beta) + def check_device(device): + if not tvm.module.enabled(device): + print("Skip because %s is not enabled" % device) + return + print("Running on target: %s" % device) + target = topi.cpp.TEST_create_target(device) + if device == "llvm": + s = topi.cpp.generic.default_schedule(target, [B], False) + else: + s = topi.cpp.cuda.schedule_lrn(target, [B]) + ctx = tvm.context(device, 0) + a = tvm.nd.array(a_np, ctx) + b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) + f = tvm.build(s, [A, B], device) + f(a, b) + np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1) + + for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']: + check_device(device) + +def test_lrn(): + verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5) + verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5) + verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75) + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + test_lrn() From d9e5e779dcd8f14c094fdc471ce2ffd3e38e54ea Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Fri, 11 May 2018 15:42:25 +0530 Subject: [PATCH 02/12] Sanity check issue fixed --- topi/include/topi/nn.h | 2 +- topi/include/topi/nn/local_response_norm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/topi/include/topi/nn.h b/topi/include/topi/nn.h index 2459eb515707..f893e3bdaa6f 100644 --- a/topi/include/topi/nn.h +++ b/topi/include/topi/nn.h @@ -1,7 +1,7 @@ /*! * Copyright (c) 2017 by Contributors * \brief NN op constructions - * \file topi/nn.h + * \file */ #ifndef TOPI_NN_H_ #define TOPI_NN_H_ diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h index 0e31c7c0515d..c6b044b78280 100644 --- a/topi/include/topi/nn/local_response_norm.h +++ b/topi/include/topi/nn/local_response_norm.h @@ -1,7 +1,7 @@ /*! * Copyright (c) 2018 by Contributors * \brief local response normalization op constructions - * \file nn/local_response_normalization.h + * \file nn/local_response_norm.h */ #ifndef TOPI_NN_LOCAL_RESPONSE_NORM_H_ #define TOPI_NN_LOCAL_RESPONSE_NORM_H_ From 9dfab018f10f18989941f31a226cbff7f636ad1b Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Wed, 30 May 2018 14:06:11 +0530 Subject: [PATCH 03/12] nnvm support for lrn and l2norm ops added --- nnvm/include/nnvm/top/nn.h | 35 +++++ nnvm/python/nnvm/top/nn.py | 33 +++++ nnvm/src/top/nn/nn.cc | 46 +++++++ nnvm/tests/python/compiler/test_top_level1.py | 125 ++++++++++++++++++ 4 files changed, 239 insertions(+) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index bbdb3b9c4f12..a1659cc3b0d3 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -368,6 +368,41 @@ struct NMSParam : public dmlc::Parameter { } }; +struct LrnParam : public dmlc::Parameter { + int size; + int axis; + float alpha; + float beta; + float bias; + + DMLC_DECLARE_PARAMETER(LrnParam) { + DMLC_DECLARE_FIELD(size) + .describe("The size of the local region to be considered for normalization."); + DMLC_DECLARE_FIELD(axis) + .describe("input data layout channel axis"); + DMLC_DECLARE_FIELD(alpha) + .describe("alpha constant."); + DMLC_DECLARE_FIELD(beta) + .describe("beta constant."); + DMLC_DECLARE_FIELD(bias) + .describe("bias constant."); + } + // constants + static const constexpr int kData = 0; +}; + +struct L2normParam : public dmlc::Parameter { + float eps; + Tuple axis; + + DMLC_DECLARE_PARAMETER(L2normParam) { + DMLC_DECLARE_FIELD(eps) + .describe("float type epsilon value."); + DMLC_DECLARE_FIELD(axis) + .describe("axis over the normalization applied"); + } +}; + } // namespace top } // namespace nnvm diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index b7e0d0952888..614d4982f715 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -243,3 +243,36 @@ def schedule_upsampling(_, outs, target): return topi.generic.schedule_injective(outs) reg.register_pattern("upsampling", OpPattern.INJECTIVE) + +@reg.register_compute("lrn") +def compute_lrn(attrs, inputs, _): + """Compute definition of lrn""" + size = attrs.get_int("size") + axis = attrs.get_int("axis") + alpha = attrs.get_float("alpha") + beta = attrs.get_float("beta") + bias = attrs.get_float("bias") + return topi.nn.lrn(inputs[0], size, axis, alpha, beta, bias) + +@reg.register_schedule("lrn") +def schedule_lrn(attrs, outs, target): + """Schedule definition of lrn""" + with tvm.target.create(target): + return topi.generic.schedule_lrn(outs) + +reg.register_pattern("lrn", OpPattern.OUT_ELEMWISE_FUSABLE) + +@reg.register_compute("l2norm") +def compute_l2norm(attrs, inputs, _): + """Compute definition of l2norm""" + eps = attrs.get_float("eps") + axis = attrs.get_int_tuple("axis") + return topi.nn.l2norm_instance(inputs[0], eps, axis) + +@reg.register_schedule("l2norm") +def schedule_l2norm(attrs, outs, target): + """Schedule definition of l2norm""" + with tvm.target.create(target): + return topi.generic.schedule_l2norm(outs) + +reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE) \ No newline at end of file diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc index cedfb210855e..b343ab695c05 100644 --- a/nnvm/src/top/nn/nn.cc +++ b/nnvm/src/top/nn/nn.cc @@ -712,5 +712,51 @@ the input array by output[n, c, h, w, C] = data[n, C*16+c, h, w] }) .set_support_level(1); +DMLC_REGISTER_PARAMETER(LrnParam); + +inline bool LrnInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + TShape dshape = (*in_shape)[0]; + TShape oshape = dshape; + + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; +} + +NNVM_REGISTER_OP(lrn) +.describe(R"code(LRN layer)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tesndor", "Input data.") +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", LrnInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_support_level(1); + +DMLC_REGISTER_PARAMETER(L2normParam); + +inline bool L2normInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { + TShape dshape = (*in_shape)[0]; + TShape oshape = dshape; + + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape); + return true; +} + +NNVM_REGISTER_OP(l2norm) +.describe(R"code(L2NORM layer)code" NNVM_ADD_FILELINE) +.add_argument("data", "4D Tesndor", "Input data.") +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_num_inputs(1) +.set_num_outputs(1) +.set_attr("FInferShape", L2normInferShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_support_level(1); + } // namespace top } // namespace nnvm diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index 3058d6ccfc7b..5313ce3c601f 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -365,6 +365,129 @@ def forward(x): inputs = [('x', (1, 3, 28, 28), x)] helper(y, inputs, dtype, forward) +def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta): + x = sym.Variable("x") + y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta) + dtype = "float32" + dshape = (n, c, h, w) + x_np = np.random.uniform(size=dshape).astype(dtype) + + def lrn_python(a_np, size, axis, bias, alpha, beta): + """Local response norm operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + size : int + normalisation window size + + axis : int + input data layout channel axis + + bias : float + offset to avoid dividing by 0. constant value + + alpha : float + contant valie + + beta : float + exponent constant value + + Returns + ------- + b_np : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + axis0, axis1, axis2, axis3 = a_np.shape + radius = size // 2 + sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) + sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype) + lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + def sum_dot_values(i, j, k, l): + axis_size = a_np.shape[axis] + if (axis == 1): + #NCHW layout + sum_start = j-radius if j-radius >= 0 else 0 + sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size + sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \ + a_np[i, sum_start:sum_end, k, l]) + elif (axis == 3): + #NHWC layout + sum_start = l-radius if l-radius >= 0 else 0 + sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size + sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ + a_np[i, j, k, sum_start:sum_end]) + + for i in range(axis0): + for j in range(axis1): + for k in range(axis2): + for l in range(axis3): + sum_dot_values(i, j, k, l) + + sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) + return np.divide(a_np, sqr_sum_up) + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(x=x_np) + out = m.get_output(0, tvm.nd.empty(dshape)) + out_np = np.zeros(shape=(n, c, h, w)).astype(dtype) + out_np = lrn_python(x_np, size, axis, bias, alpha, beta) + np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + +def verify_l2norm(batch, channel, height, width, eps, axis): + x = sym.Variable("x") + y = sym.l2norm(x, eps=eps, axis=axis) + dtype = "float32" + dshape = (batch, channel, height, width) + x_np = np.random.uniform(size=dshape).astype(dtype) + + def l2norm_instance_python(a_np, eps, axis=None): + """L2 norm operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + eps : float + epsilon constant value + axis : list of int + axis over the normalization applied + + Returns + ------- + l2norm_out : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + batch, axis1, axis2, axis3 = a_np.shape + sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) + sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) + l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + dot_value = np.power(a_np, 2.0) + sqr_sum = np.sum(dot_value, axis, keepdims=True) + sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) + return np.divide(a_np, sqrt_sum) + + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(x=x_np) + out = m.get_output(0, tvm.nd.empty(dshape)) + out_np = np.zeros(shape=(batch, channel, height, width)).astype(dtype) + out_np = l2norm_instance_python(x_np, eps, axis) + np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + +def test_lrn(): + verify_lrn(1, 3, 20, 20, 3, 1, 1.0, 1.0, 0.5) + verify_lrn(1, 3, 20, 20, 3, 1, 2.0, 1.0, 0.75) + +def test_l2norm(): + verify_l2norm(1, 3, 20, 20, 0.001, (1,)) + verify_l2norm(1, 3, 20, 20, 0.001, (1, 2)) if __name__ == "__main__": test_split() @@ -384,3 +507,5 @@ def forward(x): test_softmax() test_squeeze() test_pad() + test_lrn() + test_l2norm() From 620f9efc1a7e37d150f9e46e373f5be2aa5b8874 Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Wed, 30 May 2018 14:17:30 +0530 Subject: [PATCH 04/12] lint error fixed --- nnvm/python/nnvm/top/nn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index 614d4982f715..d6b413e1204d 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -275,4 +275,4 @@ def schedule_l2norm(attrs, outs, target): with tvm.target.create(target): return topi.generic.schedule_l2norm(outs) -reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE) \ No newline at end of file +reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE) From 8886fb91795b01c0f353e30b9b10a3e33d9005e6 Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Mon, 4 Jun 2018 10:44:22 +0530 Subject: [PATCH 05/12] Build check --- nnvm/tests/python/compiler/test_top_level1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index 5313ce3c601f..a79345f63e24 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -373,7 +373,7 @@ def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta): x_np = np.random.uniform(size=dshape).astype(dtype) def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response norm operator in NCHW layout. + """Local response norm operator numpy implementation. Parameters ---------- From c2dcc600cad9db780e44cf06f774b0df782d0e01 Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Tue, 5 Jun 2018 18:48:25 +0530 Subject: [PATCH 06/12] build recheck --- nnvm/tests/python/compiler/test_top_level1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index a79345f63e24..fff7381a6f46 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -373,7 +373,7 @@ def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta): x_np = np.random.uniform(size=dshape).astype(dtype) def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response norm operator numpy implementation. + """Local response normalization operator numpy implementation. Parameters ---------- From c41f9980b2d9260e0b8d7275d5b23f628e74556b Mon Sep 17 00:00:00 2001 From: Pariksheet Date: Thu, 7 Jun 2018 11:38:06 +0530 Subject: [PATCH 07/12] Review comments updated --- nnvm/include/nnvm/top/nn.h | 6 +-- nnvm/tests/python/compiler/test_top_level1.py | 2 +- .../topi/cuda/{nn.h => normalization.h} | 40 +++++++++---------- topi/include/topi/nn.h | 2 +- topi/include/topi/nn/l2_norm.h | 2 +- topi/include/topi/nn/local_response_norm.h | 4 +- .../topi/rocm/{nn.h => normalization.h} | 8 ++-- topi/src/topi.cc | 4 +- 8 files changed, 33 insertions(+), 35 deletions(-) rename topi/include/topi/cuda/{nn.h => normalization.h} (75%) rename topi/include/topi/rocm/{nn.h => normalization.h} (87%) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index a1659cc3b0d3..ef2f3b2a829b 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -381,11 +381,11 @@ struct LrnParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(axis) .describe("input data layout channel axis"); DMLC_DECLARE_FIELD(alpha) - .describe("alpha constant."); + .describe("The scaling parameter."); DMLC_DECLARE_FIELD(beta) - .describe("beta constant."); + .describe("The exponent parameter."); DMLC_DECLARE_FIELD(bias) - .describe("bias constant."); + .describe("The offset parameter."); } // constants static const constexpr int kData = 0; diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index fff7381a6f46..683a2ec5dc35 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -390,7 +390,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): offset to avoid dividing by 0. constant value alpha : float - contant valie + contant value beta : float exponent constant value diff --git a/topi/include/topi/cuda/nn.h b/topi/include/topi/cuda/normalization.h similarity index 75% rename from topi/include/topi/cuda/nn.h rename to topi/include/topi/cuda/normalization.h index a425a47b5ce7..2816c99f8e25 100644 --- a/topi/include/topi/cuda/nn.h +++ b/topi/include/topi/cuda/normalization.h @@ -1,10 +1,10 @@ /*! * Copyright (c) 2018 by Contributors -* \file cuda/nn.h +* \file cuda/normalization.h * \brief CUDA schedule for lrn and l2 normalization operations */ -#ifndef TOPI_CUDA_NN_H_ -#define TOPI_CUDA_NN_H_ +#ifndef TOPI_CUDA_NORMALIZATION_H_ +#define TOPI_CUDA_NORMALIZATION_H_ #include "tvm/tvm.h" #include "tvm/build_module.h" @@ -26,19 +26,19 @@ inline Schedule schedule_lrn(const Target &target, const Array& outs) { for (auto t : outs) { out_ops.push_back(t->op); } - auto s = create_schedule(out_ops); - auto num_thread = 64; - auto block_x = tvm::thread_axis(Range(), "blockIdx.x"); - auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); - auto lrn = outs[0]; - auto sqr_sum_up = lrn->op->InputTensors()[1]; - auto sqr_sum = sqr_sum_up->op->InputTensors()[0]; - auto set_pad = sqr_sum->op->InputTensors()[0]; + Schedule s = create_schedule(out_ops); + int num_thread = 64; + IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x"); + IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); + Tensor lrn = outs[0]; + Tensor sqr_sum_up = lrn->op->InputTensors()[1]; + Tensor sqr_sum = sqr_sum_up->op->InputTensors()[0]; + Tensor set_pad = sqr_sum->op->InputTensors()[0]; s[set_pad].bind(set_pad->op.as()->axis[0], block_x); - auto rxk = sqr_sum->op.as()->reduce_axis[0]; + IterVar rxk = sqr_sum->op.as()->reduce_axis[0]; IterVar xko, xki; s[sqr_sum].split(rxk, num_thread, &xko, &xki); - auto srf = s.rfactor(sqr_sum, xki)[0]; + Tensor srf = s.rfactor(sqr_sum, xki)[0]; s[sqr_sum].bind(s[sqr_sum]->op.as()->axis[0], block_x); s[sqr_sum].bind(s[sqr_sum]->op.as()->reduce_axis[0], thread_x); s[srf].compute_at(s[sqr_sum], s[sqr_sum]->op.as()->reduce_axis[0]); @@ -64,7 +64,7 @@ inline Schedule schedule_l2norm(const Target &target, const Array& outs) for (auto t : outs) { out_ops.push_back(t->op); } - auto s = create_schedule(out_ops); + Schedule s = create_schedule(out_ops); std::function traverse; traverse = [&](const Operation& op) { @@ -89,10 +89,10 @@ inline Schedule schedule_l2norm(const Target &target, const Array& outs) }; traverse(outs[0]->op); - auto num_thread = 64; - auto l2norm = outs[0]; - auto block_x = tvm::thread_axis(Range(), "blockIdx.x"); - auto thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); + int num_thread = 64; + Tensor l2norm = outs[0]; + IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x"); + IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); IterVar xto, xti; s[l2norm].split_by_nparts(l2norm->op.as()->axis[1], num_thread, &xto, &xti); s[l2norm].bind(l2norm->op.as()->axis[0], block_x); @@ -101,7 +101,5 @@ inline Schedule schedule_l2norm(const Target &target, const Array& outs) } } // namespace cuda } // namespace topi -#endif // TOPI_CUDA_NN_H_ - - +#endif // TOPI_CUDA_NORMALIZATION_H_ diff --git a/topi/include/topi/nn.h b/topi/include/topi/nn.h index f893e3bdaa6f..2459eb515707 100644 --- a/topi/include/topi/nn.h +++ b/topi/include/topi/nn.h @@ -1,7 +1,7 @@ /*! * Copyright (c) 2017 by Contributors * \brief NN op constructions - * \file + * \file topi/nn.h */ #ifndef TOPI_NN_H_ #define TOPI_NN_H_ diff --git a/topi/include/topi/nn/l2_norm.h b/topi/include/topi/nn/l2_norm.h index e1fe25cb6040..c8f8be455591 100644 --- a/topi/include/topi/nn/l2_norm.h +++ b/topi/include/topi/nn/l2_norm.h @@ -15,7 +15,7 @@ namespace nn { using namespace tvm; /*! -* \brief L2 normalization inference operator +* \brief L2 normalization inference operator * * \param data The input tensor. 4-D with shape [batch, channel, height, width] * \param eps Epsilon to prevent div by 0 diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h index c6b044b78280..339fb6dae879 100644 --- a/topi/include/topi/nn/local_response_norm.h +++ b/topi/include/topi/nn/local_response_norm.h @@ -38,8 +38,8 @@ inline Tensor lrn(const Tensor& data, std::string name = "tensor", std::string tag = kBroadcast) { CHECK_EQ(data->shape.size(), 4) << "LRN requires 4-D input"; - assert(size % 2 == 1); - assert(axis == 1 || axis == 3); + CHECK_EQ(size % 2, 1) << "size should be odd number"; + CHECK_EQ((axis - 1) && (axis - 3), 0) << "axis should be 1 or 3 for NCHW and NHWC"; auto input_shape = data->shape; Array pad_before{ 0, 0, 0, 0}; Array pad_after{ 0, 0, 0, 0}; diff --git a/topi/include/topi/rocm/nn.h b/topi/include/topi/rocm/normalization.h similarity index 87% rename from topi/include/topi/rocm/nn.h rename to topi/include/topi/rocm/normalization.h index 98b74ad80593..8c6c31d462c4 100644 --- a/topi/include/topi/rocm/nn.h +++ b/topi/include/topi/rocm/normalization.h @@ -1,10 +1,10 @@ /*! * Copyright (c) 2018 by Contributors -* \file rocm/nn.h +* \file rocm/normalization.h * \brief rocm schedule for lrn and l2 normalization operations */ -#ifndef TOPI_ROCM_NN_H_ -#define TOPI_ROCM_NN_H_ +#ifndef TOPI_ROCM_NORMALIZATION_H_ +#define TOPI_ROCM_NORMALIZATION_H_ #include "tvm/tvm.h" #include "tvm/build_module.h" @@ -38,4 +38,4 @@ inline Schedule schedule_l2norm(const Target &target, const Array& outs) } } // namespace rocm } // namespace topi -#endif // TOPI_ROCM_NN_H_ +#endif // TOPI_ROCM_NORMALIZATION_H_ diff --git a/topi/src/topi.cc b/topi/src/topi.cc index cd3ad66bb45a..930d9d16260d 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include @@ -49,7 +49,7 @@ #include #include -#include +#include namespace topi { From 5a44f63a1497d8709509b3162299f824bdec3fbf Mon Sep 17 00:00:00 2001 From: pariksheet Date: Fri, 15 Jun 2018 10:03:52 +0530 Subject: [PATCH 08/12] Review comments reworked --- nnvm/include/nnvm/top/nn.h | 12 ++-- nnvm/python/nnvm/top/nn.py | 18 +++--- nnvm/src/top/nn/nn.cc | 28 ++++----- nnvm/tests/python/compiler/test_top_level1.py | 63 +++++++++---------- topi/include/topi/cuda/normalization.h | 15 ++--- .../topi/nn/{l2_norm.h => l2_normalize.h} | 14 ++--- topi/include/topi/nn/local_response_norm.h | 2 +- topi/include/topi/rocm/normalization.h | 6 +- topi/python/topi/cuda/__init__.py | 2 +- topi/python/topi/cuda/nn.py | 10 +-- topi/python/topi/generic/nn.py | 6 +- topi/python/topi/nn/__init__.py | 2 +- .../topi/nn/{l2_norm.py => l2_normalize.py} | 8 +-- topi/python/topi/rocm/nn.py | 6 +- topi/src/topi.cc | 16 ++--- topi/tests/python/test_topi_l2norm.py | 46 +++++++------- topi/tests/python/test_topi_lrn.py | 11 ++-- topi/tests/python_cpp/test_topi_l2norm.py | 45 +++++++------ topi/tests/python_cpp/test_topi_lrn.py | 14 ++--- 19 files changed, 155 insertions(+), 169 deletions(-) rename topi/include/topi/nn/{l2_norm.h => l2_normalize.h} (80%) rename topi/python/topi/nn/{l2_norm.py => l2_normalize.py} (71%) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index ef2f3b2a829b..6687535c4f85 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -368,14 +368,14 @@ struct NMSParam : public dmlc::Parameter { } }; -struct LrnParam : public dmlc::Parameter { +struct LRNParam : public dmlc::Parameter { int size; int axis; float alpha; float beta; float bias; - DMLC_DECLARE_PARAMETER(LrnParam) { + DMLC_DECLARE_PARAMETER(LRNParam) { DMLC_DECLARE_FIELD(size) .describe("The size of the local region to be considered for normalization."); DMLC_DECLARE_FIELD(axis) @@ -391,11 +391,11 @@ struct LrnParam : public dmlc::Parameter { static const constexpr int kData = 0; }; -struct L2normParam : public dmlc::Parameter { - float eps; - Tuple axis; +struct L2normalizeParam : public dmlc::Parameter { + float eps; + Tuple axis; - DMLC_DECLARE_PARAMETER(L2normParam) { + DMLC_DECLARE_PARAMETER(L2normalizeParam) { DMLC_DECLARE_FIELD(eps) .describe("float type epsilon value."); DMLC_DECLARE_FIELD(axis) diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index d6b413e1204d..37c014e705d9 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -262,17 +262,17 @@ def schedule_lrn(attrs, outs, target): reg.register_pattern("lrn", OpPattern.OUT_ELEMWISE_FUSABLE) -@reg.register_compute("l2norm") -def compute_l2norm(attrs, inputs, _): - """Compute definition of l2norm""" +@reg.register_compute("l2normalize") +def compute_l2normalize(attrs, inputs, _): + """Compute definition of l2normalize""" eps = attrs.get_float("eps") axis = attrs.get_int_tuple("axis") - return topi.nn.l2norm_instance(inputs[0], eps, axis) + return topi.nn.l2normalize_instance(inputs[0], eps, axis) -@reg.register_schedule("l2norm") -def schedule_l2norm(attrs, outs, target): - """Schedule definition of l2norm""" +@reg.register_schedule("l2normalize") +def schedule_l2normalize(attrs, outs, target): + """Schedule definition of l2normalize""" with tvm.target.create(target): - return topi.generic.schedule_l2norm(outs) + return topi.generic.schedule_l2normalize(outs) -reg.register_pattern("l2norm", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_pattern("l2normalize", OpPattern.OUT_ELEMWISE_FUSABLE) diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc index b343ab695c05..7f50f2b1f58a 100644 --- a/nnvm/src/top/nn/nn.cc +++ b/nnvm/src/top/nn/nn.cc @@ -712,9 +712,9 @@ the input array by output[n, c, h, w, C] = data[n, C*16+c, h, w] }) .set_support_level(1); -DMLC_REGISTER_PARAMETER(LrnParam); +DMLC_REGISTER_PARAMETER(LRNParam); -inline bool LrnInferShape(const nnvm::NodeAttrs& attrs, +inline bool LRNInferShape(const nnvm::NodeAttrs& attrs, std::vector* in_shape, std::vector* out_shape) { TShape dshape = (*in_shape)[0]; @@ -727,19 +727,19 @@ inline bool LrnInferShape(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(lrn) .describe(R"code(LRN layer)code" NNVM_ADD_FILELINE) .add_argument("data", "4D Tesndor", "Input data.") -.set_attr_parser(ParamParser) -.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) .set_num_inputs(1) .set_num_outputs(1) -.set_attr("FInferShape", LrnInferShape) +.set_attr("FInferShape", LRNInferShape) .set_attr("FInferType", ElemwiseType<1, 1>) .set_support_level(1); -DMLC_REGISTER_PARAMETER(L2normParam); +DMLC_REGISTER_PARAMETER(L2normalizeParam); -inline bool L2normInferShape(const nnvm::NodeAttrs& attrs, - std::vector* in_shape, - std::vector* out_shape) { +inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs, + std::vector* in_shape, + std::vector* out_shape) { TShape dshape = (*in_shape)[0]; TShape oshape = dshape; @@ -747,14 +747,14 @@ inline bool L2normInferShape(const nnvm::NodeAttrs& attrs, return true; } -NNVM_REGISTER_OP(l2norm) -.describe(R"code(L2NORM layer)code" NNVM_ADD_FILELINE) +NNVM_REGISTER_OP(l2normalize) +.describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE) .add_argument("data", "4D Tesndor", "Input data.") -.set_attr_parser(ParamParser) -.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) .set_num_inputs(1) .set_num_outputs(1) -.set_attr("FInferShape", L2normInferShape) +.set_attr("FInferShape", L2normalizeInferShape) .set_attr("FInferType", ElemwiseType<1, 1>) .set_support_level(1); diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index 683a2ec5dc35..5cf661b412c4 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -365,15 +365,14 @@ def forward(x): inputs = [('x', (1, 3, 28, 28), x)] helper(y, inputs, dtype, forward) -def verify_lrn(n, c, h, w, size, axis, bias, alpha, beta): +def verify_lrn(ishape, size, axis, bias, alpha, beta): x = sym.Variable("x") y = sym.lrn(x, size=size, axis=axis, bias=bias, alpha=alpha, beta=beta) dtype = "float32" - dshape = (n, c, h, w) - x_np = np.random.uniform(size=dshape).astype(dtype) + x_np = np.random.uniform(size=ishape).astype(dtype) def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response normalization operator numpy implementation. + """Local response normalization operator in NCHW layout. Parameters ---------- @@ -381,7 +380,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): 4-D with shape [batch, in_channel, in_height, in_width] size : int - normalisation window size + normalization window size axis : int input data layout channel axis @@ -390,21 +389,19 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): offset to avoid dividing by 0. constant value alpha : float - contant value + constant value beta : float exponent constant value Returns ------- - b_np : np.ndarray + lrn_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ axis0, axis1, axis2, axis3 = a_np.shape radius = size // 2 sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype) - lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) def sum_dot_values(i, j, k, l): axis_size = a_np.shape[axis] if (axis == 1): @@ -427,26 +424,25 @@ def sum_dot_values(i, j, k, l): sum_dot_values(i, j, k, l) sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) - return np.divide(a_np, sqr_sum_up) + lrn_out = np.divide(a_np, sqr_sum_up) + return lrn_out for target, ctx in ctx_list(): - graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape}) m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) - out = m.get_output(0, tvm.nd.empty(dshape)) - out_np = np.zeros(shape=(n, c, h, w)).astype(dtype) + out = m.get_output(0, tvm.nd.empty(ishape)) out_np = lrn_python(x_np, size, axis, bias, alpha, beta) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) -def verify_l2norm(batch, channel, height, width, eps, axis): +def verify_l2normalize(ishape, eps, axis): x = sym.Variable("x") - y = sym.l2norm(x, eps=eps, axis=axis) + y = sym.l2normalize(x, eps=eps, axis=axis) dtype = "float32" - dshape = (batch, channel, height, width) - x_np = np.random.uniform(size=dshape).astype(dtype) + x_np = np.random.uniform(size=ishape).astype(dtype) - def l2norm_instance_python(a_np, eps, axis=None): - """L2 norm operator in NCHW layout. + def l2normalize_instance_python(a_np, eps, axis=None): + """L2 normalize operator in NCHW layout. Parameters ---------- @@ -460,34 +456,31 @@ def l2norm_instance_python(a_np, eps, axis=None): Returns ------- - l2norm_out : np.ndarray + l2normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - batch, axis1, axis2, axis3 = a_np.shape - sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) - sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) - l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + batch = a_np.shape[0] dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - return np.divide(a_np, sqrt_sum) + l2normalize_out = np.divide(a_np, sqrt_sum) + return l2normalize_out for target, ctx in ctx_list(): - graph, lib, _ = nnvm.compiler.build(y, target, {"x": dshape}) + graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape}) m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) - out = m.get_output(0, tvm.nd.empty(dshape)) - out_np = np.zeros(shape=(batch, channel, height, width)).astype(dtype) - out_np = l2norm_instance_python(x_np, eps, axis) + out = m.get_output(0, tvm.nd.empty(ishape)) + out_np = l2normalize_instance_python(x_np, eps, axis) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) def test_lrn(): - verify_lrn(1, 3, 20, 20, 3, 1, 1.0, 1.0, 0.5) - verify_lrn(1, 3, 20, 20, 3, 1, 2.0, 1.0, 0.75) + verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5) + verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75) -def test_l2norm(): - verify_l2norm(1, 3, 20, 20, 0.001, (1,)) - verify_l2norm(1, 3, 20, 20, 0.001, (1, 2)) +def test_l2normalize(): + verify_l2normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2)) if __name__ == "__main__": test_split() @@ -508,4 +501,4 @@ def test_l2norm(): test_squeeze() test_pad() test_lrn() - test_l2norm() + test_l2normalize() diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h index 2816c99f8e25..d189893450e0 100644 --- a/topi/include/topi/cuda/normalization.h +++ b/topi/include/topi/cuda/normalization.h @@ -1,7 +1,7 @@ /*! * Copyright (c) 2018 by Contributors * \file cuda/normalization.h -* \brief CUDA schedule for lrn and l2 normalization operations +* \brief CUDA schedule for LRN and l2 normalization operations */ #ifndef TOPI_CUDA_NORMALIZATION_H_ #define TOPI_CUDA_NORMALIZATION_H_ @@ -59,7 +59,7 @@ inline Schedule schedule_lrn(const Target &target, const Array& outs) { * * \return A schedule for the given ops. */ -inline Schedule schedule_l2norm(const Target &target, const Array& outs) { +inline Schedule schedule_l2normalize(const Target &target, const Array& outs) { Array out_ops; for (auto t : outs) { out_ops.push_back(t->op); @@ -69,7 +69,7 @@ inline Schedule schedule_l2norm(const Target &target, const Array& outs) std::function traverse; traverse = [&](const Operation& op) { // Inline all one-to-one-mapping operators except the last stage (output) - if (is_injective(op->tag) || op->tag == "l2norm") { + if (is_injective(op->tag) || op->tag == "l2normalize") { if (!detail::contains(s->outputs, op)) { s[op].compute_inline(); } @@ -90,13 +90,14 @@ inline Schedule schedule_l2norm(const Target &target, const Array& outs) traverse(outs[0]->op); int num_thread = 64; - Tensor l2norm = outs[0]; + Tensor l2normalize = outs[0]; IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x"); IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); IterVar xto, xti; - s[l2norm].split_by_nparts(l2norm->op.as()->axis[1], num_thread, &xto, &xti); - s[l2norm].bind(l2norm->op.as()->axis[0], block_x); - s[l2norm].bind(xto, thread_x); + s[l2normalize].split_by_nparts(l2normalize->op.as()->axis[1], + num_thread, &xto, &xti); + s[l2normalize].bind(l2normalize->op.as()->axis[0], block_x); + s[l2normalize].bind(xto, thread_x); return s; } } // namespace cuda diff --git a/topi/include/topi/nn/l2_norm.h b/topi/include/topi/nn/l2_normalize.h similarity index 80% rename from topi/include/topi/nn/l2_norm.h rename to topi/include/topi/nn/l2_normalize.h index c8f8be455591..96f31782ff88 100644 --- a/topi/include/topi/nn/l2_norm.h +++ b/topi/include/topi/nn/l2_normalize.h @@ -1,10 +1,10 @@ /*! * Copyright (c) 2018 by Contributors * \brief l2 normalization op constructions - * \file nn/l2_norm.h + * \file nn/l2_normalize.h */ -#ifndef TOPI_NN_L2_NORM_H_ -#define TOPI_NN_L2_NORM_H_ +#ifndef TOPI_NN_L2_NORMALIZE_H_ +#define TOPI_NN_L2_NORMALIZE_H_ #include #include @@ -25,12 +25,12 @@ using namespace tvm; * * \return A Tensor whose op member is the l2 normalization operation */ -inline Tensor l2norm_instance(const Tensor& data, +inline Tensor l2normalize_instance(const Tensor& data, float eps, const Array& axis, std::string name = "tensor", - std::string tag = "l2norm") { - CHECK_EQ(data->shape.size(), 4) << "L2 norm requires 4-D input"; + std::string tag = "l2normalize") { + CHECK_EQ(data->shape.size(), 4) << "L2 normalization requires 4-D input"; auto input_shape = data->shape; Tensor dot_value = pow(data, static_cast(2.0)); Tensor sum_value = topi::sum(dot_value, axis, true); @@ -43,4 +43,4 @@ inline Tensor l2norm_instance(const Tensor& data, } } // namespace nn } // namespace topi -#endif // TOPI_NN_L2_NORM_H_ +#endif // TOPI_NN_L2_NORMALIZE_H_ diff --git a/topi/include/topi/nn/local_response_norm.h b/topi/include/topi/nn/local_response_norm.h index 339fb6dae879..c956a9c253dc 100644 --- a/topi/include/topi/nn/local_response_norm.h +++ b/topi/include/topi/nn/local_response_norm.h @@ -39,7 +39,7 @@ inline Tensor lrn(const Tensor& data, std::string tag = kBroadcast) { CHECK_EQ(data->shape.size(), 4) << "LRN requires 4-D input"; CHECK_EQ(size % 2, 1) << "size should be odd number"; - CHECK_EQ((axis - 1) && (axis - 3), 0) << "axis should be 1 or 3 for NCHW and NHWC"; + CHECK(axis == 1 || axis == 3) << "axis should be 1 or 3 for NCHW and NHWC"; auto input_shape = data->shape; Array pad_before{ 0, 0, 0, 0}; Array pad_after{ 0, 0, 0, 0}; diff --git a/topi/include/topi/rocm/normalization.h b/topi/include/topi/rocm/normalization.h index 8c6c31d462c4..609177f4b8ef 100644 --- a/topi/include/topi/rocm/normalization.h +++ b/topi/include/topi/rocm/normalization.h @@ -1,7 +1,7 @@ /*! * Copyright (c) 2018 by Contributors * \file rocm/normalization.h -* \brief rocm schedule for lrn and l2 normalization operations +* \brief rocm schedule for LRN and l2 normalization operations */ #ifndef TOPI_ROCM_NORMALIZATION_H_ #define TOPI_ROCM_NORMALIZATION_H_ @@ -33,8 +33,8 @@ inline Schedule schedule_lrn(const Target &target, const Array& outs) { * * \return A schedule for the given ops. */ -inline Schedule schedule_l2norm(const Target &target, const Array& outs) { - return topi::cuda::schedule_l2norm(target, outs); +inline Schedule schedule_l2normalize(const Target &target, const Array& outs) { + return topi::cuda::schedule_l2normalize(target, outs); } } // namespace rocm } // namespace topi diff --git a/topi/python/topi/cuda/__init__.py b/topi/python/topi/cuda/__init__.py index 3b0e38c4d3f4..a6bab999ccc6 100644 --- a/topi/python/topi/cuda/__init__.py +++ b/topi/python/topi/cuda/__init__.py @@ -17,4 +17,4 @@ from .extern import schedule_extern from .vision import schedule_region from .vision import schedule_reorg -from .nn import schedule_lrn, schedule_l2norm +from .nn import schedule_lrn, schedule_l2normalize diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py index c565698001fe..39486afbf2cf 100644 --- a/topi/python/topi/cuda/nn.py +++ b/topi/python/topi/cuda/nn.py @@ -25,14 +25,14 @@ def schedule_lrn(outs): cpp_target = cpp.TEST_create_target(target.target_name) return cpp.cuda.schedule_lrn(cpp_target, outs) -@generic.schedule_l2norm.register(["cuda"]) -def schedule_l2norm(outs): - """Schedule for L2norm +@generic.schedule_l2normalize.register(["cuda"]) +def schedule_l2normalize(outs): + """Schedule for L2normalize Parameters ---------- outs: Array of Tensor - The computation graph description of L2norm + The computation graph description of L2normalize in the format of an array of tensors. Returns @@ -42,4 +42,4 @@ def schedule_l2norm(outs): """ target = tvm.target.current_target(allow_none=False) cpp_target = cpp.TEST_create_target(target.target_name) - return cpp.cuda.schedule_l2norm(cpp_target, outs) + return cpp.cuda.schedule_l2normalize(cpp_target, outs) diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index 7f67f8093856..892c90af7062 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -278,13 +278,13 @@ def schedule_lrn(outs): return cpp.generic.default_schedule(cpp_target, outs, False) @tvm.target.generic_func -def schedule_l2norm(outs): - """Schedule for l2norm +def schedule_l2normalize(outs): + """Schedule for l2normalize Parameters ---------- outs: Array of Tensor - The computation graph description of l2norm + The computation graph description of l2normalize in the format of an array of tensors. Returns diff --git a/topi/python/topi/nn/__init__.py b/topi/python/topi/nn/__init__.py index 056d1a76339a..7b6ee4a86836 100644 --- a/topi/python/topi/nn/__init__.py +++ b/topi/python/topi/nn/__init__.py @@ -16,4 +16,4 @@ from .bnn import * from .upsampling import * from .local_response_norm import * -from .l2_norm import * +from .l2_normalize import * diff --git a/topi/python/topi/nn/l2_norm.py b/topi/python/topi/nn/l2_normalize.py similarity index 71% rename from topi/python/topi/nn/l2_norm.py rename to topi/python/topi/nn/l2_normalize.py index 964c8f8d264a..8ac51c908841 100644 --- a/topi/python/topi/nn/l2_norm.py +++ b/topi/python/topi/nn/l2_normalize.py @@ -1,12 +1,12 @@ # pylint: disable=invalid-name -"""TVM operator for l2norm""" +"""TVM operator for l2normalize""" from __future__ import absolute_import import tvm from .. import cpp @tvm.target.generic_func -def l2norm_instance(data, eps, axis=None): - """Perform L2norm on the input data +def l2normalize_instance(data, eps, axis=None): + """Perform L2 normalization on the input data For axis=None, y(i, j) = x(i, j) / sqrt(max(sum(x^2), eps)) @@ -26,4 +26,4 @@ def l2norm_instance(data, eps, axis=None): output : tvm.Tensor 4-D output with same shape """ - return cpp.nn.l2norm_instance(data, eps, axis) + return cpp.nn.l2normalize_instance(data, eps, axis) diff --git a/topi/python/topi/rocm/nn.py b/topi/python/topi/rocm/nn.py index ca935740f2e2..994be565ff87 100644 --- a/topi/python/topi/rocm/nn.py +++ b/topi/python/topi/rocm/nn.py @@ -11,8 +11,8 @@ def schedule_lrn(outs): cpp_target = cpp.TEST_create_target(target.target_name) return cpp.rocm.schedule_lrn(cpp_target, outs) -@generic.schedule_l2norm.register(["rocm", "gpu"]) -def schedule_l2norm(outs): +@generic.schedule_l2normalize.register(["rocm", "gpu"]) +def schedule_l2normalize(outs): target = tvm.target.current_target(allow_none=False) cpp_target = cpp.TEST_create_target(target.target_name) - return cpp.rocm.schedule_l2norm(cpp_target, outs) + return cpp.rocm.schedule_l2normalize(cpp_target, outs) diff --git a/topi/src/topi.cc b/topi/src/topi.cc index 930d9d16260d..5a5c4f537d17 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -363,10 +363,10 @@ TVM_REGISTER_GLOBAL("topi.nn.log_softmax") *rv = nn::log_softmax(args[0]); }); -/* Ops from nn/l2_norm.h */ -TVM_REGISTER_GLOBAL("topi.nn.l2norm_instance") +/* Ops from nn/l2_normalize.h */ +TVM_REGISTER_GLOBAL("topi.nn.l2normalize_instance") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = nn::l2norm_instance(args[0], static_cast(args[1]), args[2]); + *rv = nn::l2normalize_instance(args[0], static_cast(args[1]), args[2]); }); TVM_REGISTER_GLOBAL("topi.nn.lrn") @@ -459,9 +459,9 @@ TVM_REGISTER_GLOBAL("topi.rocm.schedule_lrn") *rv = topi::rocm::schedule_lrn(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2norm") +TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2normalize") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::rocm::schedule_l2norm(args[0], args[1]); + *rv = topi::rocm::schedule_l2normalize(args[0], args[1]); }); /* CUDA schedules */ @@ -515,9 +515,9 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_lrn") *rv = topi::cuda::schedule_lrn(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2norm") +TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2normalize") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::cuda::schedule_l2norm(args[0], args[1]); + *rv = topi::cuda::schedule_l2normalize(args[0], args[1]); }); /*! \brief Builder function for instantiating schedules. */ diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py index 999b82169149..4f9a503bce7d 100644 --- a/topi/tests/python/test_topi_l2norm.py +++ b/topi/tests/python/test_topi_l2norm.py @@ -1,11 +1,11 @@ -"""Test code for L2 norm""" +"""Test code for L2 normalization""" import numpy as np import tvm import topi from topi.util import get_const_tuple -def l2norm_instance_python(a_np, eps, axis=None): - """L2 norm operator in NCHW layout. +def l2normalize_instance_python(a_np, eps, axis=None): + """L2 normalize operator in NCHW layout. Parameters ---------- @@ -19,26 +19,24 @@ def l2norm_instance_python(a_np, eps, axis=None): Returns ------- - l2norm_out : np.ndarray + l2normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - batch, axis1, axis2, axis3 = a_np.shape - sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) - sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) - l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + batch = a_np.shape[0] dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - return np.divide(a_np, sqrt_sum) + l2normalize_out = np.divide(a_np, sqrt_sum) + return l2normalize_out -def verify_l2norm(n, c, h, w, eps, axis=None): +def verify_l2normalize(ishape, eps, axis=None): - A = tvm.placeholder((n, c, h, w), name='A') - B = topi.nn.l2norm_instance(A, eps, axis) + A = tvm.placeholder(ishape, name='A') + B = topi.nn.l2normalize_instance(A, eps, axis) dtype = A.dtype - a_np = np.random.uniform(size=(n, c, h, w)).astype(dtype) - b_np = l2norm_instance_python(a_np, eps, axis) + a_np = np.random.uniform(size=ishape).astype(dtype) + b_np = l2normalize_instance_python(a_np, eps, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -48,9 +46,9 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): if device == 'llvm': - s = topi.generic.schedule_l2norm([B]) + s = topi.generic.schedule_l2normalize([B]) else: - s = topi.cuda.schedule_l2norm([B]) + s = topi.cuda.schedule_l2normalize([B]) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) @@ -60,14 +58,14 @@ def check_device(device): for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']: check_device(device) -def test_l2norm(): - verify_l2norm(1, 3, 20, 20, 0.001) - verify_l2norm(1, 3, 20, 20, 0.001, (1,)) - verify_l2norm(1, 3, 20, 20, 0.001, (1, 2)) - verify_l2norm(1, 3, 20, 20, 0.001, (2, 3)) - verify_l2norm(1, 3, 20, 20, 0.001, (0, 3)) - verify_l2norm(1, 3, 20, 20, 0.001, (0, 2, 3)) +def test_l2normalize(): + verify_l2normalize((1, 3, 20, 20), 0.001) + verify_l2normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2)) + verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3)) + verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3)) + verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3)) if __name__ == "__main__": - test_l2norm() + test_l2normalize() diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py index 69c3fa404b71..5e49fe739103 100644 --- a/topi/tests/python/test_topi_lrn.py +++ b/topi/tests/python/test_topi_lrn.py @@ -13,7 +13,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): 4-D with shape [batch, in_channel, in_height, in_width] size : int - normalisation window size + normalization window size axis : int input data layout channel axis @@ -22,21 +22,19 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): offset to avoid dividing by 0. constant value alpha : float - contant valie + constant value beta : float exponent constant value Returns ------- - b_np : np.ndarray + lrn_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ axis0, axis1, axis2, axis3 = a_np.shape radius = size // 2 sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype) - lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) def sum_dot_values(i, j, k, l): axis_size = a_np.shape[axis] if (axis == 1): @@ -59,7 +57,8 @@ def sum_dot_values(i, j, k, l): sum_dot_values(i, j, k, l) sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) - return np.divide(a_np, sqr_sum_up) + lrn_out = np.divide(a_np, sqr_sum_up) + return lrn_out def verify_lrn(shape, size, axis, bias, alpha, beta): A = tvm.placeholder(shape, name='A') diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py index 5e81406d2177..22ef06b90487 100644 --- a/topi/tests/python_cpp/test_topi_l2norm.py +++ b/topi/tests/python_cpp/test_topi_l2norm.py @@ -1,13 +1,12 @@ """Test code for l2 normalization""" -import os import numpy as np import tvm import topi import logging from topi.util import get_const_tuple -def l2norm_instance_python(a_np, eps, axis=None): - """L2 norm operator in NCHW layout. +def l2normalize_instance_python(a_np, eps, axis=None): + """L2 normalize operator in NCHW layout. Parameters ---------- @@ -21,26 +20,24 @@ def l2norm_instance_python(a_np, eps, axis=None): Returns ------- - l2norm_out : np.ndarray + l2normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - batch, axis1, axis2, axis3 = a_np.shape - sqr_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) - sqrt_sum = np.zeros(shape=(batch,)).astype(a_np.dtype) - l2norm_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) + batch = a_np.shape[0] dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - return np.divide(a_np, sqrt_sum) + l2normalize_out = np.divide(a_np, sqrt_sum) + return l2normalize_out -def verify_l2norm(n, c, h, w, eps, axis=None): +def verify_l2normalize(shape, eps, axis=None): '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation''' - A = tvm.placeholder((n, c, h, w), name='A') - B = topi.cpp.nn.l2norm_instance(A, eps, axis) + A = tvm.placeholder(shape, name='A') + B = topi.cpp.nn.l2normalize_instance(A, eps, axis) dtype = A.dtype - a_np = np.random.uniform(size=(n, c, h, w)).astype(dtype) - b_np = l2norm_instance_python(a_np, eps, axis) + a_np = np.random.uniform(size=shape).astype(dtype) + b_np = l2normalize_instance_python(a_np, eps, axis) def check_device(device): if not tvm.module.enabled(device): @@ -51,25 +48,25 @@ def check_device(device): if device == "llvm": s = topi.cpp.generic.default_schedule(target, [B], False) else: - s = topi.cpp.cuda.schedule_l2norm(target, [B]) + s = topi.cpp.cuda.schedule_l2normalize(target, [B]) ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) - func = tvm.build(s, [A, B], device, name="l2_norm") + func = tvm.build(s, [A, B], device, name="l2_normalize") func(a, b) np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5) for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']: check_device(device) -def test_l2_norm(): - verify_l2norm(1, 3, 20, 20, 0.001) - verify_l2norm(1, 3, 20, 20, 0.001, (1,)) - verify_l2norm(1, 3, 20, 20, 0.001, (1, 2)) - verify_l2norm(1, 3, 20, 20, 0.001, (2, 3)) - verify_l2norm(1, 3, 20, 20, 0.001, (0, 3)) - verify_l2norm(1, 3, 20, 20, 0.001, (0, 2, 3)) +def test_l2_normalize(): + verify_l2normalize((1, 3, 20, 20), 0.001) + verify_l2normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2)) + verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3)) + verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3)) + verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3)) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) - test_l2_norm() + test_l2_normalize() diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py index 69c66c028c69..0af9ca9b8ac4 100644 --- a/topi/tests/python_cpp/test_topi_lrn.py +++ b/topi/tests/python_cpp/test_topi_lrn.py @@ -1,5 +1,4 @@ """Test code for LRN""" -import os import numpy as np import tvm import topi @@ -7,7 +6,7 @@ from topi.util import get_const_tuple def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response norm operator in NCHW layout. + """Local response normalization operator in NCHW layout. Parameters ---------- @@ -15,7 +14,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): 4-D with shape [batch, in_channel, in_height, in_width] size : int - normalisation window size + normalization window size axis : int input data layout channel axis @@ -24,21 +23,19 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): offset to avoid dividing by 0. constant value alpha : float - contant valie + constant value beta : float exponent constant value Returns ------- - b_np : np.ndarray + lrn_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ axis0, axis1, axis2, axis3 = a_np.shape radius = size // 2 sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - sqr_sum_up = np.zeros(shape=a_np.shape).astype(a_np.dtype) - lrn_out = np.zeros(shape=a_np.shape).astype(a_np.dtype) def sum_dot_values(i, j, k, l): axis_size = a_np.shape[axis] if (axis == 1): @@ -61,7 +58,8 @@ def sum_dot_values(i, j, k, l): sum_dot_values(i, j, k, l) sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) - return np.divide(a_np, sqr_sum_up) + lrn_out = np.divide(a_np, sqr_sum_up) + return lrn_out def verify_lrn(shape, size, axis, bias, alpha, beta): '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation''' From 88a12f5cf8bd5300fba77bb2293145a8b59518ae Mon Sep 17 00:00:00 2001 From: pariksheet Date: Wed, 20 Jun 2018 15:17:26 +0530 Subject: [PATCH 09/12] Review comments addressed --- nnvm/python/nnvm/top/nn.py | 16 +++++----- nnvm/src/top/nn/nn.cc | 3 +- nnvm/tests/python/compiler/test_top_level1.py | 31 ++++++++++++++++--- topi/include/topi/cuda/normalization.h | 4 +-- topi/include/topi/nn/l2_normalize.h | 4 +-- topi/include/topi/rocm/normalization.h | 4 +-- topi/python/topi/cuda/__init__.py | 2 +- topi/python/topi/cuda/nn.py | 6 ++-- topi/python/topi/generic/nn.py | 2 +- topi/python/topi/nn/l2_normalize.py | 4 +-- topi/python/topi/rocm/nn.py | 6 ++-- topi/src/topi.cc | 12 +++---- topi/tests/python/test_topi_l2norm.py | 11 +++---- topi/tests/python_cpp/test_topi_l2norm.py | 9 +++--- 14 files changed, 68 insertions(+), 46 deletions(-) diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index 37c014e705d9..b0678a1edfb3 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -260,19 +260,19 @@ def schedule_lrn(attrs, outs, target): with tvm.target.create(target): return topi.generic.schedule_lrn(outs) -reg.register_pattern("lrn", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_pattern("lrn", OpPattern.OPAQUE) -@reg.register_compute("l2normalize") -def compute_l2normalize(attrs, inputs, _): +@reg.register_compute("l2_normalize") +def compute_l2_normalize(attrs, inputs, _): """Compute definition of l2normalize""" eps = attrs.get_float("eps") axis = attrs.get_int_tuple("axis") - return topi.nn.l2normalize_instance(inputs[0], eps, axis) + return topi.nn.l2_normalize(inputs[0], eps, axis) -@reg.register_schedule("l2normalize") -def schedule_l2normalize(attrs, outs, target): +@reg.register_schedule("l2_normalize") +def schedule_l2_normalize(attrs, outs, target): """Schedule definition of l2normalize""" with tvm.target.create(target): - return topi.generic.schedule_l2normalize(outs) + return topi.generic.schedule_l2_normalize(outs) -reg.register_pattern("l2normalize", OpPattern.OUT_ELEMWISE_FUSABLE) +reg.register_pattern("l2_normalize", OpPattern.OUT_ELEMWISE_FUSABLE) diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc index 7f50f2b1f58a..e78e22edf272 100644 --- a/nnvm/src/top/nn/nn.cc +++ b/nnvm/src/top/nn/nn.cc @@ -747,7 +747,7 @@ inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs, return true; } -NNVM_REGISTER_OP(l2normalize) +NNVM_REGISTER_OP(l2_normalize) .describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE) .add_argument("data", "4D Tesndor", "Input data.") .set_attr_parser(ParamParser) @@ -756,6 +756,7 @@ NNVM_REGISTER_OP(l2normalize) .set_num_outputs(1) .set_attr("FInferShape", L2normalizeInferShape) .set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>) .set_support_level(1); } // namespace top diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index 5cf661b412c4..a6cc475920f7 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -435,13 +435,25 @@ def sum_dot_values(i, j, k, l): out_np = lrn_python(x_np, size, axis, bias, alpha, beta) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + #Checking LRN op followed by elementwise op relu + z = sym.relu(y) + x_np = np.random.uniform(low=-10.0, high=10.0, size=ishape).astype(dtype) + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(z, target, {"x": ishape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(x=x_np) + out = m.get_output(0, tvm.nd.empty(ishape)) + out_np = lrn_python(x_np, size, axis, bias, alpha, beta) + out_np = (out_np > 0) * out_np + np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + def verify_l2normalize(ishape, eps, axis): x = sym.Variable("x") - y = sym.l2normalize(x, eps=eps, axis=axis) + y = sym.l2_normalize(x, eps=eps, axis=axis) dtype = "float32" x_np = np.random.uniform(size=ishape).astype(dtype) - def l2normalize_instance_python(a_np, eps, axis=None): + def l2normalize_python(a_np, eps, axis=None): """L2 normalize operator in NCHW layout. Parameters @@ -459,7 +471,6 @@ def l2normalize_instance_python(a_np, eps, axis=None): l2normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - batch = a_np.shape[0] dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) @@ -471,7 +482,19 @@ def l2normalize_instance_python(a_np, eps, axis=None): m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = l2normalize_instance_python(x_np, eps, axis) + out_np = l2normalize_python(x_np, eps, axis) + np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) + + #Checking L2 normalization op followed by elementwise op relu + z = sym.relu(y) + x_np = np.random.uniform(low=-10.0, high=10.0, size=ishape).astype(dtype) + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(z, target, {"x": ishape}) + m = graph_runtime.create(graph, lib, ctx) + m.run(x=x_np) + out = m.get_output(0, tvm.nd.empty(ishape)) + out_np = l2normalize_python(x_np, eps, axis) + out_np = (out_np > 0) * out_np np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) def test_lrn(): diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h index d189893450e0..4ffbf521379f 100644 --- a/topi/include/topi/cuda/normalization.h +++ b/topi/include/topi/cuda/normalization.h @@ -59,7 +59,7 @@ inline Schedule schedule_lrn(const Target &target, const Array& outs) { * * \return A schedule for the given ops. */ -inline Schedule schedule_l2normalize(const Target &target, const Array& outs) { +inline Schedule schedule_l2_normalize(const Target &target, const Array& outs) { Array out_ops; for (auto t : outs) { out_ops.push_back(t->op); @@ -69,7 +69,7 @@ inline Schedule schedule_l2normalize(const Target &target, const Array& std::function traverse; traverse = [&](const Operation& op) { // Inline all one-to-one-mapping operators except the last stage (output) - if (is_injective(op->tag) || op->tag == "l2normalize") { + if (is_injective(op->tag) || op->tag == "l2_normalize") { if (!detail::contains(s->outputs, op)) { s[op].compute_inline(); } diff --git a/topi/include/topi/nn/l2_normalize.h b/topi/include/topi/nn/l2_normalize.h index 96f31782ff88..079c6d467561 100644 --- a/topi/include/topi/nn/l2_normalize.h +++ b/topi/include/topi/nn/l2_normalize.h @@ -25,11 +25,11 @@ using namespace tvm; * * \return A Tensor whose op member is the l2 normalization operation */ -inline Tensor l2normalize_instance(const Tensor& data, +inline Tensor l2_normalize(const Tensor& data, float eps, const Array& axis, std::string name = "tensor", - std::string tag = "l2normalize") { + std::string tag = "l2_normalize") { CHECK_EQ(data->shape.size(), 4) << "L2 normalization requires 4-D input"; auto input_shape = data->shape; Tensor dot_value = pow(data, static_cast(2.0)); diff --git a/topi/include/topi/rocm/normalization.h b/topi/include/topi/rocm/normalization.h index 609177f4b8ef..b12e64aba963 100644 --- a/topi/include/topi/rocm/normalization.h +++ b/topi/include/topi/rocm/normalization.h @@ -33,8 +33,8 @@ inline Schedule schedule_lrn(const Target &target, const Array& outs) { * * \return A schedule for the given ops. */ -inline Schedule schedule_l2normalize(const Target &target, const Array& outs) { - return topi::cuda::schedule_l2normalize(target, outs); +inline Schedule schedule_l2_normalize(const Target &target, const Array& outs) { + return topi::cuda::schedule_l2_normalize(target, outs); } } // namespace rocm } // namespace topi diff --git a/topi/python/topi/cuda/__init__.py b/topi/python/topi/cuda/__init__.py index a6bab999ccc6..dbf00ebeb52b 100644 --- a/topi/python/topi/cuda/__init__.py +++ b/topi/python/topi/cuda/__init__.py @@ -17,4 +17,4 @@ from .extern import schedule_extern from .vision import schedule_region from .vision import schedule_reorg -from .nn import schedule_lrn, schedule_l2normalize +from .nn import schedule_lrn, schedule_l2_normalize diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py index 39486afbf2cf..da88235084b6 100644 --- a/topi/python/topi/cuda/nn.py +++ b/topi/python/topi/cuda/nn.py @@ -25,8 +25,8 @@ def schedule_lrn(outs): cpp_target = cpp.TEST_create_target(target.target_name) return cpp.cuda.schedule_lrn(cpp_target, outs) -@generic.schedule_l2normalize.register(["cuda"]) -def schedule_l2normalize(outs): +@generic.schedule_l2_normalize.register(["cuda"]) +def schedule_l2_normalize(outs): """Schedule for L2normalize Parameters @@ -42,4 +42,4 @@ def schedule_l2normalize(outs): """ target = tvm.target.current_target(allow_none=False) cpp_target = cpp.TEST_create_target(target.target_name) - return cpp.cuda.schedule_l2normalize(cpp_target, outs) + return cpp.cuda.schedule_l2_normalize(cpp_target, outs) diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index 892c90af7062..ff61deac13d5 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -278,7 +278,7 @@ def schedule_lrn(outs): return cpp.generic.default_schedule(cpp_target, outs, False) @tvm.target.generic_func -def schedule_l2normalize(outs): +def schedule_l2_normalize(outs): """Schedule for l2normalize Parameters diff --git a/topi/python/topi/nn/l2_normalize.py b/topi/python/topi/nn/l2_normalize.py index 8ac51c908841..0ae104ce715f 100644 --- a/topi/python/topi/nn/l2_normalize.py +++ b/topi/python/topi/nn/l2_normalize.py @@ -5,7 +5,7 @@ from .. import cpp @tvm.target.generic_func -def l2normalize_instance(data, eps, axis=None): +def l2_normalize(data, eps, axis=None): """Perform L2 normalization on the input data For axis=None, y(i, j) = x(i, j) / sqrt(max(sum(x^2), eps)) @@ -26,4 +26,4 @@ def l2normalize_instance(data, eps, axis=None): output : tvm.Tensor 4-D output with same shape """ - return cpp.nn.l2normalize_instance(data, eps, axis) + return cpp.nn.l2_normalize(data, eps, axis) diff --git a/topi/python/topi/rocm/nn.py b/topi/python/topi/rocm/nn.py index 994be565ff87..5a9b2ad84db0 100644 --- a/topi/python/topi/rocm/nn.py +++ b/topi/python/topi/rocm/nn.py @@ -11,8 +11,8 @@ def schedule_lrn(outs): cpp_target = cpp.TEST_create_target(target.target_name) return cpp.rocm.schedule_lrn(cpp_target, outs) -@generic.schedule_l2normalize.register(["rocm", "gpu"]) -def schedule_l2normalize(outs): +@generic.schedule_l2_normalize.register(["rocm", "gpu"]) +def schedule_l2_normalize(outs): target = tvm.target.current_target(allow_none=False) cpp_target = cpp.TEST_create_target(target.target_name) - return cpp.rocm.schedule_l2normalize(cpp_target, outs) + return cpp.rocm.schedule_l2_normalize(cpp_target, outs) diff --git a/topi/src/topi.cc b/topi/src/topi.cc index 5a5c4f537d17..9f2ecacd11a4 100644 --- a/topi/src/topi.cc +++ b/topi/src/topi.cc @@ -364,9 +364,9 @@ TVM_REGISTER_GLOBAL("topi.nn.log_softmax") }); /* Ops from nn/l2_normalize.h */ -TVM_REGISTER_GLOBAL("topi.nn.l2normalize_instance") +TVM_REGISTER_GLOBAL("topi.nn.l2_normalize") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = nn::l2normalize_instance(args[0], static_cast(args[1]), args[2]); + *rv = nn::l2_normalize(args[0], static_cast(args[1]), args[2]); }); TVM_REGISTER_GLOBAL("topi.nn.lrn") @@ -459,9 +459,9 @@ TVM_REGISTER_GLOBAL("topi.rocm.schedule_lrn") *rv = topi::rocm::schedule_lrn(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2normalize") +TVM_REGISTER_GLOBAL("topi.rocm.schedule_l2_normalize") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::rocm::schedule_l2normalize(args[0], args[1]); + *rv = topi::rocm::schedule_l2_normalize(args[0], args[1]); }); /* CUDA schedules */ @@ -515,9 +515,9 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_lrn") *rv = topi::cuda::schedule_lrn(args[0], args[1]); }); -TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2normalize") +TVM_REGISTER_GLOBAL("topi.cuda.schedule_l2_normalize") .set_body([](TVMArgs args, TVMRetValue *rv) { - *rv = topi::cuda::schedule_l2normalize(args[0], args[1]); + *rv = topi::cuda::schedule_l2_normalize(args[0], args[1]); }); /*! \brief Builder function for instantiating schedules. */ diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py index 4f9a503bce7d..f86bc7c5f324 100644 --- a/topi/tests/python/test_topi_l2norm.py +++ b/topi/tests/python/test_topi_l2norm.py @@ -4,7 +4,7 @@ import topi from topi.util import get_const_tuple -def l2normalize_instance_python(a_np, eps, axis=None): +def l2normalize_python(a_np, eps, axis=None): """L2 normalize operator in NCHW layout. Parameters @@ -22,7 +22,6 @@ def l2normalize_instance_python(a_np, eps, axis=None): l2normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - batch = a_np.shape[0] dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) @@ -32,11 +31,11 @@ def l2normalize_instance_python(a_np, eps, axis=None): def verify_l2normalize(ishape, eps, axis=None): A = tvm.placeholder(ishape, name='A') - B = topi.nn.l2normalize_instance(A, eps, axis) + B = topi.nn.l2_normalize(A, eps, axis) dtype = A.dtype a_np = np.random.uniform(size=ishape).astype(dtype) - b_np = l2normalize_instance_python(a_np, eps, axis) + b_np = l2normalize_python(a_np, eps, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -46,9 +45,9 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): if device == 'llvm': - s = topi.generic.schedule_l2normalize([B]) + s = topi.generic.schedule_l2_normalize([B]) else: - s = topi.cuda.schedule_l2normalize([B]) + s = topi.cuda.schedule_l2_normalize([B]) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx) f = tvm.build(s, [A, B], device) diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py index 22ef06b90487..b1ede2d8085e 100644 --- a/topi/tests/python_cpp/test_topi_l2norm.py +++ b/topi/tests/python_cpp/test_topi_l2norm.py @@ -5,7 +5,7 @@ import logging from topi.util import get_const_tuple -def l2normalize_instance_python(a_np, eps, axis=None): +def l2normalize_python(a_np, eps, axis=None): """L2 normalize operator in NCHW layout. Parameters @@ -23,7 +23,6 @@ def l2normalize_instance_python(a_np, eps, axis=None): l2normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - batch = a_np.shape[0] dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) @@ -33,11 +32,11 @@ def l2normalize_instance_python(a_np, eps, axis=None): def verify_l2normalize(shape, eps, axis=None): '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation''' A = tvm.placeholder(shape, name='A') - B = topi.cpp.nn.l2normalize_instance(A, eps, axis) + B = topi.cpp.nn.l2_normalize(A, eps, axis) dtype = A.dtype a_np = np.random.uniform(size=shape).astype(dtype) - b_np = l2normalize_instance_python(a_np, eps, axis) + b_np = l2normalize_python(a_np, eps, axis) def check_device(device): if not tvm.module.enabled(device): @@ -48,7 +47,7 @@ def check_device(device): if device == "llvm": s = topi.cpp.generic.default_schedule(target, [B], False) else: - s = topi.cpp.cuda.schedule_l2normalize(target, [B]) + s = topi.cpp.cuda.schedule_l2_normalize(target, [B]) ctx = tvm.context(device, 0) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx) From be94a2edc91c1e5ef8507afa2252aec5f813aa4e Mon Sep 17 00:00:00 2001 From: pariksheet Date: Thu, 21 Jun 2018 09:59:05 +0530 Subject: [PATCH 10/12] Consistent l2_normalize name --- nnvm/include/nnvm/top/nn.h | 4 +-- nnvm/python/nnvm/top/nn.py | 4 +-- nnvm/src/top/nn/nn.cc | 10 +++---- nnvm/tests/python/compiler/test_top_level1.py | 22 +++++++-------- topi/include/topi/cuda/normalization.h | 8 +++--- topi/python/topi/cuda/nn.py | 4 +-- topi/python/topi/generic/nn.py | 4 +-- topi/python/topi/nn/l2_normalize.py | 2 +- topi/tests/python/test_topi_l2norm.py | 28 +++++++++---------- topi/tests/python_cpp/test_topi_l2norm.py | 24 ++++++++-------- 10 files changed, 55 insertions(+), 55 deletions(-) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 6687535c4f85..0e0b03a72ebb 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -391,11 +391,11 @@ struct LRNParam : public dmlc::Parameter { static const constexpr int kData = 0; }; -struct L2normalizeParam : public dmlc::Parameter { +struct L2NormalizeParam : public dmlc::Parameter { float eps; Tuple axis; - DMLC_DECLARE_PARAMETER(L2normalizeParam) { + DMLC_DECLARE_PARAMETER(L2NormalizeParam) { DMLC_DECLARE_FIELD(eps) .describe("float type epsilon value."); DMLC_DECLARE_FIELD(axis) diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py index b0678a1edfb3..5bfabdac2c8d 100644 --- a/nnvm/python/nnvm/top/nn.py +++ b/nnvm/python/nnvm/top/nn.py @@ -264,14 +264,14 @@ def schedule_lrn(attrs, outs, target): @reg.register_compute("l2_normalize") def compute_l2_normalize(attrs, inputs, _): - """Compute definition of l2normalize""" + """Compute definition of l2 normalize""" eps = attrs.get_float("eps") axis = attrs.get_int_tuple("axis") return topi.nn.l2_normalize(inputs[0], eps, axis) @reg.register_schedule("l2_normalize") def schedule_l2_normalize(attrs, outs, target): - """Schedule definition of l2normalize""" + """Schedule definition of l2 normalize""" with tvm.target.create(target): return topi.generic.schedule_l2_normalize(outs) diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc index e78e22edf272..432f6e7a0b41 100644 --- a/nnvm/src/top/nn/nn.cc +++ b/nnvm/src/top/nn/nn.cc @@ -735,9 +735,9 @@ NNVM_REGISTER_OP(lrn) .set_attr("FInferType", ElemwiseType<1, 1>) .set_support_level(1); -DMLC_REGISTER_PARAMETER(L2normalizeParam); +DMLC_REGISTER_PARAMETER(L2NormalizeParam); -inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs, +inline bool L2NormalizeInferShape(const nnvm::NodeAttrs& attrs, std::vector* in_shape, std::vector* out_shape) { TShape dshape = (*in_shape)[0]; @@ -750,11 +750,11 @@ inline bool L2normalizeInferShape(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(l2_normalize) .describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE) .add_argument("data", "4D Tesndor", "Input data.") -.set_attr_parser(ParamParser) -.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) .set_num_inputs(1) .set_num_outputs(1) -.set_attr("FInferShape", L2normalizeInferShape) +.set_attr("FInferShape", L2NormalizeInferShape) .set_attr("FInferType", ElemwiseType<1, 1>) .set_attr("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>) .set_support_level(1); diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index a6cc475920f7..1fa7f5d87675 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -447,13 +447,13 @@ def sum_dot_values(i, j, k, l): out_np = (out_np > 0) * out_np np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) -def verify_l2normalize(ishape, eps, axis): +def verify_l2_normalize(ishape, eps, axis): x = sym.Variable("x") y = sym.l2_normalize(x, eps=eps, axis=axis) dtype = "float32" x_np = np.random.uniform(size=ishape).astype(dtype) - def l2normalize_python(a_np, eps, axis=None): + def l2_normalize_python(a_np, eps, axis=None): """L2 normalize operator in NCHW layout. Parameters @@ -468,21 +468,21 @@ def l2normalize_python(a_np, eps, axis=None): Returns ------- - l2normalize_out : np.ndarray + l2_normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - l2normalize_out = np.divide(a_np, sqrt_sum) - return l2normalize_out + l2_normalize_out = np.divide(a_np, sqrt_sum) + return l2_normalize_out for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape}) m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = l2normalize_python(x_np, eps, axis) + out_np = l2_normalize_python(x_np, eps, axis) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) #Checking L2 normalization op followed by elementwise op relu @@ -493,7 +493,7 @@ def l2normalize_python(a_np, eps, axis=None): m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = l2normalize_python(x_np, eps, axis) + out_np = l2_normalize_python(x_np, eps, axis) out_np = (out_np > 0) * out_np np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) @@ -501,9 +501,9 @@ def test_lrn(): verify_lrn((1, 3, 20, 20), 3, 1, 1.0, 1.0, 0.5) verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75) -def test_l2normalize(): - verify_l2normalize((1, 3, 20, 20), 0.001, (1,)) - verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2)) +def test_l2_normalize(): + verify_l2_normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2)) if __name__ == "__main__": test_split() @@ -524,4 +524,4 @@ def test_l2normalize(): test_squeeze() test_pad() test_lrn() - test_l2normalize() + test_l2_normalize() diff --git a/topi/include/topi/cuda/normalization.h b/topi/include/topi/cuda/normalization.h index 4ffbf521379f..91578c46d266 100644 --- a/topi/include/topi/cuda/normalization.h +++ b/topi/include/topi/cuda/normalization.h @@ -90,14 +90,14 @@ inline Schedule schedule_l2_normalize(const Target &target, const Array& traverse(outs[0]->op); int num_thread = 64; - Tensor l2normalize = outs[0]; + Tensor l2_normalize = outs[0]; IterVar block_x = tvm::thread_axis(Range(), "blockIdx.x"); IterVar thread_x = tvm::thread_axis(Range(0, num_thread), "threadIdx.x"); IterVar xto, xti; - s[l2normalize].split_by_nparts(l2normalize->op.as()->axis[1], + s[l2_normalize].split_by_nparts(l2_normalize->op.as()->axis[1], num_thread, &xto, &xti); - s[l2normalize].bind(l2normalize->op.as()->axis[0], block_x); - s[l2normalize].bind(xto, thread_x); + s[l2_normalize].bind(l2_normalize->op.as()->axis[0], block_x); + s[l2_normalize].bind(xto, thread_x); return s; } } // namespace cuda diff --git a/topi/python/topi/cuda/nn.py b/topi/python/topi/cuda/nn.py index da88235084b6..b503b2dad50f 100644 --- a/topi/python/topi/cuda/nn.py +++ b/topi/python/topi/cuda/nn.py @@ -27,12 +27,12 @@ def schedule_lrn(outs): @generic.schedule_l2_normalize.register(["cuda"]) def schedule_l2_normalize(outs): - """Schedule for L2normalize + """Schedule for L2 normalize Parameters ---------- outs: Array of Tensor - The computation graph description of L2normalize + The computation graph description of L2 normalize in the format of an array of tensors. Returns diff --git a/topi/python/topi/generic/nn.py b/topi/python/topi/generic/nn.py index ff61deac13d5..8f2f8612c7fa 100644 --- a/topi/python/topi/generic/nn.py +++ b/topi/python/topi/generic/nn.py @@ -279,12 +279,12 @@ def schedule_lrn(outs): @tvm.target.generic_func def schedule_l2_normalize(outs): - """Schedule for l2normalize + """Schedule for l2 normalize Parameters ---------- outs: Array of Tensor - The computation graph description of l2normalize + The computation graph description of l2 normalize in the format of an array of tensors. Returns diff --git a/topi/python/topi/nn/l2_normalize.py b/topi/python/topi/nn/l2_normalize.py index 0ae104ce715f..951084379eec 100644 --- a/topi/python/topi/nn/l2_normalize.py +++ b/topi/python/topi/nn/l2_normalize.py @@ -1,5 +1,5 @@ # pylint: disable=invalid-name -"""TVM operator for l2normalize""" +"""TVM operator for l2 normalize""" from __future__ import absolute_import import tvm from .. import cpp diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py index f86bc7c5f324..3c17005a56e1 100644 --- a/topi/tests/python/test_topi_l2norm.py +++ b/topi/tests/python/test_topi_l2norm.py @@ -4,7 +4,7 @@ import topi from topi.util import get_const_tuple -def l2normalize_python(a_np, eps, axis=None): +def l2_normalize_python(a_np, eps, axis=None): """L2 normalize operator in NCHW layout. Parameters @@ -19,23 +19,23 @@ def l2normalize_python(a_np, eps, axis=None): Returns ------- - l2normalize_out : np.ndarray + l2_normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - l2normalize_out = np.divide(a_np, sqrt_sum) - return l2normalize_out + l2_normalize_out = np.divide(a_np, sqrt_sum) + return l2_normalize_out -def verify_l2normalize(ishape, eps, axis=None): +def verify_l2_normalize(ishape, eps, axis=None): A = tvm.placeholder(ishape, name='A') B = topi.nn.l2_normalize(A, eps, axis) dtype = A.dtype a_np = np.random.uniform(size=ishape).astype(dtype) - b_np = l2normalize_python(a_np, eps, axis) + b_np = l2_normalize_python(a_np, eps, axis) def check_device(device): ctx = tvm.context(device, 0) @@ -57,14 +57,14 @@ def check_device(device): for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']: check_device(device) -def test_l2normalize(): - verify_l2normalize((1, 3, 20, 20), 0.001) - verify_l2normalize((1, 3, 20, 20), 0.001, (1,)) - verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2)) - verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3)) - verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3)) - verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3)) +def test_l2_normalize(): + verify_l2_normalize((1, 3, 20, 20), 0.001) + verify_l2_normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3)) if __name__ == "__main__": - test_l2normalize() + test_l2_normalize() diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py index b1ede2d8085e..74396def5d76 100644 --- a/topi/tests/python_cpp/test_topi_l2norm.py +++ b/topi/tests/python_cpp/test_topi_l2norm.py @@ -5,7 +5,7 @@ import logging from topi.util import get_const_tuple -def l2normalize_python(a_np, eps, axis=None): +def l2_normalize_python(a_np, eps, axis=None): """L2 normalize operator in NCHW layout. Parameters @@ -20,23 +20,23 @@ def l2normalize_python(a_np, eps, axis=None): Returns ------- - l2normalize_out : np.ndarray + l2_normalize_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ dot_value = np.power(a_np, 2.0) sqr_sum = np.sum(dot_value, axis, keepdims=True) sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - l2normalize_out = np.divide(a_np, sqrt_sum) - return l2normalize_out + l2_normalize_out = np.divide(a_np, sqrt_sum) + return l2_normalize_out -def verify_l2normalize(shape, eps, axis=None): +def verify_l2_normalize(shape, eps, axis=None): '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation''' A = tvm.placeholder(shape, name='A') B = topi.cpp.nn.l2_normalize(A, eps, axis) dtype = A.dtype a_np = np.random.uniform(size=shape).astype(dtype) - b_np = l2normalize_python(a_np, eps, axis) + b_np = l2_normalize_python(a_np, eps, axis) def check_device(device): if not tvm.module.enabled(device): @@ -59,12 +59,12 @@ def check_device(device): check_device(device) def test_l2_normalize(): - verify_l2normalize((1, 3, 20, 20), 0.001) - verify_l2normalize((1, 3, 20, 20), 0.001, (1,)) - verify_l2normalize((1, 3, 20, 20), 0.001, (1, 2)) - verify_l2normalize((1, 3, 20, 20), 0.001, (2, 3)) - verify_l2normalize((1, 3, 20, 20), 0.001, (0, 3)) - verify_l2normalize((1, 3, 20, 20), 0.001, (0, 2, 3)) + verify_l2_normalize((1, 3, 20, 20), 0.001) + verify_l2_normalize((1, 3, 20, 20), 0.001, (1,)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3)) + verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3)) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) From e5bc18bb2eea68f58201feea56020bf77b5265ab Mon Sep 17 00:00:00 2001 From: pariksheet Date: Thu, 21 Jun 2018 16:19:05 +0530 Subject: [PATCH 11/12] Modified lrn_python function --- nnvm/src/top/nn/nn.cc | 4 ++-- nnvm/tests/python/compiler/test_top_level1.py | 11 ++--------- topi/tests/python/test_topi_lrn.py | 10 ++-------- topi/tests/python_cpp/test_topi_lrn.py | 10 ++-------- 4 files changed, 8 insertions(+), 27 deletions(-) diff --git a/nnvm/src/top/nn/nn.cc b/nnvm/src/top/nn/nn.cc index 432f6e7a0b41..ab47ae521224 100644 --- a/nnvm/src/top/nn/nn.cc +++ b/nnvm/src/top/nn/nn.cc @@ -726,7 +726,7 @@ inline bool LRNInferShape(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(lrn) .describe(R"code(LRN layer)code" NNVM_ADD_FILELINE) -.add_argument("data", "4D Tesndor", "Input data.") +.add_argument("data", "4D Tensor", "Input data.") .set_attr_parser(ParamParser) .set_attr("FGetAttrDict", ParamGetAttrDict) .set_num_inputs(1) @@ -749,7 +749,7 @@ inline bool L2NormalizeInferShape(const nnvm::NodeAttrs& attrs, NNVM_REGISTER_OP(l2_normalize) .describe(R"code(L2NORMALIZE layer)code" NNVM_ADD_FILELINE) -.add_argument("data", "4D Tesndor", "Input data.") +.add_argument("data", "4D Tensor", "Input data.") .set_attr_parser(ParamParser) .set_attr("FGetAttrDict", ParamGetAttrDict) .set_num_inputs(1) diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index 1fa7f5d87675..678f8a8ccc46 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -5,7 +5,7 @@ import nnvm.symbol as sym import nnvm.compiler from nnvm.testing.config import ctx_list - +from itertools import product def helper(symbol, inputs, dtype, np_forward, np_backward=None, need_input=True, need_head_grads=True): @@ -399,10 +399,9 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): lrn_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - axis0, axis1, axis2, axis3 = a_np.shape radius = size // 2 sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - def sum_dot_values(i, j, k, l): + for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): axis_size = a_np.shape[axis] if (axis == 1): #NCHW layout @@ -417,12 +416,6 @@ def sum_dot_values(i, j, k, l): sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ a_np[i, j, k, sum_start:sum_end]) - for i in range(axis0): - for j in range(axis1): - for k in range(axis2): - for l in range(axis3): - sum_dot_values(i, j, k, l) - sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) lrn_out = np.divide(a_np, sqr_sum_up) return lrn_out diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py index 5e49fe739103..51446ebf712d 100644 --- a/topi/tests/python/test_topi_lrn.py +++ b/topi/tests/python/test_topi_lrn.py @@ -3,6 +3,7 @@ import tvm import topi from topi.util import get_const_tuple +from itertools import product def lrn_python(a_np, size, axis, bias, alpha, beta): """Local response norm operator in NCHW layout. @@ -32,10 +33,9 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): lrn_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - axis0, axis1, axis2, axis3 = a_np.shape radius = size // 2 sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - def sum_dot_values(i, j, k, l): + for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): axis_size = a_np.shape[axis] if (axis == 1): #NCHW layout @@ -50,12 +50,6 @@ def sum_dot_values(i, j, k, l): sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ a_np[i, j, k, sum_start:sum_end]) - for i in range(axis0): - for j in range(axis1): - for k in range(axis2): - for l in range(axis3): - sum_dot_values(i, j, k, l) - sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) lrn_out = np.divide(a_np, sqr_sum_up) return lrn_out diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py index 0af9ca9b8ac4..6f62e5143e27 100644 --- a/topi/tests/python_cpp/test_topi_lrn.py +++ b/topi/tests/python_cpp/test_topi_lrn.py @@ -4,6 +4,7 @@ import topi import logging from topi.util import get_const_tuple +from itertools import product def lrn_python(a_np, size, axis, bias, alpha, beta): """Local response normalization operator in NCHW layout. @@ -33,10 +34,9 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): lrn_out : np.ndarray 4-D with shape [batch, out_channel, out_height, out_width] """ - axis0, axis1, axis2, axis3 = a_np.shape radius = size // 2 sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - def sum_dot_values(i, j, k, l): + for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): axis_size = a_np.shape[axis] if (axis == 1): #NCHW layout @@ -51,12 +51,6 @@ def sum_dot_values(i, j, k, l): sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ a_np[i, j, k, sum_start:sum_end]) - for i in range(axis0): - for j in range(axis1): - for k in range(axis2): - for l in range(axis3): - sum_dot_values(i, j, k, l) - sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) lrn_out = np.divide(a_np, sqr_sum_up) return lrn_out From c2ca9c25b330c44bd4280920694bce681d98be77 Mon Sep 17 00:00:00 2001 From: pariksheet Date: Fri, 22 Jun 2018 09:53:13 +0530 Subject: [PATCH 12/12] Moved lrn_python and l2_normalize_python to topi.testing --- nnvm/tests/python/compiler/test_top_level1.py | 82 +------------------ topi/python/topi/testing/__init__.py | 2 + .../topi/testing/l2_normalize_python.py | 27 ++++++ topi/python/topi/testing/lrn_python.py | 53 ++++++++++++ topi/tests/python/test_topi_l2norm.py | 27 +----- topi/tests/python/test_topi_lrn.py | 53 +----------- topi/tests/python_cpp/test_topi_l2norm.py | 27 +----- topi/tests/python_cpp/test_topi_lrn.py | 53 +----------- 8 files changed, 94 insertions(+), 230 deletions(-) create mode 100644 topi/python/topi/testing/l2_normalize_python.py create mode 100644 topi/python/topi/testing/lrn_python.py diff --git a/nnvm/tests/python/compiler/test_top_level1.py b/nnvm/tests/python/compiler/test_top_level1.py index 678f8a8ccc46..37798d37f400 100644 --- a/nnvm/tests/python/compiler/test_top_level1.py +++ b/nnvm/tests/python/compiler/test_top_level1.py @@ -5,7 +5,6 @@ import nnvm.symbol as sym import nnvm.compiler from nnvm.testing.config import ctx_list -from itertools import product def helper(symbol, inputs, dtype, np_forward, np_backward=None, need_input=True, need_head_grads=True): @@ -371,61 +370,12 @@ def verify_lrn(ishape, size, axis, bias, alpha, beta): dtype = "float32" x_np = np.random.uniform(size=ishape).astype(dtype) - def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response normalization operator in NCHW layout. - - Parameters - ---------- - a_np : numpy.ndarray - 4-D with shape [batch, in_channel, in_height, in_width] - - size : int - normalization window size - - axis : int - input data layout channel axis - - bias : float - offset to avoid dividing by 0. constant value - - alpha : float - constant value - - beta : float - exponent constant value - - Returns - ------- - lrn_out : np.ndarray - 4-D with shape [batch, out_channel, out_height, out_width] - """ - radius = size // 2 - sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): - axis_size = a_np.shape[axis] - if (axis == 1): - #NCHW layout - sum_start = j-radius if j-radius >= 0 else 0 - sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size - sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \ - a_np[i, sum_start:sum_end, k, l]) - elif (axis == 3): - #NHWC layout - sum_start = l-radius if l-radius >= 0 else 0 - sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size - sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ - a_np[i, j, k, sum_start:sum_end]) - - sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) - lrn_out = np.divide(a_np, sqr_sum_up) - return lrn_out - for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape}) m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = lrn_python(x_np, size, axis, bias, alpha, beta) + out_np = topi.testing.lrn_python(x_np, size, axis, bias, alpha, beta) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) #Checking LRN op followed by elementwise op relu @@ -436,7 +386,7 @@ def lrn_python(a_np, size, axis, bias, alpha, beta): m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = lrn_python(x_np, size, axis, bias, alpha, beta) + out_np = topi.testing.lrn_python(x_np, size, axis, bias, alpha, beta) out_np = (out_np > 0) * out_np np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) @@ -446,36 +396,12 @@ def verify_l2_normalize(ishape, eps, axis): dtype = "float32" x_np = np.random.uniform(size=ishape).astype(dtype) - def l2_normalize_python(a_np, eps, axis=None): - """L2 normalize operator in NCHW layout. - - Parameters - ---------- - a_np : numpy.ndarray - 4-D with shape [batch, in_channel, in_height, in_width] - - eps : float - epsilon constant value - axis : list of int - axis over the normalization applied - - Returns - ------- - l2_normalize_out : np.ndarray - 4-D with shape [batch, out_channel, out_height, out_width] - """ - dot_value = np.power(a_np, 2.0) - sqr_sum = np.sum(dot_value, axis, keepdims=True) - sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - l2_normalize_out = np.divide(a_np, sqrt_sum) - return l2_normalize_out - for target, ctx in ctx_list(): graph, lib, _ = nnvm.compiler.build(y, target, {"x": ishape}) m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = l2_normalize_python(x_np, eps, axis) + out_np = topi.testing.l2_normalize_python(x_np, eps, axis) np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) #Checking L2 normalization op followed by elementwise op relu @@ -486,7 +412,7 @@ def l2_normalize_python(a_np, eps, axis=None): m = graph_runtime.create(graph, lib, ctx) m.run(x=x_np) out = m.get_output(0, tvm.nd.empty(ishape)) - out_np = l2_normalize_python(x_np, eps, axis) + out_np = topi.testing.l2_normalize_python(x_np, eps, axis) out_np = (out_np > 0) * out_np np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5) diff --git a/topi/python/topi/testing/__init__.py b/topi/python/topi/testing/__init__.py index 3731040e3a85..c91eea7958ea 100644 --- a/topi/python/topi/testing/__init__.py +++ b/topi/python/topi/testing/__init__.py @@ -16,3 +16,5 @@ from .reorg_python import reorg_python from .region_python import region_python from .shortcut_python import shortcut_python +from .lrn_python import lrn_python +from .l2_normalize_python import l2_normalize_python diff --git a/topi/python/topi/testing/l2_normalize_python.py b/topi/python/topi/testing/l2_normalize_python.py new file mode 100644 index 000000000000..98f1843233a7 --- /dev/null +++ b/topi/python/topi/testing/l2_normalize_python.py @@ -0,0 +1,27 @@ +# pylint: disable=invalid-name, line-too-long, unused-variable, too-many-locals +"""L2 normalize in python""" +import numpy as np + +def l2_normalize_python(a_np, eps, axis=None): + """L2 normalize operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + eps : float + epsilon constant value + axis : list of int + axis over the normalization applied + + Returns + ------- + l2_normalize_out : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + dot_value = np.power(a_np, 2.0) + sqr_sum = np.sum(dot_value, axis, keepdims=True) + sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) + l2_normalize_out = np.divide(a_np, sqrt_sum) + return l2_normalize_out diff --git a/topi/python/topi/testing/lrn_python.py b/topi/python/topi/testing/lrn_python.py new file mode 100644 index 000000000000..4e44e8bcb635 --- /dev/null +++ b/topi/python/topi/testing/lrn_python.py @@ -0,0 +1,53 @@ +# pylint: disable=invalid-name, line-too-long, unused-variable, too-many-locals +"""LRN in python""" +from itertools import product +import numpy as np + +def lrn_python(a_np, size, axis, bias, alpha, beta): + """Local response normalization operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + size : int + normalization window size + + axis : int + input data layout channel axis + + bias : float + offset to avoid dividing by 0. constant value + + alpha : float + constant value + + beta : float + exponent constant value + + Returns + ------- + lrn_out : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + radius = size // 2 + sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) + for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): + axis_size = a_np.shape[axis] + if axis == 1: + #NCHW layout + sum_start = j-radius if j-radius >= 0 else 0 + sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size + sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \ + a_np[i, sum_start:sum_end, k, l]) + elif axis == 3: + #NHWC layout + sum_start = l-radius if l-radius >= 0 else 0 + sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size + sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ + a_np[i, j, k, sum_start:sum_end]) + + sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) + lrn_out = np.divide(a_np, sqr_sum_up) + return lrn_out diff --git a/topi/tests/python/test_topi_l2norm.py b/topi/tests/python/test_topi_l2norm.py index 3c17005a56e1..b27a1dc27e72 100644 --- a/topi/tests/python/test_topi_l2norm.py +++ b/topi/tests/python/test_topi_l2norm.py @@ -3,30 +3,7 @@ import tvm import topi from topi.util import get_const_tuple - -def l2_normalize_python(a_np, eps, axis=None): - """L2 normalize operator in NCHW layout. - - Parameters - ---------- - a_np : numpy.ndarray - 4-D with shape [batch, in_channel, in_height, in_width] - - eps : float - epsilon constant value - axis : list of int - axis over the normalization applied - - Returns - ------- - l2_normalize_out : np.ndarray - 4-D with shape [batch, out_channel, out_height, out_width] - """ - dot_value = np.power(a_np, 2.0) - sqr_sum = np.sum(dot_value, axis, keepdims=True) - sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - l2_normalize_out = np.divide(a_np, sqrt_sum) - return l2_normalize_out +import topi.testing def verify_l2_normalize(ishape, eps, axis=None): @@ -35,7 +12,7 @@ def verify_l2_normalize(ishape, eps, axis=None): dtype = A.dtype a_np = np.random.uniform(size=ishape).astype(dtype) - b_np = l2_normalize_python(a_np, eps, axis) + b_np = topi.testing.l2_normalize_python(a_np, eps, axis) def check_device(device): ctx = tvm.context(device, 0) diff --git a/topi/tests/python/test_topi_lrn.py b/topi/tests/python/test_topi_lrn.py index 51446ebf712d..7d62aefe5f55 100644 --- a/topi/tests/python/test_topi_lrn.py +++ b/topi/tests/python/test_topi_lrn.py @@ -3,56 +3,7 @@ import tvm import topi from topi.util import get_const_tuple -from itertools import product - -def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response norm operator in NCHW layout. - - Parameters - ---------- - a_np : numpy.ndarray - 4-D with shape [batch, in_channel, in_height, in_width] - - size : int - normalization window size - - axis : int - input data layout channel axis - - bias : float - offset to avoid dividing by 0. constant value - - alpha : float - constant value - - beta : float - exponent constant value - - Returns - ------- - lrn_out : np.ndarray - 4-D with shape [batch, out_channel, out_height, out_width] - """ - radius = size // 2 - sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): - axis_size = a_np.shape[axis] - if (axis == 1): - #NCHW layout - sum_start = j-radius if j-radius >= 0 else 0 - sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size - sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \ - a_np[i, sum_start:sum_end, k, l]) - elif (axis == 3): - #NHWC layout - sum_start = l-radius if l-radius >= 0 else 0 - sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size - sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ - a_np[i, j, k, sum_start:sum_end]) - - sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) - lrn_out = np.divide(a_np, sqr_sum_up) - return lrn_out +import topi.testing def verify_lrn(shape, size, axis, bias, alpha, beta): A = tvm.placeholder(shape, name='A') @@ -60,7 +11,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta): dtype = A.dtype a_np = np.random.uniform(size=shape).astype(dtype) - b_np = lrn_python(a_np, size, axis, bias, alpha, beta) + b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta) def check_device(device): if not tvm.module.enabled(device): diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py index 74396def5d76..08799f76c5c3 100644 --- a/topi/tests/python_cpp/test_topi_l2norm.py +++ b/topi/tests/python_cpp/test_topi_l2norm.py @@ -4,30 +4,7 @@ import topi import logging from topi.util import get_const_tuple - -def l2_normalize_python(a_np, eps, axis=None): - """L2 normalize operator in NCHW layout. - - Parameters - ---------- - a_np : numpy.ndarray - 4-D with shape [batch, in_channel, in_height, in_width] - - eps : float - epsilon constant value - axis : list of int - axis over the normalization applied - - Returns - ------- - l2_normalize_out : np.ndarray - 4-D with shape [batch, out_channel, out_height, out_width] - """ - dot_value = np.power(a_np, 2.0) - sqr_sum = np.sum(dot_value, axis, keepdims=True) - sqrt_sum = np.sqrt(np.maximum(np.broadcast_to(sqr_sum, a_np.shape), eps)) - l2_normalize_out = np.divide(a_np, sqrt_sum) - return l2_normalize_out +import topi.testing def verify_l2_normalize(shape, eps, axis=None): '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation''' @@ -36,7 +13,7 @@ def verify_l2_normalize(shape, eps, axis=None): dtype = A.dtype a_np = np.random.uniform(size=shape).astype(dtype) - b_np = l2_normalize_python(a_np, eps, axis) + b_np = topi.testing.l2_normalize_python(a_np, eps, axis) def check_device(device): if not tvm.module.enabled(device): diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py index 6f62e5143e27..d685643a9406 100644 --- a/topi/tests/python_cpp/test_topi_lrn.py +++ b/topi/tests/python_cpp/test_topi_lrn.py @@ -4,56 +4,7 @@ import topi import logging from topi.util import get_const_tuple -from itertools import product - -def lrn_python(a_np, size, axis, bias, alpha, beta): - """Local response normalization operator in NCHW layout. - - Parameters - ---------- - a_np : numpy.ndarray - 4-D with shape [batch, in_channel, in_height, in_width] - - size : int - normalization window size - - axis : int - input data layout channel axis - - bias : float - offset to avoid dividing by 0. constant value - - alpha : float - constant value - - beta : float - exponent constant value - - Returns - ------- - lrn_out : np.ndarray - 4-D with shape [batch, out_channel, out_height, out_width] - """ - radius = size // 2 - sqr_sum = np.zeros(shape=a_np.shape).astype(a_np.dtype) - for i, j, k, l in product(*[range(_axis) for _axis in a_np.shape]): - axis_size = a_np.shape[axis] - if (axis == 1): - #NCHW layout - sum_start = j-radius if j-radius >= 0 else 0 - sum_end = j+radius+1 if j+radius+1 < axis_size else axis_size - sqr_sum[i, j, k, l] = sum(a_np[i, sum_start:sum_end, k, l] * \ - a_np[i, sum_start:sum_end, k, l]) - elif (axis == 3): - #NHWC layout - sum_start = l-radius if l-radius >= 0 else 0 - sum_end = l+radius+1 if l+radius+1 < axis_size else axis_size - sqr_sum[i, j, k, l] = sum(a_np[i, j, k, sum_start:sum_end] * \ - a_np[i, j, k, sum_start:sum_end]) - - sqr_sum_up = np.power((bias + (alpha * sqr_sum /size)), beta) - lrn_out = np.divide(a_np, sqr_sum_up) - return lrn_out +import topi.testing def verify_lrn(shape, size, axis, bias, alpha, beta): '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation''' @@ -62,7 +13,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta): dtype = A.dtype a_np = np.random.uniform(size=shape).astype(dtype) - b_np = lrn_python(a_np, size, axis, bias, alpha, beta) + b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta) def check_device(device): if not tvm.module.enabled(device): print("Skip because %s is not enabled" % device)